iscsi_tcp.c 27.9 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/inet.h>
31
#include <linux/slab.h>
32
#include <linux/file.h>
33 34 35 36 37
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
38
#include <linux/module.h>
39 40
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
41
#include <scsi/scsi_device.h>
42 43 44 45 46 47
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

48 49
MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
	      "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
50 51 52 53
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");

54 55 56
static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport;
static struct scsi_host_template iscsi_sw_tcp_sht;
static struct iscsi_transport iscsi_sw_tcp_transport;
57

58
static unsigned int iscsi_max_lun = ~0;
59 60
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
static int iscsi_sw_tcp_dbg;
module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int,
		   S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module "
		 "Set to 1 to turn on, and zero to turn off. Default is off.");

#define ISCSI_SW_TCP_DBG(_conn, dbg_fmt, arg...)		\
	do {							\
		if (iscsi_sw_tcp_dbg)				\
			iscsi_conn_printk(KERN_INFO, _conn,	\
					     "%s " dbg_fmt,	\
					     __func__, ##arg);	\
	} while (0);


76
/**
77
 * iscsi_sw_tcp_recv - TCP receive in sendfile fashion
78 79 80 81
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
82 83 84
 */
static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
			     unsigned int offset, size_t len)
85 86 87 88 89
{
	struct iscsi_conn *conn = rd_desc->arg.data;
	unsigned int consumed, total_consumed = 0;
	int status;

90
	ISCSI_SW_TCP_DBG(conn, "in %d bytes\n", skb->len - offset);
91 92 93 94 95 96 97 98

	do {
		status = 0;
		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
		offset += consumed;
		total_consumed += consumed;
	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);

99 100
	ISCSI_SW_TCP_DBG(conn, "read %d bytes status %d\n",
			 skb->len - offset, status);
101
	return total_consumed;
102 103
}

104 105 106 107 108 109 110
/**
 * iscsi_sw_sk_state_check - check socket state
 * @sk: socket
 *
 * If the socket is in CLOSE or CLOSE_WAIT we should
 * not close the connection if there is still some
 * data pending.
111 112
 *
 * Must be called with sk_callback_lock.
113 114 115
 */
static inline int iscsi_sw_sk_state_check(struct sock *sk)
{
116
	struct iscsi_conn *conn = sk->sk_user_data;
117

118
	if ((sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) &&
119
	    (conn->session->state != ISCSI_STATE_LOGGING_OUT) &&
120 121 122 123 124
	    !atomic_read(&sk->sk_rmem_alloc)) {
		ISCSI_SW_TCP_DBG(conn, "TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_TCP_CONN_CLOSE);
		return -ECONNRESET;
	}
125 126 127
	return 0;
}

128
static void iscsi_sw_tcp_data_ready(struct sock *sk)
129
{
130 131
	struct iscsi_conn *conn;
	struct iscsi_tcp_conn *tcp_conn;
132 133 134
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);
135 136 137 138 139 140
	conn = sk->sk_user_data;
	if (!conn) {
		read_unlock(&sk->sk_callback_lock);
		return;
	}
	tcp_conn = conn->dd_data;
141

142
	/*
143
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
144
	 * We set count to 1 because we want the network layer to
145
	 * hand us all the skbs that are available. iscsi_tcp_recv
146 147
	 * handled pdus that cross buffers or pdus that still need data.
	 */
148
	rd_desc.arg.data = conn;
149
	rd_desc.count = 1;
150
	tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
151

152
	iscsi_sw_sk_state_check(sk);
153

154 155
	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
156
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
157
	read_unlock(&sk->sk_callback_lock);
158 159
}

160
static void iscsi_sw_tcp_state_change(struct sock *sk)
161
{
162
	struct iscsi_tcp_conn *tcp_conn;
163
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
164 165 166 167 168
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);
169 170 171 172 173
	conn = sk->sk_user_data;
	if (!conn) {
		read_unlock(&sk->sk_callback_lock);
		return;
	}
174 175
	session = conn->session;

176
	iscsi_sw_sk_state_check(sk);
177

178
	tcp_conn = conn->dd_data;
179 180
	tcp_sw_conn = tcp_conn->dd_data;
	old_state_change = tcp_sw_conn->old_state_change;
181 182 183 184 185 186 187 188 189 190

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
191
static void iscsi_sw_tcp_write_space(struct sock *sk)
192
{
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
	struct iscsi_conn *conn;
	struct iscsi_tcp_conn *tcp_conn;
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
	void (*old_write_space)(struct sock *);

	read_lock_bh(&sk->sk_callback_lock);
	conn = sk->sk_user_data;
	if (!conn) {
		read_unlock_bh(&sk->sk_callback_lock);
		return;
	}

	tcp_conn = conn->dd_data;
	tcp_sw_conn = tcp_conn->dd_data;
	old_write_space = tcp_sw_conn->old_write_space;
	read_unlock_bh(&sk->sk_callback_lock);

	old_write_space(sk);
211

212
	ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n");
213
	iscsi_conn_queue_work(conn);
214 215
}

216
static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
217
{
218
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
219 220
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct sock *sk = tcp_sw_conn->sock->sk;
221 222 223 224

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
225 226 227 228 229 230
	tcp_sw_conn->old_data_ready = sk->sk_data_ready;
	tcp_sw_conn->old_state_change = sk->sk_state_change;
	tcp_sw_conn->old_write_space = sk->sk_write_space;
	sk->sk_data_ready = iscsi_sw_tcp_data_ready;
	sk->sk_state_change = iscsi_sw_tcp_state_change;
	sk->sk_write_space = iscsi_sw_tcp_write_space;
231 232 233
	write_unlock_bh(&sk->sk_callback_lock);
}

234
static void
235
iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_conn *conn)
236
{
237 238
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
239
	struct sock *sk = tcp_sw_conn->sock->sk;
240 241 242 243

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
244 245 246
	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
	sk->sk_state_change = tcp_sw_conn->old_state_change;
	sk->sk_write_space  = tcp_sw_conn->old_write_space;
247
	sk->sk_no_check_tx = 0;
248 249 250 251
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
252
 * iscsi_sw_tcp_xmit_segment - transmit segment
253
 * @tcp_conn: the iSCSI TCP connection
254 255 256 257 258 259 260 261 262 263
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
264
static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
265 266
				     struct iscsi_segment *segment)
{
267
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
268 269 270 271
	struct socket *sk = tcp_sw_conn->sock;
	unsigned int copied = 0;
	int r = 0;

272
	while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_sw_conn->sendpage(sk, sg_page(sg), offset,
						  copy, flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_sw_tcp_xmit - TCP transmit
311
 **/
312
static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
313
{
314
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
315 316
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
317 318
	unsigned int consumed = 0;
	int rc = 0;
319

320
	while (1) {
321
		rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
322 323
		/*
		 * We may not have been able to send data because the conn
Lucas De Marchi's avatar
Lucas De Marchi committed
324
		 * is getting stopped. libiscsi will know so propagate err
325 326 327 328 329
		 * for it to do the right thing.
		 */
		if (rc == -EAGAIN)
			return rc;
		else if (rc < 0) {
330
			rc = ISCSI_ERR_XMIT_FAILED;
331
			goto error;
332
		} else if (rc == 0)
333 334 335 336 337 338 339
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
340
				if (rc != 0)
341 342 343
					goto error;
			}
		}
344 345
	}

346
	ISCSI_SW_TCP_DBG(conn, "xmit %d bytes\n", consumed);
347 348 349 350 351 352 353

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
354
	ISCSI_SW_TCP_DBG(conn, "Error sending PDU, errno=%d\n", rc);
355 356
	iscsi_conn_failure(conn, rc);
	return -EIO;
357 358 359
}

/**
360 361
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
362
static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
363
{
364
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
365 366
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
367

368
	return segment->total_copied - segment->total_size;
369 370
}

371
static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
372
{
373
	struct iscsi_conn *conn = task->conn;
374 375 376 377
	unsigned long pflags = current->flags;
	int rc = 0;

	current->flags |= PF_MEMALLOC;
378

379 380
	while (iscsi_sw_tcp_xmit_qlen(conn)) {
		rc = iscsi_sw_tcp_xmit(conn);
381 382 383 384
		if (rc == 0) {
			rc = -EAGAIN;
			break;
		}
385
		if (rc < 0)
386 387
			break;
		rc = 0;
388
	}
389

390 391
	tsk_restore_flags(current, pflags, PF_MEMALLOC);
	return rc;
392 393
}

394 395 396 397
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
398 399
static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
				      struct iscsi_segment *segment)
400
{
401 402 403
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;

	tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment;
404 405 406 407
	ISCSI_SW_TCP_DBG(tcp_conn->iscsi_conn,
			 "Header done. Next segment size %u total_size %u\n",
			 tcp_sw_conn->out.segment.size,
			 tcp_sw_conn->out.segment.total_size);
408 409 410
	return 0;
}

411 412
static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr,
				       size_t hdrlen)
413 414
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
415
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
416

417 418
	ISCSI_SW_TCP_DBG(conn, "%s\n", conn->hdrdgst_en ?
			 "digest enabled" : "digest disabled");
419 420 421

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
422 423
	memset(&tcp_sw_conn->out.data_segment, 0,
	       sizeof(struct iscsi_segment));
424 425 426

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
427
	 * sure that both iscsi_tcp_task and mtask have
428 429 430
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
431
		iscsi_tcp_dgst_header(&tcp_sw_conn->tx_hash, hdr, hdrlen,
432 433 434 435 436 437 438
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
439
	tcp_sw_conn->out.hdr = hdr;
440

441 442
	iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen,
				  iscsi_sw_tcp_send_hdr_done, NULL);
443 444 445 446 447 448 449 450
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
451 452 453
iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			    unsigned int count, unsigned int offset,
			    unsigned int len)
454 455
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
456
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
457 458 459
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

460 461 462
	ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len,
			 conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
463 464 465

	/* Make sure the datalen matches what the caller
	   said he would send. */
466
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
467 468 469
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
470
		tx_hash = &tcp_sw_conn->tx_hash;
471

472 473 474
	return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment,
				     sg, count, offset, len,
				     NULL, tx_hash);
475 476 477
}

static void
478
iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data,
479 480 481
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
482
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
483 484 485
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

486 487
	ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
488 489 490

	/* Make sure the datalen matches what the caller
	   said he would send. */
491
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
492 493 494
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
495
		tx_hash = &tcp_sw_conn->tx_hash;
496

497
	iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment,
498
				data, len, NULL, tx_hash);
499 500
}

501 502
static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
				 unsigned int offset, unsigned int count)
503 504 505 506
{
	struct iscsi_conn *conn = task->conn;
	int err = 0;

507
	iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
508 509 510 511 512

	if (!count)
		return 0;

	if (!task->sc)
513
		iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
514 515 516
	else {
		struct scsi_data_buffer *sdb = scsi_out(task->sc);

517 518 519
		err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
						  sdb->table.nents, offset,
						  count);
520 521 522
	}

	if (err) {
523
		/* got invalid offset/len */
524 525 526 527 528
		return -EIO;
	}
	return 0;
}

529
static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
530
{
531
	struct iscsi_tcp_task *tcp_task = task->dd_data;
532

533 534
	task->hdr = task->dd_data + sizeof(*tcp_task);
	task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE;
535
	return 0;
536 537
}

538
static struct iscsi_cls_conn *
539 540
iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
			 uint32_t conn_idx)
541
{
542 543 544
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
545
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
546

547 548
	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn),
					conn_idx);
549 550 551
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
552
	tcp_conn = conn->dd_data;
553
	tcp_sw_conn = tcp_conn->dd_data;
554

555 556 557 558
	tcp_sw_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->tx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->tx_hash.tfm))
559
		goto free_conn;
560

561 562 563 564
	tcp_sw_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->rx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->rx_hash.tfm))
565
		goto free_tx_tfm;
566
	tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
567

568
	return cls_conn;
569

570
free_tx_tfm:
571
	crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
572
free_conn:
573 574 575 576 577
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
578
	iscsi_tcp_conn_teardown(cls_conn);
579
	return NULL;
580 581
}

582
static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
583
{
584
	struct iscsi_session *session = conn->session;
585
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
586 587
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct socket *sock = tcp_sw_conn->sock;
588

589
	if (!sock)
590 591
		return;

592
	sock_hold(sock->sk);
593
	iscsi_sw_tcp_conn_restore_callbacks(conn);
594
	sock_put(sock->sk);
595

596
	spin_lock_bh(&session->lock);
597
	tcp_sw_conn->sock = NULL;
598
	spin_unlock_bh(&session->lock);
599
	sockfd_put(sock);
600 601
}

602
static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
603
{
604 605
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
606
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
607

608
	iscsi_sw_tcp_release_conn(conn);
609

610 611 612 613
	if (tcp_sw_conn->tx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
	if (tcp_sw_conn->rx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->rx_hash.tfm);
614

615
	iscsi_tcp_conn_teardown(cls_conn);
616
}
617

618
static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
619 620
{
	struct iscsi_conn *conn = cls_conn->dd_data;
621
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
622
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
623
	struct socket *sock = tcp_sw_conn->sock;
624 625

	/* userspace may have goofed up and not bound us */
626
	if (!sock)
627
		return;
628

629 630
	sock->sk->sk_err = EIO;
	wake_up_interruptible(sk_sleep(sock->sk));
631

632 633 634 635
	/* stop xmit side */
	iscsi_suspend_tx(conn);

	/* stop recv side and release socket */
636
	iscsi_sw_tcp_release_conn(conn);
637 638

	iscsi_conn_stop(cls_conn, flag);
639 640
}

641
static int
642 643 644
iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
		       struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
		       int is_leading)
645
{
646
	struct iscsi_session *session = cls_session->dd_data;
647 648
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
649
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
650 651 652
	struct sock *sk;
	struct socket *sock;
	int err;
653

654
	/* lookup for existing socket */
655
	sock = sockfd_lookup((int)transport_eph, &err);
656
	if (!sock) {
657 658
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
659
		return -EEXIST;
660 661
	}

662 663
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
664
		goto free_socket;
665

666
	spin_lock_bh(&session->lock);
667
	/* bind iSCSI connection and socket */
668
	tcp_sw_conn->sock = sock;
669
	spin_unlock_bh(&session->lock);
670

671 672
	/* setup Socket parameters */
	sk = sock->sk;
673
	sk->sk_reuse = SK_CAN_REUSE;
674 675
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
676
	sk_set_memalloc(sk);
677

678 679
	iscsi_sw_tcp_conn_set_callbacks(conn);
	tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
680 681 682
	/*
	 * set receive state machine into initial state
	 */
683
	iscsi_tcp_hdr_recv_prep(tcp_conn);
684
	return 0;
685 686 687 688

free_socket:
	sockfd_put(sock);
	return err;
689 690
}

691 692 693
static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf,
				       int buflen)
694
{
695
	struct iscsi_conn *conn = cls_conn->dd_data;
696
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
697
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
698 699 700

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
701
		iscsi_set_param(cls_conn, param, buf, buflen);
702 703
		break;
	case ISCSI_PARAM_DATADGST_EN:
704
		iscsi_set_param(cls_conn, param, buf, buflen);
705 706
		tcp_sw_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
707 708
		break;
	case ISCSI_PARAM_MAX_R2T:
709
		return iscsi_tcp_set_max_r2t(conn, buf);
710
	default:
711
		return iscsi_set_param(cls_conn, param, buf, buflen);
712 713 714 715 716
	}

	return 0;
}

717 718
static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf)
719
{
720
	struct iscsi_conn *conn = cls_conn->dd_data;
721 722 723 724
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct sockaddr_in6 addr;
	int rc, len;
725 726

	switch(param) {
727 728
	case ISCSI_PARAM_CONN_PORT:
	case ISCSI_PARAM_CONN_ADDRESS:
729
	case ISCSI_PARAM_LOCAL_PORT:
730
		spin_lock_bh(&conn->session->lock);
731
		if (!tcp_sw_conn || !tcp_sw_conn->sock) {
732
			spin_unlock_bh(&conn->session->lock);
733 734
			return -ENOTCONN;
		}
735 736 737 738 739 740
		if (param == ISCSI_PARAM_LOCAL_PORT)
			rc = kernel_getsockname(tcp_sw_conn->sock,
						(struct sockaddr *)&addr, &len);
		else
			rc = kernel_getpeername(tcp_sw_conn->sock,
						(struct sockaddr *)&addr, &len);
741
		spin_unlock_bh(&conn->session->lock);
742 743 744 745 746
		if (rc)
			return rc;

		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
						 &addr, param, buf);
747
	default:
748
		return iscsi_conn_get_param(cls_conn, param, buf);
749 750
	}

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
	return 0;
}

static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
				       enum iscsi_host_param param, char *buf)
{
	struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost);
	struct iscsi_session *session = tcp_sw_host->session;
	struct iscsi_conn *conn;
	struct iscsi_tcp_conn *tcp_conn;
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
	struct sockaddr_in6 addr;
	int rc, len;

	switch (param) {
	case ISCSI_HOST_PARAM_IPADDRESS:
767 768 769
		if (!session)
			return -ENOTCONN;

770
		spin_lock_bh(&session->lock);
771 772
		conn = session->leadconn;
		if (!conn) {
773
			spin_unlock_bh(&session->lock);
774 775 776 777 778 779
			return -ENOTCONN;
		}
		tcp_conn = conn->dd_data;

		tcp_sw_conn = tcp_conn->dd_data;
		if (!tcp_sw_conn->sock) {
780
			spin_unlock_bh(&session->lock);
781 782 783 784 785
			return -ENOTCONN;
		}

		rc = kernel_getsockname(tcp_sw_conn->sock,
					(struct sockaddr *)&addr, &len);
786
		spin_unlock_bh(&session->lock);
787 788 789 790
		if (rc)
			return rc;

		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
791 792
						 &addr,
						 (enum iscsi_param)param, buf);
793 794 795 796 797
	default:
		return iscsi_host_get_param(shost, param, buf);
	}

	return 0;
798 799
}

800
static void
801 802
iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
			    struct iscsi_stats *stats)
803
{
804
	struct iscsi_conn *conn = cls_conn->dd_data;
805
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
806
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
807 808 809

	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
810
	stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt;
811
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
812
	stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt;
813 814
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
815 816

	iscsi_tcp_conn_get_stats(cls_conn, stats);
817 818
}

819
static struct iscsi_cls_session *
820
iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
821
			    uint16_t qdepth, uint32_t initial_cmdsn)
822
{
823 824
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
825
	struct iscsi_sw_tcp_host *tcp_sw_host;
826
	struct Scsi_Host *shost;
827

828 829
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
830 831 832
		return NULL;
	}

833 834
	shost = iscsi_host_alloc(&iscsi_sw_tcp_sht,
				 sizeof(struct iscsi_sw_tcp_host), 1);
835
	if (!shost)
836
		return NULL;
837
	shost->transportt = iscsi_sw_tcp_scsi_transport;
838
	shost->cmd_per_lun = qdepth;
839 840 841
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
842
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
843

844
	if (iscsi_host_add(shost, NULL))
845 846
		goto free_host;

847
	cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
848
					  cmds_max, 0,
849 850
					  sizeof(struct iscsi_tcp_task) +
					  sizeof(struct iscsi_sw_tcp_hdrbuf),
851
					  initial_cmdsn, 0);
852 853 854
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
855 856
	tcp_sw_host = iscsi_host_priv(shost);
	tcp_sw_host->session = session;
857

858
	shost->can_queue = session->scsi_cmds_max;
859
	if (iscsi_tcp_r2tpool_alloc(session))
860
		goto remove_session;
861 862
	return cls_session;

863
remove_session:
864
	iscsi_session_teardown(cls_session);
865
remove_host:
866
	iscsi_host_remove(shost);
867
free_host:
868
	iscsi_host_free(shost);
869 870 871
	return NULL;
}

872
static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
873
{
874
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
875 876 877 878
	struct iscsi_session *session = cls_session->dd_data;

	if (WARN_ON_ONCE(session->leadconn))
		return;
879

880
	iscsi_tcp_r2tpool_free(cls_session->dd_data);
881
	iscsi_session_teardown(cls_session);
882

883 884
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
885 886
}

887
static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
888 889
{
	switch (param_type) {
890 891 892 893 894 895 896 897 898 899
	case ISCSI_HOST_PARAM:
		switch (param) {
		case ISCSI_HOST_PARAM_NETDEV_NAME:
		case ISCSI_HOST_PARAM_HWADDRESS:
		case ISCSI_HOST_PARAM_IPADDRESS:
		case ISCSI_HOST_PARAM_INITIATOR_NAME:
			return S_IRUGO;
		default:
			return 0;
		}
900 901 902 903 904 905 906 907
	case ISCSI_PARAM:
		switch (param) {
		case ISCSI_PARAM_MAX_RECV_DLENGTH:
		case ISCSI_PARAM_MAX_XMIT_DLENGTH:
		case ISCSI_PARAM_HDRDGST_EN:
		case ISCSI_PARAM_DATADGST_EN:
		case ISCSI_PARAM_CONN_ADDRESS:
		case ISCSI_PARAM_CONN_PORT:
908
		case ISCSI_PARAM_LOCAL_PORT:
909 910 911 912 913
		case ISCSI_PARAM_EXP_STATSN:
		case ISCSI_PARAM_PERSISTENT_ADDRESS:
		case ISCSI_PARAM_PERSISTENT_PORT:
		case ISCSI_PARAM_PING_TMO:
		case ISCSI_PARAM_RECV_TMO:
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
		case ISCSI_PARAM_INITIAL_R2T_EN:
		case ISCSI_PARAM_MAX_R2T:
		case ISCSI_PARAM_IMM_DATA_EN:
		case ISCSI_PARAM_FIRST_BURST:
		case ISCSI_PARAM_MAX_BURST:
		case ISCSI_PARAM_PDU_INORDER_EN:
		case ISCSI_PARAM_DATASEQ_INORDER_EN:
		case ISCSI_PARAM_ERL:
		case ISCSI_PARAM_TARGET_NAME:
		case ISCSI_PARAM_TPGT:
		case ISCSI_PARAM_USERNAME:
		case ISCSI_PARAM_PASSWORD:
		case ISCSI_PARAM_USERNAME_IN:
		case ISCSI_PARAM_PASSWORD_IN:
		case ISCSI_PARAM_FAST_ABORT:
		case ISCSI_PARAM_ABORT_TMO:
		case ISCSI_PARAM_LU_RESET_TMO:
		case ISCSI_PARAM_TGT_RESET_TMO:
		case ISCSI_PARAM_IFACE_NAME:
		case ISCSI_PARAM_INITIATOR_NAME:
934 935 936 937 938 939 940 941 942
			return S_IRUGO;
		default:
			return 0;
		}
	}

	return 0;
}

943 944 945 946 947 948
static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
{
	set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags);
	return 0;
}

949
static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
950
{
951
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
952 953 954 955
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

956
static struct scsi_host_template iscsi_sw_tcp_sht = {
957
	.module			= THIS_MODULE,
958
	.name			= "iSCSI Initiator over TCP/IP",
959
	.queuecommand           = iscsi_queuecommand,
960
	.change_queue_depth	= scsi_change_queue_depth,
961
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
962
	.sg_tablesize		= 4096,
963
	.max_sectors		= 0xFFFF,
964 965
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
966
	.eh_device_reset_handler= iscsi_eh_device_reset,
967
	.eh_target_reset_handler = iscsi_eh_recover_target,
968
	.use_clustering         = DISABLE_CLUSTERING,
969
	.slave_alloc            = iscsi_sw_tcp_slave_alloc,
970
	.slave_configure        = iscsi_sw_tcp_slave_configure,
971
	.target_alloc		= iscsi_target_alloc,
972 973
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
974
	.track_queue_depth	= 1,
975 976
};

977
static struct iscsi_transport iscsi_sw_tcp_transport = {
978 979 980 981
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
982
	/* session management */
983 984
	.create_session		= iscsi_sw_tcp_session_create,
	.destroy_session	= iscsi_sw_tcp_session_destroy,
985
	/* connection management */
986 987 988
	.create_conn		= iscsi_sw_tcp_conn_create,
	.bind_conn		= iscsi_sw_tcp_conn_bind,
	.destroy_conn		= iscsi_sw_tcp_conn_destroy,
989
	.attr_is_visible	= iscsi_sw_tcp_attr_is_visible,
990 991
	.set_param		= iscsi_sw_tcp_conn_set_param,
	.get_conn_param		= iscsi_sw_tcp_conn_get_param,
992
	.get_session_param	= iscsi_session_get_param,
993
	.start_conn		= iscsi_conn_start,
994
	.stop_conn		= iscsi_sw_tcp_conn_stop,
995
	/* iscsi host params */
996
	.get_host_param		= iscsi_sw_tcp_host_get_param,
997
	.set_host_param		= iscsi_host_set_param,
998
	/* IO */
999
	.send_pdu		= iscsi_conn_send_pdu,
1000
	.get_stats		= iscsi_sw_tcp_conn_get_stats,
1001
	/* iscsi task/cmd helpers */
1002 1003 1004
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
1005
	/* low level pdu helpers */
1006 1007 1008
	.xmit_pdu		= iscsi_sw_tcp_pdu_xmit,
	.init_pdu		= iscsi_sw_tcp_pdu_init,
	.alloc_pdu		= iscsi_sw_tcp_pdu_alloc,
1009
	/* recovery */
1010
	.session_recovery_timedout = iscsi_session_recovery_timedout,
1011 1012
};

1013
static int __init iscsi_sw_tcp_init(void)
1014 1015
{
	if (iscsi_max_lun < 1) {
Or Gerlitz's avatar
Or Gerlitz committed
1016 1017
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
1018 1019 1020
		return -EINVAL;
	}

1021 1022 1023
	iscsi_sw_tcp_scsi_transport = iscsi_register_transport(
						&iscsi_sw_tcp_transport);
	if (!iscsi_sw_tcp_scsi_transport)
1024
		return -ENODEV;
1025

1026
	return 0;
1027 1028
}

1029
static void __exit iscsi_sw_tcp_exit(void)
1030
{
1031
	iscsi_unregister_transport(&iscsi_sw_tcp_transport);
1032 1033
}

1034 1035
module_init(iscsi_sw_tcp_init);
module_exit(iscsi_sw_tcp_exit);