Commit 209fae14 authored by Dan Williams's avatar Dan Williams

isci: atomic device lookup and reference counting

We have unsafe references to remote devices that are notified to
disappear at lldd_dev_gone.  In order to clean this up we need a single
canonical source for device lookups and stable references once a lookup
succeeds.  Towards that end guarantee that domain_device.lldd_dev is
NULL as soon as we start the process of stopping a device.  Any code
path that wants to safely lookup a remote device must do so through
task->dev->lldd_dev (isci_lookup_device()).

For in-flight references outside of scic_lock we need reference counting
to ensure that the device is not recycled before we are done with it.
Simplify device back references to just scic_sds_request.target_device
which is now the only permissible internal reference that is maintained
relative to the reference count.

There were two occasions where we wanted new i/o's to be treated as
SAS_TASK_UNDELIVERED but where the domain_dev->lldd_dev link is still
intact.  Introduce a 'gone' flag to prevent i/o while waiting for libsas
to take action on the port down event.

One 'core' leftover is that we currently call
scic_remote_device_destruct() from isci_remote_device_deconstruct()
which is called when the 'core' says the device is stopped.  It would be
more natural for the final put to trigger
isci_remote_device_deconstruct() but this implementation is deferred as
it requires other changes.
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 360b03ed
......@@ -1327,7 +1327,7 @@ void isci_host_deinit(struct isci_host *ihost)
struct isci_remote_device *idev, *d;
list_for_each_entry_safe(idev, d, &iport->remote_dev_list, node) {
isci_remote_device_change_state(idev, isci_stopping);
if (test_bit(IDEV_ALLOCATED, &idev->flags))
isci_remote_device_stop(ihost, idev);
}
}
......
......@@ -321,8 +321,7 @@ static void isci_port_link_down(struct isci_host *isci_host,
dev_dbg(&isci_host->pdev->dev,
"%s: isci_device = %p\n",
__func__, isci_device);
isci_remote_device_change_state(isci_device,
isci_stopping);
set_bit(IDEV_GONE, &isci_device->flags);
}
}
isci_port_change_state(isci_port, isci_stopping);
......
......@@ -94,7 +94,7 @@ static void isci_remote_device_not_ready(struct isci_host *ihost,
"%s: isci_device = %p\n", __func__, idev);
if (reason == SCIC_REMOTE_DEVICE_NOT_READY_STOP_REQUESTED)
isci_remote_device_change_state(idev, isci_stopping);
set_bit(IDEV_GONE, &idev->flags);
else
/* device ready is actually a "not ready for io" state. */
isci_remote_device_change_state(idev, isci_ready);
......@@ -449,8 +449,10 @@ static void scic_sds_remote_device_start_request(struct scic_sds_remote_device *
/* cleanup requests that failed after starting on the port */
if (status != SCI_SUCCESS)
scic_sds_port_complete_io(sci_port, sci_dev, sci_req);
else
else {
kref_get(&sci_dev_to_idev(sci_dev)->kref);
scic_sds_remote_device_increment_request_count(sci_dev);
}
}
enum sci_status scic_sds_remote_device_start_io(struct scic_sds_controller *scic,
......@@ -656,6 +658,8 @@ enum sci_status scic_sds_remote_device_complete_io(struct scic_sds_controller *s
"%s: Port:0x%p Device:0x%p Request:0x%p Status:0x%x "
"could not complete\n", __func__, sci_port,
sci_dev, sci_req, status);
else
isci_put_device(sci_dev_to_idev(sci_dev));
return status;
}
......@@ -860,23 +864,11 @@ static void isci_remote_device_deconstruct(struct isci_host *ihost, struct isci_
* here should go through isci_remote_device_nuke_requests.
* If we hit this condition, we will need a way to complete
* io requests in process */
while (!list_empty(&idev->reqs_in_process)) {
dev_err(&ihost->pdev->dev,
"%s: ** request list not empty! **\n", __func__);
BUG();
}
BUG_ON(!list_empty(&idev->reqs_in_process));
scic_remote_device_destruct(&idev->sci);
idev->domain_dev->lldd_dev = NULL;
idev->domain_dev = NULL;
idev->isci_port = NULL;
list_del_init(&idev->node);
clear_bit(IDEV_START_PENDING, &idev->flags);
clear_bit(IDEV_STOP_PENDING, &idev->flags);
clear_bit(IDEV_EH, &idev->flags);
wake_up(&ihost->eventq);
isci_put_device(idev);
}
/**
......@@ -1314,6 +1306,22 @@ isci_remote_device_alloc(struct isci_host *ihost, struct isci_port *iport)
return idev;
}
void isci_remote_device_release(struct kref *kref)
{
struct isci_remote_device *idev = container_of(kref, typeof(*idev), kref);
struct isci_host *ihost = idev->isci_port->isci_host;
idev->domain_dev = NULL;
idev->isci_port = NULL;
clear_bit(IDEV_START_PENDING, &idev->flags);
clear_bit(IDEV_STOP_PENDING, &idev->flags);
clear_bit(IDEV_GONE, &idev->flags);
clear_bit(IDEV_EH, &idev->flags);
smp_mb__before_clear_bit();
clear_bit(IDEV_ALLOCATED, &idev->flags);
wake_up(&ihost->eventq);
}
/**
* isci_remote_device_stop() - This function is called internally to stop the
* remote device.
......@@ -1330,7 +1338,11 @@ enum sci_status isci_remote_device_stop(struct isci_host *ihost, struct isci_rem
dev_dbg(&ihost->pdev->dev,
"%s: isci_device = %p\n", __func__, idev);
spin_lock_irqsave(&ihost->scic_lock, flags);
idev->domain_dev->lldd_dev = NULL; /* disable new lookups */
set_bit(IDEV_GONE, &idev->flags);
isci_remote_device_change_state(idev, isci_stopping);
spin_unlock_irqrestore(&ihost->scic_lock, flags);
/* Kill all outstanding requests. */
isci_remote_device_nuke_requests(ihost, idev);
......@@ -1342,14 +1354,10 @@ enum sci_status isci_remote_device_stop(struct isci_host *ihost, struct isci_rem
spin_unlock_irqrestore(&ihost->scic_lock, flags);
/* Wait for the stop complete callback. */
if (status == SCI_SUCCESS) {
if (WARN_ONCE(status != SCI_SUCCESS, "failed to stop device\n"))
/* nothing to wait for */;
else
wait_for_device_stop(ihost, idev);
clear_bit(IDEV_ALLOCATED, &idev->flags);
}
dev_dbg(&ihost->pdev->dev,
"%s: idev = %p - after completion wait\n",
__func__, idev);
return status;
}
......@@ -1416,39 +1424,33 @@ int isci_remote_device_found(struct domain_device *domain_dev)
if (!isci_device)
return -ENODEV;
kref_init(&isci_device->kref);
INIT_LIST_HEAD(&isci_device->node);
domain_dev->lldd_dev = isci_device;
spin_lock_irq(&isci_host->scic_lock);
isci_device->domain_dev = domain_dev;
isci_device->isci_port = isci_port;
isci_remote_device_change_state(isci_device, isci_starting);
spin_lock_irq(&isci_host->scic_lock);
list_add_tail(&isci_device->node, &isci_port->remote_dev_list);
set_bit(IDEV_START_PENDING, &isci_device->flags);
status = isci_remote_device_construct(isci_port, isci_device);
spin_unlock_irq(&isci_host->scic_lock);
dev_dbg(&isci_host->pdev->dev,
"%s: isci_device = %p\n",
__func__, isci_device);
if (status != SCI_SUCCESS) {
spin_lock_irq(&isci_host->scic_lock);
isci_remote_device_deconstruct(
isci_host,
isci_device
);
if (status == SCI_SUCCESS) {
/* device came up, advertise it to the world */
domain_dev->lldd_dev = isci_device;
} else
isci_put_device(isci_device);
spin_unlock_irq(&isci_host->scic_lock);
return -ENODEV;
}
/* wait for the device ready callback. */
wait_for_device_start(isci_host, isci_device);
return 0;
return status == SCI_SUCCESS ? 0 : -ENODEV;
}
/**
* isci_device_is_reset_pending() - This function will check if there is any
......
......@@ -56,6 +56,7 @@
#ifndef _ISCI_REMOTE_DEVICE_H_
#define _ISCI_REMOTE_DEVICE_H_
#include <scsi/libsas.h>
#include <linux/kref.h>
#include "scu_remote_node_context.h"
#include "remote_node_context.h"
#include "port.h"
......@@ -134,7 +135,9 @@ struct isci_remote_device {
#define IDEV_STOP_PENDING 1
#define IDEV_ALLOCATED 2
#define IDEV_EH 3
#define IDEV_GONE 4
unsigned long flags;
struct kref kref;
struct isci_port *isci_port;
struct domain_device *domain_dev;
struct list_head node;
......@@ -145,6 +148,26 @@ struct isci_remote_device {
#define ISCI_REMOTE_DEVICE_START_TIMEOUT 5000
/* device reference routines must be called under scic_lock */
static inline struct isci_remote_device *isci_lookup_device(struct domain_device *dev)
{
struct isci_remote_device *idev = dev->lldd_dev;
if (idev && !test_bit(IDEV_GONE, &idev->flags)) {
kref_get(&idev->kref);
return idev;
}
return NULL;
}
void isci_remote_device_release(struct kref *kref);
static inline void isci_put_device(struct isci_remote_device *idev)
{
if (idev)
kref_put(&idev->kref, isci_remote_device_release);
}
enum sci_status isci_remote_device_stop(struct isci_host *ihost,
struct isci_remote_device *idev);
void isci_remote_device_nuke_requests(struct isci_host *ihost,
......
......@@ -2313,7 +2313,7 @@ static void isci_request_set_open_reject_status(
* none.
*/
static void isci_request_handle_controller_specific_errors(
struct isci_remote_device *isci_device,
struct isci_remote_device *idev,
struct isci_request *request,
struct sas_task *task,
enum service_response *response_ptr,
......@@ -2353,8 +2353,7 @@ static void isci_request_handle_controller_specific_errors(
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
*status_ptr = SAS_DEVICE_UNKNOWN;
else
*status_ptr = SAS_ABORTED_TASK;
......@@ -2367,8 +2366,7 @@ static void isci_request_handle_controller_specific_errors(
/* Task in the target is not done. */
*response_ptr = SAS_TASK_UNDELIVERED;
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
*status_ptr = SAS_DEVICE_UNKNOWN;
else
*status_ptr = SAM_STAT_TASK_ABORTED;
......@@ -2399,8 +2397,7 @@ static void isci_request_handle_controller_specific_errors(
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
*status_ptr = SAS_DEVICE_UNKNOWN;
else
*status_ptr = SAS_ABORTED_TASK;
......@@ -2629,7 +2626,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
struct ssp_response_iu *resp_iu;
void *resp_buf;
unsigned long task_flags;
struct isci_remote_device *isci_device = request->isci_device;
struct isci_remote_device *idev = isci_lookup_device(task->dev);
enum service_response response = SAS_TASK_UNDELIVERED;
enum exec_status status = SAS_ABORTED_TASK;
enum isci_request_status request_status;
......@@ -2672,9 +2669,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping)
|| (isci_device->status == isci_stopped)
)
if (!idev)
status = SAS_DEVICE_UNKNOWN;
else
status = SAS_ABORTED_TASK;
......@@ -2697,8 +2692,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
request->complete_in_target = true;
response = SAS_TASK_UNDELIVERED;
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
/* The device has been /is being stopped. Note that
* we ignore the quiesce state, since we are
* concerned about the actual device state.
......@@ -2728,8 +2722,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
status = SAS_DEVICE_UNKNOWN;
else
status = SAS_ABORTED_TASK;
......@@ -2861,8 +2854,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
status = SAS_DEVICE_UNKNOWN;
else
status = SAS_ABORTED_TASK;
......@@ -2873,7 +2865,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
case SCI_FAILURE_CONTROLLER_SPECIFIC_IO_ERR:
isci_request_handle_controller_specific_errors(
isci_device, request, task, &response, &status,
idev, request, task, &response, &status,
&complete_to_host);
break;
......@@ -2902,8 +2894,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
/* Fail the I/O so it can be retried. */
response = SAS_TASK_UNDELIVERED;
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
status = SAS_DEVICE_UNKNOWN;
else
status = SAS_ABORTED_TASK;
......@@ -2926,8 +2917,7 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
* that we ignore the quiesce state, since we are
* concerned about the actual device state.
*/
if ((isci_device->status == isci_stopping) ||
(isci_device->status == isci_stopped))
if (!idev)
status = SAS_DEVICE_UNKNOWN;
else
status = SAS_ABORTED_TASK;
......@@ -2953,8 +2943,10 @@ static void isci_request_io_request_complete(struct isci_host *isci_host,
/* complete the io request to the core. */
scic_controller_complete_io(&isci_host->sci,
&isci_device->sci,
request->sci.target_device,
&request->sci);
isci_put_device(idev);
/* set terminated handle so it cannot be completed or
* terminated again, and to cause any calls into abort
* task to recognize the already completed case.
......@@ -3511,7 +3503,6 @@ static enum sci_status isci_io_request_build(
}
static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
struct isci_remote_device *idev,
gfp_t gfp_flags)
{
dma_addr_t handle;
......@@ -3528,7 +3519,6 @@ static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
spin_lock_init(&ireq->state_lock);
ireq->request_daddr = handle;
ireq->isci_host = ihost;
ireq->isci_device = idev;
ireq->io_request_completion = NULL;
ireq->terminated = false;
......@@ -3546,12 +3536,11 @@ static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
static struct isci_request *isci_request_alloc_io(struct isci_host *ihost,
struct sas_task *task,
struct isci_remote_device *idev,
gfp_t gfp_flags)
{
struct isci_request *ireq;
ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
ireq = isci_request_alloc_core(ihost, gfp_flags);
if (ireq) {
ireq->ttype_ptr.io_task_ptr = task;
ireq->ttype = io_task;
......@@ -3562,12 +3551,11 @@ static struct isci_request *isci_request_alloc_io(struct isci_host *ihost,
struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
struct isci_tmf *isci_tmf,
struct isci_remote_device *idev,
gfp_t gfp_flags)
{
struct isci_request *ireq;
ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
ireq = isci_request_alloc_core(ihost, gfp_flags);
if (ireq) {
ireq->ttype_ptr.tmf_task_ptr = isci_tmf;
ireq->ttype = tmf_task;
......@@ -3575,21 +3563,16 @@ struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
return ireq;
}
int isci_request_execute(struct isci_host *ihost, struct sas_task *task,
gfp_t gfp_flags)
int isci_request_execute(struct isci_host *ihost, struct isci_remote_device *idev,
struct sas_task *task, gfp_t gfp_flags)
{
enum sci_status status = SCI_FAILURE_UNSUPPORTED_PROTOCOL;
struct scic_sds_remote_device *sci_dev;
struct isci_remote_device *idev;
struct isci_request *ireq;
unsigned long flags;
int ret = 0;
idev = task->dev->lldd_dev;
sci_dev = &idev->sci;
/* do common allocation and init of request object. */
ireq = isci_request_alloc_io(ihost, task, idev, gfp_flags);
ireq = isci_request_alloc_io(ihost, task, gfp_flags);
if (!ireq)
goto out;
......@@ -3605,8 +3588,7 @@ int isci_request_execute(struct isci_host *ihost, struct sas_task *task,
spin_lock_irqsave(&ihost->scic_lock, flags);
/* send the request, let the core assign the IO TAG. */
status = scic_controller_start_io(&ihost->sci, sci_dev,
&ireq->sci,
status = scic_controller_start_io(&ihost->sci, &idev->sci, &ireq->sci,
SCI_CONTROLLER_INVALID_IO_TAG);
if (status != SCI_SUCCESS &&
status != SCI_FAILURE_REMOTE_DEVICE_RESET_REQUIRED) {
......
......@@ -285,7 +285,6 @@ struct isci_request {
struct isci_tmf *tmf_task_ptr; /* When ttype==tmf_task */
} ttype_ptr;
struct isci_host *isci_host;
struct isci_remote_device *isci_device;
/* For use in the requests_to_{complete|abort} lists: */
struct list_head completed_node;
/* For use in the reqs_in_process list: */
......@@ -681,12 +680,10 @@ static inline void isci_request_free(struct isci_host *isci_host,
struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
struct isci_tmf *isci_tmf,
struct isci_remote_device *idev,
gfp_t gfp_flags);
int isci_request_execute(struct isci_host *isci_host,
struct sas_task *task,
gfp_t gfp_flags);
int isci_request_execute(struct isci_host *ihost, struct isci_remote_device *idev,
struct sas_task *task, gfp_t gfp_flags);
/**
* isci_request_unmap_sgl() - This function unmaps the DMA address of a given
......
......@@ -213,11 +213,10 @@ int isci_task_send_lu_reset_sata(
/* Send the soft reset to the target */
#define ISCI_SRST_TIMEOUT_MS 25000 /* 25 second timeout. */
isci_task_build_tmf(&tmf, isci_device, isci_tmf_sata_srst_high,
NULL, NULL
);
isci_task_build_tmf(&tmf, isci_tmf_sata_srst_high, NULL, NULL);
ret = isci_task_execute_tmf(isci_host, &tmf, ISCI_SRST_TIMEOUT_MS);
ret = isci_task_execute_tmf(isci_host, isci_device, &tmf,
ISCI_SRST_TIMEOUT_MS);
if (ret != TMF_RESP_FUNC_COMPLETE) {
dev_warn(&isci_host->pdev->dev,
......
This diff is collapsed.
......@@ -213,18 +213,15 @@ int isci_bus_reset_handler(struct scsi_cmnd *cmd);
void isci_task_build_tmf(
struct isci_tmf *tmf,
struct isci_remote_device *isci_device,
enum isci_tmf_function_codes code,
void (*tmf_sent_cb)(enum isci_tmf_cb_state,
struct isci_tmf *,
void *),
void *cb_data);
int isci_task_execute_tmf(
struct isci_host *isci_host,
struct isci_tmf *tmf,
unsigned long timeout_ms);
int isci_task_execute_tmf(struct isci_host *isci_host,
struct isci_remote_device *idev,
struct isci_tmf *tmf, unsigned long timeout_ms);
/**
* enum isci_completion_selection - This enum defines the possible actions to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment