Commit 3d36a0df authored by Stefan Richter's avatar Stefan Richter

firewire: keep highlevel drivers attached during brief connection loss

There are situations when nodes vanish from the bus and come back
quickly thereafter:
  - When certain bus-powered hubs are plugged in,
  - when certain devices are plugged into 6-port hubs,
  - when certain disk enclosures are switched from self-power to bus
    power or vice versa and break the daisy chain during the transition,
  - when the user plugs a cable out and quickly plugs it back in, e.g.
    to reorder a daisy chain (works on Mac OS X if done quickly enough),
  - when certain hubs temporarily malfunction during high bus traffic.

Until now, firewire-core reported affected nodes as lost to the
highlevel drivers (firewire-sbp2 and userspace drivers).  We now delay
the destruction of device representations until after at least two
seconds after the last bus reset.  If a "new" device is detected in this
period whose bus information block and root directory header match that
of a device which is pending for deletion, we resurrect that device and
send update calls to highlevel drivers.
Signed-off-by: default avatarStefan Richter <stefanr@s5r6.in-berlin.de>
parent 8cd0bbbd
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/jiffies.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
...@@ -634,12 +635,38 @@ struct fw_device *fw_device_get_by_devt(dev_t devt) ...@@ -634,12 +635,38 @@ struct fw_device *fw_device_get_by_devt(dev_t devt)
return device; return device;
} }
/*
* These defines control the retry behavior for reading the config
* rom. It shouldn't be necessary to tweak these; if the device
* doesn't respond to a config rom read within 10 seconds, it's not
* going to respond at all. As for the initial delay, a lot of
* devices will be able to respond within half a second after bus
* reset. On the other hand, it's not really worth being more
* aggressive than that, since it scales pretty well; if 10 devices
* are plugged in, they're all getting read within one second.
*/
#define MAX_RETRIES 10
#define RETRY_DELAY (3 * HZ)
#define INITIAL_DELAY (HZ / 2)
#define SHUTDOWN_DELAY (2 * HZ)
static void fw_device_shutdown(struct work_struct *work) static void fw_device_shutdown(struct work_struct *work)
{ {
struct fw_device *device = struct fw_device *device =
container_of(work, struct fw_device, work.work); container_of(work, struct fw_device, work.work);
int minor = MINOR(device->device.devt); int minor = MINOR(device->device.devt);
if (time_is_after_jiffies(device->card->reset_jiffies + SHUTDOWN_DELAY)) {
schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
return;
}
if (atomic_cmpxchg(&device->state,
FW_DEVICE_GONE,
FW_DEVICE_SHUTDOWN) != FW_DEVICE_GONE)
return;
fw_device_cdev_remove(device); fw_device_cdev_remove(device);
device_for_each_child(&device->device, NULL, shutdown_unit); device_for_each_child(&device->device, NULL, shutdown_unit);
device_unregister(&device->device); device_unregister(&device->device);
...@@ -647,6 +674,7 @@ static void fw_device_shutdown(struct work_struct *work) ...@@ -647,6 +674,7 @@ static void fw_device_shutdown(struct work_struct *work)
down_write(&fw_device_rwsem); down_write(&fw_device_rwsem);
idr_remove(&fw_device_idr, minor); idr_remove(&fw_device_idr, minor);
up_write(&fw_device_rwsem); up_write(&fw_device_rwsem);
fw_device_put(device); fw_device_put(device);
} }
...@@ -654,25 +682,63 @@ static struct device_type fw_device_type = { ...@@ -654,25 +682,63 @@ static struct device_type fw_device_type = {
.release = fw_device_release, .release = fw_device_release,
}; };
static void fw_device_update(struct work_struct *work);
/* /*
* These defines control the retry behavior for reading the config * If a device was pending for deletion because its node went away but its
* rom. It shouldn't be necessary to tweak these; if the device * bus info block and root directory header matches that of a newly discovered
* doesn't respond to a config rom read within 10 seconds, it's not * device, revive the existing fw_device.
* going to respond at all. As for the initial delay, a lot of * The newly allocated fw_device becomes obsolete instead.
* devices will be able to respond within half a second after bus
* reset. On the other hand, it's not really worth being more
* aggressive than that, since it scales pretty well; if 10 devices
* are plugged in, they're all getting read within one second.
*/ */
static int lookup_existing_device(struct device *dev, void *data)
{
struct fw_device *old = fw_device(dev);
struct fw_device *new = data;
struct fw_card *card = new->card;
int match = 0;
down_read(&fw_device_rwsem); /* serialize config_rom access */
spin_lock_irq(&card->lock); /* serialize node access */
if (memcmp(old->config_rom, new->config_rom, 6 * 4) == 0 &&
atomic_cmpxchg(&old->state,
FW_DEVICE_GONE,
FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
struct fw_node *current_node = new->node;
struct fw_node *obsolete_node = old->node;
new->node = obsolete_node;
new->node->data = new;
old->node = current_node;
old->node->data = old;
old->max_speed = new->max_speed;
old->node_id = current_node->node_id;
smp_wmb(); /* update node_id before generation */
old->generation = card->generation;
old->config_rom_retries = 0;
fw_notify("rediscovered device %s\n", dev_name(dev));
#define MAX_RETRIES 10 PREPARE_DELAYED_WORK(&old->work, fw_device_update);
#define RETRY_DELAY (3 * HZ) schedule_delayed_work(&old->work, 0);
#define INITIAL_DELAY (HZ / 2)
if (current_node == card->root_node)
fw_schedule_bm_work(card, 0);
match = 1;
}
spin_unlock_irq(&card->lock);
up_read(&fw_device_rwsem);
return match;
}
static void fw_device_init(struct work_struct *work) static void fw_device_init(struct work_struct *work)
{ {
struct fw_device *device = struct fw_device *device =
container_of(work, struct fw_device, work.work); container_of(work, struct fw_device, work.work);
struct device *revived_dev;
int minor, err; int minor, err;
/* /*
...@@ -696,6 +762,15 @@ static void fw_device_init(struct work_struct *work) ...@@ -696,6 +762,15 @@ static void fw_device_init(struct work_struct *work)
return; return;
} }
revived_dev = device_find_child(device->card->device,
device, lookup_existing_device);
if (revived_dev) {
put_device(revived_dev);
fw_device_release(&device->device);
return;
}
device_initialize(&device->device); device_initialize(&device->device);
fw_device_get(device); fw_device_get(device);
...@@ -734,9 +809,10 @@ static void fw_device_init(struct work_struct *work) ...@@ -734,9 +809,10 @@ static void fw_device_init(struct work_struct *work)
* fw_node_event(). * fw_node_event().
*/ */
if (atomic_cmpxchg(&device->state, if (atomic_cmpxchg(&device->state,
FW_DEVICE_INITIALIZING, FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) { FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
fw_device_shutdown(work); PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
} else { } else {
if (device->config_rom_retries) if (device->config_rom_retries)
fw_notify("created device %s: GUID %08x%08x, S%d00, " fw_notify("created device %s: GUID %08x%08x, S%d00, "
...@@ -847,8 +923,8 @@ static void fw_device_refresh(struct work_struct *work) ...@@ -847,8 +923,8 @@ static void fw_device_refresh(struct work_struct *work)
case REREAD_BIB_UNCHANGED: case REREAD_BIB_UNCHANGED:
if (atomic_cmpxchg(&device->state, if (atomic_cmpxchg(&device->state,
FW_DEVICE_INITIALIZING, FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) FW_DEVICE_RUNNING) == FW_DEVICE_GONE)
goto gone; goto gone;
fw_device_update(work); fw_device_update(work);
...@@ -879,8 +955,8 @@ static void fw_device_refresh(struct work_struct *work) ...@@ -879,8 +955,8 @@ static void fw_device_refresh(struct work_struct *work)
create_units(device); create_units(device);
if (atomic_cmpxchg(&device->state, if (atomic_cmpxchg(&device->state,
FW_DEVICE_INITIALIZING, FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) FW_DEVICE_RUNNING) == FW_DEVICE_GONE)
goto gone; goto gone;
fw_notify("refreshed device %s\n", dev_name(&device->device)); fw_notify("refreshed device %s\n", dev_name(&device->device));
...@@ -890,8 +966,9 @@ static void fw_device_refresh(struct work_struct *work) ...@@ -890,8 +966,9 @@ static void fw_device_refresh(struct work_struct *work)
give_up: give_up:
fw_notify("giving up on refresh of device %s\n", dev_name(&device->device)); fw_notify("giving up on refresh of device %s\n", dev_name(&device->device));
gone: gone:
atomic_set(&device->state, FW_DEVICE_SHUTDOWN); atomic_set(&device->state, FW_DEVICE_GONE);
fw_device_shutdown(work); PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
out: out:
if (node_id == card->root_node->node_id) if (node_id == card->root_node->node_id)
fw_schedule_bm_work(card, 0); fw_schedule_bm_work(card, 0);
...@@ -995,9 +1072,9 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) ...@@ -995,9 +1072,9 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
*/ */
device = node->data; device = node->data;
if (atomic_xchg(&device->state, if (atomic_xchg(&device->state,
FW_DEVICE_SHUTDOWN) == FW_DEVICE_RUNNING) { FW_DEVICE_GONE) == FW_DEVICE_RUNNING) {
PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
schedule_delayed_work(&device->work, 0); schedule_delayed_work(&device->work, SHUTDOWN_DELAY);
} }
break; break;
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
enum fw_device_state { enum fw_device_state {
FW_DEVICE_INITIALIZING, FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING, FW_DEVICE_RUNNING,
FW_DEVICE_GONE,
FW_DEVICE_SHUTDOWN, FW_DEVICE_SHUTDOWN,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment