Print this page
OS-1987 disks in zpools never go away when pulled

@@ -19,11 +19,12 @@
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/zfs_zone.h>
 #include <sys/spa_impl.h>

@@ -40,10 +41,152 @@
  * Virtual device vector for disks.
  */
 
 extern ldi_ident_t zfs_li;
 
+static void vdev_disk_close(vdev_t *);
+
+typedef struct vdev_disk_buf {
+        buf_t   vdb_buf;
+        zio_t   *vdb_io;
+} vdev_disk_buf_t;
+
+typedef struct vdev_disk_ldi_cb {
+        list_node_t             lcb_next;
+        ldi_callback_id_t       lcb_id;
+} vdev_disk_ldi_cb_t;
+
+static void vdev_disk_alloc(vdev_t *vd)
+{
+        vdev_disk_t *dvd;
+
+        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+        /*
+         * Create the LDI event callback list.
+         */
+        list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
+            offsetof(vdev_disk_ldi_cb_t, lcb_next));
+}
+
+static void vdev_disk_free(vdev_t *vd)
+{
+        vdev_disk_t *dvd = vd->vdev_tsd;
+        vdev_disk_ldi_cb_t *lcb;
+
+        /*
+         * We have already closed the LDI handle. Clean up the LDI event
+         * callbacks and free vd->vdev_tsd.
+         */
+        while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
+                list_remove(&dvd->vd_ldi_cbs, lcb);
+                (void) ldi_ev_remove_callbacks(lcb->lcb_id);
+                kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
+        }
+        list_destroy(&dvd->vd_ldi_cbs);
+        kmem_free(dvd, sizeof (vdev_disk_t));
+        vd->vdev_tsd = NULL;
+}
+
+/* ARGSUSED */
+static int
+vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
+    void *ev_data)
+{
+        vdev_t *vd = (vdev_t *)arg;
+        vdev_disk_t *dvd = vd->vdev_tsd;
+
+        /*
+         * Ignore events other than offline.
+         */
+        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
+                return (LDI_EV_SUCCESS);
+
+        /*
+         * All LDI handles must be closed for the state change to succeed, so
+         * call on vdev_disk_close() to do this.
+         *
+         * We inform vdev_disk_close that it is being called from offline
+         * notify context so it will defer cleanup of LDI event callbacks and
+         * freeing of vd->vdev_tsd to the offline finalize or a reopen.
+         */
+        dvd->vd_ldi_offline = B_TRUE;
+        vdev_disk_close(vd);
+
+        /*
+         * Now that the device is closed, request that the spa_async_thread
+         * mark the device as REMOVED and notify FMA of the removal.
+         */
+        zfs_post_remove(vd->vdev_spa, vd);
+        vd->vdev_remove_wanted = B_TRUE;
+        spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
+
+        return (LDI_EV_SUCCESS);
+}
+
+/* ARGSUSED */
+static void
+vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
+    int ldi_result, void *arg, void *ev_data)
+{
+        vdev_t *vd = (vdev_t *)arg;
+        vdev_disk_t *dvd = vd->vdev_tsd;
+        vdev_disk_ldi_cb_t *lcb;
+
+        /*
+         * Ignore events other than offline.
+         */
+        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
+                return;
+
+        /*
+         * We have already closed the LDI handle in notify.
+         * Clean up the LDI event callbacks and free vd->vdev_tsd.
+         */
+        vdev_disk_free(vd);
+
+        /*
+         * Request that the vdev be reopened if the offline state change was
+         * unsuccessful.
+         */
+        if (ldi_result != LDI_EV_SUCCESS) {
+                vd->vdev_probe_wanted = B_TRUE;
+                spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
+        }
+}
+
+static ldi_ev_callback_t vdev_disk_off_callb = {
+        .cb_vers = LDI_EV_CB_VERS,
+        .cb_notify = vdev_disk_off_notify,
+        .cb_finalize = vdev_disk_off_finalize
+};
+
+/* ARGSUSED */
+static void
+vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
+    int ldi_result, void *arg, void *ev_data)
+{
+        vdev_t *vd = (vdev_t *)arg;
+
+        /*
+         * Ignore events other than degrade.
+         */
+        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
+                return;
+
+        /*
+         * Degrade events always succeed. Mark the vdev as degraded.
+         * This status is purely informative for the user.
+         */
+        (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
+}
+
+static ldi_ev_callback_t vdev_disk_dgrd_callb = {
+        .cb_vers = LDI_EV_CB_VERS,
+        .cb_notify = NULL,
+        .cb_finalize = vdev_disk_dgrd_finalize
+};
+
 static void
 vdev_disk_hold(vdev_t *vd)
 {
         ddi_devid_t devid;
         char *minor;

@@ -134,12 +277,14 @@
 static int
 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
     uint64_t *ashift)
 {
         spa_t *spa = vd->vdev_spa;
-        vdev_disk_t *dvd;
+        vdev_disk_t *dvd = vd->vdev_tsd;
         struct dk_minfo_ext dkmext;
+        ldi_ev_cookie_t ecookie;
+        vdev_disk_ldi_cb_t *lcb;
         int error;
         dev_t dev;
         int otyp;
 
         /*

@@ -152,17 +297,29 @@
 
         /*
          * Reopen the device if it's not currently open. Otherwise,
          * just update the physical size of the device.
          */
-        if (vd->vdev_tsd != NULL) {
+        if (dvd != NULL) {
+                if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
+                        /*
+                         * If we are opening a device in its offline notify
+                         * context, the LDI handle was just closed. Clean
+                         * up the LDI event callbacks and free vd->vdev_tsd.
+                         */
+                        vdev_disk_free(vd);
+                } else {
                 ASSERT(vd->vdev_reopening);
-                dvd = vd->vdev_tsd;
                 goto skip_open;
         }
+        }
 
-        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+        /*
+         * Create vd->vdev_tsd.
+         */
+        vdev_disk_alloc(vd);
+        dvd = vd->vdev_tsd;
 
         /*
          * When opening a disk device, we want to preserve the user's original
          * intent.  We always want to open the device by the path the user gave
          * us, even if it is one of multiple paths to the same device.  But we

@@ -192,27 +349,32 @@
                 ddi_devid_t devid;
 
                 if (vd->vdev_wholedisk == -1ULL) {
                         size_t len = strlen(vd->vdev_path) + 3;
                         char *buf = kmem_alloc(len, KM_SLEEP);
-                        ldi_handle_t lh;
 
                         (void) snprintf(buf, len, "%ss0", vd->vdev_path);
 
-                        if (ldi_open_by_name(buf, spa_mode(spa), kcred,
-                            &lh, zfs_li) == 0) {
+                        error = ldi_open_by_name(buf, spa_mode(spa), kcred,
+                            &dvd->vd_lh, zfs_li);
+                        if (error == 0) {
                                 spa_strfree(vd->vdev_path);
                                 vd->vdev_path = buf;
                                 vd->vdev_wholedisk = 1ULL;
-                                (void) ldi_close(lh, spa_mode(spa), kcred);
                         } else {
                                 kmem_free(buf, len);
                         }
                 }
 
-                error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
-                    &dvd->vd_lh, zfs_li);
+                /*
+                 * If we have not yet opened the device, try to open it by the
+                 * specified path.
+                 */
+                if (error != 0) {
+                        error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
+                            kcred, &dvd->vd_lh, zfs_li);
+                }
 
                 /*
                  * Compare the devid to the stored value.
                  */
                 if (error == 0 && vd->vdev_devid != NULL &&

@@ -292,10 +454,31 @@
                 if (minorname)
                         kmem_free(minorname, strlen(minorname) + 1);
                 kmem_free(physpath, MAXPATHLEN);
         }
 
+        /*
+         * Register callbacks for the LDI offline event.
+         */
+        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
+            LDI_EV_SUCCESS) {
+                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
+                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
+                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
+                    &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
+        }
+
+        /*
+         * Register callbacks for the LDI degrade event.
+         */
+        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
+            LDI_EV_SUCCESS) {
+                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
+                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
+                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
+                    &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
+        }
 skip_open:
         /*
          * Determine the actual size of the device.
          */
         if (ldi_get_size(dvd->vd_lh, psize) != 0) {

@@ -343,26 +526,40 @@
 
 static void
 vdev_disk_close(vdev_t *vd)
 {
         vdev_disk_t *dvd = vd->vdev_tsd;
+        vdev_disk_ldi_cb_t *lcb;
 
         if (vd->vdev_reopening || dvd == NULL)
                 return;
 
-        if (dvd->vd_minor != NULL)
+        if (dvd->vd_minor != NULL) {
                 ddi_devid_str_free(dvd->vd_minor);
+                dvd->vd_minor = NULL;
+        }
 
-        if (dvd->vd_devid != NULL)
+        if (dvd->vd_devid != NULL) {
                 ddi_devid_free(dvd->vd_devid);
+                dvd->vd_devid = NULL;
+        }
 
-        if (dvd->vd_lh != NULL)
+        if (dvd->vd_lh != NULL) {
                 (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
+                dvd->vd_lh = NULL;
+        }
 
         vd->vdev_delayed_close = B_FALSE;
-        kmem_free(dvd, sizeof (vdev_disk_t));
-        vd->vdev_tsd = NULL;
+        /*
+         * If we closed the LDI handle due to an offline notify from LDI,
+         * don't free vd->vdev_tsd or unregister the callbacks here;
+         * the offline finalize callback or a reopen will take care of it.
+         */
+        if (dvd->vd_ldi_offline)
+                return;
+
+        vdev_disk_free(vd);
 }
 
 int
 vdev_disk_physio(vdev_t *vd, caddr_t data,
     size_t size, uint64_t offset, int flags)

@@ -371,11 +568,11 @@
 
         /*
          * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
          * Nothing to be done here but return failure.
          */
-        if (dvd == NULL)
+        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
                 return (EIO);
 
         ASSERT(vd->vdev_ops == &vdev_disk_ops);
         return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
 }

@@ -458,10 +655,19 @@
         vdev_buf_t *vb;
         struct dk_callback *dkc;
         buf_t *bp;
         int error;
 
+        /*
+         * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+         * Nothing to be done here but return failure.
+         */
+        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
+                zio->io_error = ENXIO;
+                return (ZIO_PIPELINE_CONTINUE);
+        }
+
         if (zio->io_type == ZIO_TYPE_IOCTL) {
                 /* XXPOLICY */
                 if (!vdev_readable(vd)) {
                         zio->io_error = ENXIO;
                         return (ZIO_PIPELINE_CONTINUE);