Print this page
OS-1987 disks in zpools never go away when pulled

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/vdev_disk.c
          +++ new/usr/src/uts/common/fs/zfs/vdev_disk.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24      - * Copyright (c) 2012, Joyent, Inc. All rights reserved.
       24 + * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       25 + * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/zfs_context.h>
  28   29  #include <sys/zfs_zone.h>
  29   30  #include <sys/spa_impl.h>
  30   31  #include <sys/refcount.h>
  31   32  #include <sys/vdev_disk.h>
  32   33  #include <sys/vdev_impl.h>
  33   34  #include <sys/fs/zfs.h>
  34   35  #include <sys/zio.h>
  35   36  #include <sys/sunldi.h>
  36   37  #include <sys/efi_partition.h>
  37   38  #include <sys/fm/fs/zfs.h>
  38   39  
  39   40  /*
  40   41   * Virtual device vector for disks.
  41   42   */
  42   43  
  43   44  extern ldi_ident_t zfs_li;
  44   45  
       46 +static void vdev_disk_close(vdev_t *);
       47 +
       48 +typedef struct vdev_disk_buf {
       49 +        buf_t   vdb_buf;
       50 +        zio_t   *vdb_io;
       51 +} vdev_disk_buf_t;
       52 +
       53 +typedef struct vdev_disk_ldi_cb {
       54 +        list_node_t             lcb_next;
       55 +        ldi_callback_id_t       lcb_id;
       56 +} vdev_disk_ldi_cb_t;
       57 +
       58 +static void vdev_disk_alloc(vdev_t *vd)
       59 +{
       60 +        vdev_disk_t *dvd;
       61 +
       62 +        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
       63 +        /*
       64 +         * Create the LDI event callback list.
       65 +         */
       66 +        list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
       67 +            offsetof(vdev_disk_ldi_cb_t, lcb_next));
       68 +}
       69 +
       70 +static void vdev_disk_free(vdev_t *vd)
       71 +{
       72 +        vdev_disk_t *dvd = vd->vdev_tsd;
       73 +        vdev_disk_ldi_cb_t *lcb;
       74 +
       75 +        /*
       76 +         * We have already closed the LDI handle. Clean up the LDI event
       77 +         * callbacks and free vd->vdev_tsd.
       78 +         */
       79 +        while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
       80 +                list_remove(&dvd->vd_ldi_cbs, lcb);
       81 +                (void) ldi_ev_remove_callbacks(lcb->lcb_id);
       82 +                kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
       83 +        }
       84 +        list_destroy(&dvd->vd_ldi_cbs);
       85 +        kmem_free(dvd, sizeof (vdev_disk_t));
       86 +        vd->vdev_tsd = NULL;
       87 +}
       88 +
       89 +/* ARGSUSED */
       90 +static int
       91 +vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
       92 +    void *ev_data)
       93 +{
       94 +        vdev_t *vd = (vdev_t *)arg;
       95 +        vdev_disk_t *dvd = vd->vdev_tsd;
       96 +
       97 +        /*
       98 +         * Ignore events other than offline.
       99 +         */
      100 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
      101 +                return (LDI_EV_SUCCESS);
      102 +
      103 +        /*
      104 +         * All LDI handles must be closed for the state change to succeed, so
      105 +         * call on vdev_disk_close() to do this.
      106 +         *
      107 +         * We inform vdev_disk_close that it is being called from offline
      108 +         * notify context so it will defer cleanup of LDI event callbacks and
      109 +         * freeing of vd->vdev_tsd to the offline finalize or a reopen.
      110 +         */
      111 +        dvd->vd_ldi_offline = B_TRUE;
      112 +        vdev_disk_close(vd);
      113 +
      114 +        /*
      115 +         * Now that the device is closed, request that the spa_async_thread
      116 +         * mark the device as REMOVED and notify FMA of the removal.
      117 +         */
      118 +        zfs_post_remove(vd->vdev_spa, vd);
      119 +        vd->vdev_remove_wanted = B_TRUE;
      120 +        spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
      121 +
      122 +        return (LDI_EV_SUCCESS);
      123 +}
      124 +
      125 +/* ARGSUSED */
      126 +static void
      127 +vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
      128 +    int ldi_result, void *arg, void *ev_data)
      129 +{
      130 +        vdev_t *vd = (vdev_t *)arg;
      131 +        vdev_disk_t *dvd = vd->vdev_tsd;
      132 +        vdev_disk_ldi_cb_t *lcb;
      133 +
      134 +        /*
      135 +         * Ignore events other than offline.
      136 +         */
      137 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
      138 +                return;
      139 +
      140 +        /*
      141 +         * We have already closed the LDI handle in notify.
      142 +         * Clean up the LDI event callbacks and free vd->vdev_tsd.
      143 +         */
      144 +        vdev_disk_free(vd);
      145 +
      146 +        /*
      147 +         * Request that the vdev be reopened if the offline state change was
      148 +         * unsuccessful.
      149 +         */
      150 +        if (ldi_result != LDI_EV_SUCCESS) {
      151 +                vd->vdev_probe_wanted = B_TRUE;
      152 +                spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
      153 +        }
      154 +}
      155 +
      156 +static ldi_ev_callback_t vdev_disk_off_callb = {
      157 +        .cb_vers = LDI_EV_CB_VERS,
      158 +        .cb_notify = vdev_disk_off_notify,
      159 +        .cb_finalize = vdev_disk_off_finalize
      160 +};
      161 +
      162 +/* ARGSUSED */
      163 +static void
      164 +vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
      165 +    int ldi_result, void *arg, void *ev_data)
      166 +{
      167 +        vdev_t *vd = (vdev_t *)arg;
      168 +
      169 +        /*
      170 +         * Ignore events other than degrade.
      171 +         */
      172 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
      173 +                return;
      174 +
      175 +        /*
      176 +         * Degrade events always succeed. Mark the vdev as degraded.
      177 +         * This status is purely informative for the user.
      178 +         */
      179 +        (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
      180 +}
      181 +
      182 +static ldi_ev_callback_t vdev_disk_dgrd_callb = {
      183 +        .cb_vers = LDI_EV_CB_VERS,
      184 +        .cb_notify = NULL,
      185 +        .cb_finalize = vdev_disk_dgrd_finalize
      186 +};
      187 +
  45  188  static void
  46  189  vdev_disk_hold(vdev_t *vd)
  47  190  {
  48  191          ddi_devid_t devid;
  49  192          char *minor;
  50  193  
  51  194          ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
  52  195  
  53  196          /*
  54  197           * We must have a pathname, and it must be absolute.
↓ open down ↓ 74 lines elided ↑ open up ↑
 129  272          }
 130  273          kmem_free(dk_ioc.dki_data, efisize);
 131  274          return (avail_space);
 132  275  }
 133  276  
 134  277  static int
 135  278  vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 136  279      uint64_t *ashift)
 137  280  {
 138  281          spa_t *spa = vd->vdev_spa;
 139      -        vdev_disk_t *dvd;
      282 +        vdev_disk_t *dvd = vd->vdev_tsd;
 140  283          struct dk_minfo_ext dkmext;
      284 +        ldi_ev_cookie_t ecookie;
      285 +        vdev_disk_ldi_cb_t *lcb;
 141  286          int error;
 142  287          dev_t dev;
 143  288          int otyp;
 144  289  
 145  290          /*
 146  291           * We must have a pathname, and it must be absolute.
 147  292           */
 148  293          if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 149  294                  vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 150  295                  return (EINVAL);
 151  296          }
 152  297  
 153  298          /*
 154  299           * Reopen the device if it's not currently open. Otherwise,
 155  300           * just update the physical size of the device.
 156  301           */
 157      -        if (vd->vdev_tsd != NULL) {
 158      -                ASSERT(vd->vdev_reopening);
 159      -                dvd = vd->vdev_tsd;
 160      -                goto skip_open;
      302 +        if (dvd != NULL) {
      303 +                if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
      304 +                        /*
      305 +                         * If we are opening a device in its offline notify
      306 +                         * context, the LDI handle was just closed. Clean
      307 +                         * up the LDI event callbacks and free vd->vdev_tsd.
      308 +                         */
      309 +                        vdev_disk_free(vd);
      310 +                } else {
      311 +                        ASSERT(vd->vdev_reopening);
      312 +                        goto skip_open;
      313 +                }
 161  314          }
 162  315  
 163      -        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
      316 +        /*
      317 +         * Create vd->vdev_tsd.
      318 +         */
      319 +        vdev_disk_alloc(vd);
      320 +        dvd = vd->vdev_tsd;
 164  321  
 165  322          /*
 166  323           * When opening a disk device, we want to preserve the user's original
 167  324           * intent.  We always want to open the device by the path the user gave
 168  325           * us, even if it is one of multiple paths to the same device.  But we
 169  326           * also want to be able to survive disks being removed/recabled.
 170  327           * Therefore the sequence of opening devices is:
 171  328           *
 172  329           * 1. Try opening the device by path.  For legacy pools without the
 173  330           *    'whole_disk' property, attempt to fix the path by appending 's0'.
↓ open down ↓ 13 lines elided ↑ open up ↑
 187  344          }
 188  345  
 189  346          error = EINVAL;         /* presume failure */
 190  347  
 191  348          if (vd->vdev_path != NULL) {
 192  349                  ddi_devid_t devid;
 193  350  
 194  351                  if (vd->vdev_wholedisk == -1ULL) {
 195  352                          size_t len = strlen(vd->vdev_path) + 3;
 196  353                          char *buf = kmem_alloc(len, KM_SLEEP);
 197      -                        ldi_handle_t lh;
 198  354  
 199  355                          (void) snprintf(buf, len, "%ss0", vd->vdev_path);
 200  356  
 201      -                        if (ldi_open_by_name(buf, spa_mode(spa), kcred,
 202      -                            &lh, zfs_li) == 0) {
      357 +                        error = ldi_open_by_name(buf, spa_mode(spa), kcred,
      358 +                            &dvd->vd_lh, zfs_li);
      359 +                        if (error == 0) {
 203  360                                  spa_strfree(vd->vdev_path);
 204  361                                  vd->vdev_path = buf;
 205  362                                  vd->vdev_wholedisk = 1ULL;
 206      -                                (void) ldi_close(lh, spa_mode(spa), kcred);
 207  363                          } else {
 208  364                                  kmem_free(buf, len);
 209  365                          }
 210  366                  }
 211  367  
 212      -                error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
 213      -                    &dvd->vd_lh, zfs_li);
      368 +                /*
      369 +                 * If we have not yet opened the device, try to open it by the
      370 +                 * specified path.
      371 +                 */
      372 +                if (error != 0) {
      373 +                        error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
      374 +                            kcred, &dvd->vd_lh, zfs_li);
      375 +                }
 214  376  
 215  377                  /*
 216  378                   * Compare the devid to the stored value.
 217  379                   */
 218  380                  if (error == 0 && vd->vdev_devid != NULL &&
 219  381                      ldi_get_devid(dvd->vd_lh, &devid) == 0) {
 220  382                          if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
 221  383                                  error = EINVAL;
 222  384                                  (void) ldi_close(dvd->vd_lh, spa_mode(spa),
 223  385                                      kcred);
↓ open down ↓ 63 lines elided ↑ open up ↑
 287  449                                  spa_strfree(vd->vdev_physpath);
 288  450                          (void) strlcat(physpath, ":", MAXPATHLEN);
 289  451                          (void) strlcat(physpath, minorname, MAXPATHLEN);
 290  452                          vd->vdev_physpath = spa_strdup(physpath);
 291  453                  }
 292  454                  if (minorname)
 293  455                          kmem_free(minorname, strlen(minorname) + 1);
 294  456                  kmem_free(physpath, MAXPATHLEN);
 295  457          }
 296  458  
      459 +        /*
      460 +         * Register callbacks for the LDI offline event.
      461 +         */
      462 +        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
      463 +            LDI_EV_SUCCESS) {
      464 +                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
      465 +                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
      466 +                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
      467 +                    &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
      468 +        }
      469 +
      470 +        /*
      471 +         * Register callbacks for the LDI degrade event.
      472 +         */
      473 +        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
      474 +            LDI_EV_SUCCESS) {
      475 +                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
      476 +                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
      477 +                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
      478 +                    &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
      479 +        }
 297  480  skip_open:
 298  481          /*
 299  482           * Determine the actual size of the device.
 300  483           */
 301  484          if (ldi_get_size(dvd->vd_lh, psize) != 0) {
 302  485                  vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 303  486                  return (EINVAL);
 304  487          }
 305  488  
 306  489          /*
↓ open down ↓ 31 lines elided ↑ open up ↑
 338  521           */
 339  522          vd->vdev_nowritecache = B_FALSE;
 340  523  
 341  524          return (0);
 342  525  }
 343  526  
 344  527  static void
 345  528  vdev_disk_close(vdev_t *vd)
 346  529  {
 347  530          vdev_disk_t *dvd = vd->vdev_tsd;
      531 +        vdev_disk_ldi_cb_t *lcb;
 348  532  
 349  533          if (vd->vdev_reopening || dvd == NULL)
 350  534                  return;
 351  535  
 352      -        if (dvd->vd_minor != NULL)
      536 +        if (dvd->vd_minor != NULL) {
 353  537                  ddi_devid_str_free(dvd->vd_minor);
      538 +                dvd->vd_minor = NULL;
      539 +        }
 354  540  
 355      -        if (dvd->vd_devid != NULL)
      541 +        if (dvd->vd_devid != NULL) {
 356  542                  ddi_devid_free(dvd->vd_devid);
      543 +                dvd->vd_devid = NULL;
      544 +        }
 357  545  
 358      -        if (dvd->vd_lh != NULL)
      546 +        if (dvd->vd_lh != NULL) {
 359  547                  (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
      548 +                dvd->vd_lh = NULL;
      549 +        }
 360  550  
 361  551          vd->vdev_delayed_close = B_FALSE;
 362      -        kmem_free(dvd, sizeof (vdev_disk_t));
 363      -        vd->vdev_tsd = NULL;
      552 +        /*
      553 +         * If we closed the LDI handle due to an offline notify from LDI,
      554 +         * don't free vd->vdev_tsd or unregister the callbacks here;
      555 +         * the offline finalize callback or a reopen will take care of it.
      556 +         */
      557 +        if (dvd->vd_ldi_offline)
      558 +                return;
      559 +
      560 +        vdev_disk_free(vd);
 364  561  }
 365  562  
 366  563  int
 367  564  vdev_disk_physio(vdev_t *vd, caddr_t data,
 368  565      size_t size, uint64_t offset, int flags)
 369  566  {
 370  567          vdev_disk_t *dvd = vd->vdev_tsd;
 371  568  
 372  569          /*
 373  570           * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
 374  571           * Nothing to be done here but return failure.
 375  572           */
 376      -        if (dvd == NULL)
      573 +        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
 377  574                  return (EIO);
 378  575  
 379  576          ASSERT(vd->vdev_ops == &vdev_disk_ops);
 380  577          return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
 381  578  }
 382  579  
 383  580  int
 384  581  vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
 385  582      size_t size, uint64_t offset, int flags)
 386  583  {
↓ open down ↓ 66 lines elided ↑ open up ↑
 453  650  static int
 454  651  vdev_disk_io_start(zio_t *zio)
 455  652  {
 456  653          vdev_t *vd = zio->io_vd;
 457  654          vdev_disk_t *dvd = vd->vdev_tsd;
 458  655          vdev_buf_t *vb;
 459  656          struct dk_callback *dkc;
 460  657          buf_t *bp;
 461  658          int error;
 462  659  
      660 +        /*
      661 +         * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
      662 +         * Nothing to be done here but return failure.
      663 +         */
      664 +        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
      665 +                zio->io_error = ENXIO;
      666 +                return (ZIO_PIPELINE_CONTINUE);
      667 +        }
      668 +
 463  669          if (zio->io_type == ZIO_TYPE_IOCTL) {
 464  670                  /* XXPOLICY */
 465  671                  if (!vdev_readable(vd)) {
 466  672                          zio->io_error = ENXIO;
 467  673                          return (ZIO_PIPELINE_CONTINUE);
 468  674                  }
 469  675  
 470  676                  switch (zio->io_cmd) {
 471  677  
 472  678                  case DKIOCFLUSHWRITECACHE:
↓ open down ↓ 198 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX