Print this page
LOCAL: listen for ldi notifications of disk offline/degrade

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/vdev_disk.c
          +++ new/usr/src/uts/common/fs/zfs/vdev_disk.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
       25 + * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/zfs_context.h>
  28   29  #include <sys/zfs_zone.h>
  29   30  #include <sys/spa_impl.h>
  30   31  #include <sys/refcount.h>
  31   32  #include <sys/vdev_disk.h>
  32   33  #include <sys/vdev_impl.h>
  33   34  #include <sys/fs/zfs.h>
  34   35  #include <sys/zio.h>
  35   36  #include <sys/sunldi.h>
  36   37  #include <sys/efi_partition.h>
  37   38  #include <sys/fm/fs/zfs.h>
  38   39  
  39   40  /*
  40   41   * Virtual device vector for disks.
  41   42   */
  42   43  
  43   44  extern ldi_ident_t zfs_li;
  44   45  
       46 +static void vdev_disk_close(vdev_t *);
       47 +
  45   48  typedef struct vdev_disk_buf {
  46   49          buf_t   vdb_buf;
  47   50          zio_t   *vdb_io;
  48   51  } vdev_disk_buf_t;
  49   52  
       53 +typedef struct vdev_disk_ldi_cb {
       54 +        list_node_t             lcb_next;
       55 +        ldi_callback_id_t       lcb_id;
       56 +} vdev_disk_ldi_cb_t;
       57 +
       58 +static void vdev_disk_alloc(vdev_t *vd)
       59 +{
       60 +        vdev_disk_t *dvd;
       61 +
       62 +        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
       63 +        /*
       64 +         * Create the LDI event callback list.
       65 +         */
       66 +        list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
       67 +            offsetof(vdev_disk_ldi_cb_t, lcb_next));
       68 +}
       69 +
       70 +static void vdev_disk_free(vdev_t *vd)
       71 +{
       72 +        vdev_disk_t *dvd = vd->vdev_tsd;
       73 +        vdev_disk_ldi_cb_t *lcb;
       74 +
       75 +        /*
       76 +         * We have already closed the LDI handle. Clean up the LDI event
       77 +         * callbacks and free vd->vdev_tsd.
       78 +         */
       79 +        while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
       80 +                list_remove(&dvd->vd_ldi_cbs, lcb);
       81 +                (void) ldi_ev_remove_callbacks(lcb->lcb_id);
       82 +                kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
       83 +        }
       84 +        list_destroy(&dvd->vd_ldi_cbs);
       85 +        kmem_free(dvd, sizeof (vdev_disk_t));
       86 +        vd->vdev_tsd = NULL;
       87 +}
       88 +
       89 +/* ARGSUSED */
       90 +static int
       91 +vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
       92 +    void *ev_data)
       93 +{
       94 +        vdev_t *vd = (vdev_t *)arg;
       95 +        vdev_disk_t *dvd = vd->vdev_tsd;
       96 +
       97 +        /*
       98 +         * Ignore events other than offline.
       99 +         */
      100 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
      101 +                return (LDI_EV_SUCCESS);
      102 +
      103 +        /*
      104 +         * All LDI handles must be closed for the state change to succeed, so
      105 +         * call on vdev_disk_close() to do this.
      106 +         *
      107 +         * We inform vdev_disk_close that it is being called from offline
      108 +         * notify context so it will defer cleanup of LDI event callbacks and
      109 +         * freeing of vd->vdev_tsd to the offline finalize or a reopen.
      110 +         */
      111 +        dvd->vd_ldi_offline = B_TRUE;
      112 +        vdev_disk_close(vd);
      113 +
      114 +        /*
      115 +         * Now that the device is closed, request that the spa_async_thread
      116 +         * mark the device as REMOVED and notify FMA of the removal.
      117 +         */
      118 +        zfs_post_remove(vd->vdev_spa, vd);
      119 +        vd->vdev_remove_wanted = B_TRUE;
      120 +        spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
      121 +
      122 +        return (LDI_EV_SUCCESS);
      123 +}
      124 +
      125 +/* ARGSUSED */
  50  126  static void
      127 +vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
      128 +    int ldi_result, void *arg, void *ev_data)
      129 +{
      130 +        vdev_t *vd = (vdev_t *)arg;
      131 +        vdev_disk_t *dvd = vd->vdev_tsd;
      132 +        vdev_disk_ldi_cb_t *lcb;
      133 +
      134 +        /*
      135 +         * Ignore events other than offline.
      136 +         */
      137 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
      138 +                return;
      139 +
      140 +        /*
      141 +         * We have already closed the LDI handle in notify.
      142 +         * Clean up the LDI event callbacks and free vd->vdev_tsd.
      143 +         */
      144 +        vdev_disk_free(vd);
      145 +
      146 +        /*
      147 +         * Request that the vdev be reopened if the offline state change was
      148 +         * unsuccessful.
      149 +         */
      150 +        if (ldi_result != LDI_EV_SUCCESS) {
      151 +                vd->vdev_probe_wanted = B_TRUE;
      152 +                spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
      153 +        }
      154 +}
      155 +
      156 +static ldi_ev_callback_t vdev_disk_off_callb = {
      157 +        .cb_vers = LDI_EV_CB_VERS,
      158 +        .cb_notify = vdev_disk_off_notify,
      159 +        .cb_finalize = vdev_disk_off_finalize
      160 +};
      161 +
      162 +/* ARGSUSED */
      163 +static void
      164 +vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
      165 +    int ldi_result, void *arg, void *ev_data)
      166 +{
      167 +        vdev_t *vd = (vdev_t *)arg;
      168 +
      169 +        /*
      170 +         * Ignore events other than degrade.
      171 +         */
      172 +        if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
      173 +                return;
      174 +
      175 +        /*
      176 +         * Degrade events always succeed. Mark the vdev as degraded.
      177 +         * This status is purely informative for the user.
      178 +         */
      179 +        (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
      180 +}
      181 +
      182 +static ldi_ev_callback_t vdev_disk_dgrd_callb = {
      183 +        .cb_vers = LDI_EV_CB_VERS,
      184 +        .cb_notify = NULL,
      185 +        .cb_finalize = vdev_disk_dgrd_finalize
      186 +};
      187 +
      188 +static void
  51  189  vdev_disk_hold(vdev_t *vd)
  52  190  {
  53  191          ddi_devid_t devid;
  54  192          char *minor;
  55  193  
  56  194          ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
  57  195  
  58  196          /*
  59  197           * We must have a pathname, and it must be absolute.
  60  198           */
↓ open down ↓ 73 lines elided ↑ open up ↑
 134  272          }
 135  273          kmem_free(dk_ioc.dki_data, efisize);
 136  274          return (avail_space);
 137  275  }
 138  276  
 139  277  static int
 140  278  vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 141  279      uint64_t *ashift)
 142  280  {
 143  281          spa_t *spa = vd->vdev_spa;
 144      -        vdev_disk_t *dvd;
      282 +        vdev_disk_t *dvd = vd->vdev_tsd;
 145  283          struct dk_minfo_ext dkmext;
      284 +        ldi_ev_cookie_t ecookie;
      285 +        vdev_disk_ldi_cb_t *lcb;
 146  286          int error;
 147  287          dev_t dev;
 148  288          int otyp;
 149  289  
 150  290          /*
 151  291           * We must have a pathname, and it must be absolute.
 152  292           */
 153  293          if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 154  294                  vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 155  295                  return (EINVAL);
 156  296          }
 157  297  
 158  298          /*
 159  299           * Reopen the device if it's not currently open. Otherwise,
 160  300           * just update the physical size of the device.
 161  301           */
 162      -        if (vd->vdev_tsd != NULL) {
 163      -                ASSERT(vd->vdev_reopening);
 164      -                dvd = vd->vdev_tsd;
 165      -                goto skip_open;
      302 +        if (dvd != NULL) {
      303 +                if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
      304 +                        /*
      305 +                         * If we are opening a device in its offline notify
      306 +                         * context, the LDI handle was just closed. Clean
      307 +                         * up the LDI event callbacks and free vd->vdev_tsd.
      308 +                         */
      309 +                        vdev_disk_free(vd);
      310 +                } else {
      311 +                        VERIFY(vd->vdev_reopening);
      312 +                        goto skip_open;
      313 +                }
 166  314          }
 167  315  
 168      -        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
      316 +        /*
      317 +         * Create vd->vdev_tsd.
      318 +         */
      319 +        vdev_disk_alloc(vd);
      320 +        dvd = vd->vdev_tsd;
 169  321  
 170  322          /*
 171  323           * When opening a disk device, we want to preserve the user's original
 172  324           * intent.  We always want to open the device by the path the user gave
 173  325           * us, even if it is one of multiple paths to the save device.  But we
 174  326           * also want to be able to survive disks being removed/recabled.
 175  327           * Therefore the sequence of opening devices is:
 176  328           *
 177  329           * 1. Try opening the device by path.  For legacy pools without the
 178  330           *    'whole_disk' property, attempt to fix the path by appending 's0'.
↓ open down ↓ 13 lines elided ↑ open up ↑
 192  344          }
 193  345  
 194  346          error = EINVAL;         /* presume failure */
 195  347  
 196  348          if (vd->vdev_path != NULL) {
 197  349                  ddi_devid_t devid;
 198  350  
 199  351                  if (vd->vdev_wholedisk == -1ULL) {
 200  352                          size_t len = strlen(vd->vdev_path) + 3;
 201  353                          char *buf = kmem_alloc(len, KM_SLEEP);
 202      -                        ldi_handle_t lh;
 203  354  
 204  355                          (void) snprintf(buf, len, "%ss0", vd->vdev_path);
 205  356  
 206      -                        if (ldi_open_by_name(buf, spa_mode(spa), kcred,
 207      -                            &lh, zfs_li) == 0) {
      357 +                        error = ldi_open_by_name(buf, spa_mode(spa), kcred,
      358 +                            &dvd->vd_lh, zfs_li);
      359 +                        if (error == 0) {
 208  360                                  spa_strfree(vd->vdev_path);
 209  361                                  vd->vdev_path = buf;
 210  362                                  vd->vdev_wholedisk = 1ULL;
 211      -                                (void) ldi_close(lh, spa_mode(spa), kcred);
 212  363                          } else {
 213  364                                  kmem_free(buf, len);
 214  365                          }
 215  366                  }
 216  367  
 217      -                error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
 218      -                    &dvd->vd_lh, zfs_li);
      368 +                /*
      369 +                 * If we have not yet opened the device, try to open it by the
      370 +                 * specified path.
      371 +                 */
      372 +                if (error != 0) {
      373 +                        error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
      374 +                            kcred, &dvd->vd_lh, zfs_li);
      375 +                }
 219  376  
 220  377                  /*
 221  378                   * Compare the devid to the stored value.
 222  379                   */
 223  380                  if (error == 0 && vd->vdev_devid != NULL &&
 224  381                      ldi_get_devid(dvd->vd_lh, &devid) == 0) {
 225  382                          if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
 226  383                                  error = EINVAL;
 227  384                                  (void) ldi_close(dvd->vd_lh, spa_mode(spa),
 228  385                                      kcred);
↓ open down ↓ 63 lines elided ↑ open up ↑
 292  449                                  spa_strfree(vd->vdev_physpath);
 293  450                          (void) strlcat(physpath, ":", MAXPATHLEN);
 294  451                          (void) strlcat(physpath, minorname, MAXPATHLEN);
 295  452                          vd->vdev_physpath = spa_strdup(physpath);
 296  453                  }
 297  454                  if (minorname)
 298  455                          kmem_free(minorname, strlen(minorname) + 1);
 299  456                  kmem_free(physpath, MAXPATHLEN);
 300  457          }
 301  458  
      459 +        /*
      460 +         * Register callbacks for the LDI offline event.
      461 +         */
      462 +        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
      463 +            LDI_EV_SUCCESS) {
      464 +                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
      465 +                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
      466 +                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
      467 +                    &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
      468 +        }
      469 +
      470 +        /*
      471 +         * Register callbacks for the LDI degrade event.
      472 +         */
      473 +        if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
      474 +            LDI_EV_SUCCESS) {
      475 +                lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
      476 +                list_insert_tail(&dvd->vd_ldi_cbs, lcb);
      477 +                (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
      478 +                    &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
      479 +        }
 302  480  skip_open:
 303  481          /*
 304  482           * Determine the actual size of the device.
 305  483           */
 306  484          if (ldi_get_size(dvd->vd_lh, psize) != 0) {
 307  485                  vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 308  486                  return (EINVAL);
 309  487          }
 310  488  
 311  489          /*
↓ open down ↓ 31 lines elided ↑ open up ↑
 343  521           */
 344  522          vd->vdev_nowritecache = B_FALSE;
 345  523  
 346  524          return (0);
 347  525  }
 348  526  
 349  527  static void
 350  528  vdev_disk_close(vdev_t *vd)
 351  529  {
 352  530          vdev_disk_t *dvd = vd->vdev_tsd;
      531 +        vdev_disk_ldi_cb_t *lcb;
 353  532  
 354  533          if (vd->vdev_reopening || dvd == NULL)
 355  534                  return;
 356  535  
 357      -        if (dvd->vd_minor != NULL)
      536 +        if (dvd->vd_minor != NULL) {
 358  537                  ddi_devid_str_free(dvd->vd_minor);
      538 +                dvd->vd_minor = NULL;
      539 +        }
 359  540  
 360      -        if (dvd->vd_devid != NULL)
      541 +        if (dvd->vd_devid != NULL) {
 361  542                  ddi_devid_free(dvd->vd_devid);
      543 +                dvd->vd_devid = NULL;
      544 +        }
 362  545  
 363      -        if (dvd->vd_lh != NULL)
      546 +        if (dvd->vd_lh != NULL) {
 364  547                  (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
      548 +                dvd->vd_lh = NULL;
      549 +        }
 365  550  
 366  551          vd->vdev_delayed_close = B_FALSE;
 367      -        kmem_free(dvd, sizeof (vdev_disk_t));
 368      -        vd->vdev_tsd = NULL;
      552 +        /*
      553 +         * If we closed the LDI handle due to an offline notify from LDI,
      554 +         * don't free vd->vdev_tsd or unregister the callbacks here;
      555 +         * the offline finalize callback or a reopen will take care of it.
      556 +         */
      557 +        if (dvd->vd_ldi_offline)
      558 +                return;
      559 +
      560 +        vdev_disk_free(vd);
 369  561  }
 370  562  
 371  563  int
 372  564  vdev_disk_physio(vdev_t *vd, caddr_t data,
 373  565      size_t size, uint64_t offset, int flags)
 374  566  {
 375  567          vdev_disk_t *dvd = vd->vdev_tsd;
 376  568  
 377  569          /*
 378  570           * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
 379  571           * Nothing to be done here but return failure.
 380  572           */
 381      -        if (dvd == NULL)
      573 +        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
 382  574                  return (EIO);
 383  575  
 384  576          ASSERT(vd->vdev_ops == &vdev_disk_ops);
 385  577          return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
 386  578  }
 387  579  
 388  580  int
 389  581  vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
 390  582      size_t size, uint64_t offset, int flags)
 391  583  {
↓ open down ↓ 66 lines elided ↑ open up ↑
 458  650  static int
 459  651  vdev_disk_io_start(zio_t *zio)
 460  652  {
 461  653          vdev_t *vd = zio->io_vd;
 462  654          vdev_disk_t *dvd = vd->vdev_tsd;
 463  655          vdev_disk_buf_t *vdb;
 464  656          struct dk_callback *dkc;
 465  657          buf_t *bp;
 466  658          int error;
 467  659  
      660 +        /*
      661 +         * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
      662 +         * Nothing to be done here but return failure.
      663 +         */
      664 +        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
      665 +                zio->io_error = ENXIO;
      666 +                return (ZIO_PIPELINE_CONTINUE);
      667 +        }
      668 +
 468  669          if (zio->io_type == ZIO_TYPE_IOCTL) {
 469  670                  /* XXPOLICY */
 470  671                  if (!vdev_readable(vd)) {
 471  672                          zio->io_error = ENXIO;
 472  673                          return (ZIO_PIPELINE_CONTINUE);
 473  674                  }
 474  675  
 475  676                  switch (zio->io_cmd) {
 476  677  
 477  678                  case DKIOCFLUSHWRITECACHE:
↓ open down ↓ 198 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX