Print this page
7367 blkdev: support block size larger than 512
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/blkdev/blkdev.c
          +++ new/usr/src/uts/common/io/blkdev/blkdev.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24   24   * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25   25   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
       26 + * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  26   27   */
  27   28  
  28   29  #include <sys/types.h>
  29   30  #include <sys/ksynch.h>
  30   31  #include <sys/kmem.h>
  31   32  #include <sys/file.h>
  32   33  #include <sys/errno.h>
  33   34  #include <sys/open.h>
  34   35  #include <sys/buf.h>
  35   36  #include <sys/uio.h>
↓ open down ↓ 128 lines elided ↑ open up ↑
 164  165      void *);
 165  166  static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 166  167  static int bd_xfer_ctor(void *, void *, int);
 167  168  static void bd_xfer_dtor(void *, void *);
 168  169  static void bd_sched(bd_t *);
 169  170  static void bd_submit(bd_t *, bd_xfer_impl_t *);
 170  171  static void bd_runq_exit(bd_xfer_impl_t *, int);
 171  172  static void bd_update_state(bd_t *);
 172  173  static int bd_check_state(bd_t *, enum dkio_state *);
 173  174  static int bd_flush_write_cache(bd_t *, struct dk_callback *);
      175 +static int bd_check_uio(dev_t, struct uio *);
 174  176  
 175  177  struct cmlb_tg_ops bd_tg_ops = {
 176  178          TG_DK_OPS_VERSION_1,
 177  179          bd_tg_rdwr,
 178  180          bd_tg_getinfo,
 179  181  };
 180  182  
 181  183  static struct cb_ops bd_cb_ops = {
 182  184          bd_open,                /* open */
 183  185          bd_close,               /* close */
↓ open down ↓ 562 lines elided ↑ open up ↑
 746  748          xi = kmem_cache_alloc(bd->d_cache, kmflag);
 747  749          if (xi == NULL) {
 748  750                  bioerror(bp, ENOMEM);
 749  751                  return (NULL);
 750  752          }
 751  753  
 752  754          ASSERT(bp);
 753  755  
 754  756          xi->i_bp = bp;
 755  757          xi->i_func = func;
 756      -        xi->i_blkno = bp->b_lblkno;
      758 +        xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 757  759  
 758  760          if (bp->b_bcount == 0) {
 759  761                  xi->i_len = 0;
 760  762                  xi->i_nblks = 0;
 761  763                  xi->i_kaddr = NULL;
 762  764                  xi->i_resid = 0;
 763  765                  xi->i_num_win = 0;
 764  766                  goto done;
 765  767          }
 766  768  
↓ open down ↓ 44 lines elided ↑ open up ↑
 811  813                          rv = 0;
 812  814                          break;
 813  815                  case DDI_DMA_PARTIAL_MAP:
 814  816                          xi->i_cur_win = 0;
 815  817  
 816  818                          if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 817  819                              DDI_SUCCESS) ||
 818  820                              (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 819  821                              &len, &xi->i_dmac, &xi->i_ndmac) !=
 820  822                              DDI_SUCCESS) ||
 821      -                            (P2PHASE(len, shift) != 0)) {
      823 +                            (P2PHASE(len, (1U << shift)) != 0)) {
 822  824                                  (void) ddi_dma_unbind_handle(xi->i_dmah);
 823  825                                  rv = EFAULT;
 824  826                                  goto done;
 825  827                          }
 826  828                          xi->i_len = len;
 827  829                          xi->i_nblks = xi->i_len >> shift;
 828  830                          xi->i_resid = bp->b_bcount;
 829  831                          rv = 0;
 830  832                          break;
 831  833                  case DDI_DMA_NORESOURCES:
↓ open down ↓ 196 lines elided ↑ open up ↑
1028 1030  bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 1031  {
1030 1032          minor_t         inst;
1031 1033          minor_t         part;
1032 1034          diskaddr_t      pstart;
1033 1035          diskaddr_t      psize;
1034 1036          bd_t            *bd;
1035 1037          bd_xfer_impl_t  *xi;
1036 1038          buf_t           *bp;
1037 1039          int             rv;
     1040 +        uint32_t        shift;
     1041 +        daddr_t         d_blkno;
     1042 +        int     d_nblk;
1038 1043  
1039 1044          rw_enter(&bd_lock, RW_READER);
1040 1045  
1041 1046          part = BDPART(dev);
1042 1047          inst = BDINST(dev);
1043 1048  
1044 1049          if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045 1050                  rw_exit(&bd_lock);
1046 1051                  return (ENXIO);
1047 1052          }
     1053 +        shift = bd->d_blkshift;
     1054 +        d_blkno = blkno >> (shift - DEV_BSHIFT);
     1055 +        d_nblk = nblk >> (shift - DEV_BSHIFT);
1048 1056          /*
1049 1057           * do cmlb, but do it synchronously unless we already have the
1050 1058           * partition (which we probably should.)
1051 1059           */
1052 1060          if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053 1061              (void *)1)) {
1054 1062                  rw_exit(&bd_lock);
1055 1063                  return (ENXIO);
1056 1064          }
1057 1065  
1058      -        if ((blkno + nblk) > psize) {
     1066 +        if ((d_blkno + d_nblk) > psize) {
1059 1067                  rw_exit(&bd_lock);
1060 1068                  return (EINVAL);
1061 1069          }
1062 1070          bp = getrbuf(KM_NOSLEEP);
1063 1071          if (bp == NULL) {
1064 1072                  rw_exit(&bd_lock);
1065 1073                  return (ENOMEM);
1066 1074          }
1067 1075  
1068      -        bp->b_bcount = nblk << bd->d_blkshift;
     1076 +        bp->b_bcount = nblk << DEV_BSHIFT;
1069 1077          bp->b_resid = bp->b_bcount;
1070 1078          bp->b_lblkno = blkno;
1071 1079          bp->b_un.b_addr = caddr;
1072 1080  
1073 1081          xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074 1082          if (xi == NULL) {
1075 1083                  rw_exit(&bd_lock);
1076 1084                  freerbuf(bp);
1077 1085                  return (ENOMEM);
1078 1086          }
1079      -        xi->i_blkno = blkno + pstart;
     1087 +        xi->i_blkno = d_blkno + pstart;
1080 1088          xi->i_flags = BD_XFER_POLL;
1081 1089          bd_submit(bd, xi);
1082 1090          rw_exit(&bd_lock);
1083 1091  
1084 1092          /*
1085 1093           * Generally, we should have run this entirely synchronously
1086 1094           * at this point and the biowait call should be a no-op.  If
1087 1095           * it didn't happen this way, it's a bug in the underlying
1088 1096           * driver not honoring BD_XFER_POLL.
1089 1097           */
↓ open down ↓ 16 lines elided ↑ open up ↑
1106 1114           * In a non-debug kernel, bd_strategy will catch !bd as
1107 1115           * well, and will fail nicely.
1108 1116           */
1109 1117          ASSERT(bd);
1110 1118  
1111 1119          if (bp->b_bcount > bd->d_maxxfer)
1112 1120                  bp->b_bcount = bd->d_maxxfer;
1113 1121  }
1114 1122  
1115 1123  static int
     1124 +bd_check_uio(dev_t dev, struct uio *uio)
     1125 +{
     1126 +        bd_t            *bd;
     1127 +        uint32_t        shift;
     1128 +
     1129 +        if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
     1130 +                return (ENXIO);
     1131 +        }
     1132 +
     1133 +        shift = bd->d_blkshift;
     1134 +        if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
     1135 +            (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
     1136 +                return (EINVAL);
     1137 +        }
     1138 +
     1139 +        return (0);
     1140 +}
     1141 +
     1142 +static int
1116 1143  bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1117 1144  {
1118 1145          _NOTE(ARGUNUSED(credp));
     1146 +        int     ret = bd_check_uio(dev, uio);
     1147 +        if (ret != 0) {
     1148 +                return (ret);
     1149 +        }
1119 1150          return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1120 1151  }
1121 1152  
1122 1153  static int
1123 1154  bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1124 1155  {
1125 1156          _NOTE(ARGUNUSED(credp));
     1157 +        int     ret = bd_check_uio(dev, uio);
     1158 +        if (ret != 0) {
     1159 +                return (ret);
     1160 +        }
1126 1161          return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1127 1162  }
1128 1163  
1129 1164  static int
1130 1165  bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1131 1166  {
1132 1167          _NOTE(ARGUNUSED(credp));
     1168 +        int     ret = bd_check_uio(dev, aio->aio_uio);
     1169 +        if (ret != 0) {
     1170 +                return (ret);
     1171 +        }
1133 1172          return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1134 1173  }
1135 1174  
1136 1175  static int
1137 1176  bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 1177  {
1139 1178          _NOTE(ARGUNUSED(credp));
     1179 +        int     ret = bd_check_uio(dev, aio->aio_uio);
     1180 +        if (ret != 0) {
     1181 +                return (ret);
     1182 +        }
1140 1183          return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 1184  }
1142 1185  
1143 1186  static int
1144 1187  bd_strategy(struct buf *bp)
1145 1188  {
1146 1189          minor_t         inst;
1147 1190          minor_t         part;
1148 1191          bd_t            *bd;
1149 1192          diskaddr_t      p_lba;
1150 1193          diskaddr_t      p_nblks;
1151 1194          diskaddr_t      b_nblks;
1152 1195          bd_xfer_impl_t  *xi;
1153 1196          uint32_t        shift;
1154 1197          int             (*func)(void *, bd_xfer_t *);
     1198 +        diskaddr_t      lblkno;
1155 1199  
1156 1200          part = BDPART(bp->b_edev);
1157 1201          inst = BDINST(bp->b_edev);
1158 1202  
1159 1203          ASSERT(bp);
1160 1204  
1161 1205          bp->b_resid = bp->b_bcount;
1162 1206  
1163 1207          if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164 1208                  bioerror(bp, ENXIO);
↓ open down ↓ 2 lines elided ↑ open up ↑
1167 1211          }
1168 1212  
1169 1213          if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170 1214              NULL, NULL, 0)) {
1171 1215                  bioerror(bp, ENXIO);
1172 1216                  biodone(bp);
1173 1217                  return (0);
1174 1218          }
1175 1219  
1176 1220          shift = bd->d_blkshift;
1177      -
1178      -        if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179      -            (bp->b_lblkno > p_nblks)) {
1180      -                bioerror(bp, ENXIO);
     1221 +        lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
     1222 +        if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
     1223 +            (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
     1224 +            (lblkno > p_nblks)) {
     1225 +                bioerror(bp, EINVAL);
1181 1226                  biodone(bp);
1182 1227                  return (0);
1183 1228          }
1184 1229          b_nblks = bp->b_bcount >> shift;
1185      -        if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
     1230 +        if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186 1231                  biodone(bp);
1187 1232                  return (0);
1188 1233          }
1189 1234  
1190      -        if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191      -                bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
     1235 +        if ((b_nblks + lblkno) > p_nblks) {
     1236 +                bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1192 1237                  bp->b_bcount -= bp->b_resid;
1193 1238          } else {
1194 1239                  bp->b_resid = 0;
1195 1240          }
1196 1241          func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 1242  
1198 1243          xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199 1244          if (xi == NULL) {
1200 1245                  xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201 1246          }
1202 1247          if (xi == NULL) {
1203 1248                  /* bd_request_alloc will have done bioerror */
1204 1249                  biodone(bp);
1205 1250                  return (0);
1206 1251          }
1207      -        xi->i_blkno = bp->b_lblkno + p_lba;
     1252 +        xi->i_blkno = lblkno + p_lba;
1208 1253  
1209 1254          bd_submit(bd, xi);
1210 1255  
1211 1256          return (0);
1212 1257  }
1213 1258  
1214 1259  static int
1215 1260  bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 1261  {
1217 1262          minor_t         inst;
↓ open down ↓ 680 lines elided ↑ open up ↑
1898 1943                      &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899 1944          } else {
1900 1945                  /* Advance memory window. */
1901 1946                  xi->i_kaddr += xi->i_len;
1902 1947                  xi->i_offset += xi->i_len;
1903 1948                  len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904 1949          }
1905 1950  
1906 1951  
1907 1952          if ((rv != DDI_SUCCESS) ||
1908      -            (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
     1953 +            (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
1909 1954                  bd_runq_exit(xi, EFAULT);
1910 1955  
1911 1956                  bp->b_resid += xi->i_resid;
1912 1957                  bd_xfer_free(xi);
1913 1958                  bioerror(bp, EFAULT);
1914 1959                  biodone(bp);
1915 1960                  return;
1916 1961          }
1917 1962          xi->i_len = len;
1918 1963          xi->i_nblks = len >> xi->i_blkshift;
↓ open down ↓ 104 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX