Print this page
7367 blkdev: support block size larger than 512
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/blkdev/blkdev.c
          +++ new/usr/src/uts/common/io/blkdev/blkdev.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24   24   * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25   25   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
       26 + * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  26   27   */
  27   28  
  28   29  #include <sys/types.h>
  29   30  #include <sys/ksynch.h>
  30   31  #include <sys/kmem.h>
  31   32  #include <sys/file.h>
  32   33  #include <sys/errno.h>
  33   34  #include <sys/open.h>
  34   35  #include <sys/buf.h>
  35   36  #include <sys/uio.h>
↓ open down ↓ 710 lines elided ↑ open up ↑
 746  747          xi = kmem_cache_alloc(bd->d_cache, kmflag);
 747  748          if (xi == NULL) {
 748  749                  bioerror(bp, ENOMEM);
 749  750                  return (NULL);
 750  751          }
 751  752  
 752  753          ASSERT(bp);
 753  754  
 754  755          xi->i_bp = bp;
 755  756          xi->i_func = func;
 756      -        xi->i_blkno = bp->b_lblkno;
      757 +        xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 757  758  
 758  759          if (bp->b_bcount == 0) {
 759  760                  xi->i_len = 0;
 760  761                  xi->i_nblks = 0;
 761  762                  xi->i_kaddr = NULL;
 762  763                  xi->i_resid = 0;
 763  764                  xi->i_num_win = 0;
 764  765                  goto done;
 765  766          }
 766  767  
↓ open down ↓ 9 lines elided ↑ open up ↑
 776  777          xi->i_blkshift = shift;
 777  778  
 778  779          if (!bd->d_use_dma) {
 779  780                  bp_mapin(bp);
 780  781                  rv = 0;
 781  782                  xi->i_offset = 0;
 782  783                  xi->i_num_win =
 783  784                      (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 784  785                  xi->i_cur_win = 0;
 785  786                  xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 786      -                xi->i_nblks = xi->i_len >> shift;
      787 +                xi->i_nblks = howmany(xi->i_len, (1U << shift));
 787  788                  xi->i_kaddr = bp->b_un.b_addr;
 788  789                  xi->i_resid = bp->b_bcount;
 789  790          } else {
 790  791  
 791  792                  /*
 792  793                   * We have to use consistent DMA if the address is misaligned.
 793  794                   */
 794  795                  if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 795  796                      ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 796  797                          dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
↓ open down ↓ 2 lines elided ↑ open up ↑
 799  800                  }
 800  801  
 801  802                  status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 802  803                      NULL, &xi->i_dmac, &xi->i_ndmac);
 803  804                  switch (status) {
 804  805                  case DDI_DMA_MAPPED:
 805  806                          xi->i_num_win = 1;
 806  807                          xi->i_cur_win = 0;
 807  808                          xi->i_offset = 0;
 808  809                          xi->i_len = bp->b_bcount;
 809      -                        xi->i_nblks = xi->i_len >> shift;
      810 +                        xi->i_nblks = howmany(xi->i_len, (1U << shift));
 810  811                          xi->i_resid = bp->b_bcount;
 811  812                          rv = 0;
 812  813                          break;
 813  814                  case DDI_DMA_PARTIAL_MAP:
 814  815                          xi->i_cur_win = 0;
 815  816  
 816  817                          if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 817  818                              DDI_SUCCESS) ||
 818  819                              (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 819  820                              &len, &xi->i_dmac, &xi->i_ndmac) !=
 820  821                              DDI_SUCCESS) ||
 821      -                            (P2PHASE(len, shift) != 0)) {
      822 +                            (P2PHASE(len, (1U << DEV_BSHIFT)) != 0)) {
 822  823                                  (void) ddi_dma_unbind_handle(xi->i_dmah);
 823  824                                  rv = EFAULT;
 824  825                                  goto done;
 825  826                          }
 826  827                          xi->i_len = len;
 827      -                        xi->i_nblks = xi->i_len >> shift;
      828 +                        xi->i_nblks = howmany(xi->i_len, (1U << shift));
 828  829                          xi->i_resid = bp->b_bcount;
 829  830                          rv = 0;
 830  831                          break;
 831  832                  case DDI_DMA_NORESOURCES:
 832  833                          rv = EAGAIN;
 833  834                          goto done;
 834  835                  case DDI_DMA_TOOBIG:
 835  836                          rv = EINVAL;
 836  837                          goto done;
 837  838                  case DDI_DMA_NOMAPPING:
↓ open down ↓ 190 lines elided ↑ open up ↑
1028 1029  bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 1030  {
1030 1031          minor_t         inst;
1031 1032          minor_t         part;
1032 1033          diskaddr_t      pstart;
1033 1034          diskaddr_t      psize;
1034 1035          bd_t            *bd;
1035 1036          bd_xfer_impl_t  *xi;
1036 1037          buf_t           *bp;
1037 1038          int             rv;
     1039 +        uint32_t        shift;
     1040 +        daddr_t         d_blkno;
     1041 +        int     d_nblk;
1038 1042  
1039 1043          rw_enter(&bd_lock, RW_READER);
1040 1044  
1041 1045          part = BDPART(dev);
1042 1046          inst = BDINST(dev);
1043 1047  
1044 1048          if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045 1049                  rw_exit(&bd_lock);
1046 1050                  return (ENXIO);
1047 1051          }
     1052 +        shift = bd->d_blkshift;
     1053 +        d_blkno = blkno >> (shift - DEV_BSHIFT);
     1054 +        d_nblk = howmany((nblk << DEV_BSHIFT), (1U << shift));
1048 1055          /*
1049 1056           * do cmlb, but do it synchronously unless we already have the
1050 1057           * partition (which we probably should.)
1051 1058           */
1052 1059          if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053 1060              (void *)1)) {
1054 1061                  rw_exit(&bd_lock);
1055 1062                  return (ENXIO);
1056 1063          }
1057 1064  
1058      -        if ((blkno + nblk) > psize) {
     1065 +        if ((d_blkno + d_nblk) > psize) {
1059 1066                  rw_exit(&bd_lock);
1060 1067                  return (EINVAL);
1061 1068          }
1062 1069          bp = getrbuf(KM_NOSLEEP);
1063 1070          if (bp == NULL) {
1064 1071                  rw_exit(&bd_lock);
1065 1072                  return (ENOMEM);
1066 1073          }
1067 1074  
1068      -        bp->b_bcount = nblk << bd->d_blkshift;
     1075 +        bp->b_bcount = nblk << DEV_BSHIFT;
1069 1076          bp->b_resid = bp->b_bcount;
1070 1077          bp->b_lblkno = blkno;
1071 1078          bp->b_un.b_addr = caddr;
1072 1079  
1073 1080          xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074 1081          if (xi == NULL) {
1075 1082                  rw_exit(&bd_lock);
1076 1083                  freerbuf(bp);
1077 1084                  return (ENOMEM);
1078 1085          }
1079      -        xi->i_blkno = blkno + pstart;
     1086 +        xi->i_blkno = d_blkno + pstart;
1080 1087          xi->i_flags = BD_XFER_POLL;
1081 1088          bd_submit(bd, xi);
1082 1089          rw_exit(&bd_lock);
1083 1090  
1084 1091          /*
1085 1092           * Generally, we should have run this entirely synchronously
1086 1093           * at this point and the biowait call should be a no-op.  If
1087 1094           * it didn't happen this way, it's a bug in the underlying
1088 1095           * driver not honoring BD_XFER_POLL.
1089 1096           */
↓ open down ↓ 55 lines elided ↑ open up ↑
1145 1152  {
1146 1153          minor_t         inst;
1147 1154          minor_t         part;
1148 1155          bd_t            *bd;
1149 1156          diskaddr_t      p_lba;
1150 1157          diskaddr_t      p_nblks;
1151 1158          diskaddr_t      b_nblks;
1152 1159          bd_xfer_impl_t  *xi;
1153 1160          uint32_t        shift;
1154 1161          int             (*func)(void *, bd_xfer_t *);
     1162 +        diskaddr_t      lblkno;
1155 1163  
1156 1164          part = BDPART(bp->b_edev);
1157 1165          inst = BDINST(bp->b_edev);
1158 1166  
1159 1167          ASSERT(bp);
1160 1168  
1161 1169          bp->b_resid = bp->b_bcount;
1162 1170  
1163 1171          if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164 1172                  bioerror(bp, ENXIO);
↓ open down ↓ 2 lines elided ↑ open up ↑
1167 1175          }
1168 1176  
1169 1177          if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170 1178              NULL, NULL, 0)) {
1171 1179                  bioerror(bp, ENXIO);
1172 1180                  biodone(bp);
1173 1181                  return (0);
1174 1182          }
1175 1183  
1176 1184          shift = bd->d_blkshift;
1177      -
1178      -        if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179      -            (bp->b_lblkno > p_nblks)) {
     1185 +        lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
     1186 +        if ((P2PHASE(bp->b_bcount, (1U << DEV_BSHIFT)) != 0) ||
     1187 +            (lblkno > p_nblks)) {
1180 1188                  bioerror(bp, ENXIO);
1181 1189                  biodone(bp);
1182 1190                  return (0);
1183 1191          }
1184      -        b_nblks = bp->b_bcount >> shift;
1185      -        if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
     1192 +        b_nblks = howmany(bp->b_bcount, (1U << shift));
     1193 +        if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186 1194                  biodone(bp);
1187 1195                  return (0);
1188 1196          }
1189 1197  
1190      -        if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191      -                bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
     1198 +        if ((b_nblks + lblkno) > p_nblks) {
     1199 +                bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1192 1200                  bp->b_bcount -= bp->b_resid;
1193 1201          } else {
1194 1202                  bp->b_resid = 0;
1195 1203          }
1196 1204          func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 1205  
1198 1206          xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199 1207          if (xi == NULL) {
1200 1208                  xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201 1209          }
1202 1210          if (xi == NULL) {
1203 1211                  /* bd_request_alloc will have done bioerror */
1204 1212                  biodone(bp);
1205 1213                  return (0);
1206 1214          }
1207      -        xi->i_blkno = bp->b_lblkno + p_lba;
     1215 +        xi->i_blkno = lblkno + p_lba;
1208 1216  
1209 1217          bd_submit(bd, xi);
1210 1218  
1211 1219          return (0);
1212 1220  }
1213 1221  
1214 1222  static int
1215 1223  bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 1224  {
1217 1225          minor_t         inst;
↓ open down ↓ 161 lines elided ↑ open up ↑
1379 1387          int             kmflag;
1380 1388  
1381 1389          /*
1382 1390           * If we are running in polled mode (such as during dump(9e)
1383 1391           * execution), then we cannot sleep for kernel allocations.
1384 1392           */
1385 1393          kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1386 1394  
1387 1395          bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1388 1396  
1389      -        if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
     1397 +        if (P2PHASE(length, (1U << DEV_BSHIFT)) != 0) {
1390 1398                  /* We can only transfer whole blocks at a time! */
1391 1399                  return (EINVAL);
1392 1400          }
1393 1401  
1394 1402          if ((bp = getrbuf(kmflag)) == NULL) {
1395 1403                  return (ENOMEM);
1396 1404          }
1397 1405  
1398 1406          switch (cmd) {
1399 1407          case TG_READ:
↓ open down ↓ 498 lines elided ↑ open up ↑
1898 1906                      &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899 1907          } else {
1900 1908                  /* Advance memory window. */
1901 1909                  xi->i_kaddr += xi->i_len;
1902 1910                  xi->i_offset += xi->i_len;
1903 1911                  len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904 1912          }
1905 1913  
1906 1914  
1907 1915          if ((rv != DDI_SUCCESS) ||
1908      -            (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
     1916 +            (P2PHASE(len, (1U << DEV_BSHIFT) != 0))) {
1909 1917                  bd_runq_exit(xi, EFAULT);
1910 1918  
1911 1919                  bp->b_resid += xi->i_resid;
1912 1920                  bd_xfer_free(xi);
1913 1921                  bioerror(bp, EFAULT);
1914 1922                  biodone(bp);
1915 1923                  return;
1916 1924          }
1917 1925          xi->i_len = len;
1918      -        xi->i_nblks = len >> xi->i_blkshift;
     1926 +        xi->i_nblks = howmany(len, (1U << xi->i_blkshift));
1919 1927  
1920 1928          /* Submit next window to hardware. */
1921 1929          rv = xi->i_func(bd->d_private, &xi->i_public);
1922 1930          if (rv != 0) {
1923 1931                  bd_runq_exit(xi, rv);
1924 1932  
1925 1933                  atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1926 1934  
1927 1935                  bp->b_resid += xi->i_resid;
1928 1936                  bd_xfer_free(xi);
↓ open down ↓ 94 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX