Print this page
7367 blkdev: support block size larger than 512
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.

  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/ksynch.h>
  30 #include <sys/kmem.h>
  31 #include <sys/file.h>
  32 #include <sys/errno.h>
  33 #include <sys/open.h>
  34 #include <sys/buf.h>
  35 #include <sys/uio.h>
  36 #include <sys/aio_req.h>
  37 #include <sys/cred.h>
  38 #include <sys/modctl.h>
  39 #include <sys/cmlb.h>
  40 #include <sys/conf.h>
  41 #include <sys/devops.h>
  42 #include <sys/list.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/dkio.h>
  45 #include <sys/vtoc.h>


 736         int                     (*cb)(caddr_t);
 737         size_t                  len;
 738         uint32_t                shift;
 739 
 740         if (kmflag == KM_SLEEP) {
 741                 cb = DDI_DMA_SLEEP;
 742         } else {
 743                 cb = DDI_DMA_DONTWAIT;
 744         }
 745 
 746         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 747         if (xi == NULL) {
 748                 bioerror(bp, ENOMEM);
 749                 return (NULL);
 750         }
 751 
 752         ASSERT(bp);
 753 
 754         xi->i_bp = bp;
 755         xi->i_func = func;
 756         xi->i_blkno = bp->b_lblkno;
 757 
 758         if (bp->b_bcount == 0) {
 759                 xi->i_len = 0;
 760                 xi->i_nblks = 0;
 761                 xi->i_kaddr = NULL;
 762                 xi->i_resid = 0;
 763                 xi->i_num_win = 0;
 764                 goto done;
 765         }
 766 
 767         if (bp->b_flags & B_READ) {
 768                 dir = DDI_DMA_READ;
 769                 xi->i_func = bd->d_ops.o_read;
 770         } else {
 771                 dir = DDI_DMA_WRITE;
 772                 xi->i_func = bd->d_ops.o_write;
 773         }
 774 
 775         shift = bd->d_blkshift;
 776         xi->i_blkshift = shift;
 777 
 778         if (!bd->d_use_dma) {
 779                 bp_mapin(bp);
 780                 rv = 0;
 781                 xi->i_offset = 0;
 782                 xi->i_num_win =
 783                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 784                 xi->i_cur_win = 0;
 785                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 786                 xi->i_nblks = xi->i_len >> shift;
 787                 xi->i_kaddr = bp->b_un.b_addr;
 788                 xi->i_resid = bp->b_bcount;
 789         } else {
 790 
 791                 /*
 792                  * We have to use consistent DMA if the address is misaligned.
 793                  */
 794                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 795                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 796                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 797                 } else {
 798                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 799                 }
 800 
 801                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 802                     NULL, &xi->i_dmac, &xi->i_ndmac);
 803                 switch (status) {
 804                 case DDI_DMA_MAPPED:
 805                         xi->i_num_win = 1;
 806                         xi->i_cur_win = 0;
 807                         xi->i_offset = 0;
 808                         xi->i_len = bp->b_bcount;
 809                         xi->i_nblks = xi->i_len >> shift;
 810                         xi->i_resid = bp->b_bcount;
 811                         rv = 0;
 812                         break;
 813                 case DDI_DMA_PARTIAL_MAP:
 814                         xi->i_cur_win = 0;
 815 
 816                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 817                             DDI_SUCCESS) ||
 818                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 819                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 820                             DDI_SUCCESS) ||
 821                             (P2PHASE(len, shift) != 0)) {
 822                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 823                                 rv = EFAULT;
 824                                 goto done;
 825                         }
 826                         xi->i_len = len;
 827                         xi->i_nblks = xi->i_len >> shift;
 828                         xi->i_resid = bp->b_bcount;
 829                         rv = 0;
 830                         break;
 831                 case DDI_DMA_NORESOURCES:
 832                         rv = EAGAIN;
 833                         goto done;
 834                 case DDI_DMA_TOOBIG:
 835                         rv = EINVAL;
 836                         goto done;
 837                 case DDI_DMA_NOMAPPING:
 838                 case DDI_DMA_INUSE:
 839                 default:
 840                         rv = EFAULT;
 841                         goto done;
 842                 }
 843         }
 844 
 845 done:
 846         if (rv != 0) {
 847                 kmem_cache_free(bd->d_cache, xi);


1018 
1019         if (last) {
1020                 cmlb_invalidate(bd->d_cmlbh, 0);
1021         }
1022         rw_exit(&bd_lock);
1023 
1024         return (0);
1025 }
1026 
1027 static int
1028 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 {
1030         minor_t         inst;
1031         minor_t         part;
1032         diskaddr_t      pstart;
1033         diskaddr_t      psize;
1034         bd_t            *bd;
1035         bd_xfer_impl_t  *xi;
1036         buf_t           *bp;
1037         int             rv;



1038 
1039         rw_enter(&bd_lock, RW_READER);
1040 
1041         part = BDPART(dev);
1042         inst = BDINST(dev);
1043 
1044         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045                 rw_exit(&bd_lock);
1046                 return (ENXIO);
1047         }



1048         /*
1049          * do cmlb, but do it synchronously unless we already have the
1050          * partition (which we probably should.)
1051          */
1052         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053             (void *)1)) {
1054                 rw_exit(&bd_lock);
1055                 return (ENXIO);
1056         }
1057 
1058         if ((blkno + nblk) > psize) {
1059                 rw_exit(&bd_lock);
1060                 return (EINVAL);
1061         }
1062         bp = getrbuf(KM_NOSLEEP);
1063         if (bp == NULL) {
1064                 rw_exit(&bd_lock);
1065                 return (ENOMEM);
1066         }
1067 
1068         bp->b_bcount = nblk << bd->d_blkshift;
1069         bp->b_resid = bp->b_bcount;
1070         bp->b_lblkno = blkno;
1071         bp->b_un.b_addr = caddr;
1072 
1073         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074         if (xi == NULL) {
1075                 rw_exit(&bd_lock);
1076                 freerbuf(bp);
1077                 return (ENOMEM);
1078         }
1079         xi->i_blkno = blkno + pstart;
1080         xi->i_flags = BD_XFER_POLL;
1081         bd_submit(bd, xi);
1082         rw_exit(&bd_lock);
1083 
1084         /*
1085          * Generally, we should have run this entirely synchronously
1086          * at this point and the biowait call should be a no-op.  If
1087          * it didn't happen this way, it's a bug in the underlying
1088          * driver not honoring BD_XFER_POLL.
1089          */
1090         (void) biowait(bp);
1091         rv = geterror(bp);
1092         freerbuf(bp);
1093         return (rv);
1094 }
1095 
1096 void
1097 bd_minphys(struct buf *bp)
1098 {
1099         minor_t inst;


1135 
1136 static int
1137 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139         _NOTE(ARGUNUSED(credp));
1140         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_strategy(struct buf *bp)
1145 {
1146         minor_t         inst;
1147         minor_t         part;
1148         bd_t            *bd;
1149         diskaddr_t      p_lba;
1150         diskaddr_t      p_nblks;
1151         diskaddr_t      b_nblks;
1152         bd_xfer_impl_t  *xi;
1153         uint32_t        shift;
1154         int             (*func)(void *, bd_xfer_t *);

1155 
1156         part = BDPART(bp->b_edev);
1157         inst = BDINST(bp->b_edev);
1158 
1159         ASSERT(bp);
1160 
1161         bp->b_resid = bp->b_bcount;
1162 
1163         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164                 bioerror(bp, ENXIO);
1165                 biodone(bp);
1166                 return (0);
1167         }
1168 
1169         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170             NULL, NULL, 0)) {
1171                 bioerror(bp, ENXIO);
1172                 biodone(bp);
1173                 return (0);
1174         }
1175 
1176         shift = bd->d_blkshift;
1177 
1178         if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179             (bp->b_lblkno > p_nblks)) {
1180                 bioerror(bp, ENXIO);
1181                 biodone(bp);
1182                 return (0);
1183         }
1184         b_nblks = bp->b_bcount >> shift;
1185         if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186                 biodone(bp);
1187                 return (0);
1188         }
1189 
1190         if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191                 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1192                 bp->b_bcount -= bp->b_resid;
1193         } else {
1194                 bp->b_resid = 0;
1195         }
1196         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 
1198         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199         if (xi == NULL) {
1200                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201         }
1202         if (xi == NULL) {
1203                 /* bd_request_alloc will have done bioerror */
1204                 biodone(bp);
1205                 return (0);
1206         }
1207         xi->i_blkno = bp->b_lblkno + p_lba;
1208 
1209         bd_submit(bd, xi);
1210 
1211         return (0);
1212 }
1213 
1214 static int
1215 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 {
1217         minor_t         inst;
1218         uint16_t        part;
1219         bd_t            *bd;
1220         void            *ptr = (void *)arg;
1221         int             rv;
1222 
1223         part = BDPART(dev);
1224         inst = BDINST(dev);
1225 
1226         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1227                 return (ENXIO);


1369 
1370 static int
1371 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1372     size_t length, void *tg_cookie)
1373 {
1374         bd_t            *bd;
1375         buf_t           *bp;
1376         bd_xfer_impl_t  *xi;
1377         int             rv;
1378         int             (*func)(void *, bd_xfer_t *);
1379         int             kmflag;
1380 
1381         /*
1382          * If we are running in polled mode (such as during dump(9e)
1383          * execution), then we cannot sleep for kernel allocations.
1384          */
1385         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1386 
1387         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1388 
1389         if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1390                 /* We can only transfer whole blocks at a time! */
1391                 return (EINVAL);
1392         }
1393 
1394         if ((bp = getrbuf(kmflag)) == NULL) {
1395                 return (ENOMEM);
1396         }
1397 
1398         switch (cmd) {
1399         case TG_READ:
1400                 bp->b_flags = B_READ;
1401                 func = bd->d_ops.o_read;
1402                 break;
1403         case TG_WRITE:
1404                 bp->b_flags = B_WRITE;
1405                 func = bd->d_ops.o_write;
1406                 break;
1407         default:
1408                 freerbuf(bp);
1409                 return (EINVAL);


1888                 bd_xfer_free(xi);
1889                 biodone(bp);
1890                 return;
1891         }
1892 
1893         xi->i_blkno += xi->i_nblks;
1894 
1895         if (bd->d_use_dma) {
1896                 /* More transfer still pending... advance to next DMA window. */
1897                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1898                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899         } else {
1900                 /* Advance memory window. */
1901                 xi->i_kaddr += xi->i_len;
1902                 xi->i_offset += xi->i_len;
1903                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904         }
1905 
1906 
1907         if ((rv != DDI_SUCCESS) ||
1908             (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1909                 bd_runq_exit(xi, EFAULT);
1910 
1911                 bp->b_resid += xi->i_resid;
1912                 bd_xfer_free(xi);
1913                 bioerror(bp, EFAULT);
1914                 biodone(bp);
1915                 return;
1916         }
1917         xi->i_len = len;
1918         xi->i_nblks = len >> xi->i_blkshift;
1919 
1920         /* Submit next window to hardware. */
1921         rv = xi->i_func(bd->d_private, &xi->i_public);
1922         if (rv != 0) {
1923                 bd_runq_exit(xi, rv);
1924 
1925                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1926 
1927                 bp->b_resid += xi->i_resid;
1928                 bd_xfer_free(xi);
1929                 bioerror(bp, rv);
1930                 biodone(bp);
1931         }
1932 }
1933 
1934 void
1935 bd_error(bd_xfer_t *xfer, int error)
1936 {
1937         bd_xfer_impl_t  *xi = (void *)xfer;
1938         bd_t            *bd = xi->i_bd;




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/ksynch.h>
  31 #include <sys/kmem.h>
  32 #include <sys/file.h>
  33 #include <sys/errno.h>
  34 #include <sys/open.h>
  35 #include <sys/buf.h>
  36 #include <sys/uio.h>
  37 #include <sys/aio_req.h>
  38 #include <sys/cred.h>
  39 #include <sys/modctl.h>
  40 #include <sys/cmlb.h>
  41 #include <sys/conf.h>
  42 #include <sys/devops.h>
  43 #include <sys/list.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/dkio.h>
  46 #include <sys/vtoc.h>


 737         int                     (*cb)(caddr_t);
 738         size_t                  len;
 739         uint32_t                shift;
 740 
 741         if (kmflag == KM_SLEEP) {
 742                 cb = DDI_DMA_SLEEP;
 743         } else {
 744                 cb = DDI_DMA_DONTWAIT;
 745         }
 746 
 747         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 748         if (xi == NULL) {
 749                 bioerror(bp, ENOMEM);
 750                 return (NULL);
 751         }
 752 
 753         ASSERT(bp);
 754 
 755         xi->i_bp = bp;
 756         xi->i_func = func;
 757         xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 758 
 759         if (bp->b_bcount == 0) {
 760                 xi->i_len = 0;
 761                 xi->i_nblks = 0;
 762                 xi->i_kaddr = NULL;
 763                 xi->i_resid = 0;
 764                 xi->i_num_win = 0;
 765                 goto done;
 766         }
 767 
 768         if (bp->b_flags & B_READ) {
 769                 dir = DDI_DMA_READ;
 770                 xi->i_func = bd->d_ops.o_read;
 771         } else {
 772                 dir = DDI_DMA_WRITE;
 773                 xi->i_func = bd->d_ops.o_write;
 774         }
 775 
 776         shift = bd->d_blkshift;
 777         xi->i_blkshift = shift;
 778 
 779         if (!bd->d_use_dma) {
 780                 bp_mapin(bp);
 781                 rv = 0;
 782                 xi->i_offset = 0;
 783                 xi->i_num_win =
 784                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 785                 xi->i_cur_win = 0;
 786                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 787                 xi->i_nblks = howmany(xi->i_len, (1U << shift));
 788                 xi->i_kaddr = bp->b_un.b_addr;
 789                 xi->i_resid = bp->b_bcount;
 790         } else {
 791 
 792                 /*
 793                  * We have to use consistent DMA if the address is misaligned.
 794                  */
 795                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 796                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 797                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 798                 } else {
 799                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 800                 }
 801 
 802                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 803                     NULL, &xi->i_dmac, &xi->i_ndmac);
 804                 switch (status) {
 805                 case DDI_DMA_MAPPED:
 806                         xi->i_num_win = 1;
 807                         xi->i_cur_win = 0;
 808                         xi->i_offset = 0;
 809                         xi->i_len = bp->b_bcount;
 810                         xi->i_nblks = howmany(xi->i_len, (1U << shift));
 811                         xi->i_resid = bp->b_bcount;
 812                         rv = 0;
 813                         break;
 814                 case DDI_DMA_PARTIAL_MAP:
 815                         xi->i_cur_win = 0;
 816 
 817                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 818                             DDI_SUCCESS) ||
 819                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 820                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 821                             DDI_SUCCESS) ||
 822                             (P2PHASE(len, (1U << DEV_BSHIFT)) != 0)) {
 823                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 824                                 rv = EFAULT;
 825                                 goto done;
 826                         }
 827                         xi->i_len = len;
 828                         xi->i_nblks = howmany(xi->i_len, (1U << shift));
 829                         xi->i_resid = bp->b_bcount;
 830                         rv = 0;
 831                         break;
 832                 case DDI_DMA_NORESOURCES:
 833                         rv = EAGAIN;
 834                         goto done;
 835                 case DDI_DMA_TOOBIG:
 836                         rv = EINVAL;
 837                         goto done;
 838                 case DDI_DMA_NOMAPPING:
 839                 case DDI_DMA_INUSE:
 840                 default:
 841                         rv = EFAULT;
 842                         goto done;
 843                 }
 844         }
 845 
 846 done:
 847         if (rv != 0) {
 848                 kmem_cache_free(bd->d_cache, xi);


1019 
1020         if (last) {
1021                 cmlb_invalidate(bd->d_cmlbh, 0);
1022         }
1023         rw_exit(&bd_lock);
1024 
1025         return (0);
1026 }
1027 
1028 static int
1029 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1030 {
1031         minor_t         inst;
1032         minor_t         part;
1033         diskaddr_t      pstart;
1034         diskaddr_t      psize;
1035         bd_t            *bd;
1036         bd_xfer_impl_t  *xi;
1037         buf_t           *bp;
1038         int             rv;
1039         uint32_t        shift;
1040         daddr_t         d_blkno;
1041         int     d_nblk;
1042 
1043         rw_enter(&bd_lock, RW_READER);
1044 
1045         part = BDPART(dev);
1046         inst = BDINST(dev);
1047 
1048         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1049                 rw_exit(&bd_lock);
1050                 return (ENXIO);
1051         }
1052         shift = bd->d_blkshift;
1053         d_blkno = blkno >> (shift - DEV_BSHIFT);
1054         d_nblk = howmany((nblk << DEV_BSHIFT), (1U << shift));
1055         /*
1056          * do cmlb, but do it synchronously unless we already have the
1057          * partition (which we probably should.)
1058          */
1059         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1060             (void *)1)) {
1061                 rw_exit(&bd_lock);
1062                 return (ENXIO);
1063         }
1064 
1065         if ((d_blkno + d_nblk) > psize) {
1066                 rw_exit(&bd_lock);
1067                 return (EINVAL);
1068         }
1069         bp = getrbuf(KM_NOSLEEP);
1070         if (bp == NULL) {
1071                 rw_exit(&bd_lock);
1072                 return (ENOMEM);
1073         }
1074 
1075         bp->b_bcount = nblk << DEV_BSHIFT;
1076         bp->b_resid = bp->b_bcount;
1077         bp->b_lblkno = blkno;
1078         bp->b_un.b_addr = caddr;
1079 
1080         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1081         if (xi == NULL) {
1082                 rw_exit(&bd_lock);
1083                 freerbuf(bp);
1084                 return (ENOMEM);
1085         }
1086         xi->i_blkno = d_blkno + pstart;
1087         xi->i_flags = BD_XFER_POLL;
1088         bd_submit(bd, xi);
1089         rw_exit(&bd_lock);
1090 
1091         /*
1092          * Generally, we should have run this entirely synchronously
1093          * at this point and the biowait call should be a no-op.  If
1094          * it didn't happen this way, it's a bug in the underlying
1095          * driver not honoring BD_XFER_POLL.
1096          */
1097         (void) biowait(bp);
1098         rv = geterror(bp);
1099         freerbuf(bp);
1100         return (rv);
1101 }
1102 
1103 void
1104 bd_minphys(struct buf *bp)
1105 {
1106         minor_t inst;


1142 
1143 static int
1144 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1145 {
1146         _NOTE(ARGUNUSED(credp));
1147         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1148 }
1149 
1150 static int
1151 bd_strategy(struct buf *bp)
1152 {
1153         minor_t         inst;
1154         minor_t         part;
1155         bd_t            *bd;
1156         diskaddr_t      p_lba;
1157         diskaddr_t      p_nblks;
1158         diskaddr_t      b_nblks;
1159         bd_xfer_impl_t  *xi;
1160         uint32_t        shift;
1161         int             (*func)(void *, bd_xfer_t *);
1162         diskaddr_t      lblkno;
1163 
1164         part = BDPART(bp->b_edev);
1165         inst = BDINST(bp->b_edev);
1166 
1167         ASSERT(bp);
1168 
1169         bp->b_resid = bp->b_bcount;
1170 
1171         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1172                 bioerror(bp, ENXIO);
1173                 biodone(bp);
1174                 return (0);
1175         }
1176 
1177         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1178             NULL, NULL, 0)) {
1179                 bioerror(bp, ENXIO);
1180                 biodone(bp);
1181                 return (0);
1182         }
1183 
1184         shift = bd->d_blkshift;
1185         lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1186         if ((P2PHASE(bp->b_bcount, (1U << DEV_BSHIFT)) != 0) ||
1187             (lblkno > p_nblks)) {
1188                 bioerror(bp, ENXIO);
1189                 biodone(bp);
1190                 return (0);
1191         }
1192         b_nblks = howmany(bp->b_bcount, (1U << shift));
1193         if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1194                 biodone(bp);
1195                 return (0);
1196         }
1197 
1198         if ((b_nblks + lblkno) > p_nblks) {
1199                 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1200                 bp->b_bcount -= bp->b_resid;
1201         } else {
1202                 bp->b_resid = 0;
1203         }
1204         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1205 
1206         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1207         if (xi == NULL) {
1208                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1209         }
1210         if (xi == NULL) {
1211                 /* bd_request_alloc will have done bioerror */
1212                 biodone(bp);
1213                 return (0);
1214         }
1215         xi->i_blkno = lblkno + p_lba;
1216 
1217         bd_submit(bd, xi);
1218 
1219         return (0);
1220 }
1221 
1222 static int
1223 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1224 {
1225         minor_t         inst;
1226         uint16_t        part;
1227         bd_t            *bd;
1228         void            *ptr = (void *)arg;
1229         int             rv;
1230 
1231         part = BDPART(dev);
1232         inst = BDINST(dev);
1233 
1234         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1235                 return (ENXIO);


1377 
1378 static int
1379 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1380     size_t length, void *tg_cookie)
1381 {
1382         bd_t            *bd;
1383         buf_t           *bp;
1384         bd_xfer_impl_t  *xi;
1385         int             rv;
1386         int             (*func)(void *, bd_xfer_t *);
1387         int             kmflag;
1388 
1389         /*
1390          * If we are running in polled mode (such as during dump(9e)
1391          * execution), then we cannot sleep for kernel allocations.
1392          */
1393         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1394 
1395         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1396 
1397         if (P2PHASE(length, (1U << DEV_BSHIFT)) != 0) {
1398                 /* We can only transfer whole blocks at a time! */
1399                 return (EINVAL);
1400         }
1401 
1402         if ((bp = getrbuf(kmflag)) == NULL) {
1403                 return (ENOMEM);
1404         }
1405 
1406         switch (cmd) {
1407         case TG_READ:
1408                 bp->b_flags = B_READ;
1409                 func = bd->d_ops.o_read;
1410                 break;
1411         case TG_WRITE:
1412                 bp->b_flags = B_WRITE;
1413                 func = bd->d_ops.o_write;
1414                 break;
1415         default:
1416                 freerbuf(bp);
1417                 return (EINVAL);


1896                 bd_xfer_free(xi);
1897                 biodone(bp);
1898                 return;
1899         }
1900 
1901         xi->i_blkno += xi->i_nblks;
1902 
1903         if (bd->d_use_dma) {
1904                 /* More transfer still pending... advance to next DMA window. */
1905                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1906                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1907         } else {
1908                 /* Advance memory window. */
1909                 xi->i_kaddr += xi->i_len;
1910                 xi->i_offset += xi->i_len;
1911                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1912         }
1913 
1914 
1915         if ((rv != DDI_SUCCESS) ||
1916             (P2PHASE(len, (1U << DEV_BSHIFT) != 0))) {
1917                 bd_runq_exit(xi, EFAULT);
1918 
1919                 bp->b_resid += xi->i_resid;
1920                 bd_xfer_free(xi);
1921                 bioerror(bp, EFAULT);
1922                 biodone(bp);
1923                 return;
1924         }
1925         xi->i_len = len;
1926         xi->i_nblks = howmany(len, (1U << xi->i_blkshift));
1927 
1928         /* Submit next window to hardware. */
1929         rv = xi->i_func(bd->d_private, &xi->i_public);
1930         if (rv != 0) {
1931                 bd_runq_exit(xi, rv);
1932 
1933                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1934 
1935                 bp->b_resid += xi->i_resid;
1936                 bd_xfer_free(xi);
1937                 bioerror(bp, rv);
1938                 biodone(bp);
1939         }
1940 }
1941 
1942 void
1943 bd_error(bd_xfer_t *xfer, int error)
1944 {
1945         bd_xfer_impl_t  *xi = (void *)xfer;
1946         bd_t            *bd = xi->i_bd;