Print this page
7367 blkdev: support block size larger than 512
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.

  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/ksynch.h>
  30 #include <sys/kmem.h>
  31 #include <sys/file.h>
  32 #include <sys/errno.h>
  33 #include <sys/open.h>
  34 #include <sys/buf.h>
  35 #include <sys/uio.h>
  36 #include <sys/aio_req.h>
  37 #include <sys/cred.h>
  38 #include <sys/modctl.h>
  39 #include <sys/cmlb.h>
  40 #include <sys/conf.h>
  41 #include <sys/devops.h>
  42 #include <sys/list.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/dkio.h>
  45 #include <sys/vtoc.h>


 154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 155 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 156 static int bd_read(dev_t, struct uio *, cred_t *);
 157 static int bd_write(dev_t, struct uio *, cred_t *);
 158 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 159 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 161     caddr_t, int *);
 162 
 163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 164     void *);
 165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 166 static int bd_xfer_ctor(void *, void *, int);
 167 static void bd_xfer_dtor(void *, void *);
 168 static void bd_sched(bd_t *);
 169 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 170 static void bd_runq_exit(bd_xfer_impl_t *, int);
 171 static void bd_update_state(bd_t *);
 172 static int bd_check_state(bd_t *, enum dkio_state *);
 173 static int bd_flush_write_cache(bd_t *, struct dk_callback *);

 174 
 175 struct cmlb_tg_ops bd_tg_ops = {
 176         TG_DK_OPS_VERSION_1,
 177         bd_tg_rdwr,
 178         bd_tg_getinfo,
 179 };
 180 
 181 static struct cb_ops bd_cb_ops = {
 182         bd_open,                /* open */
 183         bd_close,               /* close */
 184         bd_strategy,            /* strategy */
 185         nodev,                  /* print */
 186         bd_dump,                /* dump */
 187         bd_read,                /* read */
 188         bd_write,               /* write */
 189         bd_ioctl,               /* ioctl */
 190         nodev,                  /* devmap */
 191         nodev,                  /* mmap */
 192         nodev,                  /* segmap */
 193         nochpoll,               /* poll */


 736         int                     (*cb)(caddr_t);
 737         size_t                  len;
 738         uint32_t                shift;
 739 
 740         if (kmflag == KM_SLEEP) {
 741                 cb = DDI_DMA_SLEEP;
 742         } else {
 743                 cb = DDI_DMA_DONTWAIT;
 744         }
 745 
 746         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 747         if (xi == NULL) {
 748                 bioerror(bp, ENOMEM);
 749                 return (NULL);
 750         }
 751 
 752         ASSERT(bp);
 753 
 754         xi->i_bp = bp;
 755         xi->i_func = func;
 756         xi->i_blkno = bp->b_lblkno;
 757 
 758         if (bp->b_bcount == 0) {
 759                 xi->i_len = 0;
 760                 xi->i_nblks = 0;
 761                 xi->i_kaddr = NULL;
 762                 xi->i_resid = 0;
 763                 xi->i_num_win = 0;
 764                 goto done;
 765         }
 766 
 767         if (bp->b_flags & B_READ) {
 768                 dir = DDI_DMA_READ;
 769                 xi->i_func = bd->d_ops.o_read;
 770         } else {
 771                 dir = DDI_DMA_WRITE;
 772                 xi->i_func = bd->d_ops.o_write;
 773         }
 774 
 775         shift = bd->d_blkshift;
 776         xi->i_blkshift = shift;


 801                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 802                     NULL, &xi->i_dmac, &xi->i_ndmac);
 803                 switch (status) {
 804                 case DDI_DMA_MAPPED:
 805                         xi->i_num_win = 1;
 806                         xi->i_cur_win = 0;
 807                         xi->i_offset = 0;
 808                         xi->i_len = bp->b_bcount;
 809                         xi->i_nblks = xi->i_len >> shift;
 810                         xi->i_resid = bp->b_bcount;
 811                         rv = 0;
 812                         break;
 813                 case DDI_DMA_PARTIAL_MAP:
 814                         xi->i_cur_win = 0;
 815 
 816                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 817                             DDI_SUCCESS) ||
 818                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 819                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 820                             DDI_SUCCESS) ||
 821                             (P2PHASE(len, shift) != 0)) {
 822                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 823                                 rv = EFAULT;
 824                                 goto done;
 825                         }
 826                         xi->i_len = len;
 827                         xi->i_nblks = xi->i_len >> shift;
 828                         xi->i_resid = bp->b_bcount;
 829                         rv = 0;
 830                         break;
 831                 case DDI_DMA_NORESOURCES:
 832                         rv = EAGAIN;
 833                         goto done;
 834                 case DDI_DMA_TOOBIG:
 835                         rv = EINVAL;
 836                         goto done;
 837                 case DDI_DMA_NOMAPPING:
 838                 case DDI_DMA_INUSE:
 839                 default:
 840                         rv = EFAULT;
 841                         goto done;


1018 
1019         if (last) {
1020                 cmlb_invalidate(bd->d_cmlbh, 0);
1021         }
1022         rw_exit(&bd_lock);
1023 
1024         return (0);
1025 }
1026 
1027 static int
1028 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 {
1030         minor_t         inst;
1031         minor_t         part;
1032         diskaddr_t      pstart;
1033         diskaddr_t      psize;
1034         bd_t            *bd;
1035         bd_xfer_impl_t  *xi;
1036         buf_t           *bp;
1037         int             rv;



1038 
1039         rw_enter(&bd_lock, RW_READER);
1040 
1041         part = BDPART(dev);
1042         inst = BDINST(dev);
1043 
1044         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045                 rw_exit(&bd_lock);
1046                 return (ENXIO);
1047         }



1048         /*
1049          * do cmlb, but do it synchronously unless we already have the
1050          * partition (which we probably should.)
1051          */
1052         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053             (void *)1)) {
1054                 rw_exit(&bd_lock);
1055                 return (ENXIO);
1056         }
1057 
1058         if ((blkno + nblk) > psize) {
1059                 rw_exit(&bd_lock);
1060                 return (EINVAL);
1061         }
1062         bp = getrbuf(KM_NOSLEEP);
1063         if (bp == NULL) {
1064                 rw_exit(&bd_lock);
1065                 return (ENOMEM);
1066         }
1067 
1068         bp->b_bcount = nblk << bd->d_blkshift;
1069         bp->b_resid = bp->b_bcount;
1070         bp->b_lblkno = blkno;
1071         bp->b_un.b_addr = caddr;
1072 
1073         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074         if (xi == NULL) {
1075                 rw_exit(&bd_lock);
1076                 freerbuf(bp);
1077                 return (ENOMEM);
1078         }
1079         xi->i_blkno = blkno + pstart;
1080         xi->i_flags = BD_XFER_POLL;
1081         bd_submit(bd, xi);
1082         rw_exit(&bd_lock);
1083 
1084         /*
1085          * Generally, we should have run this entirely synchronously
1086          * at this point and the biowait call should be a no-op.  If
1087          * it didn't happen this way, it's a bug in the underlying
1088          * driver not honoring BD_XFER_POLL.
1089          */
1090         (void) biowait(bp);
1091         rv = geterror(bp);
1092         freerbuf(bp);
1093         return (rv);
1094 }
1095 
1096 void
1097 bd_minphys(struct buf *bp)
1098 {
1099         minor_t inst;
1100         bd_t    *bd;
1101         inst = BDINST(bp->b_edev);
1102 
1103         bd = ddi_get_soft_state(bd_state, inst);
1104 
1105         /*
1106          * In a non-debug kernel, bd_strategy will catch !bd as
1107          * well, and will fail nicely.
1108          */
1109         ASSERT(bd);
1110 
1111         if (bp->b_bcount > bd->d_maxxfer)
1112                 bp->b_bcount = bd->d_maxxfer;
1113 }
1114 
1115 static int



















1116 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1117 {
1118         _NOTE(ARGUNUSED(credp));




1119         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1120 }
1121 
1122 static int
1123 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1124 {
1125         _NOTE(ARGUNUSED(credp));




1126         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1127 }
1128 
1129 static int
1130 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1131 {
1132         _NOTE(ARGUNUSED(credp));




1133         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1134 }
1135 
1136 static int
1137 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139         _NOTE(ARGUNUSED(credp));




1140         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_strategy(struct buf *bp)
1145 {
1146         minor_t         inst;
1147         minor_t         part;
1148         bd_t            *bd;
1149         diskaddr_t      p_lba;
1150         diskaddr_t      p_nblks;
1151         diskaddr_t      b_nblks;
1152         bd_xfer_impl_t  *xi;
1153         uint32_t        shift;
1154         int             (*func)(void *, bd_xfer_t *);

1155 
1156         part = BDPART(bp->b_edev);
1157         inst = BDINST(bp->b_edev);
1158 
1159         ASSERT(bp);
1160 
1161         bp->b_resid = bp->b_bcount;
1162 
1163         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164                 bioerror(bp, ENXIO);
1165                 biodone(bp);
1166                 return (0);
1167         }
1168 
1169         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170             NULL, NULL, 0)) {
1171                 bioerror(bp, ENXIO);
1172                 biodone(bp);
1173                 return (0);
1174         }
1175 
1176         shift = bd->d_blkshift;
1177 
1178         if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179             (bp->b_lblkno > p_nblks)) {
1180                 bioerror(bp, ENXIO);

1181                 biodone(bp);
1182                 return (0);
1183         }
1184         b_nblks = bp->b_bcount >> shift;
1185         if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186                 biodone(bp);
1187                 return (0);
1188         }
1189 
1190         if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191                 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1192                 bp->b_bcount -= bp->b_resid;
1193         } else {
1194                 bp->b_resid = 0;
1195         }
1196         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 
1198         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199         if (xi == NULL) {
1200                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201         }
1202         if (xi == NULL) {
1203                 /* bd_request_alloc will have done bioerror */
1204                 biodone(bp);
1205                 return (0);
1206         }
1207         xi->i_blkno = bp->b_lblkno + p_lba;
1208 
1209         bd_submit(bd, xi);
1210 
1211         return (0);
1212 }
1213 
1214 static int
1215 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 {
1217         minor_t         inst;
1218         uint16_t        part;
1219         bd_t            *bd;
1220         void            *ptr = (void *)arg;
1221         int             rv;
1222 
1223         part = BDPART(dev);
1224         inst = BDINST(dev);
1225 
1226         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1227                 return (ENXIO);


1888                 bd_xfer_free(xi);
1889                 biodone(bp);
1890                 return;
1891         }
1892 
1893         xi->i_blkno += xi->i_nblks;
1894 
1895         if (bd->d_use_dma) {
1896                 /* More transfer still pending... advance to next DMA window. */
1897                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1898                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899         } else {
1900                 /* Advance memory window. */
1901                 xi->i_kaddr += xi->i_len;
1902                 xi->i_offset += xi->i_len;
1903                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904         }
1905 
1906 
1907         if ((rv != DDI_SUCCESS) ||
1908             (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1909                 bd_runq_exit(xi, EFAULT);
1910 
1911                 bp->b_resid += xi->i_resid;
1912                 bd_xfer_free(xi);
1913                 bioerror(bp, EFAULT);
1914                 biodone(bp);
1915                 return;
1916         }
1917         xi->i_len = len;
1918         xi->i_nblks = len >> xi->i_blkshift;
1919 
1920         /* Submit next window to hardware. */
1921         rv = xi->i_func(bd->d_private, &xi->i_public);
1922         if (rv != 0) {
1923                 bd_runq_exit(xi, rv);
1924 
1925                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1926 
1927                 bp->b_resid += xi->i_resid;
1928                 bd_xfer_free(xi);




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/ksynch.h>
  31 #include <sys/kmem.h>
  32 #include <sys/file.h>
  33 #include <sys/errno.h>
  34 #include <sys/open.h>
  35 #include <sys/buf.h>
  36 #include <sys/uio.h>
  37 #include <sys/aio_req.h>
  38 #include <sys/cred.h>
  39 #include <sys/modctl.h>
  40 #include <sys/cmlb.h>
  41 #include <sys/conf.h>
  42 #include <sys/devops.h>
  43 #include <sys/list.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/dkio.h>
  46 #include <sys/vtoc.h>


 155 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 156 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 157 static int bd_read(dev_t, struct uio *, cred_t *);
 158 static int bd_write(dev_t, struct uio *, cred_t *);
 159 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 160 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 161 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 162     caddr_t, int *);
 163 
 164 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 165     void *);
 166 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 167 static int bd_xfer_ctor(void *, void *, int);
 168 static void bd_xfer_dtor(void *, void *);
 169 static void bd_sched(bd_t *);
 170 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 171 static void bd_runq_exit(bd_xfer_impl_t *, int);
 172 static void bd_update_state(bd_t *);
 173 static int bd_check_state(bd_t *, enum dkio_state *);
 174 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
 175 static int bd_check_uio(dev_t, struct uio *);
 176 
 177 struct cmlb_tg_ops bd_tg_ops = {
 178         TG_DK_OPS_VERSION_1,
 179         bd_tg_rdwr,
 180         bd_tg_getinfo,
 181 };
 182 
 183 static struct cb_ops bd_cb_ops = {
 184         bd_open,                /* open */
 185         bd_close,               /* close */
 186         bd_strategy,            /* strategy */
 187         nodev,                  /* print */
 188         bd_dump,                /* dump */
 189         bd_read,                /* read */
 190         bd_write,               /* write */
 191         bd_ioctl,               /* ioctl */
 192         nodev,                  /* devmap */
 193         nodev,                  /* mmap */
 194         nodev,                  /* segmap */
 195         nochpoll,               /* poll */


 738         int                     (*cb)(caddr_t);
 739         size_t                  len;
 740         uint32_t                shift;
 741 
 742         if (kmflag == KM_SLEEP) {
 743                 cb = DDI_DMA_SLEEP;
 744         } else {
 745                 cb = DDI_DMA_DONTWAIT;
 746         }
 747 
 748         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 749         if (xi == NULL) {
 750                 bioerror(bp, ENOMEM);
 751                 return (NULL);
 752         }
 753 
 754         ASSERT(bp);
 755 
 756         xi->i_bp = bp;
 757         xi->i_func = func;
 758         xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 759 
 760         if (bp->b_bcount == 0) {
 761                 xi->i_len = 0;
 762                 xi->i_nblks = 0;
 763                 xi->i_kaddr = NULL;
 764                 xi->i_resid = 0;
 765                 xi->i_num_win = 0;
 766                 goto done;
 767         }
 768 
 769         if (bp->b_flags & B_READ) {
 770                 dir = DDI_DMA_READ;
 771                 xi->i_func = bd->d_ops.o_read;
 772         } else {
 773                 dir = DDI_DMA_WRITE;
 774                 xi->i_func = bd->d_ops.o_write;
 775         }
 776 
 777         shift = bd->d_blkshift;
 778         xi->i_blkshift = shift;


 803                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 804                     NULL, &xi->i_dmac, &xi->i_ndmac);
 805                 switch (status) {
 806                 case DDI_DMA_MAPPED:
 807                         xi->i_num_win = 1;
 808                         xi->i_cur_win = 0;
 809                         xi->i_offset = 0;
 810                         xi->i_len = bp->b_bcount;
 811                         xi->i_nblks = xi->i_len >> shift;
 812                         xi->i_resid = bp->b_bcount;
 813                         rv = 0;
 814                         break;
 815                 case DDI_DMA_PARTIAL_MAP:
 816                         xi->i_cur_win = 0;
 817 
 818                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 819                             DDI_SUCCESS) ||
 820                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 821                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 822                             DDI_SUCCESS) ||
 823                             (P2PHASE(len, (1U << shift)) != 0)) {
 824                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 825                                 rv = EFAULT;
 826                                 goto done;
 827                         }
 828                         xi->i_len = len;
 829                         xi->i_nblks = xi->i_len >> shift;
 830                         xi->i_resid = bp->b_bcount;
 831                         rv = 0;
 832                         break;
 833                 case DDI_DMA_NORESOURCES:
 834                         rv = EAGAIN;
 835                         goto done;
 836                 case DDI_DMA_TOOBIG:
 837                         rv = EINVAL;
 838                         goto done;
 839                 case DDI_DMA_NOMAPPING:
 840                 case DDI_DMA_INUSE:
 841                 default:
 842                         rv = EFAULT;
 843                         goto done;


1020 
1021         if (last) {
1022                 cmlb_invalidate(bd->d_cmlbh, 0);
1023         }
1024         rw_exit(&bd_lock);
1025 
1026         return (0);
1027 }
1028 
1029 static int
1030 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1031 {
1032         minor_t         inst;
1033         minor_t         part;
1034         diskaddr_t      pstart;
1035         diskaddr_t      psize;
1036         bd_t            *bd;
1037         bd_xfer_impl_t  *xi;
1038         buf_t           *bp;
1039         int             rv;
1040         uint32_t        shift;
1041         daddr_t         d_blkno;
1042         int     d_nblk;
1043 
1044         rw_enter(&bd_lock, RW_READER);
1045 
1046         part = BDPART(dev);
1047         inst = BDINST(dev);
1048 
1049         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1050                 rw_exit(&bd_lock);
1051                 return (ENXIO);
1052         }
1053         shift = bd->d_blkshift;
1054         d_blkno = blkno >> (shift - DEV_BSHIFT);
1055         d_nblk = nblk >> (shift - DEV_BSHIFT);
1056         /*
1057          * do cmlb, but do it synchronously unless we already have the
1058          * partition (which we probably should.)
1059          */
1060         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1061             (void *)1)) {
1062                 rw_exit(&bd_lock);
1063                 return (ENXIO);
1064         }
1065 
1066         if ((d_blkno + d_nblk) > psize) {
1067                 rw_exit(&bd_lock);
1068                 return (EINVAL);
1069         }
1070         bp = getrbuf(KM_NOSLEEP);
1071         if (bp == NULL) {
1072                 rw_exit(&bd_lock);
1073                 return (ENOMEM);
1074         }
1075 
1076         bp->b_bcount = nblk << DEV_BSHIFT;
1077         bp->b_resid = bp->b_bcount;
1078         bp->b_lblkno = blkno;
1079         bp->b_un.b_addr = caddr;
1080 
1081         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1082         if (xi == NULL) {
1083                 rw_exit(&bd_lock);
1084                 freerbuf(bp);
1085                 return (ENOMEM);
1086         }
1087         xi->i_blkno = d_blkno + pstart;
1088         xi->i_flags = BD_XFER_POLL;
1089         bd_submit(bd, xi);
1090         rw_exit(&bd_lock);
1091 
1092         /*
1093          * Generally, we should have run this entirely synchronously
1094          * at this point and the biowait call should be a no-op.  If
1095          * it didn't happen this way, it's a bug in the underlying
1096          * driver not honoring BD_XFER_POLL.
1097          */
1098         (void) biowait(bp);
1099         rv = geterror(bp);
1100         freerbuf(bp);
1101         return (rv);
1102 }
1103 
1104 void
1105 bd_minphys(struct buf *bp)
1106 {
1107         minor_t inst;
1108         bd_t    *bd;
1109         inst = BDINST(bp->b_edev);
1110 
1111         bd = ddi_get_soft_state(bd_state, inst);
1112 
1113         /*
1114          * In a non-debug kernel, bd_strategy will catch !bd as
1115          * well, and will fail nicely.
1116          */
1117         ASSERT(bd);
1118 
1119         if (bp->b_bcount > bd->d_maxxfer)
1120                 bp->b_bcount = bd->d_maxxfer;
1121 }
1122 
1123 static int
1124 bd_check_uio(dev_t dev, struct uio *uio)
1125 {
1126         bd_t            *bd;
1127         uint32_t        shift;
1128 
1129         if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1130                 return (ENXIO);
1131         }
1132 
1133         shift = bd->d_blkshift;
1134         if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1135             (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1136                 return (EINVAL);
1137         }
1138 
1139         return (0);
1140 }
1141 
1142 static int
1143 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1144 {
1145         _NOTE(ARGUNUSED(credp));
1146         int     ret = bd_check_uio(dev, uio);
1147         if (ret != 0) {
1148                 return (ret);
1149         }
1150         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1151 }
1152 
1153 static int
1154 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1155 {
1156         _NOTE(ARGUNUSED(credp));
1157         int     ret = bd_check_uio(dev, uio);
1158         if (ret != 0) {
1159                 return (ret);
1160         }
1161         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1162 }
1163 
1164 static int
1165 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1166 {
1167         _NOTE(ARGUNUSED(credp));
1168         int     ret = bd_check_uio(dev, aio->aio_uio);
1169         if (ret != 0) {
1170                 return (ret);
1171         }
1172         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1173 }
1174 
1175 static int
1176 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1177 {
1178         _NOTE(ARGUNUSED(credp));
1179         int     ret = bd_check_uio(dev, aio->aio_uio);
1180         if (ret != 0) {
1181                 return (ret);
1182         }
1183         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1184 }
1185 
1186 static int
1187 bd_strategy(struct buf *bp)
1188 {
1189         minor_t         inst;
1190         minor_t         part;
1191         bd_t            *bd;
1192         diskaddr_t      p_lba;
1193         diskaddr_t      p_nblks;
1194         diskaddr_t      b_nblks;
1195         bd_xfer_impl_t  *xi;
1196         uint32_t        shift;
1197         int             (*func)(void *, bd_xfer_t *);
1198         diskaddr_t      lblkno;
1199 
1200         part = BDPART(bp->b_edev);
1201         inst = BDINST(bp->b_edev);
1202 
1203         ASSERT(bp);
1204 
1205         bp->b_resid = bp->b_bcount;
1206 
1207         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1208                 bioerror(bp, ENXIO);
1209                 biodone(bp);
1210                 return (0);
1211         }
1212 
1213         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1214             NULL, NULL, 0)) {
1215                 bioerror(bp, ENXIO);
1216                 biodone(bp);
1217                 return (0);
1218         }
1219 
1220         shift = bd->d_blkshift;
1221         lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1222         if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1223             (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1224             (lblkno > p_nblks)) {
1225                 bioerror(bp, EINVAL);
1226                 biodone(bp);
1227                 return (0);
1228         }
1229         b_nblks = bp->b_bcount >> shift;
1230         if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1231                 biodone(bp);
1232                 return (0);
1233         }
1234 
1235         if ((b_nblks + lblkno) > p_nblks) {
1236                 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1237                 bp->b_bcount -= bp->b_resid;
1238         } else {
1239                 bp->b_resid = 0;
1240         }
1241         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1242 
1243         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1244         if (xi == NULL) {
1245                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1246         }
1247         if (xi == NULL) {
1248                 /* bd_request_alloc will have done bioerror */
1249                 biodone(bp);
1250                 return (0);
1251         }
1252         xi->i_blkno = lblkno + p_lba;
1253 
1254         bd_submit(bd, xi);
1255 
1256         return (0);
1257 }
1258 
1259 static int
1260 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1261 {
1262         minor_t         inst;
1263         uint16_t        part;
1264         bd_t            *bd;
1265         void            *ptr = (void *)arg;
1266         int             rv;
1267 
1268         part = BDPART(dev);
1269         inst = BDINST(dev);
1270 
1271         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1272                 return (ENXIO);


1933                 bd_xfer_free(xi);
1934                 biodone(bp);
1935                 return;
1936         }
1937 
1938         xi->i_blkno += xi->i_nblks;
1939 
1940         if (bd->d_use_dma) {
1941                 /* More transfer still pending... advance to next DMA window. */
1942                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1943                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1944         } else {
1945                 /* Advance memory window. */
1946                 xi->i_kaddr += xi->i_len;
1947                 xi->i_offset += xi->i_len;
1948                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1949         }
1950 
1951 
1952         if ((rv != DDI_SUCCESS) ||
1953             (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
1954                 bd_runq_exit(xi, EFAULT);
1955 
1956                 bp->b_resid += xi->i_resid;
1957                 bd_xfer_free(xi);
1958                 bioerror(bp, EFAULT);
1959                 biodone(bp);
1960                 return;
1961         }
1962         xi->i_len = len;
1963         xi->i_nblks = len >> xi->i_blkshift;
1964 
1965         /* Submit next window to hardware. */
1966         rv = xi->i_func(bd->d_private, &xi->i_public);
1967         if (rv != 0) {
1968                 bd_runq_exit(xi, rv);
1969 
1970                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1971 
1972                 bp->b_resid += xi->i_resid;
1973                 bd_xfer_free(xi);