1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/ksynch.h>
  28 #include <sys/errno.h>
  29 #include <sys/file.h>
  30 #include <sys/open.h>
  31 #include <sys/cred.h>
  32 #include <sys/kmem.h>
  33 #include <sys/uio.h>
  34 #include <sys/ddi.h>
  35 #include <sys/sdt.h>
  36 
  37 #define __NSC_GEN__
  38 #include "nsc_dev.h"
  39 #include "nsc_disk.h"
  40 #include "../nsctl.h"
  41 
  42 
  43 #define _I(x)   (((long)(&((nsc_io_t *)0)->x))/sizeof (long))
  44 
  45 nsc_def_t _nsc_disk_def[] = {
  46         { "UserRead",   (uintptr_t)nsc_ioerr,   _I(uread) },
  47         { "UserWrite",  (uintptr_t)nsc_ioerr,   _I(uwrite) },
  48         { "PartSize",   (uintptr_t)nsc_null,    _I(partsize) },
  49         { "MaxFbas",    (uintptr_t)nsc_null,    _I(maxfbas) },
  50         { "Control",    (uintptr_t)nsc_ioerr,   _I(control) },
  51         { NULL,         (uintptr_t)NULL,        0 }
  52 };
  53 
  54 
  55 extern nsc_mem_t *_nsc_local_mem;
  56 
  57 static int _nsc_uread(dev_t, uio_t *, cred_t *, nsc_fd_t *);
  58 static int _nsc_uwrite(dev_t, uio_t *, cred_t *, nsc_fd_t *);
  59 static int _nsc_rw_uio(nsc_fd_t *, uio_t *, uio_rw_t);
  60 
  61 static int _nsc_free_dhandle(nsc_dbuf_t *);
  62 static int _nsc_alloc_dbuf(blind_t, nsc_off_t, nsc_size_t, int, nsc_dbuf_t **);
  63 static int _nsc_free_dbuf(nsc_dbuf_t *);
  64 static void _nsc_wait_dbuf(nsc_dbuf_t *);
  65 static int _nsc_read_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
  66 static int _nsc_write_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
  67 static int _nsc_zero_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
  68 static int _nsc_dbuf_io(int (*)(), nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
  69 
  70 static nsc_dbuf_t *_nsc_alloc_dhandle(void (*)(), void (*)(), void (*)());
  71 
  72 
  73 /*
  74  * void
  75  * _nsc_add_disk (nsc_io_t *io)
  76  *      Add disk interface functions.
  77  *
  78  * Calling/Exit State:
  79  *      Updates the I/O module with the appropriate
  80  *      interface routines.
  81  *
  82  * Description:
  83  *      Add functions to the I/O module to provide a disk
  84  *      or cache interface as appropriate.
  85  */
  86 void
  87 _nsc_add_disk(nsc_io_t *io)
  88 {
  89         if ((io->alloc_buf != nsc_ioerr && io->free_buf != nsc_fatal) ||
  90             (io->flag & NSC_FILTER)) {
  91                 if (io->uread == nsc_ioerr)
  92                         io->uread = _nsc_uread;
  93 
  94                 if (io->uwrite == nsc_ioerr &&
  95                     (io->write != nsc_fatal || (io->flag & NSC_FILTER)))
  96                         io->uwrite = _nsc_uwrite;
  97 
  98                 return;
  99         }
 100 
 101         if (io->alloc_h != (nsc_buf_t *(*)())nsc_null ||
 102             io->free_h != nsc_fatal || io->alloc_buf != nsc_ioerr ||
 103             io->free_buf != nsc_fatal || io->read != nsc_fatal ||
 104             io->write != nsc_fatal || io->zero != nsc_fatal)
 105                 return;
 106 
 107         if (io->uread == nsc_ioerr && io->uwrite == nsc_ioerr)
 108                 return;
 109 
 110         /*
 111          * Layer the generic nsc_buf_t provider onto a uio_t provider.
 112          */
 113 
 114         io->alloc_h = (nsc_buf_t *(*)())_nsc_alloc_dhandle;
 115         io->free_h = _nsc_free_dhandle;
 116         io->alloc_buf = _nsc_alloc_dbuf;
 117         io->free_buf = _nsc_free_dbuf;
 118 
 119         io->read = _nsc_read_dbuf;
 120         io->write = _nsc_write_dbuf;
 121         io->zero = _nsc_zero_dbuf;
 122 
 123         io->provide |= NSC_ANON;
 124 }
 125 
 126 
 127 int
 128 nsc_uread(nsc_fd_t *fd, void *uiop, void *crp)
 129 {
 130         return (*fd->sf_aio->uread)(fd->sf_cd, uiop, crp, fd);
 131 }
 132 
 133 
 134 int
 135 nsc_uwrite(nsc_fd_t *fd, void *uiop, void *crp)
 136 {
 137         if ((fd->sf_avail & NSC_WRITE) == 0)
 138                 return (EIO);
 139 
 140         return (*fd->sf_aio->uwrite)(fd->sf_cd, uiop, crp, fd);
 141 }
 142 
 143 
 144 int
 145 nsc_partsize(nsc_fd_t *fd, nsc_size_t *valp)
 146 {
 147         *valp = 0;
 148         return (*fd->sf_aio->partsize)(fd->sf_cd, valp);
 149 }
 150 
 151 
 152 int
 153 nsc_maxfbas(nsc_fd_t *fd, int flag, nsc_size_t *valp)
 154 {
 155         *valp = 0;
 156         return (*fd->sf_aio->maxfbas)(fd->sf_cd, flag, valp);
 157 }
 158 
 159 int
 160 nsc_control(nsc_fd_t *fd, int command, void *argp, int argl)
 161 {
 162         return (*fd->sf_aio->control)(fd->sf_cd, command, argp, argl);
 163 }
 164 
 165 
 166 /* ARGSUSED */
 167 
 168 static int
 169 _nsc_uread(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
 170 {
 171         return (_nsc_rw_uio(fd, uiop, UIO_READ));
 172 }
 173 
 174 
 175 /* ARGSUSED */
 176 
 177 static int
 178 _nsc_uwrite(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
 179 {
 180         return (_nsc_rw_uio(fd, uiop, UIO_WRITE));
 181 }
 182 
 183 
 184 static int
 185 _nsc_rw_uio(nsc_fd_t *fd, uio_t *uiop, uio_rw_t rw)
 186 {
 187         nsc_size_t buflen, len, limit, chunk;
 188         nsc_off_t pos, off;
 189         nsc_buf_t *buf;
 190         nsc_vec_t *vec;
 191         size_t n;
 192         int rc;
 193 
 194         pos = FPOS_TO_FBA(uiop);
 195         off = FPOS_TO_OFF(uiop);
 196         len = FBA_LEN(uiop->uio_resid + off);
 197 
 198         DTRACE_PROBE3(_nsc_rw_uio_io,
 199                 uint64_t, pos,
 200                 uint64_t, off,
 201                 uint64_t, len);
 202 
 203         /* prevent non-FBA bounded I/O - this is a disk driver! */
 204         if (off != 0 || FBA_OFF(uiop->uio_resid) != 0)
 205                 return (EINVAL);
 206 
 207         if ((rc = nsc_partsize(fd, &limit)) != 0)
 208                 return (rc);
 209 
 210         if ((rc = nsc_maxfbas(fd, 0, &chunk)) != 0)
 211                 return (rc);
 212 
 213         DTRACE_PROBE2(_nsc_rw_uio_limit,
 214                 uint64_t, limit,
 215                 uint64_t, chunk);
 216 
 217         if (limit && pos >= limit) {
 218                 if (pos > limit || rw == UIO_WRITE)
 219                         return (ENXIO);
 220                 return (0);
 221         }
 222 
 223         if (limit && pos + len > limit)
 224                 len = limit - pos;
 225 
 226         while (len > 0) {
 227                 buflen = min(len, chunk);
 228 
 229                 buf = NULL;     /* always use a temporary buffer */
 230                 if ((rc = nsc_alloc_buf(fd, pos, buflen,
 231                     (rw == UIO_READ) ? NSC_RDBUF : NSC_WRBUF, &buf)) > 0)
 232                         return (rc);
 233 
 234                 vec = buf->sb_vec;
 235 
 236                 for (rc = 0;
 237                     !rc && uiop->uio_resid && vec->sv_addr;
 238                     vec++, off = 0) {
 239                         n = min(vec->sv_len - off, uiop->uio_resid);
 240                         rc = uiomove((char *)vec->sv_addr + off,
 241                             n, rw, uiop);
 242                 }
 243 
 244                 if (rw == UIO_WRITE) {
 245                         if (rc) {
 246                                 (void) nsc_uncommit(buf, pos, buflen, 0);
 247                         } else if ((rc = nsc_write(buf, pos, buflen, 0)) < 0) {
 248                                 rc = 0;
 249                         }
 250                 }
 251 
 252                 (void) nsc_free_buf(buf);
 253 
 254                 len -= buflen;
 255                 pos += buflen;
 256         }
 257 
 258         return (rc);
 259 }
 260 
 261 
 262 /* ARGSUSED */
 263 
 264 static nsc_dbuf_t *
 265 _nsc_alloc_dhandle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)())
 266 {
 267         nsc_dbuf_t *h;
 268 
 269         if ((h = nsc_kmem_zalloc(sizeof (nsc_dbuf_t),
 270                         KM_SLEEP, _nsc_local_mem)) == NULL)
 271                 return (NULL);
 272 
 273         h->db_disc = d_cb;
 274         h->db_flag = NSC_HALLOCATED;
 275 
 276         return (h);
 277 }
 278 
 279 
 280 static int
 281 _nsc_free_dhandle(nsc_dbuf_t *h)
 282 {
 283         nsc_kmem_free(h, sizeof (*h));
 284         return (0);
 285 }
 286 
 287 
 288 static int
 289 _nsc_alloc_dbuf(blind_t cd, nsc_off_t pos, nsc_size_t len,
 290     int flag, nsc_dbuf_t **hp)
 291 {
 292         nsc_dbuf_t *h = *hp;
 293         int rc;
 294 
 295         if (cd == NSC_ANON_CD) {
 296                 flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD);
 297         } else {
 298                 if (h->db_maxfbas == 0) {
 299                         rc = nsc_maxfbas(h->db_fd, 0, &h->db_maxfbas);
 300                         if (rc != 0)
 301                                 return (rc);
 302                         else if (h->db_maxfbas == 0)
 303                                 return (EIO);
 304                 }
 305 
 306                 if (len > h->db_maxfbas)
 307                         return (ENOSPC);
 308         }
 309 
 310         if (flag & NSC_NODATA) {
 311                 ASSERT(!(flag & NSC_RDBUF));
 312                 h->db_addr = NULL;
 313         } else {
 314                 if (h->db_disc)
 315                         (*h->db_disc)(h);
 316 
 317                 if (!(h->db_addr = nsc_kmem_alloc(FBA_SIZE(len), KM_SLEEP, 0)))
 318                         return (ENOMEM);
 319         }
 320 
 321         h->db_pos = pos;
 322         h->db_len = len;
 323         h->db_error = 0;
 324         h->db_flag |= flag;
 325 
 326         if (flag & NSC_NODATA) {
 327                 h->db_vec = NULL;
 328         } else {
 329                 h->db_vec = &h->db_bvec[0];
 330                 h->db_bvec[0].sv_len = FBA_SIZE(len);
 331                 h->db_bvec[0].sv_addr = (void *)h->db_addr;
 332                 h->db_bvec[0].sv_vme = 0;
 333 
 334                 h->db_bvec[1].sv_len = 0;
 335                 h->db_bvec[1].sv_addr = 0;
 336                 h->db_bvec[1].sv_vme = 0;
 337         }
 338 
 339         if ((flag & NSC_RDAHEAD) || (cd == NSC_ANON_CD))
 340                 return (NSC_DONE);
 341 
 342         _nsc_wait_dbuf(h);
 343 
 344         if (flag & NSC_RDBUF) {
 345                 if ((rc = _nsc_dbuf_io(nsc_uread, h, pos, len, flag)) != 0) {
 346                         (void) _nsc_free_dbuf(h);
 347                         return (rc);
 348                 }
 349         }
 350 
 351         return (NSC_DONE);
 352 }
 353 
 354 
 355 static void
 356 _nsc_wait_dbuf(nsc_dbuf_t *h)
 357 {
 358         nsc_iodev_t *iodev = h->db_fd->sf_iodev;
 359         void (*fn)() = h->db_disc;
 360         nsc_dbuf_t *hp;
 361 
 362         mutex_enter(&iodev->si_lock);
 363 
 364         h->db_next = iodev->si_active;
 365         iodev->si_active = h;
 366 
 367         /* CONSTCOND */
 368 
 369         while (1) {
 370                 for (hp = h->db_next; hp; hp = hp->db_next)
 371                         if (h->db_pos + h->db_len > hp->db_pos &&
 372                             h->db_pos < hp->db_pos + hp->db_len) break;
 373 
 374                 if (!hp)
 375                         break;
 376 
 377                 if (fn)
 378                         (*fn)(h), fn = NULL;
 379 
 380                 cv_wait(&iodev->si_cv, &iodev->si_lock);
 381         }
 382 
 383         mutex_exit(&iodev->si_lock);
 384 }
 385 
 386 
 387 static int
 388 _nsc_free_dbuf(nsc_dbuf_t *h)
 389 {
 390         nsc_dbuf_t **hpp, *hp;
 391         nsc_iodev_t *iodev;
 392         int wake = 0;
 393 
 394         if (h->db_fd && !(h->db_flag & NSC_ABUF)) {
 395                 iodev = h->db_fd->sf_iodev;
 396 
 397                 mutex_enter(&iodev->si_lock);
 398 
 399                 hpp = (nsc_dbuf_t **)&iodev->si_active;
 400 
 401                 for (; *hpp; hpp = &hp->db_next) {
 402                         if ((hp = *hpp) == h) {
 403                                 *hpp = h->db_next;
 404                                 break;
 405                         }
 406 
 407                         if (h->db_pos + h->db_len > hp->db_pos &&
 408                             h->db_pos < hp->db_pos + hp->db_len) wake = 1;
 409 
 410                 }
 411                 if (wake)
 412                         cv_broadcast(&iodev->si_cv);
 413 
 414                 mutex_exit(&iodev->si_lock);
 415         }
 416 
 417         if (!(h->db_flag & NSC_NODATA) && h->db_addr)
 418                 nsc_kmem_free(h->db_addr, FBA_SIZE(h->db_len));
 419 
 420         h->db_addr = NULL;
 421         h->db_flag &= NSC_HALLOCATED; /* clear flags, preserve NSC_HALLOCATED */
 422 
 423         if ((h->db_flag & NSC_HALLOCATED) == 0)
 424                 (void) _nsc_free_dhandle(h);
 425 
 426 
 427         return (0);
 428 }
 429 
 430 
 431 static int
 432 _nsc_read_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
 433 {
 434         return (_nsc_dbuf_io(nsc_uread, h, pos, len, flag));
 435 }
 436 
 437 
 438 static int
 439 _nsc_write_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
 440 {
 441         return (_nsc_dbuf_io(nsc_uwrite, h, pos, len, flag));
 442 }
 443 
 444 
 445 static int
 446 _nsc_zero_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
 447 {
 448         return (_nsc_dbuf_io(NULL, h, pos, len, flag));
 449 }
 450 
 451 
 452 static int
 453 _nsc_dbuf_io(int (*fn)(), nsc_dbuf_t *h, nsc_off_t pos,
 454     nsc_size_t len, int flag)
 455 {
 456         nsc_vec_t *vp = NULL;
 457         cred_t *crp = NULL;
 458         iovec_t *iovp;
 459         nsc_size_t thisio;              /* bytes in this io */
 460         nsc_size_t todo;                /* anticipated bytes to go */
 461         nsc_size_t truedo;              /* actual bytes to go */
 462         nsc_off_t xpos;                 /* offset of this io */
 463         int destidx;
 464         nsc_size_t firstentryfix;       /* value used for first entry */
 465 
 466         int (*iofn)();
 467         int rc = 0;
 468 
 469         if (!h->db_vec || (h->db_flag & NSC_ABUF))
 470                 return (EIO);
 471 
 472         if (pos < h->db_pos || pos + len > h->db_pos + h->db_len)
 473                 return (EINVAL);
 474 
 475         if (!len)
 476                 return (0);
 477         if (fn == nsc_uread && (flag & NSC_RDAHEAD))
 478                 return (0);
 479 
 480         if (h->db_disc)
 481                 (*h->db_disc)(h);
 482 
 483         crp = ddi_get_cred();
 484         bzero(&h->db_uio, sizeof (uio_t));
 485         bzero(&h->db_iov[0], (_NSC_DBUF_NVEC * sizeof (iovec_t)));
 486 
 487         todo = FBA_SIZE(len);
 488 
 489         /*
 490          * determine where in the vector array we should start.
 491          */
 492         vp = h->db_vec;
 493         xpos = pos - h->db_pos;
 494         for (; xpos >= FBA_NUM(vp->sv_len); vp++)
 495                 xpos -= FBA_NUM(vp->sv_len);
 496 
 497         firstentryfix = FBA_SIZE(xpos);
 498 
 499         xpos = pos;
 500 
 501         /*
 502          * Loop performing i/o to the underlying driver.
 503          */
 504         while (todo) {
 505                 destidx = 0;
 506                 thisio = 0;
 507                 iofn = fn;
 508 
 509                 /*
 510                  * Copy up to _NSC_DBUF_NVEC vector entries from the
 511                  * nsc_vec_t into the iovec_t so that the number of
 512                  * i/o operations is minimised.
 513                  */
 514                 while (destidx < _NSC_DBUF_NVEC && todo) {
 515                         iovp = &h->db_iov[destidx];
 516 
 517                         ASSERT(FBA_LEN(vp->sv_len) == FBA_NUM(vp->sv_len));
 518                         ASSERT((vp->sv_len - firstentryfix) && vp->sv_addr);
 519 
 520                         truedo = min(vp->sv_len - firstentryfix, todo);
 521                         iovp->iov_base = (caddr_t)vp->sv_addr + firstentryfix;
 522                         firstentryfix = 0;
 523                         iovp->iov_len = (size_t)truedo;
 524                         if (!iofn) {
 525                                 bzero(iovp->iov_base, iovp->iov_len);
 526                         }
 527                         thisio += truedo;
 528                         todo -= truedo;
 529                         destidx++;
 530                         vp++;
 531                 }
 532 
 533                 h->db_uio.uio_iovcnt = destidx;
 534                 h->db_uio.uio_iov = &h->db_iov[0];
 535                 h->db_uio.uio_segflg = UIO_SYSSPACE;
 536                 h->db_uio.uio_resid = (size_t)thisio;
 537 
 538                 SET_FPOS(&h->db_uio, xpos);
 539 
 540                 if (!iofn) {
 541                         iofn = nsc_uwrite;
 542                 }
 543 
 544                 rc = (*iofn)(h->db_fd, &h->db_uio, crp);
 545                 if (rc != 0) {
 546                         break;
 547                 }
 548 
 549                 ASSERT(FBA_LEN(thisio) == FBA_NUM(thisio));
 550                 xpos += FBA_LEN(thisio);
 551         }
 552 
 553         return (rc);
 554 }