1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ksynch.h> 28 #include <sys/errno.h> 29 #include <sys/file.h> 30 #include <sys/open.h> 31 #include <sys/cred.h> 32 #include <sys/kmem.h> 33 #include <sys/uio.h> 34 #include <sys/ddi.h> 35 #include <sys/sdt.h> 36 37 #define __NSC_GEN__ 38 #include "nsc_dev.h" 39 #include "nsc_disk.h" 40 #include "../nsctl.h" 41 42 43 #define _I(x) (((long)(&((nsc_io_t *)0)->x))/sizeof (long)) 44 45 nsc_def_t _nsc_disk_def[] = { 46 "UserRead", (uintptr_t)nsc_ioerr, _I(uread), 47 "UserWrite", (uintptr_t)nsc_ioerr, _I(uwrite), 48 "PartSize", (uintptr_t)nsc_null, _I(partsize), 49 "MaxFbas", (uintptr_t)nsc_null, _I(maxfbas), 50 "Control", (uintptr_t)nsc_ioerr, _I(control), 51 0, 0, 0 52 }; 53 54 55 extern nsc_mem_t *_nsc_local_mem; 56 57 static int _nsc_uread(dev_t, uio_t *, cred_t *, nsc_fd_t *); 58 static int _nsc_uwrite(dev_t, uio_t *, cred_t *, nsc_fd_t *); 59 static int _nsc_rw_uio(nsc_fd_t *, uio_t *, uio_rw_t); 60 61 static int _nsc_free_dhandle(nsc_dbuf_t *); 62 static int _nsc_alloc_dbuf(blind_t, nsc_off_t, nsc_size_t, int, nsc_dbuf_t **); 63 static int _nsc_free_dbuf(nsc_dbuf_t *); 64 static void _nsc_wait_dbuf(nsc_dbuf_t *); 65 static int _nsc_read_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); 66 static int _nsc_write_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); 67 static int _nsc_zero_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); 68 static int _nsc_dbuf_io(int (*)(), nsc_dbuf_t *, nsc_off_t, nsc_size_t, int); 69 70 static nsc_dbuf_t *_nsc_alloc_dhandle(void (*)(), void (*)(), void (*)()); 71 72 73 /* 74 * void 75 * _nsc_add_disk (nsc_io_t *io) 76 * Add disk interface functions. 77 * 78 * Calling/Exit State: 79 * Updates the I/O module with the appropriate 80 * interface routines. 81 * 82 * Description: 83 * Add functions to the I/O module to provide a disk 84 * or cache interface as appropriate. 85 */ 86 void 87 _nsc_add_disk(nsc_io_t *io) 88 { 89 if ((io->alloc_buf != nsc_ioerr && io->free_buf != nsc_fatal) || 90 (io->flag & NSC_FILTER)) { 91 if (io->uread == nsc_ioerr) 92 io->uread = _nsc_uread; 93 94 if (io->uwrite == nsc_ioerr && 95 (io->write != nsc_fatal || (io->flag & NSC_FILTER))) 96 io->uwrite = _nsc_uwrite; 97 98 return; 99 } 100 101 if (io->alloc_h != (nsc_buf_t *(*)())nsc_null || 102 io->free_h != nsc_fatal || io->alloc_buf != nsc_ioerr || 103 io->free_buf != nsc_fatal || io->read != nsc_fatal || 104 io->write != nsc_fatal || io->zero != nsc_fatal) 105 return; 106 107 if (io->uread == nsc_ioerr && io->uwrite == nsc_ioerr) 108 return; 109 110 /* 111 * Layer the generic nsc_buf_t provider onto a uio_t provider. 112 */ 113 114 io->alloc_h = (nsc_buf_t *(*)())_nsc_alloc_dhandle; 115 io->free_h = _nsc_free_dhandle; 116 io->alloc_buf = _nsc_alloc_dbuf; 117 io->free_buf = _nsc_free_dbuf; 118 119 io->read = _nsc_read_dbuf; 120 io->write = _nsc_write_dbuf; 121 io->zero = _nsc_zero_dbuf; 122 123 io->provide |= NSC_ANON; 124 } 125 126 127 int 128 nsc_uread(nsc_fd_t *fd, void *uiop, void *crp) 129 { 130 return (*fd->sf_aio->uread)(fd->sf_cd, uiop, crp, fd); 131 } 132 133 134 int 135 nsc_uwrite(nsc_fd_t *fd, void *uiop, void *crp) 136 { 137 if ((fd->sf_avail & NSC_WRITE) == 0) 138 return (EIO); 139 140 return (*fd->sf_aio->uwrite)(fd->sf_cd, uiop, crp, fd); 141 } 142 143 144 int 145 nsc_partsize(nsc_fd_t *fd, nsc_size_t *valp) 146 { 147 *valp = 0; 148 return (*fd->sf_aio->partsize)(fd->sf_cd, valp); 149 } 150 151 152 int 153 nsc_maxfbas(nsc_fd_t *fd, int flag, nsc_size_t *valp) 154 { 155 *valp = 0; 156 return (*fd->sf_aio->maxfbas)(fd->sf_cd, flag, valp); 157 } 158 159 int 160 nsc_control(nsc_fd_t *fd, int command, void *argp, int argl) 161 { 162 return (*fd->sf_aio->control)(fd->sf_cd, command, argp, argl); 163 } 164 165 166 /* ARGSUSED */ 167 168 static int 169 _nsc_uread(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd) 170 { 171 return (_nsc_rw_uio(fd, uiop, UIO_READ)); 172 } 173 174 175 /* ARGSUSED */ 176 177 static int 178 _nsc_uwrite(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd) 179 { 180 return (_nsc_rw_uio(fd, uiop, UIO_WRITE)); 181 } 182 183 184 static int 185 _nsc_rw_uio(nsc_fd_t *fd, uio_t *uiop, uio_rw_t rw) 186 { 187 nsc_size_t buflen, len, limit, chunk; 188 nsc_off_t pos, off; 189 nsc_buf_t *buf; 190 nsc_vec_t *vec; 191 size_t n; 192 int rc; 193 194 pos = FPOS_TO_FBA(uiop); 195 off = FPOS_TO_OFF(uiop); 196 len = FBA_LEN(uiop->uio_resid + off); 197 198 DTRACE_PROBE3(_nsc_rw_uio_io, 199 uint64_t, pos, 200 uint64_t, off, 201 uint64_t, len); 202 203 /* prevent non-FBA bounded I/O - this is a disk driver! */ 204 if (off != 0 || FBA_OFF(uiop->uio_resid) != 0) 205 return (EINVAL); 206 207 if ((rc = nsc_partsize(fd, &limit)) != 0) 208 return (rc); 209 210 if ((rc = nsc_maxfbas(fd, 0, &chunk)) != 0) 211 return (rc); 212 213 DTRACE_PROBE2(_nsc_rw_uio_limit, 214 uint64_t, limit, 215 uint64_t, chunk); 216 217 if (limit && pos >= limit) { 218 if (pos > limit || rw == UIO_WRITE) 219 return (ENXIO); 220 return (0); 221 } 222 223 if (limit && pos + len > limit) 224 len = limit - pos; 225 226 while (len > 0) { 227 buflen = min(len, chunk); 228 229 buf = NULL; /* always use a temporary buffer */ 230 if ((rc = nsc_alloc_buf(fd, pos, buflen, 231 (rw == UIO_READ) ? NSC_RDBUF : NSC_WRBUF, &buf)) > 0) 232 return (rc); 233 234 vec = buf->sb_vec; 235 236 for (rc = 0; 237 !rc && uiop->uio_resid && vec->sv_addr; 238 vec++, off = 0) { 239 n = min(vec->sv_len - off, uiop->uio_resid); 240 rc = uiomove((char *)vec->sv_addr + off, 241 n, rw, uiop); 242 } 243 244 if (rw == UIO_WRITE) { 245 if (rc) { 246 (void) nsc_uncommit(buf, pos, buflen, 0); 247 } else if ((rc = nsc_write(buf, pos, buflen, 0)) < 0) { 248 rc = 0; 249 } 250 } 251 252 (void) nsc_free_buf(buf); 253 254 len -= buflen; 255 pos += buflen; 256 } 257 258 return (rc); 259 } 260 261 262 /* ARGSUSED */ 263 264 static nsc_dbuf_t * 265 _nsc_alloc_dhandle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)()) 266 { 267 nsc_dbuf_t *h; 268 269 if ((h = nsc_kmem_zalloc(sizeof (nsc_dbuf_t), 270 KM_SLEEP, _nsc_local_mem)) == NULL) 271 return (NULL); 272 273 h->db_disc = d_cb; 274 h->db_flag = NSC_HALLOCATED; 275 276 return (h); 277 } 278 279 280 static int 281 _nsc_free_dhandle(nsc_dbuf_t *h) 282 { 283 nsc_kmem_free(h, sizeof (*h)); 284 return (0); 285 } 286 287 288 static int 289 _nsc_alloc_dbuf(blind_t cd, nsc_off_t pos, nsc_size_t len, 290 int flag, nsc_dbuf_t **hp) 291 { 292 nsc_dbuf_t *h = *hp; 293 int rc; 294 295 if (cd == NSC_ANON_CD) { 296 flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD); 297 } else { 298 if (h->db_maxfbas == 0) { 299 rc = nsc_maxfbas(h->db_fd, 0, &h->db_maxfbas); 300 if (rc != 0) 301 return (rc); 302 else if (h->db_maxfbas == 0) 303 return (EIO); 304 } 305 306 if (len > h->db_maxfbas) 307 return (ENOSPC); 308 } 309 310 if (flag & NSC_NODATA) { 311 ASSERT(!(flag & NSC_RDBUF)); 312 h->db_addr = NULL; 313 } else { 314 if (h->db_disc) 315 (*h->db_disc)(h); 316 317 if (!(h->db_addr = nsc_kmem_alloc(FBA_SIZE(len), KM_SLEEP, 0))) 318 return (ENOMEM); 319 } 320 321 h->db_pos = pos; 322 h->db_len = len; 323 h->db_error = 0; 324 h->db_flag |= flag; 325 326 if (flag & NSC_NODATA) { 327 h->db_vec = NULL; 328 } else { 329 h->db_vec = &h->db_bvec[0]; 330 h->db_bvec[0].sv_len = FBA_SIZE(len); 331 h->db_bvec[0].sv_addr = (void *)h->db_addr; 332 h->db_bvec[0].sv_vme = 0; 333 334 h->db_bvec[1].sv_len = 0; 335 h->db_bvec[1].sv_addr = 0; 336 h->db_bvec[1].sv_vme = 0; 337 } 338 339 if ((flag & NSC_RDAHEAD) || (cd == NSC_ANON_CD)) 340 return (NSC_DONE); 341 342 _nsc_wait_dbuf(h); 343 344 if (flag & NSC_RDBUF) { 345 if ((rc = _nsc_dbuf_io(nsc_uread, h, pos, len, flag)) != 0) { 346 (void) _nsc_free_dbuf(h); 347 return (rc); 348 } 349 } 350 351 return (NSC_DONE); 352 } 353 354 355 static void 356 _nsc_wait_dbuf(nsc_dbuf_t *h) 357 { 358 nsc_iodev_t *iodev = h->db_fd->sf_iodev; 359 void (*fn)() = h->db_disc; 360 nsc_dbuf_t *hp; 361 362 mutex_enter(&iodev->si_lock); 363 364 h->db_next = iodev->si_active; 365 iodev->si_active = h; 366 367 /* CONSTCOND */ 368 369 while (1) { 370 for (hp = h->db_next; hp; hp = hp->db_next) 371 if (h->db_pos + h->db_len > hp->db_pos && 372 h->db_pos < hp->db_pos + hp->db_len) break; 373 374 if (!hp) 375 break; 376 377 if (fn) 378 (*fn)(h), fn = NULL; 379 380 cv_wait(&iodev->si_cv, &iodev->si_lock); 381 } 382 383 mutex_exit(&iodev->si_lock); 384 } 385 386 387 static int 388 _nsc_free_dbuf(nsc_dbuf_t *h) 389 { 390 nsc_dbuf_t **hpp, *hp; 391 nsc_iodev_t *iodev; 392 int wake = 0; 393 394 if (h->db_fd && !(h->db_flag & NSC_ABUF)) { 395 iodev = h->db_fd->sf_iodev; 396 397 mutex_enter(&iodev->si_lock); 398 399 hpp = (nsc_dbuf_t **)&iodev->si_active; 400 401 for (; *hpp; hpp = &hp->db_next) { 402 if ((hp = *hpp) == h) { 403 *hpp = h->db_next; 404 break; 405 } 406 407 if (h->db_pos + h->db_len > hp->db_pos && 408 h->db_pos < hp->db_pos + hp->db_len) wake = 1; 409 410 } 411 if (wake) 412 cv_broadcast(&iodev->si_cv); 413 414 mutex_exit(&iodev->si_lock); 415 } 416 417 if (!(h->db_flag & NSC_NODATA) && h->db_addr) 418 nsc_kmem_free(h->db_addr, FBA_SIZE(h->db_len)); 419 420 h->db_addr = NULL; 421 h->db_flag &= NSC_HALLOCATED; /* clear flags, preserve NSC_HALLOCATED */ 422 423 if ((h->db_flag & NSC_HALLOCATED) == 0) 424 (void) _nsc_free_dhandle(h); 425 426 427 return (0); 428 } 429 430 431 static int 432 _nsc_read_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) 433 { 434 return (_nsc_dbuf_io(nsc_uread, h, pos, len, flag)); 435 } 436 437 438 static int 439 _nsc_write_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) 440 { 441 return (_nsc_dbuf_io(nsc_uwrite, h, pos, len, flag)); 442 } 443 444 445 static int 446 _nsc_zero_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag) 447 { 448 return (_nsc_dbuf_io(NULL, h, pos, len, flag)); 449 } 450 451 452 static int 453 _nsc_dbuf_io(int (*fn)(), nsc_dbuf_t *h, nsc_off_t pos, 454 nsc_size_t len, int flag) 455 { 456 nsc_vec_t *vp = NULL; 457 cred_t *crp = NULL; 458 iovec_t *iovp; 459 nsc_size_t thisio; /* bytes in this io */ 460 nsc_size_t todo; /* anticipated bytes to go */ 461 nsc_size_t truedo; /* actual bytes to go */ 462 nsc_off_t xpos; /* offset of this io */ 463 int destidx; 464 nsc_size_t firstentryfix; /* value used for first entry */ 465 466 int (*iofn)(); 467 int rc = 0; 468 469 if (!h->db_vec || (h->db_flag & NSC_ABUF)) 470 return (EIO); 471 472 if (pos < h->db_pos || pos + len > h->db_pos + h->db_len) 473 return (EINVAL); 474 475 if (!len) 476 return (0); 477 if (fn == nsc_uread && (flag & NSC_RDAHEAD)) 478 return (0); 479 480 if (h->db_disc) 481 (*h->db_disc)(h); 482 483 crp = ddi_get_cred(); 484 bzero(&h->db_uio, sizeof (uio_t)); 485 bzero(&h->db_iov[0], (_NSC_DBUF_NVEC * sizeof (iovec_t))); 486 487 todo = FBA_SIZE(len); 488 489 /* 490 * determine where in the vector array we should start. 491 */ 492 vp = h->db_vec; 493 xpos = pos - h->db_pos; 494 for (; xpos >= FBA_NUM(vp->sv_len); vp++) 495 xpos -= FBA_NUM(vp->sv_len); 496 497 firstentryfix = FBA_SIZE(xpos); 498 499 xpos = pos; 500 501 /* 502 * Loop performing i/o to the underlying driver. 503 */ 504 while (todo) { 505 destidx = 0; 506 thisio = 0; 507 iofn = fn; 508 509 /* 510 * Copy up to _NSC_DBUF_NVEC vector entries from the 511 * nsc_vec_t into the iovec_t so that the number of 512 * i/o operations is minimised. 513 */ 514 while (destidx < _NSC_DBUF_NVEC && todo) { 515 iovp = &h->db_iov[destidx]; 516 517 ASSERT(FBA_LEN(vp->sv_len) == FBA_NUM(vp->sv_len)); 518 ASSERT((vp->sv_len - firstentryfix) && vp->sv_addr); 519 520 truedo = min(vp->sv_len - firstentryfix, todo); 521 iovp->iov_base = (caddr_t)vp->sv_addr + firstentryfix; 522 firstentryfix = 0; 523 iovp->iov_len = (size_t)truedo; 524 if (!iofn) { 525 bzero(iovp->iov_base, iovp->iov_len); 526 } 527 thisio += truedo; 528 todo -= truedo; 529 destidx++; 530 vp++; 531 } 532 533 h->db_uio.uio_iovcnt = destidx; 534 h->db_uio.uio_iov = &h->db_iov[0]; 535 h->db_uio.uio_segflg = UIO_SYSSPACE; 536 h->db_uio.uio_resid = (size_t)thisio; 537 538 SET_FPOS(&h->db_uio, xpos); 539 540 if (!iofn) { 541 iofn = nsc_uwrite; 542 } 543 544 rc = (*iofn)(h->db_fd, &h->db_uio, crp); 545 if (rc != 0) { 546 break; 547 } 548 549 ASSERT(FBA_LEN(thisio) == FBA_NUM(thisio)); 550 xpos += FBA_LEN(thisio); 551 } 552 553 return (rc); 554 }