1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28 #include <sys/errno.h>
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/kmem.h>
32 #include <sys/ddi.h>
33 #include <sys/stat.h>
34 #include <sys/sunddi.h>
35 #include <sys/file.h>
36 #include <sys/open.h>
37 #include <sys/modctl.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/sysmacros.h>
40 #include <sys/ddidevmap.h>
41 #include <sys/policy.h>
42
43 #include <sys/vmsystm.h>
44 #include <vm/hat_i86.h>
45 #include <vm/hat_pte.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_mf.h>
48
49 #include <xen/io/blkif_impl.h>
50 #include <xen/io/blk_common.h>
51 #include <xen/io/xpvtap.h>
52
53
54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57 cred_t *cred, int *rval);
58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59 size_t len, size_t *maplen, uint_t model);
60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62 cred_t *cred_p);
63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64 struct pollhead **phpp);
65
66 static struct cb_ops xpvtap_cb_ops = {
67 xpvtap_open, /* cb_open */
68 xpvtap_close, /* cb_close */
69 nodev, /* cb_strategy */
70 nodev, /* cb_print */
71 nodev, /* cb_dump */
72 nodev, /* cb_read */
73 nodev, /* cb_write */
74 xpvtap_ioctl, /* cb_ioctl */
75 xpvtap_devmap, /* cb_devmap */
76 nodev, /* cb_mmap */
77 xpvtap_segmap, /* cb_segmap */
78 xpvtap_chpoll, /* cb_chpoll */
79 ddi_prop_op, /* cb_prop_op */
80 NULL, /* cb_stream */
81 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
82 CB_REV
83 };
84
85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86 void **result);
87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89
90 static struct dev_ops xpvtap_dev_ops = {
91 DEVO_REV, /* devo_rev */
92 0, /* devo_refcnt */
93 xpvtap_getinfo, /* devo_getinfo */
94 nulldev, /* devo_identify */
95 nulldev, /* devo_probe */
96 xpvtap_attach, /* devo_attach */
97 xpvtap_detach, /* devo_detach */
98 nodev, /* devo_reset */
99 &xpvtap_cb_ops, /* devo_cb_ops */
100 NULL, /* devo_bus_ops */
101 NULL /* power */
102 };
103
104
105 static struct modldrv xpvtap_modldrv = {
106 &mod_driverops, /* Type of module. This one is a driver */
107 "xpvtap driver", /* Name of the module. */
108 &xpvtap_dev_ops, /* driver ops */
109 };
110
111 static struct modlinkage xpvtap_modlinkage = {
112 MODREV_1,
113 { (void *) &xpvtap_modldrv, NULL }
114 };
115
116
117 void *xpvtap_statep;
118
119
120 static xpvtap_state_t *xpvtap_drv_init(int instance);
121 static void xpvtap_drv_fini(xpvtap_state_t *state);
122 static uint_t xpvtap_intr(caddr_t arg);
123
124 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
125 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
126 xpvtap_rs_hdl_t *handle);
127 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
128 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
129 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
130 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
131 xpvtap_rs_cleanup_t callback, void *arg);
132
133 static int xpvtap_segmf_register(xpvtap_state_t *state);
134 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
135
136 static int xpvtap_user_init(xpvtap_state_t *state);
137 static void xpvtap_user_fini(xpvtap_state_t *state);
138 static int xpvtap_user_ring_init(xpvtap_state_t *state);
139 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
140 static int xpvtap_user_thread_init(xpvtap_state_t *state);
141 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
142 static void xpvtap_user_thread_start(caddr_t arg);
143 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
144 static void xpvtap_user_thread(void *arg);
145
146 static void xpvtap_user_app_stop(caddr_t arg);
147
148 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
149 uint_t *uid);
150 static int xpvtap_user_request_push(xpvtap_state_t *state,
151 blkif_request_t *req, uint_t uid);
152 static int xpvtap_user_response_get(xpvtap_state_t *state,
153 blkif_response_t *resp, uint_t *uid);
154 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
155
156
157 /*
158 * _init()
159 */
160 int
161 _init(void)
162 {
163 int e;
164
165 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
166 if (e != 0) {
167 return (e);
168 }
169
170 e = mod_install(&xpvtap_modlinkage);
171 if (e != 0) {
172 ddi_soft_state_fini(&xpvtap_statep);
173 return (e);
174 }
175
176 return (0);
177 }
178
179
180 /*
181 * _info()
182 */
183 int
184 _info(struct modinfo *modinfop)
185 {
186 return (mod_info(&xpvtap_modlinkage, modinfop));
187 }
188
189
190 /*
191 * _fini()
192 */
193 int
194 _fini(void)
195 {
196 int e;
197
198 e = mod_remove(&xpvtap_modlinkage);
199 if (e != 0) {
200 return (e);
201 }
202
203 ddi_soft_state_fini(&xpvtap_statep);
204
205 return (0);
206 }
207
208
209 /*
210 * xpvtap_attach()
211 */
212 static int
213 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
214 {
215 blk_ringinit_args_t args;
216 xpvtap_state_t *state;
217 int instance;
218 int e;
219
220
221 switch (cmd) {
222 case DDI_ATTACH:
223 break;
224
225 case DDI_RESUME:
226 return (DDI_SUCCESS);
227
228 default:
229 return (DDI_FAILURE);
230 }
231
232 /* initialize our state info */
233 instance = ddi_get_instance(dip);
234 state = xpvtap_drv_init(instance);
235 if (state == NULL) {
236 return (DDI_FAILURE);
237 }
238 state->bt_dip = dip;
239
240 /* Initialize the guest ring */
241 args.ar_dip = state->bt_dip;
242 args.ar_intr = xpvtap_intr;
243 args.ar_intr_arg = (caddr_t)state;
244 args.ar_ringup = xpvtap_user_thread_start;
245 args.ar_ringup_arg = (caddr_t)state;
246 args.ar_ringdown = xpvtap_user_app_stop;
247 args.ar_ringdown_arg = (caddr_t)state;
248 e = blk_ring_init(&args, &state->bt_guest_ring);
249 if (e != DDI_SUCCESS) {
250 goto attachfail_ringinit;
251 }
252
253 /* create the minor node (for ioctl/mmap) */
254 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
255 DDI_PSEUDO, 0);
256 if (e != DDI_SUCCESS) {
257 goto attachfail_minor_node;
258 }
259
260 /* Report that driver was loaded */
261 ddi_report_dev(dip);
262
263 return (DDI_SUCCESS);
264
265 attachfail_minor_node:
266 blk_ring_fini(&state->bt_guest_ring);
267 attachfail_ringinit:
268 xpvtap_drv_fini(state);
269 return (DDI_FAILURE);
270 }
271
272
273 /*
274 * xpvtap_detach()
275 */
276 static int
277 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
278 {
279 xpvtap_state_t *state;
280 int instance;
281
282
283 instance = ddi_get_instance(dip);
284 state = ddi_get_soft_state(xpvtap_statep, instance);
285 if (state == NULL) {
286 return (DDI_FAILURE);
287 }
288
289 switch (cmd) {
290 case DDI_DETACH:
291 break;
292
293 case DDI_SUSPEND:
294 default:
295 return (DDI_FAILURE);
296 }
297
298 xpvtap_user_thread_stop(state);
299 blk_ring_fini(&state->bt_guest_ring);
300 xpvtap_drv_fini(state);
301 ddi_remove_minor_node(dip, NULL);
302
303 return (DDI_SUCCESS);
304 }
305
306
307 /*
308 * xpvtap_getinfo()
309 */
310 /*ARGSUSED*/
311 static int
312 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
313 {
314 xpvtap_state_t *state;
315 int instance;
316 dev_t dev;
317 int e;
318
319
320 dev = (dev_t)arg;
321 instance = getminor(dev);
322
323 switch (cmd) {
324 case DDI_INFO_DEVT2DEVINFO:
325 state = ddi_get_soft_state(xpvtap_statep, instance);
326 if (state == NULL) {
327 return (DDI_FAILURE);
328 }
329 *result = (void *)state->bt_dip;
330 e = DDI_SUCCESS;
331 break;
332
333 case DDI_INFO_DEVT2INSTANCE:
334 *result = (void *)(uintptr_t)instance;
335 e = DDI_SUCCESS;
336 break;
337
338 default:
339 e = DDI_FAILURE;
340 break;
341 }
342
343 return (e);
344 }
345
346
347 /*
348 * xpvtap_open()
349 */
350 /*ARGSUSED*/
351 static int
352 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
353 {
354 xpvtap_state_t *state;
355 int instance;
356
357
358 if (secpolicy_xvm_control(cred)) {
359 return (EPERM);
360 }
361
362 instance = getminor(*devp);
363 state = ddi_get_soft_state(xpvtap_statep, instance);
364 if (state == NULL) {
365 return (ENXIO);
366 }
367
368 /* we should only be opened once */
369 mutex_enter(&state->bt_open.bo_mutex);
370 if (state->bt_open.bo_opened) {
371 mutex_exit(&state->bt_open.bo_mutex);
372 return (EBUSY);
373 }
374 state->bt_open.bo_opened = B_TRUE;
375 mutex_exit(&state->bt_open.bo_mutex);
376
377 /*
378 * save the apps address space. need it for mapping/unmapping grefs
379 * since will be doing it in a separate kernel thread.
380 */
381 state->bt_map.um_as = curproc->p_as;
382
383 return (0);
384 }
385
386
387 /*
388 * xpvtap_close()
389 */
390 /*ARGSUSED*/
391 static int
392 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
393 {
394 xpvtap_state_t *state;
395 int instance;
396
397
398 instance = getminor(devp);
399 state = ddi_get_soft_state(xpvtap_statep, instance);
400 if (state == NULL) {
401 return (ENXIO);
402 }
403
404 /*
405 * wake thread so it can cleanup and wait for it to exit so we can
406 * be sure it's not in the middle of processing a request/response.
407 */
408 mutex_enter(&state->bt_thread.ut_mutex);
409 state->bt_thread.ut_wake = B_TRUE;
410 state->bt_thread.ut_exit = B_TRUE;
411 cv_signal(&state->bt_thread.ut_wake_cv);
412 if (!state->bt_thread.ut_exit_done) {
413 cv_wait(&state->bt_thread.ut_exit_done_cv,
414 &state->bt_thread.ut_mutex);
415 }
416 ASSERT(state->bt_thread.ut_exit_done);
417 mutex_exit(&state->bt_thread.ut_mutex);
418
419 state->bt_map.um_as = NULL;
420 state->bt_map.um_guest_pages = NULL;
421
422 /*
423 * when the ring is brought down, a userland hotplug script is run
424 * which tries to bring the userland app down. We'll wait for a bit
425 * for the user app to exit. Notify the thread waiting that the app
426 * has closed the driver.
427 */
428 mutex_enter(&state->bt_open.bo_mutex);
429 ASSERT(state->bt_open.bo_opened);
430 state->bt_open.bo_opened = B_FALSE;
431 cv_signal(&state->bt_open.bo_exit_cv);
432 mutex_exit(&state->bt_open.bo_mutex);
433
434 return (0);
435 }
436
437
438 /*
439 * xpvtap_ioctl()
440 */
441 /*ARGSUSED*/
442 static int
443 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
444 int *rval)
445 {
446 xpvtap_state_t *state;
447 int instance;
448
449
450 if (secpolicy_xvm_control(cred)) {
451 return (EPERM);
452 }
453
454 instance = getminor(dev);
455 if (instance == -1) {
456 return (EBADF);
457 }
458
459 state = ddi_get_soft_state(xpvtap_statep, instance);
460 if (state == NULL) {
461 return (EBADF);
462 }
463
464 switch (cmd) {
465 case XPVTAP_IOCTL_RESP_PUSH:
466 /*
467 * wake thread, thread handles guest requests and user app
468 * responses.
469 */
470 mutex_enter(&state->bt_thread.ut_mutex);
471 state->bt_thread.ut_wake = B_TRUE;
472 cv_signal(&state->bt_thread.ut_wake_cv);
473 mutex_exit(&state->bt_thread.ut_mutex);
474 break;
475
476 default:
477 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
478 return (ENXIO);
479 }
480
481 return (0);
482 }
483
484
485 /*
486 * xpvtap_segmap()
487 */
488 /*ARGSUSED*/
489 static int
490 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
491 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
492 cred_t *cred_p)
493 {
494 struct segmf_crargs a;
495 xpvtap_state_t *state;
496 int instance;
497 int e;
498
499
500 if (secpolicy_xvm_control(cred_p)) {
501 return (EPERM);
502 }
503
504 instance = getminor(dev);
505 state = ddi_get_soft_state(xpvtap_statep, instance);
506 if (state == NULL) {
507 return (EBADF);
508 }
509
510 /* the user app should be doing a MAP_SHARED mapping */
511 if ((flags & MAP_TYPE) != MAP_SHARED) {
512 return (EINVAL);
513 }
514
515 /*
516 * if this is the user ring (offset = 0), devmap it (which ends up in
517 * xpvtap_devmap). devmap will alloc and map the ring into the
518 * app's VA space.
519 */
520 if (off == 0) {
521 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
522 prot, maxprot, flags, cred_p);
523 return (e);
524 }
525
526 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
527 if (off != PAGESIZE) {
528 return (EINVAL);
529 }
530
531 /* make sure we get the size we're expecting */
532 if (len != XPVTAP_GREF_BUFSIZE) {
533 return (EINVAL);
534 }
535
536 /*
537 * reserve user app VA space for the gref pages and use segmf to
538 * manage the backing store for the physical memory. segmf will
539 * map in/out the grefs and fault them in/out.
540 */
541 ASSERT(asp == state->bt_map.um_as);
542 as_rangelock(asp);
543 if ((flags & MAP_FIXED) == 0) {
544 map_addr(addrp, len, 0, 0, flags);
545 if (*addrp == NULL) {
546 as_rangeunlock(asp);
547 return (ENOMEM);
548 }
549 } else {
550 /* User specified address */
551 (void) as_unmap(asp, *addrp, len);
552 }
553 a.dev = dev;
554 a.prot = (uchar_t)prot;
555 a.maxprot = (uchar_t)maxprot;
556 e = as_map(asp, *addrp, len, segmf_create, &a);
557 if (e != 0) {
558 as_rangeunlock(asp);
559 return (e);
560 }
561 as_rangeunlock(asp);
562
563 /*
564 * Stash user base address, and compute address where the request
565 * array will end up.
566 */
567 state->bt_map.um_guest_pages = (caddr_t)*addrp;
568 state->bt_map.um_guest_size = (size_t)len;
569
570 /* register an as callback so we can cleanup when the app goes away */
571 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
572 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
573 if (e != 0) {
574 (void) as_unmap(asp, *addrp, len);
575 return (EINVAL);
576 }
577
578 /* wake thread to see if there are requests already queued up */
579 mutex_enter(&state->bt_thread.ut_mutex);
580 state->bt_thread.ut_wake = B_TRUE;
581 cv_signal(&state->bt_thread.ut_wake_cv);
582 mutex_exit(&state->bt_thread.ut_mutex);
583
584 return (0);
585 }
586
587
588 /*
589 * xpvtap_devmap()
590 */
591 /*ARGSUSED*/
592 static int
593 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
594 size_t *maplen, uint_t model)
595 {
596 xpvtap_user_ring_t *usring;
597 xpvtap_state_t *state;
598 int instance;
599 int e;
600
601
602 instance = getminor(dev);
603 state = ddi_get_soft_state(xpvtap_statep, instance);
604 if (state == NULL) {
605 return (EBADF);
606 }
607
608 /* we should only get here if the offset was == 0 */
609 if (off != 0) {
610 return (EINVAL);
611 }
612
613 /* we should only be mapping in one page */
614 if (len != PAGESIZE) {
615 return (EINVAL);
616 }
617
618 /*
619 * we already allocated the user ring during driver attach, all we
620 * need to do is map it into the user app's VA.
621 */
622 usring = &state->bt_user_ring;
623 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
624 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
625 if (e < 0) {
626 return (e);
627 }
628
629 /* return the size to compete the devmap */
630 *maplen = PAGESIZE;
631
632 return (0);
633 }
634
635
636 /*
637 * xpvtap_chpoll()
638 */
639 static int
640 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
641 struct pollhead **phpp)
642 {
643 xpvtap_user_ring_t *usring;
644 xpvtap_state_t *state;
645 int instance;
646
647
648 instance = getminor(dev);
649 if (instance == -1) {
650 return (EBADF);
651 }
652 state = ddi_get_soft_state(xpvtap_statep, instance);
653 if (state == NULL) {
654 return (EBADF);
655 }
656
657 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
658 *reventsp = 0;
659 return (EINVAL);
660 }
661
662 /*
663 * if we pushed requests on the user ring since the last poll, wakeup
664 * the user app
665 */
666 usring = &state->bt_user_ring;
667 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
668
669 /*
670 * XXX - is this faster here or xpvtap_user_request_push??
671 * prelim data says here. Because less membars or because
672 * user thread will spin in poll requests before getting to
673 * responses?
674 */
675 RING_PUSH_REQUESTS(&usring->ur_ring);
676
677 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
678 *reventsp = POLLIN | POLLRDNORM;
679
680 /* no new requests */
681 } else {
682 *reventsp = 0;
683 if (!anyyet) {
684 *phpp = &state->bt_pollhead;
685 }
686 }
687
688 return (0);
689 }
690
691
692 /*
693 * xpvtap_drv_init()
694 */
695 static xpvtap_state_t *
696 xpvtap_drv_init(int instance)
697 {
698 xpvtap_state_t *state;
699 int e;
700
701
702 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
703 if (e != DDI_SUCCESS) {
704 return (NULL);
705 }
706 state = ddi_get_soft_state(xpvtap_statep, instance);
707 if (state == NULL) {
708 goto drvinitfail_get_soft_state;
709 }
710
711 state->bt_instance = instance;
712 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
713 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
714 state->bt_open.bo_opened = B_FALSE;
715 state->bt_map.um_registered = B_FALSE;
716
717 /* initialize user ring, thread, mapping state */
718 e = xpvtap_user_init(state);
719 if (e != DDI_SUCCESS) {
720 goto drvinitfail_userinit;
721 }
722
723 return (state);
724
725 drvinitfail_userinit:
726 cv_destroy(&state->bt_open.bo_exit_cv);
727 mutex_destroy(&state->bt_open.bo_mutex);
728 drvinitfail_get_soft_state:
729 (void) ddi_soft_state_free(xpvtap_statep, instance);
730 return (NULL);
731 }
732
733
734 /*
735 * xpvtap_drv_fini()
736 */
737 static void
738 xpvtap_drv_fini(xpvtap_state_t *state)
739 {
740 xpvtap_user_fini(state);
741 cv_destroy(&state->bt_open.bo_exit_cv);
742 mutex_destroy(&state->bt_open.bo_mutex);
743 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
744 }
745
746
747 /*
748 * xpvtap_intr()
749 * this routine will be called when we have a request on the guest ring.
750 */
751 static uint_t
752 xpvtap_intr(caddr_t arg)
753 {
754 xpvtap_state_t *state;
755
756
757 state = (xpvtap_state_t *)arg;
758
759 /* wake thread, thread handles guest requests and user app responses */
760 mutex_enter(&state->bt_thread.ut_mutex);
761 state->bt_thread.ut_wake = B_TRUE;
762 cv_signal(&state->bt_thread.ut_wake_cv);
763 mutex_exit(&state->bt_thread.ut_mutex);
764
765 return (DDI_INTR_CLAIMED);
766 }
767
768
769 /*
770 * xpvtap_segmf_register()
771 */
772 static int
773 xpvtap_segmf_register(xpvtap_state_t *state)
774 {
775 struct seg *seg;
776 uint64_t pte_ma;
777 struct as *as;
778 caddr_t uaddr;
779 uint_t pgcnt;
780 int i;
781
782
783 as = state->bt_map.um_as;
784 pgcnt = btopr(state->bt_map.um_guest_size);
785 uaddr = state->bt_map.um_guest_pages;
786
787 if (pgcnt == 0) {
788 return (DDI_FAILURE);
789 }
790
791 AS_LOCK_ENTER(as, RW_READER);
792
793 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
794 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
795 (seg->s_base + seg->s_size))) {
796 AS_LOCK_EXIT(as);
797 return (DDI_FAILURE);
798 }
799
800 /*
801 * lock down the htables so the HAT can't steal them. Register the
802 * PTE MA's for each gref page with seg_mf so we can do user space
803 * gref mappings.
804 */
805 for (i = 0; i < pgcnt; i++) {
806 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
807 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
808 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
809 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
810 hat_release_mapping(as->a_hat, uaddr);
811 segmf_add_gref_pte(seg, uaddr, pte_ma);
812 uaddr += PAGESIZE;
813 }
814
815 state->bt_map.um_registered = B_TRUE;
816
817 AS_LOCK_EXIT(as);
818
819 return (DDI_SUCCESS);
820 }
821
822
823 /*
824 * xpvtap_segmf_unregister()
825 * as_callback routine
826 */
827 /*ARGSUSED*/
828 static void
829 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
830 {
831 xpvtap_state_t *state;
832 caddr_t uaddr;
833 uint_t pgcnt;
834 int i;
835
836
837 state = (xpvtap_state_t *)arg;
838 if (!state->bt_map.um_registered) {
839 /* remove the callback (which is this routine) */
840 (void) as_delete_callback(as, arg);
841 return;
842 }
843
844 pgcnt = btopr(state->bt_map.um_guest_size);
845 uaddr = state->bt_map.um_guest_pages;
846
847 /* unmap any outstanding req's grefs */
848 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
849
850 /* Unlock the gref pages */
851 for (i = 0; i < pgcnt; i++) {
852 AS_LOCK_ENTER(as, RW_WRITER);
853 hat_prepare_mapping(as->a_hat, uaddr, NULL);
854 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
855 hat_release_mapping(as->a_hat, uaddr);
856 AS_LOCK_EXIT(as);
857 uaddr += PAGESIZE;
858 }
859
860 /* remove the callback (which is this routine) */
861 (void) as_delete_callback(as, arg);
862
863 state->bt_map.um_registered = B_FALSE;
864 }
865
866
867 /*
868 * xpvtap_user_init()
869 */
870 static int
871 xpvtap_user_init(xpvtap_state_t *state)
872 {
873 xpvtap_user_map_t *map;
874 int e;
875
876
877 map = &state->bt_map;
878
879 /* Setup the ring between the driver and user app */
880 e = xpvtap_user_ring_init(state);
881 if (e != DDI_SUCCESS) {
882 return (DDI_FAILURE);
883 }
884
885 /*
886 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
887 * is the same number of requests as the guest ring. Initialize the
888 * state we use to track request IDs to the user app. These IDs will
889 * also identify which group of gref pages correspond with the
890 * request.
891 */
892 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
893
894 /*
895 * allocate the space to store a copy of each outstanding requests. We
896 * will need to reference the ID and the number of segments when we
897 * get the response from the user app.
898 */
899 map->um_outstanding_reqs = kmem_zalloc(
900 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
901 KM_SLEEP);
902
903 /*
904 * initialize the thread we use to process guest requests and user
905 * responses.
906 */
907 e = xpvtap_user_thread_init(state);
908 if (e != DDI_SUCCESS) {
909 goto userinitfail_user_thread_init;
910 }
911
912 return (DDI_SUCCESS);
913
914 userinitfail_user_thread_init:
915 xpvtap_rs_fini(&map->um_rs);
916 kmem_free(map->um_outstanding_reqs,
917 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
918 xpvtap_user_ring_fini(state);
919 return (DDI_FAILURE);
920 }
921
922
923 /*
924 * xpvtap_user_ring_init()
925 */
926 static int
927 xpvtap_user_ring_init(xpvtap_state_t *state)
928 {
929 xpvtap_user_ring_t *usring;
930
931
932 usring = &state->bt_user_ring;
933
934 /* alocate and initialize the page for the shared user ring */
935 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
936 DDI_UMEM_SLEEP, &usring->ur_cookie);
937 SHARED_RING_INIT(usring->ur_sring);
938 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
939 usring->ur_prod_polled = 0;
940
941 return (DDI_SUCCESS);
942 }
943
944
945 /*
946 * xpvtap_user_thread_init()
947 */
948 static int
949 xpvtap_user_thread_init(xpvtap_state_t *state)
950 {
951 xpvtap_user_thread_t *thread;
952 char taskqname[32];
953
954
955 thread = &state->bt_thread;
956
957 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
958 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
959 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
960 thread->ut_wake = B_FALSE;
961 thread->ut_exit = B_FALSE;
962 thread->ut_exit_done = B_TRUE;
963
964 /* create but don't start the user thread */
965 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
966 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
967 TASKQ_DEFAULTPRI, 0);
968 if (thread->ut_taskq == NULL) {
969 goto userinitthrfail_taskq_create;
970 }
971
972 return (DDI_SUCCESS);
973
974 userinitthrfail_taskq_dispatch:
975 ddi_taskq_destroy(thread->ut_taskq);
976 userinitthrfail_taskq_create:
977 cv_destroy(&thread->ut_exit_done_cv);
978 cv_destroy(&thread->ut_wake_cv);
979 mutex_destroy(&thread->ut_mutex);
980
981 return (DDI_FAILURE);
982 }
983
984
985 /*
986 * xpvtap_user_thread_start()
987 */
988 static void
989 xpvtap_user_thread_start(caddr_t arg)
990 {
991 xpvtap_user_thread_t *thread;
992 xpvtap_state_t *state;
993 int e;
994
995
996 state = (xpvtap_state_t *)arg;
997 thread = &state->bt_thread;
998
999 /* start the user thread */
1000 thread->ut_exit_done = B_FALSE;
1001 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1002 DDI_SLEEP);
1003 if (e != DDI_SUCCESS) {
1004 thread->ut_exit_done = B_TRUE;
1005 cmn_err(CE_WARN, "Unable to start user thread\n");
1006 }
1007 }
1008
1009
1010 /*
1011 * xpvtap_user_thread_stop()
1012 */
1013 static void
1014 xpvtap_user_thread_stop(xpvtap_state_t *state)
1015 {
1016 /* wake thread so it can exit */
1017 mutex_enter(&state->bt_thread.ut_mutex);
1018 state->bt_thread.ut_wake = B_TRUE;
1019 state->bt_thread.ut_exit = B_TRUE;
1020 cv_signal(&state->bt_thread.ut_wake_cv);
1021 if (!state->bt_thread.ut_exit_done) {
1022 cv_wait(&state->bt_thread.ut_exit_done_cv,
1023 &state->bt_thread.ut_mutex);
1024 }
1025 mutex_exit(&state->bt_thread.ut_mutex);
1026 ASSERT(state->bt_thread.ut_exit_done);
1027 }
1028
1029
1030 /*
1031 * xpvtap_user_fini()
1032 */
1033 static void
1034 xpvtap_user_fini(xpvtap_state_t *state)
1035 {
1036 xpvtap_user_map_t *map;
1037
1038
1039 map = &state->bt_map;
1040
1041 xpvtap_user_thread_fini(state);
1042 xpvtap_rs_fini(&map->um_rs);
1043 kmem_free(map->um_outstanding_reqs,
1044 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1045 xpvtap_user_ring_fini(state);
1046 }
1047
1048
1049 /*
1050 * xpvtap_user_ring_fini()
1051 */
1052 static void
1053 xpvtap_user_ring_fini(xpvtap_state_t *state)
1054 {
1055 ddi_umem_free(state->bt_user_ring.ur_cookie);
1056 }
1057
1058
1059 /*
1060 * xpvtap_user_thread_fini()
1061 */
1062 static void
1063 xpvtap_user_thread_fini(xpvtap_state_t *state)
1064 {
1065 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1066 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1067 cv_destroy(&state->bt_thread.ut_wake_cv);
1068 mutex_destroy(&state->bt_thread.ut_mutex);
1069 }
1070
1071
1072 /*
1073 * xpvtap_user_thread()
1074 */
1075 static void
1076 xpvtap_user_thread(void *arg)
1077 {
1078 xpvtap_user_thread_t *thread;
1079 blkif_response_t resp;
1080 xpvtap_state_t *state;
1081 blkif_request_t req;
1082 boolean_t b;
1083 uint_t uid;
1084 int e;
1085
1086
1087 state = (xpvtap_state_t *)arg;
1088 thread = &state->bt_thread;
1089
1090 xpvtap_thread_start:
1091 /* See if we are supposed to exit */
1092 mutex_enter(&thread->ut_mutex);
1093 if (thread->ut_exit) {
1094 thread->ut_exit_done = B_TRUE;
1095 cv_signal(&state->bt_thread.ut_exit_done_cv);
1096 mutex_exit(&thread->ut_mutex);
1097 return;
1098 }
1099
1100 /*
1101 * if we aren't supposed to be awake, wait until someone wakes us.
1102 * when we wake up, check for a kill or someone telling us to exit.
1103 */
1104 if (!thread->ut_wake) {
1105 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1106 if ((e == 0) || (thread->ut_exit)) {
1107 thread->ut_exit = B_TRUE;
1108 mutex_exit(&thread->ut_mutex);
1109 goto xpvtap_thread_start;
1110 }
1111 }
1112
1113 /* if someone didn't wake us, go back to the start of the thread */
1114 if (!thread->ut_wake) {
1115 mutex_exit(&thread->ut_mutex);
1116 goto xpvtap_thread_start;
1117 }
1118
1119 /* we are awake */
1120 thread->ut_wake = B_FALSE;
1121 mutex_exit(&thread->ut_mutex);
1122
1123 /* process requests from the guest */
1124 do {
1125 /*
1126 * check for requests from the guest. if we don't have any,
1127 * break out of the loop.
1128 */
1129 e = blk_ring_request_get(state->bt_guest_ring, &req);
1130 if (e == B_FALSE) {
1131 break;
1132 }
1133
1134 /* we got a request, map the grefs into the user app's VA */
1135 e = xpvtap_user_request_map(state, &req, &uid);
1136 if (e != DDI_SUCCESS) {
1137 /*
1138 * If we couldn't map the request (e.g. user app hasn't
1139 * opened the device yet), requeue it and try again
1140 * later
1141 */
1142 blk_ring_request_requeue(state->bt_guest_ring);
1143 break;
1144 }
1145
1146 /* push the request to the user app */
1147 e = xpvtap_user_request_push(state, &req, uid);
1148 if (e != DDI_SUCCESS) {
1149 resp.id = req.id;
1150 resp.operation = req.operation;
1151 resp.status = BLKIF_RSP_ERROR;
1152 blk_ring_response_put(state->bt_guest_ring, &resp);
1153 }
1154 } while (!thread->ut_exit);
1155
1156 /* process reponses from the user app */
1157 do {
1158 /*
1159 * check for responses from the user app. if we don't have any,
1160 * break out of the loop.
1161 */
1162 b = xpvtap_user_response_get(state, &resp, &uid);
1163 if (b != B_TRUE) {
1164 break;
1165 }
1166
1167 /*
1168 * if we got a response, unmap the grefs from the matching
1169 * request.
1170 */
1171 xpvtap_user_request_unmap(state, uid);
1172
1173 /* push the response to the guest */
1174 blk_ring_response_put(state->bt_guest_ring, &resp);
1175 } while (!thread->ut_exit);
1176
1177 goto xpvtap_thread_start;
1178 }
1179
1180
1181 /*
1182 * xpvtap_user_request_map()
1183 */
1184 static int
1185 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1186 uint_t *uid)
1187 {
1188 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1189 struct seg *seg;
1190 struct as *as;
1191 domid_t domid;
1192 caddr_t uaddr;
1193 uint_t flags;
1194 int i;
1195 int e;
1196
1197
1198 domid = xvdi_get_oeid(state->bt_dip);
1199
1200 as = state->bt_map.um_as;
1201 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1202 return (DDI_FAILURE);
1203 }
1204
1205 /* has to happen after segmap returns */
1206 if (!state->bt_map.um_registered) {
1207 /* register the pte's with segmf */
1208 e = xpvtap_segmf_register(state);
1209 if (e != DDI_SUCCESS) {
1210 return (DDI_FAILURE);
1211 }
1212 }
1213
1214 /* alloc an ID for the user ring */
1215 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1216 if (e != DDI_SUCCESS) {
1217 return (DDI_FAILURE);
1218 }
1219
1220 /* if we don't have any segments to map, we're done */
1221 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1222 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1223 (req->nr_segments == 0)) {
1224 return (DDI_SUCCESS);
1225 }
1226
1227 /* get the apps gref address */
1228 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1229
1230 AS_LOCK_ENTER(as, RW_READER);
1231 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1232 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1233 (seg->s_base + seg->s_size))) {
1234 AS_LOCK_EXIT(as);
1235 return (DDI_FAILURE);
1236 }
1237
1238 /* if we are reading from disk, we are writing into memory */
1239 flags = 0;
1240 if (req->operation == BLKIF_OP_READ) {
1241 flags |= SEGMF_GREF_WR;
1242 }
1243
1244 /* Load the grefs into seg_mf */
1245 for (i = 0; i < req->nr_segments; i++) {
1246 gref[i] = req->seg[i].gref;
1247 }
1248 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1249 domid);
1250
1251 AS_LOCK_EXIT(as);
1252
1253 return (DDI_SUCCESS);
1254 }
1255
1256
1257 /*
1258 * xpvtap_user_request_push()
1259 */
1260 static int
1261 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1262 uint_t uid)
1263 {
1264 blkif_request_t *outstanding_req;
1265 blkif_front_ring_t *uring;
1266 blkif_request_t *target;
1267 xpvtap_user_map_t *map;
1268
1269
1270 uring = &state->bt_user_ring.ur_ring;
1271 map = &state->bt_map;
1272
1273 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1274
1275 /*
1276 * Save request from the frontend. used for ID mapping and unmap
1277 * on response/cleanup
1278 */
1279 outstanding_req = &map->um_outstanding_reqs[uid];
1280 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1281
1282 /* put the request on the user ring */
1283 bcopy(req, target, sizeof (*req));
1284 target->id = (uint64_t)uid;
1285 uring->req_prod_pvt++;
1286
1287 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1288
1289 return (DDI_SUCCESS);
1290 }
1291
1292
1293 static void
1294 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1295 {
1296 blkif_request_t *req;
1297 struct seg *seg;
1298 struct as *as;
1299 caddr_t uaddr;
1300 int e;
1301
1302
1303 as = state->bt_map.um_as;
1304 if (as == NULL) {
1305 return;
1306 }
1307
1308 /* get a copy of the original request */
1309 req = &state->bt_map.um_outstanding_reqs[uid];
1310
1311 /* unmap the grefs for this request */
1312 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1313 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1314 (req->nr_segments != 0)) {
1315 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1316 AS_LOCK_ENTER(as, RW_READER);
1317 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1318 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1319 (seg->s_base + seg->s_size))) {
1320 AS_LOCK_EXIT(as);
1321 xpvtap_rs_free(state->bt_map.um_rs, uid);
1322 return;
1323 }
1324
1325 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1326 if (e != 0) {
1327 cmn_err(CE_WARN, "unable to release grefs");
1328 }
1329
1330 AS_LOCK_EXIT(as);
1331 }
1332
1333 /* free up the user ring id */
1334 xpvtap_rs_free(state->bt_map.um_rs, uid);
1335 }
1336
1337
1338 static int
1339 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1340 uint_t *uid)
1341 {
1342 blkif_front_ring_t *uring;
1343 blkif_response_t *target;
1344
1345
1346 uring = &state->bt_user_ring.ur_ring;
1347
1348 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1349 return (B_FALSE);
1350 }
1351
1352 target = NULL;
1353 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1354 if (target == NULL) {
1355 return (B_FALSE);
1356 }
1357
1358 /* copy out the user app response */
1359 bcopy(target, resp, sizeof (*resp));
1360 uring->rsp_cons++;
1361
1362 /* restore the quests id from the original request */
1363 *uid = (uint_t)resp->id;
1364 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1365
1366 return (B_TRUE);
1367 }
1368
1369
1370 /*
1371 * xpvtap_user_app_stop()
1372 */
1373 static void xpvtap_user_app_stop(caddr_t arg)
1374 {
1375 xpvtap_state_t *state;
1376 clock_t rc;
1377
1378 state = (xpvtap_state_t *)arg;
1379
1380 /*
1381 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1382 * problem, we just won't auto-detach the driver.
1383 */
1384 mutex_enter(&state->bt_open.bo_mutex);
1385 if (state->bt_open.bo_opened) {
1386 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1387 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1388 TR_CLOCK_TICK);
1389 if (rc <= 0) {
1390 cmn_err(CE_NOTE, "!user process still has driver open, "
1391 "deferring detach\n");
1392 }
1393 }
1394 mutex_exit(&state->bt_open.bo_mutex);
1395 }
1396
1397
1398 /*
1399 * xpvtap_rs_init()
1400 * Initialize the resource structure. init() returns a handle to be used
1401 * for the rest of the resource functions. This code is written assuming
1402 * that min_val will be close to 0. Therefore, we will allocate the free
1403 * buffer only taking max_val into account.
1404 */
1405 static void
1406 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1407 {
1408 xpvtap_rs_t *rstruct;
1409 uint_t array_size;
1410 uint_t index;
1411
1412
1413 ASSERT(handle != NULL);
1414 ASSERT(min_val < max_val);
1415
1416 /* alloc space for resource structure */
1417 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1418
1419 /*
1420 * Test to see if the max value is 64-bit aligned. If so, we don't need
1421 * to allocate an extra 64-bit word. alloc space for free buffer
1422 * (8 bytes per uint64_t).
1423 */
1424 if ((max_val & 0x3F) == 0) {
1425 rstruct->rs_free_size = (max_val >> 6) * 8;
1426 } else {
1427 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1428 }
1429 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1430
1431 /* Initialize resource structure */
1432 rstruct->rs_min = min_val;
1433 rstruct->rs_last = min_val;
1434 rstruct->rs_max = max_val;
1435 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1436 rstruct->rs_flushing = B_FALSE;
1437
1438 /* Mark all resources as free */
1439 array_size = rstruct->rs_free_size >> 3;
1440 for (index = 0; index < array_size; index++) {
1441 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1442 }
1443
1444 /* setup handle which is returned from this function */
1445 *handle = rstruct;
1446 }
1447
1448
1449 /*
1450 * xpvtap_rs_fini()
1451 * Frees up the space allocated in init(). Notice that a pointer to the
1452 * handle is used for the parameter. fini() will set the handle to NULL
1453 * before returning.
1454 */
1455 static void
1456 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1457 {
1458 xpvtap_rs_t *rstruct;
1459
1460
1461 ASSERT(handle != NULL);
1462
1463 rstruct = (xpvtap_rs_t *)*handle;
1464
1465 mutex_destroy(&rstruct->rs_mutex);
1466 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1467 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1468
1469 /* set handle to null. This helps catch bugs. */
1470 *handle = NULL;
1471 }
1472
1473
1474 /*
1475 * xpvtap_rs_alloc()
1476 * alloc a resource. If alloc fails, we are out of resources.
1477 */
1478 static int
1479 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1480 {
1481 xpvtap_rs_t *rstruct;
1482 uint_t array_idx;
1483 uint64_t free;
1484 uint_t index;
1485 uint_t last;
1486 uint_t min;
1487 uint_t max;
1488
1489
1490 ASSERT(handle != NULL);
1491 ASSERT(resource != NULL);
1492
1493 rstruct = (xpvtap_rs_t *)handle;
1494
1495 mutex_enter(&rstruct->rs_mutex);
1496 min = rstruct->rs_min;
1497 max = rstruct->rs_max;
1498
1499 /*
1500 * Find a free resource. This will return out of the loop once it finds
1501 * a free resource. There are a total of 'max'-'min'+1 resources.
1502 * Performs a round robin allocation.
1503 */
1504 for (index = min; index <= max; index++) {
1505
1506 array_idx = rstruct->rs_last >> 6;
1507 free = rstruct->rs_free[array_idx];
1508 last = rstruct->rs_last & 0x3F;
1509
1510 /* if the next resource to check is free */
1511 if ((free & ((uint64_t)1 << last)) != 0) {
1512 /* we are using this resource */
1513 *resource = rstruct->rs_last;
1514
1515 /* take it out of the free list */
1516 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1517
1518 /*
1519 * increment the last count so we start checking the
1520 * next resource on the next alloc(). Note the rollover
1521 * at 'max'+1.
1522 */
1523 rstruct->rs_last++;
1524 if (rstruct->rs_last > max) {
1525 rstruct->rs_last = rstruct->rs_min;
1526 }
1527
1528 /* unlock the resource structure */
1529 mutex_exit(&rstruct->rs_mutex);
1530
1531 return (DDI_SUCCESS);
1532 }
1533
1534 /*
1535 * This resource is not free, lets go to the next one. Note the
1536 * rollover at 'max'.
1537 */
1538 rstruct->rs_last++;
1539 if (rstruct->rs_last > max) {
1540 rstruct->rs_last = rstruct->rs_min;
1541 }
1542 }
1543
1544 mutex_exit(&rstruct->rs_mutex);
1545
1546 return (DDI_FAILURE);
1547 }
1548
1549
1550 /*
1551 * xpvtap_rs_free()
1552 * Free the previously alloc'd resource. Once a resource has been free'd,
1553 * it can be used again when alloc is called.
1554 */
1555 static void
1556 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1557 {
1558 xpvtap_rs_t *rstruct;
1559 uint_t array_idx;
1560 uint_t offset;
1561
1562
1563 ASSERT(handle != NULL);
1564
1565 rstruct = (xpvtap_rs_t *)handle;
1566 ASSERT(resource >= rstruct->rs_min);
1567 ASSERT(resource <= rstruct->rs_max);
1568
1569 if (!rstruct->rs_flushing) {
1570 mutex_enter(&rstruct->rs_mutex);
1571 }
1572
1573 /* Put the resource back in the free list */
1574 array_idx = resource >> 6;
1575 offset = resource & 0x3F;
1576 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1577
1578 if (!rstruct->rs_flushing) {
1579 mutex_exit(&rstruct->rs_mutex);
1580 }
1581 }
1582
1583
1584 /*
1585 * xpvtap_rs_flush()
1586 */
1587 static void
1588 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1589 void *arg)
1590 {
1591 xpvtap_rs_t *rstruct;
1592 uint_t array_idx;
1593 uint64_t free;
1594 uint_t index;
1595 uint_t last;
1596 uint_t min;
1597 uint_t max;
1598
1599
1600 ASSERT(handle != NULL);
1601
1602 rstruct = (xpvtap_rs_t *)handle;
1603
1604 mutex_enter(&rstruct->rs_mutex);
1605 min = rstruct->rs_min;
1606 max = rstruct->rs_max;
1607
1608 rstruct->rs_flushing = B_TRUE;
1609
1610 /*
1611 * for all resources not free, call the callback routine to clean it
1612 * up.
1613 */
1614 for (index = min; index <= max; index++) {
1615
1616 array_idx = rstruct->rs_last >> 6;
1617 free = rstruct->rs_free[array_idx];
1618 last = rstruct->rs_last & 0x3F;
1619
1620 /* if the next resource to check is not free */
1621 if ((free & ((uint64_t)1 << last)) == 0) {
1622 /* call the callback to cleanup */
1623 (*callback)(arg, rstruct->rs_last);
1624
1625 /* put it back in the free list */
1626 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1627 }
1628
1629 /* go to the next one. Note the rollover at 'max' */
1630 rstruct->rs_last++;
1631 if (rstruct->rs_last > max) {
1632 rstruct->rs_last = rstruct->rs_min;
1633 }
1634 }
1635
1636 mutex_exit(&rstruct->rs_mutex);
1637 }