1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 /*
28 * This driver attempts to emulate some of the the behaviors of
29 * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
30 *
31 * It does this by layering over the /dev/ptmx device and intercepting
32 * opens to it.
33 *
34 * This driver makes the following assumptions about the way the ptm/pts
35 * drivers on Solaris work:
36 *
37 * - all opens of the /dev/ptmx device node return a unique dev_t.
38 *
39 * - the dev_t minor node value for each open ptm instance corrospondes
40 * to it's associated slave terminal device number. ie. the path to
41 * the slave terminal device associated with an open ptm instance
42 * who's dev_t minor node vaue is 5, is /dev/pts/5.
43 *
44 * - the ptm driver always allocates the lowest numbered slave terminal
45 * device possible.
46 */
47
48 #include <sys/conf.h>
49 #include <sys/ddi.h>
50 #include <sys/devops.h>
51 #include <sys/file.h>
52 #include <sys/filio.h>
53 #include <sys/kstr.h>
54 #include <sys/ldlinux.h>
55 #include <sys/lx_ptm.h>
56 #include <sys/modctl.h>
57 #include <sys/pathname.h>
58 #include <sys/ptms.h>
59 #include <sys/ptyvar.h>
60 #include <sys/stat.h>
61 #include <sys/stropts.h>
62 #include <sys/sunddi.h>
63 #include <sys/sunldi.h>
64 #include <sys/sysmacros.h>
65 #include <sys/types.h>
66
67 #define LP_PTM_PATH "/dev/ptmx"
68 #define LP_PTS_PATH "/dev/pts/"
69 #define LP_PTS_DRV_NAME "pts"
70 #define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */
71 #define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */
72
73 /*
74 * this driver is layered on top of the ptm driver. we'd like to
75 * make this drivers minor name space a mirror of the ptm drivers
76 * namespace, but we can't actually do this. the reason is that the
77 * ptm driver is opened via the clone driver. there for no minor nodes
78 * of the ptm driver are actually accessible via the filesystem.
79 * since we're not a streams device we can't be opened by the clone
80 * driver. there for we need to have at least minor node accessible
81 * via the filesystem so that consumers can open it. we use the device
82 * node with a minor number of 0 for this purpose. what this means is
83 * that minor node 0 can't be used to map ptm minor node 0. since this
84 * minor node is now reserved we need to shift our ptm minor node
85 * mappings by one. ie. a ptm minor node with a value of 0 will
86 * corrospond to our minor node with a value of 1. these mappings are
87 * managed with the following macros.
88 */
89 #define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x)
90 #define INDEX_TO_MINOR(x) ((x) + 1)
91
92 /*
93 * grow our layered handle array by the same size increment that the ptm
94 * driver uses to grow the pty device space - PTY_MAXDELTA
95 */
96 #define LP_PTY_INC 128
97
98 /*
99 * lx_ptm_ops contains state information about outstanding operations on the
100 * underlying master terminal device. Currently we only track information
101 * for read operations.
102 *
103 * Note that this data has not been rolled directly into the lx_ptm_handle
104 * structure because we can't put mutex's of condition variables into
105 * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle
106 * structures linked to from the global lx_ptm state can be resized
107 * dynamically, and when it's resized, the new array is at a different
108 * memory location and the old array memory is discarded. Mutexs and cvs
109 * are accessed based off their address, so if this array was re-sized while
110 * there were outstanding operations on any mutexs or cvs in the array
111 * then the system would tip over. In the future the lx_ptm_handle structure
112 * array should probably be replaced with either an array of pointers to
113 * lx_ptm_handle structures or some other kind of data structure containing
114 * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure
115 * could be folded directly into the lx_ptm_handle structures. (This will
116 * also require the definition of a new locking mechanism to protect the
117 * contents of lx_ptm_handle structures.)
118 */
119 typedef struct lx_ptm_ops {
120 int lpo_rops;
121 kcondvar_t lpo_rops_cv;
122 kmutex_t lpo_rops_lock;
123 } lx_ptm_ops_t;
124
125 /*
126 * Every open of the master terminal device in a zone results in a new
127 * lx_ptm_handle handle allocation. These handles are stored in an array
128 * hanging off the lx_ptm_state structure.
129 */
130 typedef struct lx_ptm_handle {
131 /* Device handle to the underlying real /dev/ptmx master terminal. */
132 ldi_handle_t lph_handle;
133
134 /* Flag to indicate if TIOCPKT mode has been enabled. */
135 int lph_pktio;
136
137 /* Number of times the slave device has been opened/closed. */
138 int lph_eofed;
139
140 /* Callback handler in the ptm driver to check if slave is open. */
141 ptmptsopencb_t lph_ppocb;
142
143 /* Pointer to state for operations on underlying device. */
144 lx_ptm_ops_t *lph_lpo;
145 } lx_ptm_handle_t;
146
147 /*
148 * Global state for the lx_ptm driver.
149 */
150 typedef struct lx_ptm_state {
151 /* lx_ptm device devinfo pointer */
152 dev_info_t *lps_dip;
153
154 /* LDI ident used to open underlying real /dev/ptmx master terminals. */
155 ldi_ident_t lps_li;
156
157 /* pts drivers major number */
158 major_t lps_pts_major;
159
160 /* rw lock used to manage access and growth of lps_lh_array */
161 krwlock_t lps_lh_rwlock;
162
163 /* number of elements in lps_lh_array */
164 uint_t lps_lh_count;
165
166 /* Array of handles to underlying real /dev/ptmx master terminals. */
167 lx_ptm_handle_t *lps_lh_array;
168 } lx_ptm_state_t;
169
170 /* Pointer to the lx_ptm global state structure. */
171 static lx_ptm_state_t lps;
172
173 /*
174 * List of modules to be autopushed onto slave terminal devices when they
175 * are opened in an lx branded zone.
176 */
177 static char *lx_pts_mods[] = {
178 "ptem",
179 "ldterm",
180 "ttcompat",
181 LDLINUX_MOD,
182 NULL
183 };
184
185 static void
186 lx_ptm_lh_grow(uint_t index)
187 {
188 uint_t new_lh_count, old_lh_count;
189 lx_ptm_handle_t *new_lh_array, *old_lh_array;
190
191 /*
192 * allocate a new array. we drop the rw lock on the array so that
193 * readers can still access devices in case our memory allocation
194 * blocks.
195 */
196 new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
197 new_lh_array =
198 kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
199
200 /*
201 * double check that we still actually need to increase the size
202 * of the array
203 */
204 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
205 if (index < lps.lps_lh_count) {
206 /* someone beat us to it so there's nothing more to do */
207 rw_exit(&lps.lps_lh_rwlock);
208 kmem_free(new_lh_array,
209 sizeof (lx_ptm_handle_t) * new_lh_count);
210 return;
211 }
212
213 /* copy the existing data into the new array */
214 ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
215 ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
216 if (lps.lps_lh_count != 0) {
217 bcopy(lps.lps_lh_array, new_lh_array,
218 sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
219 }
220
221 /* save info on the old array */
222 old_lh_array = lps.lps_lh_array;
223 old_lh_count = lps.lps_lh_count;
224
225 /* install the new array */
226 lps.lps_lh_array = new_lh_array;
227 lps.lps_lh_count = new_lh_count;
228
229 rw_exit(&lps.lps_lh_rwlock);
230
231 /* free the old array */
232 if (old_lh_array != NULL) {
233 kmem_free(old_lh_array,
234 sizeof (lx_ptm_handle_t) * old_lh_count);
235 }
236 }
237
238 static void
239 lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
240 {
241 lx_ptm_ops_t *lpo;
242
243 ASSERT(lh != NULL);
244
245 /* Allocate and initialize the ops structure */
246 lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
247 mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
248 cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
249
250 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
251
252 /* check if we need to grow the size of the layered handle array */
253 if (index >= lps.lps_lh_count) {
254 rw_exit(&lps.lps_lh_rwlock);
255 lx_ptm_lh_grow(index);
256 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
257 }
258
259 ASSERT(index < lps.lps_lh_count);
260 ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
261 ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
262 ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
263 ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
264
265 /* insert the new handle and return */
266 lps.lps_lh_array[index].lph_handle = lh;
267 lps.lps_lh_array[index].lph_pktio = 0;
268 lps.lps_lh_array[index].lph_eofed = 0;
269 lps.lps_lh_array[index].lph_lpo = lpo;
270
271 rw_exit(&lps.lps_lh_rwlock);
272 }
273
274 static ldi_handle_t
275 lx_ptm_lh_remove(uint_t index)
276 {
277 ldi_handle_t lh;
278
279 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
280
281 ASSERT(index < lps.lps_lh_count);
282 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
283 ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
284 ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
285
286 /* free the write handle */
287 kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
288 lps.lps_lh_array[index].lph_lpo = NULL;
289
290 /* remove the handle and return it */
291 lh = lps.lps_lh_array[index].lph_handle;
292 lps.lps_lh_array[index].lph_handle = NULL;
293 lps.lps_lh_array[index].lph_pktio = 0;
294 lps.lps_lh_array[index].lph_eofed = 0;
295 rw_exit(&lps.lps_lh_rwlock);
296 return (lh);
297 }
298
299 static void
300 lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
301 {
302 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
303
304 ASSERT(index < lps.lps_lh_count);
305 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
306
307 *ppocb = lps.lps_lh_array[index].lph_ppocb;
308 rw_exit(&lps.lps_lh_rwlock);
309 }
310
311 static void
312 lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
313 {
314 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
315
316 ASSERT(index < lps.lps_lh_count);
317 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
318
319 lps.lps_lh_array[index].lph_ppocb = *ppocb;
320 rw_exit(&lps.lps_lh_rwlock);
321 }
322
323 static ldi_handle_t
324 lx_ptm_lh_lookup(uint_t index)
325 {
326 ldi_handle_t lh;
327
328 rw_enter(&lps.lps_lh_rwlock, RW_READER);
329
330 ASSERT(index < lps.lps_lh_count);
331 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
332
333 /* return the handle */
334 lh = lps.lps_lh_array[index].lph_handle;
335 rw_exit(&lps.lps_lh_rwlock);
336 return (lh);
337 }
338
339 static lx_ptm_ops_t *
340 lx_ptm_lpo_lookup(uint_t index)
341 {
342 lx_ptm_ops_t *lpo;
343
344 rw_enter(&lps.lps_lh_rwlock, RW_READER);
345
346 ASSERT(index < lps.lps_lh_count);
347 ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
348
349 /* return the handle */
350 lpo = lps.lps_lh_array[index].lph_lpo;
351 rw_exit(&lps.lps_lh_rwlock);
352 return (lpo);
353 }
354
355 static int
356 lx_ptm_lh_pktio_get(uint_t index)
357 {
358 int pktio;
359
360 rw_enter(&lps.lps_lh_rwlock, RW_READER);
361
362 ASSERT(index < lps.lps_lh_count);
363 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
364
365 /* return the pktio state */
366 pktio = lps.lps_lh_array[index].lph_pktio;
367 rw_exit(&lps.lps_lh_rwlock);
368 return (pktio);
369 }
370
371 static void
372 lx_ptm_lh_pktio_set(uint_t index, int pktio)
373 {
374 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
375
376 ASSERT(index < lps.lps_lh_count);
377 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
378
379 /* set the pktio state */
380 lps.lps_lh_array[index].lph_pktio = pktio;
381 rw_exit(&lps.lps_lh_rwlock);
382 }
383
384 static int
385 lx_ptm_lh_eofed_get(uint_t index)
386 {
387 int eofed;
388
389 rw_enter(&lps.lps_lh_rwlock, RW_READER);
390
391 ASSERT(index < lps.lps_lh_count);
392 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
393
394 /* return the eofed state */
395 eofed = lps.lps_lh_array[index].lph_eofed;
396 rw_exit(&lps.lps_lh_rwlock);
397 return (eofed);
398 }
399
400 static void
401 lx_ptm_lh_eofed_set(uint_t index)
402 {
403 rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
404
405 ASSERT(index < lps.lps_lh_count);
406 ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
407
408 /* set the eofed state */
409 lps.lps_lh_array[index].lph_eofed++;
410 rw_exit(&lps.lps_lh_rwlock);
411 }
412
413 static int
414 lx_ptm_read_start(dev_t dev)
415 {
416 lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
417
418 mutex_enter(&lpo->lpo_rops_lock);
419 ASSERT(lpo->lpo_rops >= 0);
420
421 /* Wait for other read operations to finish */
422 while (lpo->lpo_rops != 0) {
423 if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
424 mutex_exit(&lpo->lpo_rops_lock);
425 return (-1);
426 }
427 }
428
429 /* Start a read operation */
430 VERIFY(++lpo->lpo_rops == 1);
431 mutex_exit(&lpo->lpo_rops_lock);
432 return (0);
433 }
434
435 static void
436 lx_ptm_read_end(dev_t dev)
437 {
438 lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
439
440 mutex_enter(&lpo->lpo_rops_lock);
441 ASSERT(lpo->lpo_rops >= 0);
442
443 /* End a read operation */
444 VERIFY(--lpo->lpo_rops == 0);
445 cv_signal(&lpo->lpo_rops_cv);
446
447 mutex_exit(&lpo->lpo_rops_lock);
448 }
449
450 static int
451 lx_ptm_pts_isopen(dev_t dev)
452 {
453 ptmptsopencb_t ppocb;
454
455 lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
456 return (ppocb.ppocb_func(ppocb.ppocb_arg));
457 }
458
459 static void
460 lx_ptm_eof_read(ldi_handle_t lh)
461 {
462 struct uio uio;
463 iovec_t iov;
464 char junk[1];
465
466 /*
467 * We can remove any EOF message from the head of the stream by
468 * doing a zero byte read from the stream.
469 */
470 iov.iov_len = 0;
471 iov.iov_base = junk;
472 uio.uio_iovcnt = 1;
473 uio.uio_iov = &iov;
474 uio.uio_resid = iov.iov_len;
475 uio.uio_offset = 0;
476 uio.uio_segflg = UIO_SYSSPACE;
477 uio.uio_fmode = 0;
478 uio.uio_extflg = 0;
479 uio.uio_llimit = MAXOFFSET_T;
480 (void) ldi_read(lh, &uio, kcred);
481 }
482
483 static int
484 lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
485 {
486 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
487 int err, msg_size, msg_count;
488
489 *rvalp = 0;
490
491 /*
492 * Check if there is an EOF message (represented by a zero length
493 * data message) at the head of the stream. Note that the
494 * I_NREAD ioctl is a streams framework ioctl so it will succeed
495 * even if there have been previous write errors on this stream.
496 */
497 if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
498 FKIOCTL, kcred, &msg_count)) != 0)
499 return (err);
500
501 if ((msg_count == 0) || (msg_size != 0)) {
502 /* No EOF message found */
503 return (0);
504 }
505
506 /* Record the fact that the slave device has been closed. */
507 lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
508
509 /* drop the EOF */
510 lx_ptm_eof_read(lh);
511 *rvalp = 1;
512 return (0);
513 }
514
515 static int
516 lx_ptm_eof_drop(dev_t dev, int *rvalp)
517 {
518 int rval, err;
519
520 if (rvalp != NULL)
521 *rvalp = 0;
522 for (;;) {
523 if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
524 return (err);
525 if (rval == 0)
526 return (0);
527 if (rvalp != NULL)
528 *rvalp = 1;
529 }
530 }
531
532 static int
533 lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
534 {
535 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
536 int err;
537
538 *rvalp = 0;
539 if (ignore_eof) {
540 int size, rval;
541
542 if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
543 FKIOCTL, kcred, &rval)) != 0)
544 return (err);
545 if (size != 0)
546 *rvalp = 1;
547 } else {
548 int msg_size, msg_count;
549
550 if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
551 FKIOCTL, kcred, &msg_count)) != 0)
552 return (err);
553 if (msg_count != 0)
554 *rvalp = 1;
555 }
556 return (0);
557 }
558
559 static int
560 lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
561 {
562 int err;
563
564 if (cmd != DDI_ATTACH)
565 return (DDI_FAILURE);
566
567 if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
568 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
569 return (DDI_FAILURE);
570
571 err = ldi_ident_from_dip(dip, &lps.lps_li);
572 if (err != 0) {
573 ddi_remove_minor_node(dip, ddi_get_name(dip));
574 return (DDI_FAILURE);
575 }
576
577 lps.lps_dip = dip;
578 lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
579
580 rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
581 lps.lps_lh_count = 0;
582 lps.lps_lh_array = NULL;
583
584 return (DDI_SUCCESS);
585 }
586
587 /*ARGSUSED*/
588 static int
589 lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
590 {
591 if (cmd != DDI_DETACH)
592 return (DDI_FAILURE);
593
594 ldi_ident_release(lps.lps_li);
595 lps.lps_dip = NULL;
596
597 ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
598 ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
599 if (lps.lps_lh_array != NULL) {
600 kmem_free(lps.lps_lh_array,
601 sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
602 lps.lps_lh_array = NULL;
603 lps.lps_lh_count = 0;
604 }
605
606 return (DDI_SUCCESS);
607 }
608
609 /*ARGSUSED*/
610 static int
611 lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
612 {
613 struct strioctl iocb;
614 ptmptsopencb_t ppocb = { NULL, NULL };
615 ldi_handle_t lh;
616 major_t maj, our_major = getmajor(*devp);
617 minor_t min, lastmin;
618 uint_t index, anchor = 1;
619 dev_t ptm_dev;
620 int err, rval = 0;
621
622 /*
623 * Don't support the FNDELAY flag and FNONBLOCK until we either
624 * find a Linux app that opens /dev/ptmx with the O_NDELAY
625 * or O_NONBLOCK flags explicitly, or until we create test cases
626 * to determine how reads of master terminal devices opened with
627 * these flags behave in different situations on Linux. Supporting
628 * these flags will involve enhancing our read implementation
629 * and changing the way it deals with EOF notifications.
630 */
631 if (flag & (FNDELAY | FNONBLOCK))
632 return (ENOTSUP);
633
634 /*
635 * we're layered on top of the ptm driver so open that driver
636 * first. (note that we're opening /dev/ptmx in the global
637 * zone, not ourselves in the Linux zone.)
638 */
639 err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
640 if (err != 0)
641 return (err);
642
643 /* get the devt returned by the ptmx open */
644 err = ldi_get_dev(lh, &ptm_dev);
645 if (err != 0) {
646 (void) ldi_close(lh, flag, credp);
647 return (err);
648 }
649
650 /*
651 * we're a cloning driver so here's well change the devt that we
652 * return. the ptmx is also a cloning driver so we'll just use
653 * it's minor number as our minor number (it already manages it's
654 * minor name space so no reason to duplicate the effort.)
655 */
656 index = getminor(ptm_dev);
657 *devp = makedevice(our_major, INDEX_TO_MINOR(index));
658
659 /* Get a callback function to query if the pts device is open. */
660 iocb.ic_cmd = PTMPTSOPENCB;
661 iocb.ic_timout = 0;
662 iocb.ic_len = sizeof (ppocb);
663 iocb.ic_dp = (char *)&ppocb;
664
665 err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
666 if ((err != 0) || (rval != 0)) {
667 (void) ldi_close(lh, flag, credp);
668 return (EIO); /* XXX return something else here? */
669 }
670 ASSERT(ppocb.ppocb_func != NULL);
671
672 /*
673 * now setup autopush for the terminal slave device. this is
674 * necessary so that when a Linux program opens the device we
675 * can push required strmod modules onto the stream. in Solaris
676 * this is normally done by the application that actually
677 * allocates the terminal.
678 */
679 maj = lps.lps_pts_major;
680 min = index;
681 lastmin = 0;
682 err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
683 &anchor, lx_pts_mods);
684 if (err != 0) {
685 (void) ldi_close(lh, flag, credp);
686 return (EIO); /* XXX return something else here? */
687 }
688
689 /* save off this layered handle for future accesses */
690 lx_ptm_lh_insert(index, lh);
691 lx_ptm_lh_set_ppocb(index, &ppocb);
692 return (0);
693 }
694
695 /*ARGSUSED*/
696 static int
697 lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
698 {
699 ldi_handle_t lh;
700 major_t maj;
701 minor_t min, lastmin;
702 uint_t index;
703 int err;
704
705 index = DEVT_TO_INDEX(dev);
706
707 /*
708 * we must cleanup all the state associated with this major/minor
709 * terminal pair before actually closing the ptm master device.
710 * this is required because once the close of the ptm device is
711 * complete major/minor terminal pair is immediatly available for
712 * re-use in any zone.
713 */
714
715 /* free up our saved reference for this layered handle */
716 lh = lx_ptm_lh_remove(index);
717
718 /* unconfigure autopush for the associated terminal slave device */
719 maj = lps.lps_pts_major;
720 min = index;
721 lastmin = 0;
722 do {
723 /*
724 * we loop here because we don't want to release this ptm
725 * node if autopush can't be disabled on the associated
726 * slave device because then bad things could happen if
727 * another brand were to get this terminal allocated
728 * to them.
729 *
730 * XXX should we ever give up?
731 */
732 err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
733 0, NULL);
734 } while (err != 0);
735
736 err = ldi_close(lh, flag, credp);
737
738 /*
739 * note that we don't have to bother with changing the permissions
740 * on the associated slave device here. the reason is that no one
741 * can actually open the device untill it's associated master
742 * device is re-opened, which will result in the permissions on
743 * it being reset.
744 */
745 return (err);
746 }
747
748 static int
749 lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
750 {
751 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
752 int err, rval;
753 struct uio uio = *uiop;
754
755 *loop = 0;
756
757 /*
758 * Here's another way that Linux master terminals behave differently
759 * from Solaris master terminals. If you do a read on a Linux
760 * master terminal (that was opened witout NDELAY and NONBLOCK)
761 * who's corrosponding slave terminal is currently closed and
762 * has been opened and closed at least once, Linux return -1 and
763 * set errno to EIO where as Solaris blocks.
764 */
765 if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
766 /* Slave has been opened and closed at least once. */
767 if (lx_ptm_pts_isopen(dev) == 0) {
768 /*
769 * Slave is closed. Make sure that data is avaliable
770 * before attempting a read.
771 */
772 if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
773 return (err);
774
775 /* If there is no data available then return. */
776 if (rval == 0)
777 return (EIO);
778 }
779 }
780
781 /* Actually do the read operation. */
782 if ((err = ldi_read(lh, uiop, credp)) != 0)
783 return (err);
784
785 /* If read returned actual data then return. */
786 if (uio.uio_resid != uiop->uio_resid)
787 return (0);
788
789 /*
790 * This was a zero byte read (ie, an EOF). This indicates
791 * that the slave terinal device has been closed. Record
792 * the fact that the slave device has been closed and retry
793 * the read operation.
794 */
795 lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
796 *loop = 1;
797 return (0);
798 }
799
800 static int
801 lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
802 {
803 int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
804 int err, loop;
805 struct uio uio;
806 struct iovec iovp;
807
808 ASSERT(uiop->uio_iovcnt > 0);
809
810 /*
811 * If packet mode has been enabled (via TIOCPKT) we need to pad
812 * all read requests with a leading byte that indicates any
813 * relevant control status information.
814 */
815 if (pktio != 0) {
816 /*
817 * We'd like to write the control information into
818 * the current buffer but we can't yet. We don't
819 * want to modify userspace memory here only to have
820 * the read operation fail later. So instead
821 * what we'll do here is read one character from the
822 * beginning of the memory pointed to by the uio
823 * structure. This will advance the output pointer
824 * by one. Then when the read completes successfully
825 * we can update the byte that we passed over. Before
826 * we do the read make a copy of the current uiop and
827 * iovec structs so we can write to them later.
828 */
829 uio = *uiop;
830 iovp = *uiop->uio_iov;
831 uio.uio_iov = &iovp;
832
833 if (uwritec(uiop) == -1)
834 return (EFAULT);
835 }
836
837 do {
838 /*
839 * Before we actually attempt a read operation we need
840 * to make sure there's some buffer space to actually
841 * read in some data. We do this because if we're in
842 * pktio mode and the caller only requested one byte,
843 * then we've already used up that one byte and we
844 * don't want to pass this read request. Doing a 0
845 * byte read (unless there is a problem with the stream
846 * head) always returns succcess. Normally when a streams
847 * read returns 0 bytes we interpret that as an EOF on
848 * the stream (ie, the slave side has been opened and
849 * closed) and we ignore it and re-try the read operation.
850 * So if we pass on a 0 byte read here lx_ptm_read_loop()
851 * will tell us to loop around and we'll end up in an
852 * infinite loop.
853 */
854 if (uiop->uio_resid == 0)
855 break;
856
857 /*
858 * Serialize all reads. We need to do this so that we can
859 * properly emulate the behavior of master terminals on Linux.
860 * In reality this serializaion should not pose any kind of
861 * performance problem since it would be very strange to have
862 * multiple threads trying to read from the same master
863 * terminal device concurrently.
864 */
865 if (lx_ptm_read_start(dev) != 0)
866 return (EINTR);
867
868 err = lx_ptm_read_loop(dev, uiop, credp, &loop);
869 lx_ptm_read_end(dev);
870 if (err != 0)
871 return (err);
872 } while (loop != 0);
873
874 if (pktio != 0) {
875 uint8_t pktio_data = TIOCPKT_DATA;
876
877 /*
878 * Note that the control status information we
879 * pass back is faked up in the sense that we
880 * don't actually report any events, we always
881 * report a status of 0.
882 */
883 if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
884 return (EFAULT);
885 }
886
887 return (0);
888 }
889
890 static int
891 lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
892 {
893 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
894 int err;
895
896 err = ldi_write(lh, uiop, credp);
897
898 return (err);
899 }
900
901 static int
902 lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
903 int *rvalp)
904 {
905 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
906 int err;
907
908 /*
909 * here we need to make sure that we never allow the
910 * I_SETSIG and I_ESETSIG ioctls to pass through. we
911 * do this because we can't support them.
912 *
913 * the native Solaris ptm device supports these ioctls because
914 * they are streams framework ioctls and all streams devices
915 * support them by default. these ioctls cause the current
916 * process to be registered with a stream and receive signals
917 * when certain stream events occur.
918 *
919 * a problem arises with cleanup of these registrations
920 * for layered drivers.
921 *
922 * normally the streams framework is notified whenever a
923 * process closes any reference to a stream and it goes ahead
924 * and cleans up these registrations. but actual device drivers
925 * are not notified when a process performs a close operation
926 * unless the process is closing the last opened reference to
927 * the device on the entire system.
928 *
929 * so while we could pass these ioctls on and allow processes
930 * to register for signal delivery, we would never receive
931 * any notification when those processes exit (or close a
932 * stream) and we wouldn't be able to unregister them.
933 *
934 * luckily these operations are streams specific and Linux
935 * doesn't support streams devices. so it doesn't actually
936 * seem like we need to support these ioctls. if it turns
937 * out that we do need to support them for some reason in
938 * the future, the current driver model will have to be
939 * enhanced to better support streams device layering.
940 */
941 if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
942 return (EINVAL);
943
944 /*
945 * here we fake up support for TIOCPKT. Linux applications expect
946 * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
947 * (it is supported on older bsd style ptys.) so we'll fake
948 * up support for it here.
949 *
950 * the reason that this ioctl is emulated here instead of in
951 * userland is that this ioctl affects the results returned
952 * from read() operations. if this ioctl was emulated in
953 * userland the brand library would need to intercept all
954 * read operations and check to see if pktio was enabled
955 * for the fd being read from. since this ioctl only needs
956 * to be supported on the ptmx device it makes more sense
957 * to support it here where we can easily update the results
958 * returned for read() operations performed on ourselves.
959 */
960 if (cmd == TIOCPKT) {
961 int pktio;
962
963 if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
964 mode) != DDI_SUCCESS)
965 return (EFAULT);
966
967 if (pktio == 0)
968 lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
969 else
970 lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
971
972 return (0);
973 }
974
975 err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
976
977 return (err);
978 }
979
980 static int
981 lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
982 struct pollhead **phpp, int *loop)
983 {
984 ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
985 short reventsp2;
986 int err, rval;
987
988 *loop = 0;
989
990 /*
991 * If the slave device has been opened and closed at least
992 * once and the slave device is currently closed, then poll
993 * always needs to returns immediatly.
994 */
995 if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
996 (lx_ptm_pts_isopen(dev) == 0)) {
997 /* In this case always return POLLHUP */
998 *reventsp = POLLHUP;
999
1000 /*
1001 * Check if there really is data on the stream.
1002 * If so set the correct return flags.
1003 */
1004 if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
1005 /* Something went wrong. */
1006 return (err);
1007 }
1008 if (rval != 0)
1009 *reventsp |= (events & (POLLIN | POLLRDNORM));
1010
1011 /*
1012 * Is the user checking for writability? Note that for ptm
1013 * devices Linux seems to ignore the POLLWRBAND write flag.
1014 */
1015 if ((events & POLLWRNORM) == 0)
1016 return (0);
1017
1018 /*
1019 * To check if the stream is writable we have to actually
1020 * call poll, but make sure to set anyyet to 1 to prevent
1021 * the streams framework from setting up callbacks.
1022 */
1023 if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
1024 return (err);
1025
1026 *reventsp |= (reventsp2 & POLLWRNORM);
1027 } else {
1028 int lockstate;
1029
1030 /* The slave device is open, do the poll */
1031 if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
1032 return (err);
1033
1034 /*
1035 * Drop any leading EOFs on the stream.
1036 *
1037 * Note that we have to use pollunlock() here to avoid
1038 * recursive mutex enters in the poll framework. The
1039 * reason is that if there is an EOF message on the stream
1040 * then the act of reading from the queue to remove the
1041 * message can cause the ptm drivers event service
1042 * routine to be invoked, and if there is no open
1043 * slave device then the ptm driver may generate
1044 * error messages and put them on the stream. This
1045 * in turn will generate a poll event and the poll
1046 * framework will try to invoke any poll callbacks
1047 * associated with the stream. In the process of
1048 * doing that the poll framework will try to aquire
1049 * locks that we are already holding. So we need to
1050 * drop those locks here before we do our read.
1051 */
1052 lockstate = pollunlock();
1053 err = lx_ptm_eof_drop(dev, &rval);
1054 pollrelock(lockstate);
1055 if (err)
1056 return (err);
1057
1058 /* If no EOF was dropped then return */
1059 if (rval == 0)
1060 return (0);
1061
1062 /*
1063 * An EOF was removed from the stream. Retry the entire
1064 * poll operation from the top because polls on the ptm
1065 * device should behave differently now.
1066 */
1067 *loop = 1;
1068 }
1069 return (0);
1070 }
1071
1072 static int
1073 lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
1074 struct pollhead **phpp)
1075 {
1076 int loop, err;
1077
1078 do {
1079 /* Serialize ourself wrt read operations. */
1080 if (lx_ptm_read_start(dev) != 0)
1081 return (EINTR);
1082
1083 err = lx_ptm_poll_loop(dev,
1084 events, anyyet, reventsp, phpp, &loop);
1085 lx_ptm_read_end(dev);
1086 if (err != 0)
1087 return (err);
1088 } while (loop != 0);
1089 return (0);
1090 }
1091
1092 static struct cb_ops lx_ptm_cb_ops = {
1093 lx_ptm_open, /* open */
1094 lx_ptm_close, /* close */
1095 nodev, /* strategy */
1096 nodev, /* print */
1097 nodev, /* dump */
1098 lx_ptm_read, /* read */
1099 lx_ptm_write, /* write */
1100 lx_ptm_ioctl, /* ioctl */
1101 nodev, /* devmap */
1102 nodev, /* mmap */
1103 nodev, /* segmap */
1104 lx_ptm_poll, /* chpoll */
1105 ddi_prop_op, /* prop_op */
1106 NULL, /* cb_str */
1107 D_NEW | D_MP,
1108 CB_REV,
1109 NULL,
1110 NULL
1111 };
1112
1113 static struct dev_ops lx_ptm_ops = {
1114 DEVO_REV,
1115 0,
1116 ddi_getinfo_1to1,
1117 nulldev,
1118 nulldev,
1119 lx_ptm_attach,
1120 lx_ptm_detach,
1121 nodev,
1122 &lx_ptm_cb_ops,
1123 NULL,
1124 NULL,
1125 ddi_quiesce_not_needed, /* quiesce */
1126 };
1127
1128 static struct modldrv modldrv = {
1129 &mod_driverops, /* type of module */
1130 "Linux master terminal driver", /* description of module */
1131 &lx_ptm_ops /* driver ops */
1132 };
1133
1134 static struct modlinkage modlinkage = {
1135 MODREV_1,
1136 &modldrv,
1137 NULL
1138 };
1139
1140 int
1141 _init(void)
1142 {
1143 return (mod_install(&modlinkage));
1144 }
1145
1146 int
1147 _info(struct modinfo *modinfop)
1148 {
1149 return (mod_info(&modlinkage, modinfop));
1150 }
1151
1152 int
1153 _fini(void)
1154 {
1155 return (mod_remove(&modlinkage));
1156 }