Print this page
Bring back LX zones.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/ml/syscall_asm.s
+++ new/usr/src/uts/i86pc/ml/syscall_asm.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 /* Copyright (c) 1987, 1988 Microsoft Corporation */
30 30 /* All Rights Reserved */
31 31
32 32 #include <sys/asm_linkage.h>
33 33 #include <sys/asm_misc.h>
34 34 #include <sys/regset.h>
35 35 #include <sys/psw.h>
36 36 #include <sys/x86_archext.h>
37 37 #include <sys/machbrand.h>
38 38 #include <sys/privregs.h>
39 39
40 40 #if defined(__lint)
41 41
42 42 #include <sys/types.h>
43 43 #include <sys/thread.h>
44 44 #include <sys/systm.h>
45 45
46 46 #else /* __lint */
47 47
48 48 #include <sys/segments.h>
49 49 #include <sys/pcb.h>
50 50 #include <sys/trap.h>
51 51 #include <sys/ftrace.h>
52 52 #include <sys/traptrace.h>
53 53 #include <sys/clock.h>
54 54 #include <sys/panic.h>
55 55 #include "assym.h"
56 56
57 57 #endif /* __lint */
58 58
59 59 /*
60 60 * We implement two flavours of system call entry points
61 61 *
62 62 * - {int,lcall}/iret (i386)
63 63 * - sysenter/sysexit (Pentium II and beyond)
64 64 *
65 65 * The basic pattern used in the handlers is to check to see if we can
66 66 * do fast (simple) version of the system call; if we can't we use various
67 67 * C routines that handle corner cases and debugging.
68 68 *
69 69 * To reduce the amount of assembler replication, yet keep the system call
70 70 * implementations vaguely comprehensible, the common code in the body
71 71 * of the handlers is broken up into a set of preprocessor definitions
72 72 * below.
73 73 */
74 74
75 75 /*
76 76 * When we have SYSCALLTRACE defined, we sneak an extra
77 77 * predicate into a couple of tests.
78 78 */
79 79 #if defined(SYSCALLTRACE)
80 80 #define ORL_SYSCALLTRACE(r32) \
81 81 orl syscalltrace, r32
82 82 #else
83 83 #define ORL_SYSCALLTRACE(r32)
84 84 #endif
85 85
86 86 /*
87 87 * This check is false whenever we want to go fast i.e.
88 88 *
89 89 * if (code >= NSYSCALL ||
90 90 * t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
91 91 * do full version
92 92 * #ifdef SYSCALLTRACE
93 93 * if (syscalltrace)
94 94 * do full version
95 95 * #endif
96 96 *
97 97 * Preconditions:
98 98 * - t curthread
99 99 * - code contains the syscall number
100 100 * Postconditions:
101 101 * - %ecx and %edi are smashed
102 102 * - condition code flag ZF is cleared if pre-sys is too complex
103 103 */
104 104 #define CHECK_PRESYS_NE(t, code) \
105 105 movzbl T_PRE_SYS(t), %edi; \
106 106 movzwl T_PROC_FLAG(t), %ecx; \
107 107 andl $TP_WATCHPT, %ecx; \
108 108 orl %ecx, %edi; \
109 109 cmpl $NSYSCALL, code; \
110 110 setae %cl; \
111 111 movzbl %cl, %ecx; \
112 112 orl %ecx, %edi; \
113 113 ORL_SYSCALLTRACE(%edi)
114 114
115 115 /*
116 116 * Check if a brand_mach_ops callback is defined for the specified callback_id
117 117 * type. If so invoke it with the user's %gs value loaded and the following
118 118 * data on the stack:
119 119 * --------------------------------------
120 120 * | user's %ss |
121 121 * | | user's %esp |
122 122 * | | EFLAGS register |
123 123 * | | user's %cs |
124 124 * | | user's %eip (user return address) |
125 125 * | | 'scratch space' |
126 126 * | | user's %ebx |
127 127 * | | user's %gs selector |
128 128 * v | lwp pointer |
129 129 * | callback wrapper return addr |
130 130 * --------------------------------------
131 131 *
132 132 * If the brand code returns, we assume that we are meant to execute the
133 133 * normal system call path.
134 134 *
135 135 * The interface to the brand callbacks on the 32-bit kernel assumes %ebx
136 136 * is available as a scratch register within the callback. If the callback
137 137 * returns within the kernel then this macro will restore %ebx. If the
138 138 * callback is going to return directly to userland then it should restore
139 139 * %ebx before returning to userland.
140 140 */
141 141 #define BRAND_CALLBACK(callback_id) \
142 142 subl $4, %esp /* save some scratch space */ ;\
143 143 pushl %ebx /* save %ebx to use for scratch */ ;\
144 144 pushl %gs /* save the user %gs */ ;\
145 145 movl $KGS_SEL, %ebx ;\
146 146 movw %bx, %gs /* switch to the kernel's %gs */ ;\
147 147 movl %gs:CPU_THREAD, %ebx /* load the thread pointer */ ;\
148 148 movl T_LWP(%ebx), %ebx /* load the lwp pointer */ ;\
149 149 pushl %ebx /* push the lwp pointer */ ;\
150 150 movl LWP_PROCP(%ebx), %ebx /* load the proc pointer */ ;\
151 151 movl P_BRAND(%ebx), %ebx /* load the brand pointer */ ;\
152 152 movl B_MACHOPS(%ebx), %ebx /* load the machops pointer */ ;\
153 153 movl _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx ;\
154 154 cmpl $0, %ebx ;\
155 155 je 1f ;\
156 156 movl %ebx, 12(%esp) /* save callback to scratch */ ;\
157 157 movl 4(%esp), %ebx /* grab the user %gs */ ;\
158 158 movw %bx, %gs /* restore the user %gs */ ;\
159 159 call *12(%esp) /* call callback in scratch */ ;\
160 160 1: movl 4(%esp), %ebx /* restore user %gs (re-do if */ ;\
161 161 movw %bx, %gs /* branch due to no callback) */ ;\
162 162 movl 8(%esp), %ebx /* restore user's %ebx */ ;\
163 163 addl $16, %esp /* restore stack ptr */
164 164
165 165 #define MSTATE_TRANSITION(from, to) \
166 166 pushl $to; \
167 167 pushl $from; \
168 168 call syscall_mstate; \
169 169 addl $0x8, %esp
170 170
171 171 /*
172 172 * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
173 173 * This must be called with interrupts or preemption disabled.
174 174 */
175 175 #define CPU_STATS_SYS_SYSCALL_INC \
176 176 addl $1, %gs:CPU_STATS_SYS_SYSCALL; \
177 177 adcl $0, %gs:CPU_STATS_SYS_SYSCALL+4;
178 178
179 179 #if !defined(__lint)
180 180
181 181 /*
182 182 * ASSERT(lwptoregs(lwp) == rp);
183 183 *
184 184 * this may seem obvious, but very odd things happen if this
185 185 * assertion is false
186 186 *
187 187 * Preconditions:
188 188 * -none-
189 189 * Postconditions (if assertion is true):
190 190 * %esi and %edi are smashed
191 191 */
192 192 #if defined(DEBUG)
193 193
194 194 __lwptoregs_msg:
195 195 .string "syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
196 196
197 197 #define ASSERT_LWPTOREGS(t, rp) \
198 198 movl T_LWP(t), %esi; \
199 199 movl LWP_REGS(%esi), %edi; \
200 200 cmpl rp, %edi; \
201 201 je 7f; \
202 202 pushl rp; \
203 203 pushl %edi; \
204 204 pushl %esi; \
205 205 pushl $__LINE__; \
206 206 pushl $__lwptoregs_msg; \
207 207 call panic; \
208 208 7:
209 209 #else
210 210 #define ASSERT_LWPTOREGS(t, rp)
211 211 #endif
212 212
213 213 #endif /* __lint */
214 214
215 215 /*
216 216 * This is an assembler version of this fragment:
217 217 *
218 218 * lwp->lwp_state = LWP_SYS;
219 219 * lwp->lwp_ru.sysc++;
220 220 * lwp->lwp_eosys = NORMALRETURN;
221 221 * lwp->lwp_ap = argp;
222 222 *
223 223 * Preconditions:
224 224 * -none-
225 225 * Postconditions:
226 226 * -none-
227 227 */
228 228 #define SET_LWP(lwp, argp) \
229 229 movb $LWP_SYS, LWP_STATE(lwp); \
230 230 addl $1, LWP_RU_SYSC(lwp); \
231 231 adcl $0, LWP_RU_SYSC+4(lwp); \
232 232 movb $NORMALRETURN, LWP_EOSYS(lwp); \
233 233 movl argp, LWP_AP(lwp)
234 234
235 235 /*
236 236 * Set up the thread, lwp, find the handler, and copy
237 237 * in the arguments from userland to the kernel stack.
238 238 *
239 239 * Preconditions:
240 240 * - %eax contains the syscall number
241 241 * Postconditions:
242 242 * - %eax contains a pointer to the sysent structure
243 243 * - %ecx is zeroed
244 244 * - %esi, %edi are smashed
245 245 * - %esp is SYS_DROPped ready for the syscall
246 246 */
247 247 #define SIMPLE_SYSCALL_PRESYS(t, faultlabel) \
248 248 movl T_LWP(t), %esi; \
249 249 movw %ax, T_SYSNUM(t); \
250 250 subl $SYS_DROP, %esp; \
251 251 shll $SYSENT_SIZE_SHIFT, %eax; \
252 252 SET_LWP(%esi, %esp); \
253 253 leal sysent(%eax), %eax; \
254 254 movzbl SY_NARG(%eax), %ecx; \
255 255 testl %ecx, %ecx; \
256 256 jz 4f; \
257 257 movl %esp, %edi; \
258 258 movl SYS_DROP + REGOFF_UESP(%esp), %esi; \
259 259 movl $faultlabel, T_LOFAULT(t); \
260 260 addl $4, %esi; \
261 261 rep; \
262 262 smovl; \
263 263 movl %ecx, T_LOFAULT(t); \
264 264 4:
265 265
266 266 /*
267 267 * Check to see if a simple return is possible i.e.
268 268 *
269 269 * if ((t->t_post_sys_ast | syscalltrace) != 0)
270 270 * do full version;
271 271 *
272 272 * Preconditions:
273 273 * - t is curthread
274 274 * Postconditions:
275 275 * - condition code NE is set if post-sys is too complex
276 276 * - rtmp is zeroed if it isn't (we rely on this!)
277 277 */
278 278 #define CHECK_POSTSYS_NE(t, rtmp) \
279 279 xorl rtmp, rtmp; \
280 280 ORL_SYSCALLTRACE(rtmp); \
281 281 orl T_POST_SYS_AST(t), rtmp; \
282 282 cmpl $0, rtmp
283 283
284 284 /*
285 285 * Fix up the lwp, thread, and eflags for a successful return
286 286 *
287 287 * Preconditions:
288 288 * - zwreg contains zero
289 289 * Postconditions:
290 290 * - %esp has been unSYS_DROPped
291 291 * - %esi is smashed (points to lwp)
292 292 */
293 293 #define SIMPLE_SYSCALL_POSTSYS(t, zwreg) \
294 294 movl T_LWP(t), %esi; \
295 295 addl $SYS_DROP, %esp; \
296 296 movw zwreg, T_SYSNUM(t); \
297 297 movb $LWP_USER, LWP_STATE(%esi); \
298 298 andb $_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
299 299
300 300 /*
301 301 * System call handler. This is the destination of both the call
302 302 * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
303 303 * there are two significant differences between an interrupt gate and a call
304 304 * gate:
305 305 *
306 306 * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
307 307 * call gate runs the handler with whatever EFLAGS settings were in effect at
308 308 * the time of the call.
309 309 *
310 310 * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
311 311 * of the interrupt onto the stack, whereas a call gate does not.
312 312 *
313 313 * Because we use the following code sequence to handle system calls made from
314 314 * _both_ a call gate _and_ an interrupt gate, these two differences must be
315 315 * respected. In regards to number 1) above, the handler must ensure that a sane
316 316 * EFLAGS snapshot is stored on the stack so that when the kernel returns back
317 317 * to the user via iret (which returns to user with the EFLAGS value saved on
318 318 * the stack), interrupts are re-enabled.
319 319 *
320 320 * In regards to number 2) above, the handler must always put a current snapshot
321 321 * of EFLAGS onto the stack in the appropriate place. If we came in via an
322 322 * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
323 323 * the interrupt gate. This is OK, as the only bit that was changed by the
324 324 * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
325 325 * now off. If we were to do nothing, the stack would contain an EFLAGS with
326 326 * IE off, resulting in us eventually returning back to the user with interrupts
327 327 * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
328 328 * the stack.
329 329 *
330 330 * Another subtlety which deserves mention is the difference between the two
331 331 * descriptors. The call gate descriptor is set to instruct the hardware to copy
332 332 * one parameter from the user stack to the kernel stack, whereas the interrupt
333 333 * gate descriptor doesn't use the parameter passing mechanism at all. The
334 334 * kernel doesn't actually use the parameter that is copied by the hardware; the
335 335 * only reason it does this is so that there is a space on the stack large
336 336 * enough to hold an EFLAGS register value, which happens to be in the correct
337 337 * place for use by iret when we go back to userland. How convenient.
338 338 *
339 339 * Stack frame description in syscall() and callees.
340 340 *
341 341 * |------------|
342 342 * | regs | +(8*4)+4 registers
343 343 * |------------|
344 344 * | 8 args | <- %esp MAXSYSARGS (currently 8) arguments
345 345 * |------------|
346 346 *
347 347 */
348 348 #define SYS_DROP _CONST(_MUL(MAXSYSARGS, 4))
349 349
350 350 #if defined(__lint)
351 351
352 352 /*ARGSUSED*/
353 353 void
354 354 sys_call()
355 355 {}
356 356
357 357 void
358 358 _allsyscalls()
359 359 {}
360 360
361 361 size_t _allsyscalls_size;
362 362
363 363 #else /* __lint */
364 364
365 365 ENTRY_NP2(brand_sys_call, _allsyscalls)
366 366 BRAND_CALLBACK(BRAND_CB_SYSCALL)
367 367
368 368 ALTENTRY(sys_call)
369 369 / on entry eax = system call number
370 370
371 371 / set up the stack to look as in reg.h
372 372 subl $8, %esp / pad the stack with ERRCODE and TRAPNO
373 373
374 374 SYSCALL_PUSH
375 375
376 376 #ifdef TRAPTRACE
377 377 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
378 378 TRACE_REGS(%edi, %esp, %ebx, %ecx) / Uses label "9"
379 379 pushl %eax
380 380 TRACE_STAMP(%edi) / Clobbers %eax, %edx, uses "9"
381 381 popl %eax
382 382 movl %eax, TTR_SYSNUM(%edi)
383 383 #endif
384 384
385 385 _watch_do_syscall:
386 386 movl %esp, %ebp
387 387
388 388 / Interrupts may be enabled here, so we must make sure this thread
389 389 / doesn't migrate off the CPU while it updates the CPU stats.
390 390 /
391 391 / XXX This is only true if we got here via call gate thru the LDT for
392 392 / old style syscalls. Perhaps this preempt++-- will go away soon?
393 393 movl %gs:CPU_THREAD, %ebx
394 394 addb $1, T_PREEMPT(%ebx)
395 395 CPU_STATS_SYS_SYSCALL_INC
396 396 subb $1, T_PREEMPT(%ebx)
397 397
398 398 ENABLE_INTR_FLAGS
399 399
400 400 pushl %eax / preserve across mstate call
401 401 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
402 402 popl %eax
403 403
404 404 movl %gs:CPU_THREAD, %ebx
405 405
406 406 ASSERT_LWPTOREGS(%ebx, %esp)
407 407
408 408 CHECK_PRESYS_NE(%ebx, %eax)
409 409 jne _full_syscall_presys
410 410 SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
411 411
412 412 _syslcall_call:
413 413 call *SY_CALLC(%eax)
414 414
415 415 _syslcall_done:
416 416 CHECK_POSTSYS_NE(%ebx, %ecx)
417 417 jne _full_syscall_postsys
418 418 SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
419 419 movl %eax, REGOFF_EAX(%esp)
420 420 movl %edx, REGOFF_EDX(%esp)
421 421
422 422 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
423 423
424 424 /
425 425 / get back via iret
426 426 /
427 427 CLI(%edx)
428 428 jmp sys_rtt_syscall
429 429
430 430 _full_syscall_presys:
431 431 movl T_LWP(%ebx), %esi
432 432 subl $SYS_DROP, %esp
433 433 movb $LWP_SYS, LWP_STATE(%esi)
434 434 pushl %esp
435 435 pushl %ebx
436 436 call syscall_entry
437 437 addl $8, %esp
438 438 jmp _syslcall_call
439 439
440 440 _full_syscall_postsys:
441 441 addl $SYS_DROP, %esp
442 442 pushl %edx
443 443 pushl %eax
444 444 pushl %ebx
445 445 call syscall_exit
446 446 addl $12, %esp
447 447 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
448 448 jmp _sys_rtt
449 449
450 450 _syscall_fault:
451 451 push $0xe / EFAULT
452 452 call set_errno
453 453 addl $4, %esp
454 454 xorl %eax, %eax / fake syscall_err()
455 455 xorl %edx, %edx
456 456 jmp _syslcall_done
457 457 SET_SIZE(sys_call)
458 458 SET_SIZE(brand_sys_call)
459 459
460 460 #endif /* __lint */
461 461
462 462 /*
463 463 * System call handler via the sysenter instruction
464 464 *
465 465 * Here's how syscall entry usually works (see sys_call for details).
466 466 *
467 467 * There, the caller (lcall or int) in userland has arranged that:
468 468 *
469 469 * - %eax contains the syscall number
470 470 * - the user stack contains the args to the syscall
471 471 *
472 472 * Normally the lcall instruction into the call gate causes the processor
473 473 * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
474 474 * The sys_call handler then leaves space for r_trapno and r_err, and
475 475 * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
476 476 * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
477 477 * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
478 478 * extracts %efl and puts it into r_efl (which happens to live at the offset
479 479 * that <top-of-stack> was copied into). Note that the value in r_efl has
480 480 * the IF (interrupt enable) flag turned on. (The int instruction into the
481 481 * interrupt gate does essentially the same thing, only instead of
482 482 * <top-of-stack> we get eflags - see comment above.)
483 483 *
484 484 * In the sysenter case, things are a lot more primitive.
485 485 *
486 486 * The caller in userland has arranged that:
487 487 *
488 488 * - %eax contains the syscall number
489 489 * - %ecx contains the user %esp
490 490 * - %edx contains the return %eip
491 491 * - the user stack contains the args to the syscall
492 492 *
493 493 * e.g.
494 494 * <args on the stack>
495 495 * mov $SYS_callnum, %eax
496 496 * mov $1f, %edx / return %eip
497 497 * mov %esp, %ecx / return %esp
498 498 * sysenter
499 499 * 1:
500 500 *
501 501 * Hardware and (privileged) initialization code have arranged that by
502 502 * the time the sysenter instructions completes:
503 503 *
504 504 * - %eip is pointing to sys_sysenter (below).
505 505 * - %cs and %ss are set to kernel text and stack (data) selectors.
506 506 * - %esp is pointing at the lwp's stack
507 507 * - Interrupts have been disabled.
508 508 *
509 509 * The task for the sysenter handler is:
510 510 *
511 511 * - recreate the same regs structure on the stack and the same
512 512 * kernel state as if we'd come in on an lcall
513 513 * - do the normal work of a syscall
514 514 * - execute the system call epilogue, use sysexit to return to userland.
515 515 *
516 516 * Note that we are unable to return both "rvals" to userland with this
517 517 * call, as %edx is used by the sysexit instruction.
518 518 *
519 519 * One final complication in this routine is its interaction with
520 520 * single-stepping in a debugger. For most of the system call mechanisms,
521 521 * the CPU automatically clears the single-step flag before we enter the
522 522 * kernel. The sysenter mechanism does not clear the flag, so a user
523 523 * single-stepping through a libc routine may suddenly find him/herself
524 524 * single-stepping through the kernel. To detect this, kmdb compares the
525 525 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
526 526 * If it finds that we have single-stepped to a sysenter entry point, it
527 527 * explicitly clears the flag and executes the sys_sysenter routine.
528 528 *
529 529 * One final complication in this final complication is the fact that we
530 530 * have two different entry points for sysenter: brand_sys_sysenter and
531 531 * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping
532 532 * through the kernel with kmdb, we will eventually hit the instruction at
533 533 * sys_sysenter. kmdb cannot distinguish between that valid single-step
534 534 * and the undesirable one mentioned above. To avoid this situation, we
535 535 * simply add a jump over the instruction at sys_sysenter to make it
536 536 * impossible to single-step to it.
537 537 */
538 538 #if defined(__lint)
539 539
540 540 void
541 541 sys_sysenter()
542 542 {}
543 543
544 544 #else /* __lint */
545 545
546 546 ENTRY_NP(brand_sys_sysenter)
547 547 pushl %edx
548 548 BRAND_CALLBACK(BRAND_CB_SYSENTER)
549 549 popl %edx
550 550 /*
551 551 * Jump over sys_sysenter to allow single-stepping as described
552 552 * above.
553 553 */
554 554 ja 1f
555 555
556 556 ALTENTRY(sys_sysenter)
557 557 nop
558 558 1:
559 559 /
560 560 / do what the call gate would've done to the stack ..
561 561 /
562 562 pushl $UDS_SEL / (really %ss, but it's the same ..)
563 563 pushl %ecx / userland makes this a copy of %esp
564 564 pushfl
565 565 orl $PS_IE, (%esp) / turn interrupts on when we return to user
566 566 pushl $UCS_SEL
567 567 pushl %edx / userland makes this a copy of %eip
568 568 /
569 569 / done. finish building the stack frame
570 570 /
571 571 subl $8, %esp / leave space for ERR and TRAPNO
572 572
573 573 SYSENTER_PUSH
574 574
575 575 #ifdef TRAPTRACE
576 576 TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER) / uses labels 8 and 9
577 577 TRACE_REGS(%edi, %esp, %ebx, %ecx) / uses label 9
578 578 pushl %eax
579 579 TRACE_STAMP(%edi) / clobbers %eax, %edx, uses label 9
580 580 popl %eax
581 581 movl %eax, TTR_SYSNUM(%edi)
582 582 #endif
583 583 movl %esp, %ebp
584 584
585 585 CPU_STATS_SYS_SYSCALL_INC
586 586
587 587 ENABLE_INTR_FLAGS
588 588
589 589 pushl %eax / preserve across mstate call
590 590 MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
591 591 popl %eax
592 592
593 593 movl %gs:CPU_THREAD, %ebx
594 594
595 595 ASSERT_LWPTOREGS(%ebx, %esp)
596 596
597 597 CHECK_PRESYS_NE(%ebx, %eax)
598 598 jne _full_syscall_presys
599 599 SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
600 600
601 601 _sysenter_call:
602 602 call *SY_CALLC(%eax)
603 603
604 604 _sysenter_done:
605 605 CHECK_POSTSYS_NE(%ebx, %ecx)
606 606 jne _full_syscall_postsys
607 607 SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
608 608 /
609 609 / sysexit uses %edx to restore %eip, so we can't use it
610 610 / to return a value, sigh.
611 611 /
612 612 movl %eax, REGOFF_EAX(%esp)
613 613 / movl %edx, REGOFF_EDX(%esp)
614 614
615 615 / Interrupts will be turned on by the 'sti' executed just before
616 616 / sysexit. The following ensures that restoring the user's EFLAGS
617 617 / doesn't enable interrupts too soon.
618 618 andl $_BITNOT(PS_IE), REGOFF_EFL(%esp)
619 619
620 620 MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
621 621
622 622 cli
623 623
↓ open down ↓ |
623 lines elided |
↑ open up ↑ |
624 624 SYSCALL_POP
625 625
626 626 popl %edx / sysexit: %edx -> %eip
627 627 addl $4, %esp / get CS off the stack
628 628 popfl / EFL
629 629 popl %ecx / sysexit: %ecx -> %esp
630 630 sti
631 631 sysexit
632 632 SET_SIZE(sys_sysenter)
633 633 SET_SIZE(brand_sys_sysenter)
634 +#endif /* __lint */
635 +
636 +#if defined(__lint)
637 +/*
638 + * System call via an int80. This entry point is only used by the Linux
639 + * application environment. Unlike the sysenter path, there is no default
640 + * action to take if no callback is registered for this process.
641 + */
642 +void
643 +sys_int80()
644 +{}
645 +
646 +#else /* __lint */
647 +
648 + ENTRY_NP(brand_sys_int80)
649 + BRAND_CALLBACK(BRAND_CB_INT80)
650 +
651 + ALTENTRY(sys_int80)
652 + /*
653 + * We hit an int80, but this process isn't of a brand with an int80
654 + * handler. Bad process! Make it look as if the INT failed.
655 + * Modify %eip to point before the INT, push the expected error
656 + * code and fake a GP fault.
657 + *
658 + */
659 + subl $2, (%esp) /* int insn 2-bytes */
660 + pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
661 + jmp gptrap / GP fault
662 + SET_SIZE(sys_int80)
663 + SET_SIZE(brand_sys_int80)
634 664
635 665 /*
636 666 * Declare a uintptr_t which covers the entire pc range of syscall
637 667 * handlers for the stack walkers that need this.
638 668 */
639 669 .align CPTRSIZE
640 670 .globl _allsyscalls_size
641 671 .type _allsyscalls_size, @object
642 672 _allsyscalls_size:
643 673 .NWORD . - _allsyscalls
644 674 SET_SIZE(_allsyscalls_size)
645 675
646 676 #endif /* __lint */
647 677
648 678 /*
649 679 * These are the thread context handlers for lwps using sysenter/sysexit.
650 680 */
651 681
652 682 #if defined(__lint)
653 683
654 684 /*ARGSUSED*/
655 685 void
656 686 sep_save(void *ksp)
657 687 {}
658 688
659 689 /*ARGSUSED*/
660 690 void
661 691 sep_restore(void *ksp)
662 692 {}
663 693
664 694 #else /* __lint */
665 695
666 696 /*
667 697 * setting this value to zero as we switch away causes the
668 698 * stack-pointer-on-sysenter to be NULL, ensuring that we
669 699 * don't silently corrupt another (preempted) thread stack
670 700 * when running an lwp that (somehow) didn't get sep_restore'd
671 701 */
672 702 ENTRY_NP(sep_save)
673 703 xorl %edx, %edx
674 704 xorl %eax, %eax
675 705 movl $MSR_INTC_SEP_ESP, %ecx
676 706 wrmsr
677 707 ret
678 708 SET_SIZE(sep_save)
679 709
680 710 /*
681 711 * Update the kernel stack pointer as we resume onto this cpu.
682 712 */
683 713 ENTRY_NP(sep_restore)
684 714 movl 4(%esp), %eax /* per-lwp kernel sp */
685 715 xorl %edx, %edx
686 716 movl $MSR_INTC_SEP_ESP, %ecx
687 717 wrmsr
688 718 ret
689 719 SET_SIZE(sep_restore)
690 720
691 721 #endif /* __lint */
692 722
693 723 /*
694 724 * Call syscall(). Called from trap() on watchpoint at lcall 0,7
695 725 */
696 726
697 727 #if defined(__lint)
698 728
699 729 void
700 730 watch_syscall(void)
701 731 {}
702 732
703 733 #else /* __lint */
704 734
705 735 ENTRY_NP(watch_syscall)
706 736 CLI(%eax)
707 737 movl %gs:CPU_THREAD, %ebx
708 738 movl T_STACK(%ebx), %esp / switch to the thread stack
709 739 movl REGOFF_EAX(%esp), %eax / recover original syscall#
710 740 jmp _watch_do_syscall
711 741 SET_SIZE(watch_syscall)
712 742
713 743 #endif /* __lint */
↓ open down ↓ |
70 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX