Print this page
de-linting of .s files
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/cpu/spitfire_copy.s
+++ new/usr/src/uts/sun4u/cpu/spitfire_copy.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License, Version 1.0 only
6 6 * (the "License"). You may not use this file except in compliance
7 7 * with the License.
8 8 *
9 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 * or http://www.opensolaris.org/os/licensing.
11 11 * See the License for the specific language governing permissions
12 12 * and limitations under the License.
13 13 *
14 14 * When distributing Covered Code, include this CDDL HEADER in each
15 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * fields enclosed by brackets "[]" replaced with your own identifying
18 18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 19 *
20 20 * CDDL HEADER END
21 21 */
22 22 /*
23 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 -#pragma ident "%Z%%M% %I% %E% SMI"
28 -
29 27 #include <sys/param.h>
30 28 #include <sys/errno.h>
31 29 #include <sys/asm_linkage.h>
32 30 #include <sys/vtrace.h>
33 31 #include <sys/machthread.h>
34 32 #include <sys/clock.h>
35 33 #include <sys/asi.h>
36 34 #include <sys/fsr.h>
37 35 #include <sys/privregs.h>
38 36
39 -#if !defined(lint)
40 37 #include "assym.h"
41 -#endif /* lint */
42 38
43 39
44 40 /*
45 41 * Pseudo-code to aid in understanding the control flow of the
46 42 * bcopy routine.
47 43 *
48 44 * On entry to bcopy:
49 45 *
50 46 * %l6 = curthread->t_lofault;
51 47 * used_block_copy = FALSE; ! %l6 |= 1
52 48 * if (%l6 != NULL) {
53 49 * curthread->t_lofault = .copyerr;
54 50 * caller_error_handler = TRUE ! %l6 |= 2
55 51 * }
56 52 *
57 53 * if (length < VIS_COPY)
58 54 * goto regular_copy;
59 55 *
60 56 * if (!use_vis)
61 57 * goto_regular_copy;
62 58 *
63 59 * if (curthread->t_lwp == NULL) {
64 60 * ! Kernel threads do not have pcb's in which to store
65 61 * ! the floating point state, disallow preemption during
66 62 * ! the copy.
67 63 * kpreempt_disable(curthread);
68 64 * }
69 65 *
70 66 * old_fprs = %fprs;
71 67 * old_gsr = %gsr;
72 68 * if (%fprs.fef) {
73 69 * ! If we need to save 4 blocks of fpregs then make sure
74 70 * ! the length is still appropriate for that extra overhead.
75 71 * if (length < (large_length + (64 * 4))) {
76 72 * if (curthread->t_lwp == NULL)
77 73 * kpreempt_enable(curthread);
78 74 * goto regular_copy;
79 75 * }
80 76 * %fprs.fef = 1;
81 77 * save current fpregs on stack using blockstore
82 78 * } else {
83 79 * %fprs.fef = 1;
84 80 * }
85 81 *
86 82 * used_block_copy = 1; ! %l6 |= 1
87 83 * do_blockcopy_here;
88 84 *
89 85 * In lofault handler:
90 86 * curthread->t_lofault = .copyerr2;
91 87 * Continue on with the normal exit handler
92 88 *
93 89 * On exit:
94 90 * call_kpreempt = 0;
95 91 * if (used_block_copy) { ! %l6 & 1
96 92 * %gsr = old_gsr;
97 93 * if (old_fprs & FPRS_FEF)
98 94 * restore fpregs from stack using blockload
99 95 * else
100 96 * zero fpregs
101 97 * %fprs = old_fprs;
102 98 * if (curthread->t_lwp == NULL) {
103 99 * kpreempt_enable(curthread);
104 100 * call_kpreempt = 1;
105 101 * }
106 102 * }
107 103 * curthread->t_lofault = (%l6 & ~3);
108 104 * if (call_kpreempt)
109 105 * kpreempt(%pil);
110 106 * return (0)
111 107 *
112 108 * In second lofault handler (.copyerr2):
113 109 * We've tried to restore fp state from the stack and failed. To
114 110 * prevent from returning with a corrupted fp state, we will panic.
115 111 */
116 112
117 113 /*
118 114 * Notes on preserving existing fp state:
119 115 *
120 116 * When a copyOP decides to use fp we may have to preserve existing
121 117 * floating point state. It is not the caller's state that we need to
122 118 * preserve - the rest of the kernel does not use fp and, anyway, fp
123 119 * registers are volatile across a call. Some examples:
124 120 *
125 121 * - userland has fp state and is interrupted (device interrupt
126 122 * or trap) and within the interrupt/trap handling we use
127 123 * bcopy()
128 124 * - another (higher level) interrupt or trap handler uses bcopy
129 125 * while a bcopy from an earlier interrupt is still active
130 126 * - an asynchronous error trap occurs while fp state exists (in
131 127 * userland or in kernel copy) and the tl0 component of the handling
132 128 * uses bcopy
133 129 * - a user process with fp state incurs a copy-on-write fault and
134 130 * hwblkpagecopy always uses fp
135 131 *
136 132 * We therefore need a per-call place in which to preserve fp state -
137 133 * using our stack is ideal (and since fp copy cannot be leaf optimized
138 134 * because of calls it makes, this is no hardship).
139 135 *
140 136 * To make sure that floating point state is always saved and restored
141 137 * correctly, the following "big rules" must be followed when the floating
142 138 * point registers will be used:
143 139 *
144 140 * 1. %l6 always holds the caller's lofault handler. Also in this register,
145 141 * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
146 142 * use. Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy.
147 143 *
148 144 * 2. The FPUSED flag indicates that all FP state has been successfully stored
149 145 * on the stack. It should not be set until this save has been completed.
150 146 *
151 147 * 3. The FPUSED flag should not be cleared on exit until all FP state has
152 148 * been restored from the stack. If an error occurs while restoring
153 149 * data from the stack, the error handler can check this flag to see if
154 150 * a restore is necessary.
155 151 *
156 152 * 4. Code run under the new lofault handler must be kept to a minimum. In
157 153 * particular, any calls to kpreempt() should not be made until after the
158 154 * lofault handler has been restored.
159 155 */
160 156
161 157 /*
162 158 * This shadows sys/machsystm.h which can't be included due to the lack of
163 159 * _ASM guards in include files it references. Change it here, change it there.
164 160 */
165 161 #define VIS_COPY_THRESHOLD 900
166 162
167 163 /*
168 164 * Less then or equal this number of bytes we will always copy byte-for-byte
169 165 */
170 166 #define SMALL_LIMIT 7
171 167
172 168 /*
173 169 * Flags set in the lower bits of the t_lofault address:
174 170 * FPUSED_FLAG: The FP registers were in use and must be restored
175 171 * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls
176 172 * COPY_FLAGS: Both of the above
177 173 *
178 174 * Other flags:
179 175 * KPREEMPT_FLAG: kpreempt needs to be called
180 176 */
181 177 #define FPUSED_FLAG 1
182 178 #define BCOPY_FLAG 2
183 179 #define COPY_FLAGS (FPUSED_FLAG | BCOPY_FLAG)
184 180 #define KPREEMPT_FLAG 4
185 181
186 182 /*
187 183 * Size of stack frame in order to accomodate a 64-byte aligned
188 184 * floating-point register save area and 2 32-bit temp locations.
189 185 */
190 186 #define HWCOPYFRAMESIZE ((64 * 5) + (2 * 4))
191 187
192 188 #define SAVED_FPREGS_OFFSET (64 * 5)
193 189 #define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 4)
194 190 #define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 4)
195 191
196 192 /*
197 193 * Common macros used by the various versions of the block copy
198 194 * routines in this file.
199 195 */
200 196
201 197 #define FZERO \
202 198 fzero %f0 ;\
203 199 fzero %f2 ;\
204 200 faddd %f0, %f2, %f4 ;\
205 201 fmuld %f0, %f2, %f6 ;\
206 202 faddd %f0, %f2, %f8 ;\
207 203 fmuld %f0, %f2, %f10 ;\
208 204 faddd %f0, %f2, %f12 ;\
209 205 fmuld %f0, %f2, %f14 ;\
210 206 faddd %f0, %f2, %f16 ;\
211 207 fmuld %f0, %f2, %f18 ;\
212 208 faddd %f0, %f2, %f20 ;\
213 209 fmuld %f0, %f2, %f22 ;\
214 210 faddd %f0, %f2, %f24 ;\
215 211 fmuld %f0, %f2, %f26 ;\
216 212 faddd %f0, %f2, %f28 ;\
217 213 fmuld %f0, %f2, %f30 ;\
218 214 faddd %f0, %f2, %f32 ;\
219 215 fmuld %f0, %f2, %f34 ;\
220 216 faddd %f0, %f2, %f36 ;\
221 217 fmuld %f0, %f2, %f38 ;\
222 218 faddd %f0, %f2, %f40 ;\
223 219 fmuld %f0, %f2, %f42 ;\
224 220 faddd %f0, %f2, %f44 ;\
225 221 fmuld %f0, %f2, %f46 ;\
226 222 faddd %f0, %f2, %f48 ;\
227 223 fmuld %f0, %f2, %f50 ;\
228 224 faddd %f0, %f2, %f52 ;\
229 225 fmuld %f0, %f2, %f54 ;\
230 226 faddd %f0, %f2, %f56 ;\
231 227 fmuld %f0, %f2, %f58 ;\
232 228 faddd %f0, %f2, %f60 ;\
233 229 fmuld %f0, %f2, %f62
234 230
235 231
236 232 #define FALIGN_D0 \
237 233 faligndata %d0, %d2, %d48 ;\
238 234 faligndata %d2, %d4, %d50 ;\
239 235 faligndata %d4, %d6, %d52 ;\
240 236 faligndata %d6, %d8, %d54 ;\
241 237 faligndata %d8, %d10, %d56 ;\
242 238 faligndata %d10, %d12, %d58 ;\
243 239 faligndata %d12, %d14, %d60 ;\
244 240 faligndata %d14, %d16, %d62
245 241
246 242 #define FALIGN_D16 \
247 243 faligndata %d16, %d18, %d48 ;\
248 244 faligndata %d18, %d20, %d50 ;\
249 245 faligndata %d20, %d22, %d52 ;\
250 246 faligndata %d22, %d24, %d54 ;\
251 247 faligndata %d24, %d26, %d56 ;\
252 248 faligndata %d26, %d28, %d58 ;\
253 249 faligndata %d28, %d30, %d60 ;\
254 250 faligndata %d30, %d32, %d62
255 251
256 252 #define FALIGN_D32 \
257 253 faligndata %d32, %d34, %d48 ;\
258 254 faligndata %d34, %d36, %d50 ;\
259 255 faligndata %d36, %d38, %d52 ;\
260 256 faligndata %d38, %d40, %d54 ;\
261 257 faligndata %d40, %d42, %d56 ;\
262 258 faligndata %d42, %d44, %d58 ;\
263 259 faligndata %d44, %d46, %d60 ;\
264 260 faligndata %d46, %d0, %d62
265 261
266 262 #define FALIGN_D2 \
267 263 faligndata %d2, %d4, %d48 ;\
268 264 faligndata %d4, %d6, %d50 ;\
269 265 faligndata %d6, %d8, %d52 ;\
270 266 faligndata %d8, %d10, %d54 ;\
271 267 faligndata %d10, %d12, %d56 ;\
272 268 faligndata %d12, %d14, %d58 ;\
273 269 faligndata %d14, %d16, %d60 ;\
274 270 faligndata %d16, %d18, %d62
275 271
276 272 #define FALIGN_D18 \
277 273 faligndata %d18, %d20, %d48 ;\
278 274 faligndata %d20, %d22, %d50 ;\
279 275 faligndata %d22, %d24, %d52 ;\
280 276 faligndata %d24, %d26, %d54 ;\
281 277 faligndata %d26, %d28, %d56 ;\
282 278 faligndata %d28, %d30, %d58 ;\
283 279 faligndata %d30, %d32, %d60 ;\
284 280 faligndata %d32, %d34, %d62
285 281
286 282 #define FALIGN_D34 \
287 283 faligndata %d34, %d36, %d48 ;\
288 284 faligndata %d36, %d38, %d50 ;\
289 285 faligndata %d38, %d40, %d52 ;\
290 286 faligndata %d40, %d42, %d54 ;\
291 287 faligndata %d42, %d44, %d56 ;\
292 288 faligndata %d44, %d46, %d58 ;\
293 289 faligndata %d46, %d0, %d60 ;\
294 290 faligndata %d0, %d2, %d62
295 291
296 292 #define FALIGN_D4 \
297 293 faligndata %d4, %d6, %d48 ;\
298 294 faligndata %d6, %d8, %d50 ;\
299 295 faligndata %d8, %d10, %d52 ;\
300 296 faligndata %d10, %d12, %d54 ;\
301 297 faligndata %d12, %d14, %d56 ;\
302 298 faligndata %d14, %d16, %d58 ;\
303 299 faligndata %d16, %d18, %d60 ;\
304 300 faligndata %d18, %d20, %d62
305 301
306 302 #define FALIGN_D20 \
307 303 faligndata %d20, %d22, %d48 ;\
308 304 faligndata %d22, %d24, %d50 ;\
309 305 faligndata %d24, %d26, %d52 ;\
310 306 faligndata %d26, %d28, %d54 ;\
311 307 faligndata %d28, %d30, %d56 ;\
312 308 faligndata %d30, %d32, %d58 ;\
313 309 faligndata %d32, %d34, %d60 ;\
314 310 faligndata %d34, %d36, %d62
315 311
316 312 #define FALIGN_D36 \
317 313 faligndata %d36, %d38, %d48 ;\
318 314 faligndata %d38, %d40, %d50 ;\
319 315 faligndata %d40, %d42, %d52 ;\
320 316 faligndata %d42, %d44, %d54 ;\
321 317 faligndata %d44, %d46, %d56 ;\
322 318 faligndata %d46, %d0, %d58 ;\
323 319 faligndata %d0, %d2, %d60 ;\
324 320 faligndata %d2, %d4, %d62
325 321
326 322 #define FALIGN_D6 \
327 323 faligndata %d6, %d8, %d48 ;\
328 324 faligndata %d8, %d10, %d50 ;\
329 325 faligndata %d10, %d12, %d52 ;\
330 326 faligndata %d12, %d14, %d54 ;\
331 327 faligndata %d14, %d16, %d56 ;\
332 328 faligndata %d16, %d18, %d58 ;\
333 329 faligndata %d18, %d20, %d60 ;\
334 330 faligndata %d20, %d22, %d62
335 331
336 332 #define FALIGN_D22 \
337 333 faligndata %d22, %d24, %d48 ;\
338 334 faligndata %d24, %d26, %d50 ;\
339 335 faligndata %d26, %d28, %d52 ;\
340 336 faligndata %d28, %d30, %d54 ;\
341 337 faligndata %d30, %d32, %d56 ;\
342 338 faligndata %d32, %d34, %d58 ;\
343 339 faligndata %d34, %d36, %d60 ;\
344 340 faligndata %d36, %d38, %d62
345 341
346 342 #define FALIGN_D38 \
347 343 faligndata %d38, %d40, %d48 ;\
348 344 faligndata %d40, %d42, %d50 ;\
349 345 faligndata %d42, %d44, %d52 ;\
350 346 faligndata %d44, %d46, %d54 ;\
351 347 faligndata %d46, %d0, %d56 ;\
352 348 faligndata %d0, %d2, %d58 ;\
353 349 faligndata %d2, %d4, %d60 ;\
354 350 faligndata %d4, %d6, %d62
355 351
356 352 #define FALIGN_D8 \
357 353 faligndata %d8, %d10, %d48 ;\
358 354 faligndata %d10, %d12, %d50 ;\
359 355 faligndata %d12, %d14, %d52 ;\
360 356 faligndata %d14, %d16, %d54 ;\
361 357 faligndata %d16, %d18, %d56 ;\
362 358 faligndata %d18, %d20, %d58 ;\
363 359 faligndata %d20, %d22, %d60 ;\
364 360 faligndata %d22, %d24, %d62
365 361
366 362 #define FALIGN_D24 \
367 363 faligndata %d24, %d26, %d48 ;\
368 364 faligndata %d26, %d28, %d50 ;\
369 365 faligndata %d28, %d30, %d52 ;\
370 366 faligndata %d30, %d32, %d54 ;\
371 367 faligndata %d32, %d34, %d56 ;\
372 368 faligndata %d34, %d36, %d58 ;\
373 369 faligndata %d36, %d38, %d60 ;\
374 370 faligndata %d38, %d40, %d62
375 371
376 372 #define FALIGN_D40 \
377 373 faligndata %d40, %d42, %d48 ;\
378 374 faligndata %d42, %d44, %d50 ;\
379 375 faligndata %d44, %d46, %d52 ;\
380 376 faligndata %d46, %d0, %d54 ;\
381 377 faligndata %d0, %d2, %d56 ;\
382 378 faligndata %d2, %d4, %d58 ;\
383 379 faligndata %d4, %d6, %d60 ;\
384 380 faligndata %d6, %d8, %d62
385 381
386 382 #define FALIGN_D10 \
387 383 faligndata %d10, %d12, %d48 ;\
388 384 faligndata %d12, %d14, %d50 ;\
389 385 faligndata %d14, %d16, %d52 ;\
390 386 faligndata %d16, %d18, %d54 ;\
391 387 faligndata %d18, %d20, %d56 ;\
392 388 faligndata %d20, %d22, %d58 ;\
393 389 faligndata %d22, %d24, %d60 ;\
394 390 faligndata %d24, %d26, %d62
395 391
396 392 #define FALIGN_D26 \
397 393 faligndata %d26, %d28, %d48 ;\
398 394 faligndata %d28, %d30, %d50 ;\
399 395 faligndata %d30, %d32, %d52 ;\
400 396 faligndata %d32, %d34, %d54 ;\
401 397 faligndata %d34, %d36, %d56 ;\
402 398 faligndata %d36, %d38, %d58 ;\
403 399 faligndata %d38, %d40, %d60 ;\
404 400 faligndata %d40, %d42, %d62
405 401
406 402 #define FALIGN_D42 \
407 403 faligndata %d42, %d44, %d48 ;\
408 404 faligndata %d44, %d46, %d50 ;\
409 405 faligndata %d46, %d0, %d52 ;\
410 406 faligndata %d0, %d2, %d54 ;\
411 407 faligndata %d2, %d4, %d56 ;\
412 408 faligndata %d4, %d6, %d58 ;\
413 409 faligndata %d6, %d8, %d60 ;\
414 410 faligndata %d8, %d10, %d62
415 411
416 412 #define FALIGN_D12 \
417 413 faligndata %d12, %d14, %d48 ;\
418 414 faligndata %d14, %d16, %d50 ;\
419 415 faligndata %d16, %d18, %d52 ;\
420 416 faligndata %d18, %d20, %d54 ;\
421 417 faligndata %d20, %d22, %d56 ;\
422 418 faligndata %d22, %d24, %d58 ;\
423 419 faligndata %d24, %d26, %d60 ;\
424 420 faligndata %d26, %d28, %d62
425 421
426 422 #define FALIGN_D28 \
427 423 faligndata %d28, %d30, %d48 ;\
428 424 faligndata %d30, %d32, %d50 ;\
429 425 faligndata %d32, %d34, %d52 ;\
430 426 faligndata %d34, %d36, %d54 ;\
431 427 faligndata %d36, %d38, %d56 ;\
432 428 faligndata %d38, %d40, %d58 ;\
433 429 faligndata %d40, %d42, %d60 ;\
434 430 faligndata %d42, %d44, %d62
435 431
436 432 #define FALIGN_D44 \
437 433 faligndata %d44, %d46, %d48 ;\
438 434 faligndata %d46, %d0, %d50 ;\
439 435 faligndata %d0, %d2, %d52 ;\
440 436 faligndata %d2, %d4, %d54 ;\
441 437 faligndata %d4, %d6, %d56 ;\
442 438 faligndata %d6, %d8, %d58 ;\
443 439 faligndata %d8, %d10, %d60 ;\
444 440 faligndata %d10, %d12, %d62
445 441
446 442 #define FALIGN_D14 \
447 443 faligndata %d14, %d16, %d48 ;\
448 444 faligndata %d16, %d18, %d50 ;\
449 445 faligndata %d18, %d20, %d52 ;\
450 446 faligndata %d20, %d22, %d54 ;\
451 447 faligndata %d22, %d24, %d56 ;\
452 448 faligndata %d24, %d26, %d58 ;\
453 449 faligndata %d26, %d28, %d60 ;\
454 450 faligndata %d28, %d30, %d62
455 451
456 452 #define FALIGN_D30 \
457 453 faligndata %d30, %d32, %d48 ;\
458 454 faligndata %d32, %d34, %d50 ;\
459 455 faligndata %d34, %d36, %d52 ;\
460 456 faligndata %d36, %d38, %d54 ;\
461 457 faligndata %d38, %d40, %d56 ;\
462 458 faligndata %d40, %d42, %d58 ;\
463 459 faligndata %d42, %d44, %d60 ;\
464 460 faligndata %d44, %d46, %d62
465 461
466 462 #define FALIGN_D46 \
467 463 faligndata %d46, %d0, %d48 ;\
468 464 faligndata %d0, %d2, %d50 ;\
469 465 faligndata %d2, %d4, %d52 ;\
470 466 faligndata %d4, %d6, %d54 ;\
471 467 faligndata %d6, %d8, %d56 ;\
472 468 faligndata %d8, %d10, %d58 ;\
473 469 faligndata %d10, %d12, %d60 ;\
474 470 faligndata %d12, %d14, %d62
↓ open down ↓ |
423 lines elided |
↑ open up ↑ |
475 471
476 472
477 473 /*
478 474 * Copy a block of storage, returning an error code if `from' or
479 475 * `to' takes a kernel pagefault which cannot be resolved.
480 476 * Returns errno value on pagefault error, 0 if all ok
481 477 */
482 478
483 479
484 480
485 -#if defined(lint)
486 -
487 -/* ARGSUSED */
488 -int
489 -kcopy(const void *from, void *to, size_t count)
490 -{ return(0); }
491 -
492 -#else /* lint */
493 -
494 481 .seg ".text"
495 482 .align 4
496 483
497 484 ENTRY(kcopy)
498 485
499 486 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
500 487 set .copyerr, %l6 ! copyerr is lofault value
501 488 ldn [THREAD_REG + T_LOFAULT], %l7 ! save existing handler
502 489 membar #Sync ! sync error barrier (see copy.s)
503 490 stn %l6, [THREAD_REG + T_LOFAULT] ! set t_lofault
504 491 !
505 492 ! Note that we carefully do *not* flag the setting of
506 493 ! t_lofault.
507 494 !
508 495 ba,pt %ncc, .do_copy ! common code
509 496 mov %l7, %l6
510 497
511 498 /*
512 499 * We got here because of a fault during kcopy or bcopy if a fault
513 500 * handler existed when bcopy was called.
514 501 * Errno value is in %g1.
515 502 */
516 503 .copyerr:
517 504 set .copyerr2, %l1
518 505 membar #Sync ! sync error barrier
519 506 stn %l1, [THREAD_REG + T_LOFAULT] ! set t_lofault
520 507 btst FPUSED_FLAG, %l6
521 508 bz %icc, 1f
522 509 and %l6, BCOPY_FLAG, %l1 ! copy flag to %l1
523 510
524 511 membar #Sync
525 512
526 513 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
527 514 wr %o2, 0, %gsr
528 515
529 516 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
530 517 btst FPRS_FEF, %o3
531 518 bz %icc, 4f
532 519 nop
533 520
534 521 ! restore fpregs from stack
535 522 membar #Sync
536 523 add %fp, STACK_BIAS - 257, %o2
537 524 and %o2, -64, %o2
538 525 ldda [%o2]ASI_BLK_P, %d0
539 526 add %o2, 64, %o2
540 527 ldda [%o2]ASI_BLK_P, %d16
541 528 add %o2, 64, %o2
542 529 ldda [%o2]ASI_BLK_P, %d32
543 530 add %o2, 64, %o2
544 531 ldda [%o2]ASI_BLK_P, %d48
545 532 membar #Sync
546 533
547 534 ba,pt %ncc, 2f
548 535 wr %o3, 0, %fprs ! restore fprs
549 536
550 537 4:
551 538 FZERO ! zero all of the fpregs
552 539 wr %o3, 0, %fprs ! restore fprs
553 540
554 541 2: ldn [THREAD_REG + T_LWP], %o2
555 542 tst %o2
556 543 bnz,pt %ncc, 1f
557 544 nop
558 545
559 546 ldsb [THREAD_REG + T_PREEMPT], %l0
560 547 deccc %l0
561 548 bnz,pn %ncc, 1f
562 549 stb %l0, [THREAD_REG + T_PREEMPT]
563 550
564 551 ! Check for a kernel preemption request
565 552 ldn [THREAD_REG + T_CPU], %l0
566 553 ldub [%l0 + CPU_KPRUNRUN], %l0
567 554 tst %l0
568 555 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
569 556 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
570 557
571 558 !
572 559 ! Need to cater for the different expectations of kcopy
573 560 ! and bcopy. kcopy will *always* set a t_lofault handler
574 561 ! If it fires, we're expected to just return the error code
575 562 ! and *not* to invoke any existing error handler. As far as
576 563 ! bcopy is concerned, we only set t_lofault if there was an
577 564 ! existing lofault handler. In that case we're expected to
578 565 ! invoke the previously existing handler after restting the
579 566 ! t_lofault value.
580 567 !
581 568 1:
582 569 andn %l6, COPY_FLAGS, %l6 ! remove flags from lofault address
583 570 membar #Sync ! sync error barrier
584 571 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
585 572
586 573 ! call kpreempt if necessary
587 574 btst KPREEMPT_FLAG, %l1
588 575 bz,pt %icc, 2f
589 576 nop
590 577 call kpreempt
591 578 rdpr %pil, %o0 ! pass %pil
592 579 2:
593 580 btst BCOPY_FLAG, %l1
594 581 bnz,pn %ncc, 3f
595 582 nop
596 583 ret
597 584 restore %g1, 0, %o0
598 585
599 586 3:
600 587 !
601 588 ! We're here via bcopy. There *must* have been an error handler
602 589 ! in place otheerwise we would have died a nasty death already.
603 590 !
604 591 jmp %l6 ! goto real handler
605 592 restore %g0, 0, %o0 ! dispose of copy window
606 593
607 594 /*
608 595 * We got here because of a fault in .copyerr. We can't safely restore fp
609 596 * state, so we panic.
↓ open down ↓ |
106 lines elided |
↑ open up ↑ |
610 597 */
611 598 fp_panic_msg:
612 599 .asciz "Unable to restore fp state after copy operation"
613 600
614 601 .align 4
615 602 .copyerr2:
616 603 set fp_panic_msg, %o0
617 604 call panic
618 605 nop
619 606 SET_SIZE(kcopy)
620 -#endif /* lint */
621 607
622 608
623 609 /*
624 610 * Copy a block of storage - must not overlap (from + len <= to).
625 611 * Registers: l6 - saved t_lofault
626 612 *
627 613 * Copy a page of memory.
628 614 * Assumes double word alignment and a count >= 256.
629 615 */
630 -#if defined(lint)
631 616
632 -/* ARGSUSED */
633 -void
634 -bcopy(const void *from, void *to, size_t count)
635 -{}
636 -
637 -#else /* lint */
638 -
639 617 ENTRY(bcopy)
640 618
641 619 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
642 620 ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault
643 621 tst %l6
644 622 !
645 623 ! We've already captured whether t_lofault was zero on entry.
646 624 ! We need to mark ourselves as being from bcopy since both
647 625 ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set
648 626 ! and the saved lofault was zero, we won't reset lofault on
649 627 ! returning.
650 628 !
651 629 or %l6, BCOPY_FLAG, %l6
652 630 bz,pt %ncc, .do_copy
653 631 sethi %hi(.copyerr), %o2
654 632 or %o2, %lo(.copyerr), %o2
655 633 membar #Sync ! sync error barrier
656 634 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
657 635
658 636 .do_copy:
659 637 cmp %i2, 12 ! for small counts
660 638 blu %ncc, .bytecp ! just copy bytes
661 639 .empty
662 640
663 641 cmp %i2, VIS_COPY_THRESHOLD ! for large counts
664 642 blu,pt %ncc, .bcb_punt
665 643 .empty
666 644
667 645 !
668 646 ! Check to see if VIS acceleration is enabled
669 647 !
670 648 sethi %hi(use_hw_bcopy), %o2
671 649 ld [%o2 + %lo(use_hw_bcopy)], %o2
672 650 tst %o2
673 651 bz,pn %icc, .bcb_punt
674 652 nop
675 653
676 654 subcc %i1, %i0, %i3
677 655 bneg,a,pn %ncc, 1f
678 656 neg %i3
679 657 1:
680 658 /*
681 659 * Compare against 256 since we should be checking block addresses
682 660 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
683 661 * src = dest + (64 * 3) + 63.
684 662 */
685 663 cmp %i3, 256
686 664 blu,pn %ncc, .bcb_punt
687 665 nop
688 666
689 667 ldn [THREAD_REG + T_LWP], %o3
690 668 tst %o3
691 669 bnz,pt %ncc, 1f
692 670 nop
693 671
694 672 ! kpreempt_disable();
695 673 ldsb [THREAD_REG + T_PREEMPT], %o2
696 674 inc %o2
697 675 stb %o2, [THREAD_REG + T_PREEMPT]
698 676
699 677 1:
700 678 rd %fprs, %o2 ! check for unused fp
701 679 st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
702 680 btst FPRS_FEF, %o2
703 681 bz,a %icc, .do_blockcopy
704 682 wr %g0, FPRS_FEF, %fprs
705 683
706 684 .bcb_fpregs_inuse:
707 685 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
708 686 bgeu %ncc, 1f ! if we have to save the fpregs)
709 687 nop
710 688
711 689 tst %o3
712 690 bnz,pt %ncc, .bcb_punt
713 691 nop
714 692
715 693 ldsb [THREAD_REG + T_PREEMPT], %l0
716 694 deccc %l0
717 695 bnz,pn %icc, .bcb_punt
718 696 stb %l0, [THREAD_REG + T_PREEMPT]
719 697
720 698 ! Check for a kernel preemption request
721 699 ldn [THREAD_REG + T_CPU], %l0
722 700 ldub [%l0 + CPU_KPRUNRUN], %l0
723 701 tst %l0
724 702 bz,pt %icc, .bcb_punt
725 703 nop
726 704
727 705 ! Attempt to preempt
728 706 call kpreempt
729 707 rdpr %pil, %o0 ! pass %pil
730 708
731 709 ba,pt %ncc, .bcb_punt
732 710 nop
733 711
734 712 1:
735 713 wr %g0, FPRS_FEF, %fprs
736 714
737 715 ! save in-use fpregs on stack
738 716 membar #Sync
739 717 add %fp, STACK_BIAS - 257, %o2
740 718 and %o2, -64, %o2
741 719 stda %d0, [%o2]ASI_BLK_P
742 720 add %o2, 64, %o2
743 721 stda %d16, [%o2]ASI_BLK_P
744 722 add %o2, 64, %o2
745 723 stda %d32, [%o2]ASI_BLK_P
746 724 add %o2, 64, %o2
747 725 stda %d48, [%o2]ASI_BLK_P
748 726 membar #Sync
749 727
750 728 .do_blockcopy:
751 729 membar #StoreStore|#StoreLoad|#LoadStore
752 730
753 731 rd %gsr, %o2
754 732 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
755 733
756 734 ! Set the lower bit in the saved t_lofault to indicate
757 735 ! that we need to clear the %fprs register on the way
758 736 ! out
759 737 or %l6, FPUSED_FLAG, %l6
760 738
761 739 ! Swap src/dst since the code below is memcpy code
762 740 ! and memcpy/bcopy have different calling sequences
763 741 mov %i1, %i5
764 742 mov %i0, %i1
765 743 mov %i5, %i0
766 744
767 745 !!! This code is nearly identical to the version in the sun4u
768 746 !!! libc_psr. Most bugfixes made to that file should be
769 747 !!! merged into this routine.
770 748
771 749 andcc %i0, 7, %o3
772 750 bz,pt %ncc, blkcpy
773 751 sub %o3, 8, %o3
774 752 neg %o3
775 753 sub %i2, %o3, %i2
776 754
777 755 ! Align Destination on double-word boundary
778 756
779 757 2: ldub [%i1], %o4
780 758 inc %i1
781 759 inc %i0
782 760 deccc %o3
783 761 bgu %ncc, 2b
784 762 stb %o4, [%i0 - 1]
785 763 blkcpy:
786 764 andcc %i0, 63, %i3
787 765 bz,pn %ncc, blalign ! now block aligned
788 766 sub %i3, 64, %i3
789 767 neg %i3 ! bytes till block aligned
790 768 sub %i2, %i3, %i2 ! update %i2 with new count
791 769
792 770 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
793 771 ! double word copies.
794 772
795 773 alignaddr %i1, %g0, %g1
796 774 ldd [%g1], %d0
797 775 add %g1, 8, %g1
798 776 6:
799 777 ldd [%g1], %d2
800 778 add %g1, 8, %g1
801 779 subcc %i3, 8, %i3
802 780 faligndata %d0, %d2, %d8
803 781 std %d8, [%i0]
804 782 add %i1, 8, %i1
805 783 bz,pn %ncc, blalign
806 784 add %i0, 8, %i0
807 785 ldd [%g1], %d0
808 786 add %g1, 8, %g1
809 787 subcc %i3, 8, %i3
810 788 faligndata %d2, %d0, %d8
811 789 std %d8, [%i0]
812 790 add %i1, 8, %i1
813 791 bgu,pn %ncc, 6b
814 792 add %i0, 8, %i0
815 793
816 794 blalign:
817 795 membar #StoreLoad
818 796 ! %i2 = total length
819 797 ! %i3 = blocks (length - 64) / 64
820 798 ! %i4 = doubles remaining (length - blocks)
821 799 sub %i2, 64, %i3
822 800 andn %i3, 63, %i3
823 801 sub %i2, %i3, %i4
824 802 andn %i4, 7, %i4
825 803 sub %i4, 16, %i4
826 804 sub %i2, %i4, %i2
827 805 sub %i2, %i3, %i2
828 806
829 807 andn %i1, 0x3f, %l7 ! blk aligned address
830 808 alignaddr %i1, %g0, %g0 ! gen %gsr
831 809
832 810 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
833 811 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
834 812 add %i1, %i4, %i1
835 813 add %i1, %i3, %i1
836 814
837 815 ldda [%l7]ASI_BLK_P, %d0
838 816 add %l7, 64, %l7
839 817 ldda [%l7]ASI_BLK_P, %d16
840 818 add %l7, 64, %l7
841 819 ldda [%l7]ASI_BLK_P, %d32
842 820 add %l7, 64, %l7
843 821 sub %i3, 128, %i3
844 822
845 823 ! switch statement to get us to the right 8 byte blk within a
846 824 ! 64 byte block
847 825 cmp %i5, 4
848 826 bgeu,a hlf
849 827 cmp %i5, 6
850 828 cmp %i5, 2
851 829 bgeu,a sqtr
852 830 nop
853 831 cmp %i5, 1
854 832 be,a seg1
855 833 nop
856 834 ba,pt %ncc, seg0
857 835 nop
858 836 sqtr:
859 837 be,a seg2
860 838 nop
861 839 ba,pt %ncc, seg3
862 840 nop
863 841
864 842 hlf:
865 843 bgeu,a fqtr
866 844 nop
867 845 cmp %i5, 5
868 846 be,a seg5
869 847 nop
870 848 ba,pt %ncc, seg4
871 849 nop
872 850 fqtr:
873 851 be,a seg6
874 852 nop
875 853 ba,pt %ncc, seg7
876 854 nop
877 855
878 856
879 857 seg0:
880 858 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
881 859 FALIGN_D0
882 860 ldda [%l7]ASI_BLK_P, %d0
883 861 stda %d48, [%i0]ASI_BLK_P
884 862 add %l7, 64, %l7
885 863 subcc %i3, 64, %i3
886 864 bz,pn %ncc, 0f
887 865 add %i0, 64, %i0
888 866 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
889 867 FALIGN_D16
890 868 ldda [%l7]ASI_BLK_P, %d16
891 869 stda %d48, [%i0]ASI_BLK_P
892 870 add %l7, 64, %l7
893 871 subcc %i3, 64, %i3
894 872 bz,pn %ncc, 1f
895 873 add %i0, 64, %i0
896 874 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
897 875 FALIGN_D32
898 876 ldda [%l7]ASI_BLK_P, %d32
899 877 stda %d48, [%i0]ASI_BLK_P
900 878 add %l7, 64, %l7
901 879 subcc %i3, 64, %i3
902 880 bz,pn %ncc, 2f
903 881 add %i0, 64, %i0
904 882 ba,a,pt %ncc, seg0
905 883
906 884 0:
907 885 FALIGN_D16
908 886 stda %d48, [%i0]ASI_BLK_P
909 887 add %i0, 64, %i0
910 888 membar #Sync
911 889 FALIGN_D32
912 890 stda %d48, [%i0]ASI_BLK_P
913 891 ba,pt %ncc, blkd0
914 892 add %i0, 64, %i0
915 893
916 894 1:
917 895 FALIGN_D32
918 896 stda %d48, [%i0]ASI_BLK_P
919 897 add %i0, 64, %i0
920 898 membar #Sync
921 899 FALIGN_D0
922 900 stda %d48, [%i0]ASI_BLK_P
923 901 ba,pt %ncc, blkd16
924 902 add %i0, 64, %i0
925 903
926 904 2:
927 905 FALIGN_D0
928 906 stda %d48, [%i0]ASI_BLK_P
929 907 add %i0, 64, %i0
930 908 membar #Sync
931 909 FALIGN_D16
932 910 stda %d48, [%i0]ASI_BLK_P
933 911 ba,pt %ncc, blkd32
934 912 add %i0, 64, %i0
935 913
936 914 seg1:
937 915 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
938 916 FALIGN_D2
939 917 ldda [%l7]ASI_BLK_P, %d0
940 918 stda %d48, [%i0]ASI_BLK_P
941 919 add %l7, 64, %l7
942 920 subcc %i3, 64, %i3
943 921 bz,pn %ncc, 0f
944 922 add %i0, 64, %i0
945 923 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
946 924 FALIGN_D18
947 925 ldda [%l7]ASI_BLK_P, %d16
948 926 stda %d48, [%i0]ASI_BLK_P
949 927 add %l7, 64, %l7
950 928 subcc %i3, 64, %i3
951 929 bz,pn %ncc, 1f
952 930 add %i0, 64, %i0
953 931 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
954 932 FALIGN_D34
955 933 ldda [%l7]ASI_BLK_P, %d32
956 934 stda %d48, [%i0]ASI_BLK_P
957 935 add %l7, 64, %l7
958 936 subcc %i3, 64, %i3
959 937 bz,pn %ncc, 2f
960 938 add %i0, 64, %i0
961 939 ba,a,pt %ncc, seg1
962 940 0:
963 941 FALIGN_D18
964 942 stda %d48, [%i0]ASI_BLK_P
965 943 add %i0, 64, %i0
966 944 membar #Sync
967 945 FALIGN_D34
968 946 stda %d48, [%i0]ASI_BLK_P
969 947 ba,pt %ncc, blkd2
970 948 add %i0, 64, %i0
971 949
972 950 1:
973 951 FALIGN_D34
974 952 stda %d48, [%i0]ASI_BLK_P
975 953 add %i0, 64, %i0
976 954 membar #Sync
977 955 FALIGN_D2
978 956 stda %d48, [%i0]ASI_BLK_P
979 957 ba,pt %ncc, blkd18
980 958 add %i0, 64, %i0
981 959
982 960 2:
983 961 FALIGN_D2
984 962 stda %d48, [%i0]ASI_BLK_P
985 963 add %i0, 64, %i0
986 964 membar #Sync
987 965 FALIGN_D18
988 966 stda %d48, [%i0]ASI_BLK_P
989 967 ba,pt %ncc, blkd34
990 968 add %i0, 64, %i0
991 969
992 970 seg2:
993 971 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
994 972 FALIGN_D4
995 973 ldda [%l7]ASI_BLK_P, %d0
996 974 stda %d48, [%i0]ASI_BLK_P
997 975 add %l7, 64, %l7
998 976 subcc %i3, 64, %i3
999 977 bz,pn %ncc, 0f
1000 978 add %i0, 64, %i0
1001 979 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1002 980 FALIGN_D20
1003 981 ldda [%l7]ASI_BLK_P, %d16
1004 982 stda %d48, [%i0]ASI_BLK_P
1005 983 add %l7, 64, %l7
1006 984 subcc %i3, 64, %i3
1007 985 bz,pn %ncc, 1f
1008 986 add %i0, 64, %i0
1009 987 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1010 988 FALIGN_D36
1011 989 ldda [%l7]ASI_BLK_P, %d32
1012 990 stda %d48, [%i0]ASI_BLK_P
1013 991 add %l7, 64, %l7
1014 992 subcc %i3, 64, %i3
1015 993 bz,pn %ncc, 2f
1016 994 add %i0, 64, %i0
1017 995 ba,a,pt %ncc, seg2
1018 996
1019 997 0:
1020 998 FALIGN_D20
1021 999 stda %d48, [%i0]ASI_BLK_P
1022 1000 add %i0, 64, %i0
1023 1001 membar #Sync
1024 1002 FALIGN_D36
1025 1003 stda %d48, [%i0]ASI_BLK_P
1026 1004 ba,pt %ncc, blkd4
1027 1005 add %i0, 64, %i0
1028 1006
1029 1007 1:
1030 1008 FALIGN_D36
1031 1009 stda %d48, [%i0]ASI_BLK_P
1032 1010 add %i0, 64, %i0
1033 1011 membar #Sync
1034 1012 FALIGN_D4
1035 1013 stda %d48, [%i0]ASI_BLK_P
1036 1014 ba,pt %ncc, blkd20
1037 1015 add %i0, 64, %i0
1038 1016
1039 1017 2:
1040 1018 FALIGN_D4
1041 1019 stda %d48, [%i0]ASI_BLK_P
1042 1020 add %i0, 64, %i0
1043 1021 membar #Sync
1044 1022 FALIGN_D20
1045 1023 stda %d48, [%i0]ASI_BLK_P
1046 1024 ba,pt %ncc, blkd36
1047 1025 add %i0, 64, %i0
1048 1026
1049 1027 seg3:
1050 1028 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1051 1029 FALIGN_D6
1052 1030 ldda [%l7]ASI_BLK_P, %d0
1053 1031 stda %d48, [%i0]ASI_BLK_P
1054 1032 add %l7, 64, %l7
1055 1033 subcc %i3, 64, %i3
1056 1034 bz,pn %ncc, 0f
1057 1035 add %i0, 64, %i0
1058 1036 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1059 1037 FALIGN_D22
1060 1038 ldda [%l7]ASI_BLK_P, %d16
1061 1039 stda %d48, [%i0]ASI_BLK_P
1062 1040 add %l7, 64, %l7
1063 1041 subcc %i3, 64, %i3
1064 1042 bz,pn %ncc, 1f
1065 1043 add %i0, 64, %i0
1066 1044 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1067 1045 FALIGN_D38
1068 1046 ldda [%l7]ASI_BLK_P, %d32
1069 1047 stda %d48, [%i0]ASI_BLK_P
1070 1048 add %l7, 64, %l7
1071 1049 subcc %i3, 64, %i3
1072 1050 bz,pn %ncc, 2f
1073 1051 add %i0, 64, %i0
1074 1052 ba,a,pt %ncc, seg3
1075 1053
1076 1054 0:
1077 1055 FALIGN_D22
1078 1056 stda %d48, [%i0]ASI_BLK_P
1079 1057 add %i0, 64, %i0
1080 1058 membar #Sync
1081 1059 FALIGN_D38
1082 1060 stda %d48, [%i0]ASI_BLK_P
1083 1061 ba,pt %ncc, blkd6
1084 1062 add %i0, 64, %i0
1085 1063
1086 1064 1:
1087 1065 FALIGN_D38
1088 1066 stda %d48, [%i0]ASI_BLK_P
1089 1067 add %i0, 64, %i0
1090 1068 membar #Sync
1091 1069 FALIGN_D6
1092 1070 stda %d48, [%i0]ASI_BLK_P
1093 1071 ba,pt %ncc, blkd22
1094 1072 add %i0, 64, %i0
1095 1073
1096 1074 2:
1097 1075 FALIGN_D6
1098 1076 stda %d48, [%i0]ASI_BLK_P
1099 1077 add %i0, 64, %i0
1100 1078 membar #Sync
1101 1079 FALIGN_D22
1102 1080 stda %d48, [%i0]ASI_BLK_P
1103 1081 ba,pt %ncc, blkd38
1104 1082 add %i0, 64, %i0
1105 1083
1106 1084 seg4:
1107 1085 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1108 1086 FALIGN_D8
1109 1087 ldda [%l7]ASI_BLK_P, %d0
1110 1088 stda %d48, [%i0]ASI_BLK_P
1111 1089 add %l7, 64, %l7
1112 1090 subcc %i3, 64, %i3
1113 1091 bz,pn %ncc, 0f
1114 1092 add %i0, 64, %i0
1115 1093 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1116 1094 FALIGN_D24
1117 1095 ldda [%l7]ASI_BLK_P, %d16
1118 1096 stda %d48, [%i0]ASI_BLK_P
1119 1097 add %l7, 64, %l7
1120 1098 subcc %i3, 64, %i3
1121 1099 bz,pn %ncc, 1f
1122 1100 add %i0, 64, %i0
1123 1101 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1124 1102 FALIGN_D40
1125 1103 ldda [%l7]ASI_BLK_P, %d32
1126 1104 stda %d48, [%i0]ASI_BLK_P
1127 1105 add %l7, 64, %l7
1128 1106 subcc %i3, 64, %i3
1129 1107 bz,pn %ncc, 2f
1130 1108 add %i0, 64, %i0
1131 1109 ba,a,pt %ncc, seg4
1132 1110
1133 1111 0:
1134 1112 FALIGN_D24
1135 1113 stda %d48, [%i0]ASI_BLK_P
1136 1114 add %i0, 64, %i0
1137 1115 membar #Sync
1138 1116 FALIGN_D40
1139 1117 stda %d48, [%i0]ASI_BLK_P
1140 1118 ba,pt %ncc, blkd8
1141 1119 add %i0, 64, %i0
1142 1120
1143 1121 1:
1144 1122 FALIGN_D40
1145 1123 stda %d48, [%i0]ASI_BLK_P
1146 1124 add %i0, 64, %i0
1147 1125 membar #Sync
1148 1126 FALIGN_D8
1149 1127 stda %d48, [%i0]ASI_BLK_P
1150 1128 ba,pt %ncc, blkd24
1151 1129 add %i0, 64, %i0
1152 1130
1153 1131 2:
1154 1132 FALIGN_D8
1155 1133 stda %d48, [%i0]ASI_BLK_P
1156 1134 add %i0, 64, %i0
1157 1135 membar #Sync
1158 1136 FALIGN_D24
1159 1137 stda %d48, [%i0]ASI_BLK_P
1160 1138 ba,pt %ncc, blkd40
1161 1139 add %i0, 64, %i0
1162 1140
1163 1141 seg5:
1164 1142 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1165 1143 FALIGN_D10
1166 1144 ldda [%l7]ASI_BLK_P, %d0
1167 1145 stda %d48, [%i0]ASI_BLK_P
1168 1146 add %l7, 64, %l7
1169 1147 subcc %i3, 64, %i3
1170 1148 bz,pn %ncc, 0f
1171 1149 add %i0, 64, %i0
1172 1150 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1173 1151 FALIGN_D26
1174 1152 ldda [%l7]ASI_BLK_P, %d16
1175 1153 stda %d48, [%i0]ASI_BLK_P
1176 1154 add %l7, 64, %l7
1177 1155 subcc %i3, 64, %i3
1178 1156 bz,pn %ncc, 1f
1179 1157 add %i0, 64, %i0
1180 1158 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1181 1159 FALIGN_D42
1182 1160 ldda [%l7]ASI_BLK_P, %d32
1183 1161 stda %d48, [%i0]ASI_BLK_P
1184 1162 add %l7, 64, %l7
1185 1163 subcc %i3, 64, %i3
1186 1164 bz,pn %ncc, 2f
1187 1165 add %i0, 64, %i0
1188 1166 ba,a,pt %ncc, seg5
1189 1167
1190 1168 0:
1191 1169 FALIGN_D26
1192 1170 stda %d48, [%i0]ASI_BLK_P
1193 1171 add %i0, 64, %i0
1194 1172 membar #Sync
1195 1173 FALIGN_D42
1196 1174 stda %d48, [%i0]ASI_BLK_P
1197 1175 ba,pt %ncc, blkd10
1198 1176 add %i0, 64, %i0
1199 1177
1200 1178 1:
1201 1179 FALIGN_D42
1202 1180 stda %d48, [%i0]ASI_BLK_P
1203 1181 add %i0, 64, %i0
1204 1182 membar #Sync
1205 1183 FALIGN_D10
1206 1184 stda %d48, [%i0]ASI_BLK_P
1207 1185 ba,pt %ncc, blkd26
1208 1186 add %i0, 64, %i0
1209 1187
1210 1188 2:
1211 1189 FALIGN_D10
1212 1190 stda %d48, [%i0]ASI_BLK_P
1213 1191 add %i0, 64, %i0
1214 1192 membar #Sync
1215 1193 FALIGN_D26
1216 1194 stda %d48, [%i0]ASI_BLK_P
1217 1195 ba,pt %ncc, blkd42
1218 1196 add %i0, 64, %i0
1219 1197
1220 1198 seg6:
1221 1199 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1222 1200 FALIGN_D12
1223 1201 ldda [%l7]ASI_BLK_P, %d0
1224 1202 stda %d48, [%i0]ASI_BLK_P
1225 1203 add %l7, 64, %l7
1226 1204 subcc %i3, 64, %i3
1227 1205 bz,pn %ncc, 0f
1228 1206 add %i0, 64, %i0
1229 1207 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1230 1208 FALIGN_D28
1231 1209 ldda [%l7]ASI_BLK_P, %d16
1232 1210 stda %d48, [%i0]ASI_BLK_P
1233 1211 add %l7, 64, %l7
1234 1212 subcc %i3, 64, %i3
1235 1213 bz,pn %ncc, 1f
1236 1214 add %i0, 64, %i0
1237 1215 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1238 1216 FALIGN_D44
1239 1217 ldda [%l7]ASI_BLK_P, %d32
1240 1218 stda %d48, [%i0]ASI_BLK_P
1241 1219 add %l7, 64, %l7
1242 1220 subcc %i3, 64, %i3
1243 1221 bz,pn %ncc, 2f
1244 1222 add %i0, 64, %i0
1245 1223 ba,a,pt %ncc, seg6
1246 1224
1247 1225 0:
1248 1226 FALIGN_D28
1249 1227 stda %d48, [%i0]ASI_BLK_P
1250 1228 add %i0, 64, %i0
1251 1229 membar #Sync
1252 1230 FALIGN_D44
1253 1231 stda %d48, [%i0]ASI_BLK_P
1254 1232 ba,pt %ncc, blkd12
1255 1233 add %i0, 64, %i0
1256 1234
1257 1235 1:
1258 1236 FALIGN_D44
1259 1237 stda %d48, [%i0]ASI_BLK_P
1260 1238 add %i0, 64, %i0
1261 1239 membar #Sync
1262 1240 FALIGN_D12
1263 1241 stda %d48, [%i0]ASI_BLK_P
1264 1242 ba,pt %ncc, blkd28
1265 1243 add %i0, 64, %i0
1266 1244
1267 1245 2:
1268 1246 FALIGN_D12
1269 1247 stda %d48, [%i0]ASI_BLK_P
1270 1248 add %i0, 64, %i0
1271 1249 membar #Sync
1272 1250 FALIGN_D28
1273 1251 stda %d48, [%i0]ASI_BLK_P
1274 1252 ba,pt %ncc, blkd44
1275 1253 add %i0, 64, %i0
1276 1254
1277 1255 seg7:
1278 1256 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1279 1257 FALIGN_D14
1280 1258 ldda [%l7]ASI_BLK_P, %d0
1281 1259 stda %d48, [%i0]ASI_BLK_P
1282 1260 add %l7, 64, %l7
1283 1261 subcc %i3, 64, %i3
1284 1262 bz,pn %ncc, 0f
1285 1263 add %i0, 64, %i0
1286 1264 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
1287 1265 FALIGN_D30
1288 1266 ldda [%l7]ASI_BLK_P, %d16
1289 1267 stda %d48, [%i0]ASI_BLK_P
1290 1268 add %l7, 64, %l7
1291 1269 subcc %i3, 64, %i3
1292 1270 bz,pn %ncc, 1f
1293 1271 add %i0, 64, %i0
1294 1272 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
1295 1273 FALIGN_D46
1296 1274 ldda [%l7]ASI_BLK_P, %d32
1297 1275 stda %d48, [%i0]ASI_BLK_P
1298 1276 add %l7, 64, %l7
1299 1277 subcc %i3, 64, %i3
1300 1278 bz,pn %ncc, 2f
1301 1279 add %i0, 64, %i0
1302 1280 ba,a,pt %ncc, seg7
1303 1281
1304 1282 0:
1305 1283 FALIGN_D30
1306 1284 stda %d48, [%i0]ASI_BLK_P
1307 1285 add %i0, 64, %i0
1308 1286 membar #Sync
1309 1287 FALIGN_D46
1310 1288 stda %d48, [%i0]ASI_BLK_P
1311 1289 ba,pt %ncc, blkd14
1312 1290 add %i0, 64, %i0
1313 1291
1314 1292 1:
1315 1293 FALIGN_D46
1316 1294 stda %d48, [%i0]ASI_BLK_P
1317 1295 add %i0, 64, %i0
1318 1296 membar #Sync
1319 1297 FALIGN_D14
1320 1298 stda %d48, [%i0]ASI_BLK_P
1321 1299 ba,pt %ncc, blkd30
1322 1300 add %i0, 64, %i0
1323 1301
1324 1302 2:
1325 1303 FALIGN_D14
1326 1304 stda %d48, [%i0]ASI_BLK_P
1327 1305 add %i0, 64, %i0
1328 1306 membar #Sync
1329 1307 FALIGN_D30
1330 1308 stda %d48, [%i0]ASI_BLK_P
1331 1309 ba,pt %ncc, blkd46
1332 1310 add %i0, 64, %i0
1333 1311
1334 1312
1335 1313 !
1336 1314 ! dribble out the last partial block
1337 1315 !
1338 1316 blkd0:
1339 1317 subcc %i4, 8, %i4
1340 1318 blu,pn %ncc, blkdone
1341 1319 faligndata %d0, %d2, %d48
1342 1320 std %d48, [%i0]
1343 1321 add %i0, 8, %i0
1344 1322 blkd2:
1345 1323 subcc %i4, 8, %i4
1346 1324 blu,pn %ncc, blkdone
1347 1325 faligndata %d2, %d4, %d48
1348 1326 std %d48, [%i0]
1349 1327 add %i0, 8, %i0
1350 1328 blkd4:
1351 1329 subcc %i4, 8, %i4
1352 1330 blu,pn %ncc, blkdone
1353 1331 faligndata %d4, %d6, %d48
1354 1332 std %d48, [%i0]
1355 1333 add %i0, 8, %i0
1356 1334 blkd6:
1357 1335 subcc %i4, 8, %i4
1358 1336 blu,pn %ncc, blkdone
1359 1337 faligndata %d6, %d8, %d48
1360 1338 std %d48, [%i0]
1361 1339 add %i0, 8, %i0
1362 1340 blkd8:
1363 1341 subcc %i4, 8, %i4
1364 1342 blu,pn %ncc, blkdone
1365 1343 faligndata %d8, %d10, %d48
1366 1344 std %d48, [%i0]
1367 1345 add %i0, 8, %i0
1368 1346 blkd10:
1369 1347 subcc %i4, 8, %i4
1370 1348 blu,pn %ncc, blkdone
1371 1349 faligndata %d10, %d12, %d48
1372 1350 std %d48, [%i0]
1373 1351 add %i0, 8, %i0
1374 1352 blkd12:
1375 1353 subcc %i4, 8, %i4
1376 1354 blu,pn %ncc, blkdone
1377 1355 faligndata %d12, %d14, %d48
1378 1356 std %d48, [%i0]
1379 1357 add %i0, 8, %i0
1380 1358 blkd14:
1381 1359 subcc %i4, 8, %i4
1382 1360 blu,pn %ncc, blkdone
1383 1361 fsrc1 %d14, %d0
1384 1362 ba,a,pt %ncc, blkleft
1385 1363
1386 1364 blkd16:
1387 1365 subcc %i4, 8, %i4
1388 1366 blu,pn %ncc, blkdone
1389 1367 faligndata %d16, %d18, %d48
1390 1368 std %d48, [%i0]
1391 1369 add %i0, 8, %i0
1392 1370 blkd18:
1393 1371 subcc %i4, 8, %i4
1394 1372 blu,pn %ncc, blkdone
1395 1373 faligndata %d18, %d20, %d48
1396 1374 std %d48, [%i0]
1397 1375 add %i0, 8, %i0
1398 1376 blkd20:
1399 1377 subcc %i4, 8, %i4
1400 1378 blu,pn %ncc, blkdone
1401 1379 faligndata %d20, %d22, %d48
1402 1380 std %d48, [%i0]
1403 1381 add %i0, 8, %i0
1404 1382 blkd22:
1405 1383 subcc %i4, 8, %i4
1406 1384 blu,pn %ncc, blkdone
1407 1385 faligndata %d22, %d24, %d48
1408 1386 std %d48, [%i0]
1409 1387 add %i0, 8, %i0
1410 1388 blkd24:
1411 1389 subcc %i4, 8, %i4
1412 1390 blu,pn %ncc, blkdone
1413 1391 faligndata %d24, %d26, %d48
1414 1392 std %d48, [%i0]
1415 1393 add %i0, 8, %i0
1416 1394 blkd26:
1417 1395 subcc %i4, 8, %i4
1418 1396 blu,pn %ncc, blkdone
1419 1397 faligndata %d26, %d28, %d48
1420 1398 std %d48, [%i0]
1421 1399 add %i0, 8, %i0
1422 1400 blkd28:
1423 1401 subcc %i4, 8, %i4
1424 1402 blu,pn %ncc, blkdone
1425 1403 faligndata %d28, %d30, %d48
1426 1404 std %d48, [%i0]
1427 1405 add %i0, 8, %i0
1428 1406 blkd30:
1429 1407 subcc %i4, 8, %i4
1430 1408 blu,pn %ncc, blkdone
1431 1409 fsrc1 %d30, %d0
1432 1410 ba,a,pt %ncc, blkleft
1433 1411 blkd32:
1434 1412 subcc %i4, 8, %i4
1435 1413 blu,pn %ncc, blkdone
1436 1414 faligndata %d32, %d34, %d48
1437 1415 std %d48, [%i0]
1438 1416 add %i0, 8, %i0
1439 1417 blkd34:
1440 1418 subcc %i4, 8, %i4
1441 1419 blu,pn %ncc, blkdone
1442 1420 faligndata %d34, %d36, %d48
1443 1421 std %d48, [%i0]
1444 1422 add %i0, 8, %i0
1445 1423 blkd36:
1446 1424 subcc %i4, 8, %i4
1447 1425 blu,pn %ncc, blkdone
1448 1426 faligndata %d36, %d38, %d48
1449 1427 std %d48, [%i0]
1450 1428 add %i0, 8, %i0
1451 1429 blkd38:
1452 1430 subcc %i4, 8, %i4
1453 1431 blu,pn %ncc, blkdone
1454 1432 faligndata %d38, %d40, %d48
1455 1433 std %d48, [%i0]
1456 1434 add %i0, 8, %i0
1457 1435 blkd40:
1458 1436 subcc %i4, 8, %i4
1459 1437 blu,pn %ncc, blkdone
1460 1438 faligndata %d40, %d42, %d48
1461 1439 std %d48, [%i0]
1462 1440 add %i0, 8, %i0
1463 1441 blkd42:
1464 1442 subcc %i4, 8, %i4
1465 1443 blu,pn %ncc, blkdone
1466 1444 faligndata %d42, %d44, %d48
1467 1445 std %d48, [%i0]
1468 1446 add %i0, 8, %i0
1469 1447 blkd44:
1470 1448 subcc %i4, 8, %i4
1471 1449 blu,pn %ncc, blkdone
1472 1450 faligndata %d44, %d46, %d48
1473 1451 std %d48, [%i0]
1474 1452 add %i0, 8, %i0
1475 1453 blkd46:
1476 1454 subcc %i4, 8, %i4
1477 1455 blu,pn %ncc, blkdone
1478 1456 fsrc1 %d46, %d0
1479 1457
1480 1458 blkleft:
1481 1459 1:
1482 1460 ldd [%l7], %d2
1483 1461 add %l7, 8, %l7
1484 1462 subcc %i4, 8, %i4
1485 1463 faligndata %d0, %d2, %d8
1486 1464 std %d8, [%i0]
1487 1465 blu,pn %ncc, blkdone
1488 1466 add %i0, 8, %i0
1489 1467 ldd [%l7], %d0
1490 1468 add %l7, 8, %l7
1491 1469 subcc %i4, 8, %i4
1492 1470 faligndata %d2, %d0, %d8
1493 1471 std %d8, [%i0]
1494 1472 bgeu,pt %ncc, 1b
1495 1473 add %i0, 8, %i0
1496 1474
1497 1475 blkdone:
1498 1476 tst %i2
1499 1477 bz,pt %ncc, .bcb_exit
1500 1478 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
1501 1479
1502 1480 7: ldub [%i1], %i4
1503 1481 inc %i1
1504 1482 inc %i0
1505 1483 deccc %i2
1506 1484 bgu,pt %ncc, 7b
1507 1485 stb %i4, [%i0 - 1]
1508 1486
1509 1487 .bcb_exit:
1510 1488 membar #StoreLoad|#StoreStore
1511 1489 btst FPUSED_FLAG, %l6
1512 1490 bz %icc, 1f
1513 1491 and %l6, COPY_FLAGS, %l1 ! Store flags in %l1
1514 1492 ! We can't clear the flags from %l6 yet.
1515 1493 ! If there's an error, .copyerr will
1516 1494 ! need them
1517 1495
1518 1496 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
1519 1497 wr %o2, 0, %gsr
1520 1498
1521 1499 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
1522 1500 btst FPRS_FEF, %o3
1523 1501 bz %icc, 4f
1524 1502 nop
1525 1503
1526 1504 ! restore fpregs from stack
1527 1505 membar #Sync
1528 1506 add %fp, STACK_BIAS - 257, %o2
1529 1507 and %o2, -64, %o2
1530 1508 ldda [%o2]ASI_BLK_P, %d0
1531 1509 add %o2, 64, %o2
1532 1510 ldda [%o2]ASI_BLK_P, %d16
1533 1511 add %o2, 64, %o2
1534 1512 ldda [%o2]ASI_BLK_P, %d32
1535 1513 add %o2, 64, %o2
1536 1514 ldda [%o2]ASI_BLK_P, %d48
1537 1515 membar #Sync
1538 1516
1539 1517 ba,pt %ncc, 2f
1540 1518 wr %o3, 0, %fprs ! restore fprs
1541 1519
1542 1520 4:
1543 1521 FZERO ! zero all of the fpregs
1544 1522 wr %o3, 0, %fprs ! restore fprs
1545 1523
1546 1524 2: ldn [THREAD_REG + T_LWP], %o2
1547 1525 tst %o2
1548 1526 bnz,pt %ncc, 1f
1549 1527 nop
1550 1528
1551 1529 ldsb [THREAD_REG + T_PREEMPT], %l0
1552 1530 deccc %l0
1553 1531 bnz,pn %ncc, 1f
1554 1532 stb %l0, [THREAD_REG + T_PREEMPT]
1555 1533
1556 1534 ! Check for a kernel preemption request
1557 1535 ldn [THREAD_REG + T_CPU], %l0
1558 1536 ldub [%l0 + CPU_KPRUNRUN], %l0
1559 1537 tst %l0
1560 1538 bnz,a,pt %ncc, 1f ! Need to call kpreempt?
1561 1539 or %l1, KPREEMPT_FLAG, %l1 ! If so, set the flag
1562 1540
1563 1541 1:
1564 1542 btst BCOPY_FLAG, %l1
1565 1543 bz,pn %icc, 3f
1566 1544 andncc %l6, COPY_FLAGS, %l6
1567 1545
1568 1546 !
1569 1547 ! Here via bcopy. Check to see if the handler was NULL.
1570 1548 ! If so, just return quietly. Otherwise, reset the
1571 1549 ! handler and go home.
1572 1550 !
1573 1551 bnz,pn %ncc, 3f
1574 1552 nop
1575 1553
1576 1554 !
1577 1555 ! Null handler. Check for kpreempt flag, call if necessary,
1578 1556 ! then return.
1579 1557 !
1580 1558 btst KPREEMPT_FLAG, %l1
1581 1559 bz,pt %icc, 2f
1582 1560 nop
1583 1561 call kpreempt
1584 1562 rdpr %pil, %o0 ! pass %pil
1585 1563 2:
1586 1564 ret
1587 1565 restore %g0, 0, %o0
1588 1566
1589 1567 !
1590 1568 ! Here via kcopy or bcopy with a handler.Reset the
1591 1569 ! fault handler.
1592 1570 !
1593 1571 3:
1594 1572 membar #Sync
1595 1573 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1596 1574
1597 1575 ! call kpreempt if necessary
1598 1576 btst KPREEMPT_FLAG, %l1
1599 1577 bz,pt %icc, 4f
1600 1578 nop
1601 1579 call kpreempt
1602 1580 rdpr %pil, %o0
1603 1581 4:
1604 1582 ret
1605 1583 restore %g0, 0, %o0
1606 1584
1607 1585 .bcb_punt:
1608 1586 !
1609 1587 ! use aligned transfers where possible
1610 1588 !
1611 1589 xor %i0, %i1, %o4 ! xor from and to address
1612 1590 btst 7, %o4 ! if lower three bits zero
1613 1591 bz %icc, .aldoubcp ! can align on double boundary
1614 1592 .empty ! assembler complaints about label
1615 1593
1616 1594 xor %i0, %i1, %o4 ! xor from and to address
1617 1595 btst 3, %o4 ! if lower two bits zero
1618 1596 bz %icc, .alwordcp ! can align on word boundary
1619 1597 btst 3, %i0 ! delay slot, from address unaligned?
1620 1598 !
1621 1599 ! use aligned reads and writes where possible
1622 1600 ! this differs from wordcp in that it copes
1623 1601 ! with odd alignment between source and destnation
1624 1602 ! using word reads and writes with the proper shifts
1625 1603 ! in between to align transfers to and from memory
1626 1604 ! i0 - src address, i1 - dest address, i2 - count
1627 1605 ! i3, i4 - tmps for used generating complete word
1628 1606 ! i5 (word to write)
1629 1607 ! l0 size in bits of upper part of source word (US)
1630 1608 ! l1 size in bits of lower part of source word (LS = 32 - US)
1631 1609 ! l2 size in bits of upper part of destination word (UD)
1632 1610 ! l3 size in bits of lower part of destination word (LD = 32 - UD)
1633 1611 ! l4 number of bytes leftover after aligned transfers complete
1634 1612 ! l5 the number 32
1635 1613 !
1636 1614 mov 32, %l5 ! load an oft-needed constant
1637 1615 bz .align_dst_only
1638 1616 btst 3, %i1 ! is destnation address aligned?
1639 1617 clr %i4 ! clear registers used in either case
1640 1618 bz %icc, .align_src_only
1641 1619 clr %l0
1642 1620 !
1643 1621 ! both source and destination addresses are unaligned
1644 1622 !
1645 1623 1: ! align source
1646 1624 ldub [%i0], %i3 ! read a byte from source address
1647 1625 add %i0, 1, %i0 ! increment source address
1648 1626 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1649 1627 btst 3, %i0 ! is source aligned?
1650 1628 add %l0, 8, %l0 ! increment size of upper source (US)
1651 1629 bnz,a 1b
1652 1630 sll %i4, 8, %i4 ! make room for next byte
1653 1631
1654 1632 sub %l5, %l0, %l1 ! generate shift left count (LS)
1655 1633 sll %i4, %l1, %i4 ! prepare to get rest
1656 1634 ld [%i0], %i3 ! read a word
1657 1635 add %i0, 4, %i0 ! increment source address
1658 1636 srl %i3, %l0, %i5 ! upper src bits into lower dst bits
1659 1637 or %i4, %i5, %i5 ! merge
1660 1638 mov 24, %l3 ! align destination
1661 1639 1:
1662 1640 srl %i5, %l3, %i4 ! prepare to write a single byte
1663 1641 stb %i4, [%i1] ! write a byte
1664 1642 add %i1, 1, %i1 ! increment destination address
1665 1643 sub %i2, 1, %i2 ! decrement count
1666 1644 btst 3, %i1 ! is destination aligned?
1667 1645 bnz,a 1b
1668 1646 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD)
1669 1647 sub %l5, %l3, %l2 ! generate shift left count (UD)
1670 1648 sll %i5, %l2, %i5 ! move leftover into upper bytes
1671 1649 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left
1672 1650 bgu %ncc, .more_needed ! need more to fill than we have
1673 1651 nop
1674 1652
1675 1653 sll %i3, %l1, %i3 ! clear upper used byte(s)
1676 1654 srl %i3, %l1, %i3
1677 1655 ! get the odd bytes between alignments
1678 1656 sub %l0, %l2, %l0 ! regenerate shift count
1679 1657 sub %l5, %l0, %l1 ! generate new shift left count (LS)
1680 1658 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1681 1659 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1682 1660 srl %i3, %l0, %i4
1683 1661 or %i5, %i4, %i5
1684 1662 st %i5, [%i1] ! write a word
1685 1663 subcc %i2, 4, %i2 ! decrement count
1686 1664 bz %ncc, .unalign_out
1687 1665 add %i1, 4, %i1 ! increment destination address
1688 1666
1689 1667 b 2f
1690 1668 sll %i3, %l1, %i5 ! get leftover into upper bits
1691 1669 .more_needed:
1692 1670 sll %i3, %l0, %i3 ! save remaining byte(s)
1693 1671 srl %i3, %l0, %i3
1694 1672 sub %l2, %l0, %l1 ! regenerate shift count
1695 1673 sub %l5, %l1, %l0 ! generate new shift left count
1696 1674 sll %i3, %l1, %i4 ! move to fill empty space
1697 1675 b 3f
1698 1676 or %i5, %i4, %i5 ! merge to complete word
1699 1677 !
1700 1678 ! the source address is aligned and destination is not
1701 1679 !
1702 1680 .align_dst_only:
1703 1681 ld [%i0], %i4 ! read a word
1704 1682 add %i0, 4, %i0 ! increment source address
1705 1683 mov 24, %l0 ! initial shift alignment count
1706 1684 1:
1707 1685 srl %i4, %l0, %i3 ! prepare to write a single byte
1708 1686 stb %i3, [%i1] ! write a byte
1709 1687 add %i1, 1, %i1 ! increment destination address
1710 1688 sub %i2, 1, %i2 ! decrement count
1711 1689 btst 3, %i1 ! is destination aligned?
1712 1690 bnz,a 1b
1713 1691 sub %l0, 8, %l0 ! delay slot, decrement shift count
1714 1692 .xfer:
1715 1693 sub %l5, %l0, %l1 ! generate shift left count
1716 1694 sll %i4, %l1, %i5 ! get leftover
1717 1695 3:
1718 1696 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0
1719 1697 andn %i2, 3, %i2 ! # of aligned bytes that can be moved
1720 1698 2:
1721 1699 ld [%i0], %i3 ! read a source word
1722 1700 add %i0, 4, %i0 ! increment source address
1723 1701 srl %i3, %l0, %i4 ! upper src bits into lower dst bits
1724 1702 or %i5, %i4, %i5 ! merge with upper dest bits (leftover)
1725 1703 st %i5, [%i1] ! write a destination word
1726 1704 subcc %i2, 4, %i2 ! decrement count
1727 1705 bz %ncc, .unalign_out ! check if done
1728 1706 add %i1, 4, %i1 ! increment destination address
1729 1707 b 2b ! loop
1730 1708 sll %i3, %l1, %i5 ! get leftover
1731 1709 .unalign_out:
1732 1710 tst %l4 ! any bytes leftover?
1733 1711 bz %ncc, .cpdone
1734 1712 .empty ! allow next instruction in delay slot
1735 1713 1:
1736 1714 sub %l0, 8, %l0 ! decrement shift
1737 1715 srl %i3, %l0, %i4 ! upper src byte into lower dst byte
1738 1716 stb %i4, [%i1] ! write a byte
1739 1717 subcc %l4, 1, %l4 ! decrement count
1740 1718 bz %ncc, .cpdone ! done?
1741 1719 add %i1, 1, %i1 ! increment destination
1742 1720 tst %l0 ! any more previously read bytes
1743 1721 bnz %ncc, 1b ! we have leftover bytes
1744 1722 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants
1745 1723 b .dbytecp ! let dbytecp do the rest
1746 1724 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1747 1725 !
1748 1726 ! the destination address is aligned and the source is not
1749 1727 !
1750 1728 .align_src_only:
1751 1729 ldub [%i0], %i3 ! read a byte from source address
1752 1730 add %i0, 1, %i0 ! increment source address
1753 1731 or %i4, %i3, %i4 ! or in with previous bytes (if any)
1754 1732 btst 3, %i0 ! is source aligned?
1755 1733 add %l0, 8, %l0 ! increment shift count (US)
1756 1734 bnz,a .align_src_only
1757 1735 sll %i4, 8, %i4 ! make room for next byte
1758 1736 b,a .xfer
1759 1737 !
1760 1738 ! if from address unaligned for double-word moves,
1761 1739 ! move bytes till it is, if count is < 56 it could take
1762 1740 ! longer to align the thing than to do the transfer
1763 1741 ! in word size chunks right away
1764 1742 !
1765 1743 .aldoubcp:
1766 1744 cmp %i2, 56 ! if count < 56, use wordcp, it takes
1767 1745 blu,a %ncc, .alwordcp ! longer to align doubles than words
1768 1746 mov 3, %o0 ! mask for word alignment
1769 1747 call .alignit ! copy bytes until aligned
1770 1748 mov 7, %o0 ! mask for double alignment
1771 1749 !
1772 1750 ! source and destination are now double-word aligned
1773 1751 ! i3 has aligned count returned by alignit
1774 1752 !
1775 1753 and %i2, 7, %i2 ! unaligned leftover count
1776 1754 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1777 1755 5:
1778 1756 ldx [%i0+%i1], %o4 ! read from address
1779 1757 stx %o4, [%i1] ! write at destination address
1780 1758 subcc %i3, 8, %i3 ! dec count
1781 1759 bgu %ncc, 5b
1782 1760 add %i1, 8, %i1 ! delay slot, inc to address
1783 1761 cmp %i2, 4 ! see if we can copy a word
1784 1762 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp
1785 1763 .empty
1786 1764 !
1787 1765 ! for leftover bytes we fall into wordcp, if needed
1788 1766 !
1789 1767 .wordcp:
1790 1768 and %i2, 3, %i2 ! unaligned leftover count
1791 1769 5:
1792 1770 ld [%i0+%i1], %o4 ! read from address
1793 1771 st %o4, [%i1] ! write at destination address
1794 1772 subcc %i3, 4, %i3 ! dec count
1795 1773 bgu %ncc, 5b
1796 1774 add %i1, 4, %i1 ! delay slot, inc to address
1797 1775 b,a .dbytecp
1798 1776
1799 1777 ! we come here to align copies on word boundaries
1800 1778 .alwordcp:
1801 1779 call .alignit ! go word-align it
1802 1780 mov 3, %o0 ! bits that must be zero to be aligned
1803 1781 b .wordcp
1804 1782 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst
1805 1783
1806 1784 !
1807 1785 ! byte copy, works with any alignment
1808 1786 !
1809 1787 .bytecp:
1810 1788 b .dbytecp
1811 1789 sub %i0, %i1, %i0 ! i0 gets difference of src and dst
1812 1790
1813 1791 !
1814 1792 ! differenced byte copy, works with any alignment
1815 1793 ! assumes dest in %i1 and (source - dest) in %i0
1816 1794 !
1817 1795 1:
1818 1796 stb %o4, [%i1] ! write to address
1819 1797 inc %i1 ! inc to address
1820 1798 .dbytecp:
1821 1799 deccc %i2 ! dec count
1822 1800 bgeu,a %ncc, 1b ! loop till done
1823 1801 ldub [%i0+%i1], %o4 ! read from address
1824 1802 !
1825 1803 ! FPUSED_FLAG will not have been set in any path leading to
1826 1804 ! this point. No need to deal with it.
1827 1805 !
1828 1806 .cpdone:
1829 1807 btst BCOPY_FLAG, %l6
1830 1808 bz,pn %icc, 2f
1831 1809 andncc %l6, BCOPY_FLAG, %l6
1832 1810 !
1833 1811 ! Here via bcopy. Check to see if the handler was NULL.
1834 1812 ! If so, just return quietly. Otherwise, reset the
1835 1813 ! handler and go home.
1836 1814 !
1837 1815 bnz,pn %ncc, 2f
1838 1816 nop
1839 1817 !
1840 1818 ! Null handler.
1841 1819 !
1842 1820 ret
1843 1821 restore %g0, 0, %o0
1844 1822 !
1845 1823 ! Here via kcopy or bcopy with a handler.Reset the
1846 1824 ! fault handler.
1847 1825 !
1848 1826 2:
1849 1827 membar #Sync
1850 1828 stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
1851 1829 ret
1852 1830 restore %g0, 0, %o0 ! return (0)
1853 1831
1854 1832 /*
1855 1833 * Common code used to align transfers on word and doubleword
1856 1834 * boudaries. Aligns source and destination and returns a count
1857 1835 * of aligned bytes to transfer in %i3
1858 1836 */
1859 1837 1:
1860 1838 inc %i0 ! inc from
1861 1839 stb %o4, [%i1] ! write a byte
1862 1840 inc %i1 ! inc to
↓ open down ↓ |
1214 lines elided |
↑ open up ↑ |
1863 1841 dec %i2 ! dec count
1864 1842 .alignit:
1865 1843 btst %o0, %i0 ! %o0 is bit mask to check for alignment
1866 1844 bnz,a 1b
1867 1845 ldub [%i0], %o4 ! read next byte
1868 1846
1869 1847 retl
1870 1848 andn %i2, %o0, %i3 ! return size of aligned bytes
1871 1849 SET_SIZE(bcopy)
1872 1850
1873 -#endif /* lint */
1874 -
1875 1851 /*
1876 1852 * Block copy with possibly overlapped operands.
1877 1853 */
1878 1854
1879 -#if defined(lint)
1880 -
1881 -/*ARGSUSED*/
1882 -void
1883 -ovbcopy(const void *from, void *to, size_t count)
1884 -{}
1885 -
1886 -#else /* lint */
1887 -
1888 1855 ENTRY(ovbcopy)
1889 1856 tst %o2 ! check count
1890 1857 bgu,a %ncc, 1f ! nothing to do or bad arguments
1891 1858 subcc %o0, %o1, %o3 ! difference of from and to address
1892 1859
1893 1860 retl ! return
1894 1861 nop
1895 1862 1:
1896 1863 bneg,a %ncc, 2f
1897 1864 neg %o3 ! if < 0, make it positive
1898 1865 2: cmp %o2, %o3 ! cmp size and abs(from - to)
1899 1866 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
1900 1867 .empty ! no overlap
1901 1868 cmp %o0, %o1 ! compare from and to addresses
1902 1869 blu %ncc, .ov_bkwd ! if from < to, copy backwards
1903 1870 nop
1904 1871 !
1905 1872 ! Copy forwards.
1906 1873 !
1907 1874 .ov_fwd:
1908 1875 ldub [%o0], %o3 ! read from address
1909 1876 inc %o0 ! inc from address
1910 1877 stb %o3, [%o1] ! write to address
1911 1878 deccc %o2 ! dec count
1912 1879 bgu %ncc, .ov_fwd ! loop till done
1913 1880 inc %o1 ! inc to address
1914 1881
1915 1882 retl ! return
1916 1883 nop
1917 1884 !
1918 1885 ! Copy backwards.
1919 1886 !
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
1920 1887 .ov_bkwd:
1921 1888 deccc %o2 ! dec count
1922 1889 ldub [%o0 + %o2], %o3 ! get byte at end of src
1923 1890 bgu %ncc, .ov_bkwd ! loop till done
1924 1891 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
1925 1892
1926 1893 retl ! return
1927 1894 nop
1928 1895 SET_SIZE(ovbcopy)
1929 1896
1930 -#endif /* lint */
1931 -
1932 1897 /*
1933 1898 * hwblkpagecopy()
1934 1899 *
1935 1900 * Copies exactly one page. This routine assumes the caller (ppcopy)
1936 1901 * has already disabled kernel preemption and has checked
1937 1902 * use_hw_bcopy.
1938 1903 */
1939 -#ifdef lint
1940 -/*ARGSUSED*/
1941 -void
1942 -hwblkpagecopy(const void *src, void *dst)
1943 -{ }
1944 -#else /* lint */
1945 1904 ENTRY(hwblkpagecopy)
1946 1905 ! get another window w/space for three aligned blocks of saved fpregs
1947 1906 save %sp, -SA(MINFRAME + 4*64), %sp
1948 1907
1949 1908 ! %i0 - source address (arg)
1950 1909 ! %i1 - destination address (arg)
1951 1910 ! %i2 - length of region (not arg)
1952 1911 ! %l0 - saved fprs
1953 1912 ! %l1 - pointer to saved fpregs
1954 1913
1955 1914 rd %fprs, %l0 ! check for unused fp
1956 1915 btst FPRS_FEF, %l0
1957 1916 bz 1f
1958 1917 membar #Sync
1959 1918
1960 1919 ! save in-use fpregs on stack
1961 1920 add %fp, STACK_BIAS - 193, %l1
1962 1921 and %l1, -64, %l1
1963 1922 stda %d0, [%l1]ASI_BLK_P
1964 1923 add %l1, 64, %l3
1965 1924 stda %d16, [%l3]ASI_BLK_P
1966 1925 add %l3, 64, %l3
1967 1926 stda %d32, [%l3]ASI_BLK_P
1968 1927 membar #Sync
1969 1928
1970 1929 1: wr %g0, FPRS_FEF, %fprs
1971 1930 ldda [%i0]ASI_BLK_P, %d0
1972 1931 add %i0, 64, %i0
1973 1932 set PAGESIZE - 64, %i2
1974 1933
1975 1934 2: ldda [%i0]ASI_BLK_P, %d16
1976 1935 fsrc1 %d0, %d32
1977 1936 fsrc1 %d2, %d34
1978 1937 fsrc1 %d4, %d36
1979 1938 fsrc1 %d6, %d38
1980 1939 fsrc1 %d8, %d40
1981 1940 fsrc1 %d10, %d42
1982 1941 fsrc1 %d12, %d44
1983 1942 fsrc1 %d14, %d46
1984 1943 stda %d32, [%i1]ASI_BLK_P
1985 1944 add %i0, 64, %i0
1986 1945 subcc %i2, 64, %i2
1987 1946 bz,pn %ncc, 3f
1988 1947 add %i1, 64, %i1
1989 1948 ldda [%i0]ASI_BLK_P, %d0
1990 1949 fsrc1 %d16, %d32
1991 1950 fsrc1 %d18, %d34
1992 1951 fsrc1 %d20, %d36
1993 1952 fsrc1 %d22, %d38
1994 1953 fsrc1 %d24, %d40
1995 1954 fsrc1 %d26, %d42
1996 1955 fsrc1 %d28, %d44
1997 1956 fsrc1 %d30, %d46
1998 1957 stda %d32, [%i1]ASI_BLK_P
1999 1958 add %i0, 64, %i0
2000 1959 sub %i2, 64, %i2
2001 1960 ba,pt %ncc, 2b
2002 1961 add %i1, 64, %i1
2003 1962
2004 1963 3: membar #Sync
2005 1964 btst FPRS_FEF, %l0
2006 1965 bz 4f
2007 1966 stda %d16, [%i1]ASI_BLK_P
2008 1967
2009 1968 ! restore fpregs from stack
2010 1969 membar #Sync
2011 1970 ldda [%l1]ASI_BLK_P, %d0
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
2012 1971 add %l1, 64, %l3
2013 1972 ldda [%l3]ASI_BLK_P, %d16
2014 1973 add %l3, 64, %l3
2015 1974 ldda [%l3]ASI_BLK_P, %d32
2016 1975
2017 1976 4: wr %l0, 0, %fprs ! restore fprs
2018 1977 membar #Sync
2019 1978 ret
2020 1979 restore %g0, 0, %o0
2021 1980 SET_SIZE(hwblkpagecopy)
2022 -#endif /* lint */
2023 1981
2024 1982
2025 1983 /*
2026 1984 * Transfer data to and from user space -
2027 1985 * Note that these routines can cause faults
2028 1986 * It is assumed that the kernel has nothing at
2029 1987 * less than KERNELBASE in the virtual address space.
2030 1988 *
2031 1989 * Note that copyin(9F) and copyout(9F) are part of the
2032 1990 * DDI/DKI which specifies that they return '-1' on "errors."
2033 1991 *
2034 1992 * Sigh.
2035 1993 *
2036 1994 * So there's two extremely similar routines - xcopyin() and xcopyout()
2037 1995 * which return the errno that we've faithfully computed. This
2038 1996 * allows other callers (e.g. uiomove(9F)) to work correctly.
2039 1997 * Given that these are used pretty heavily, we expand the calling
2040 1998 * sequences inline for all flavours (rather than making wrappers).
2041 1999 *
2042 2000 * There are also stub routines for xcopyout_little and xcopyin_little,
2043 2001 * which currently are intended to handle requests of <= 16 bytes from
2044 2002 * do_unaligned. Future enhancement to make them handle 8k pages efficiently
2045 2003 * is left as an exercise...
2046 2004 */
2047 2005
2048 2006 /*
2049 2007 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
2050 2008 *
2051 2009 * General theory of operation:
2052 2010 *
2053 2011 * The only difference between default_copy{in,out} and
2054 2012 * default_xcopy{in,out} is in the error handling routine they invoke
2055 2013 * when a memory access error is seen. default_xcopyOP returns the errno
2056 2014 * while default_copyOP returns -1 (see above). copy{in,out}_noerr set
2057 2015 * a special flag (by oring the value 2 into the fault handler address)
2058 2016 * if they are called with a fault handler already in place. That flag
2059 2017 * causes the default handlers to trampoline to the previous handler
2060 2018 * upon an error.
2061 2019 *
2062 2020 * None of the copyops routines grab a window until it's decided that
2063 2021 * we need to do a HW block copy operation. This saves a window
2064 2022 * spill/fill when we're called during socket ops. The typical IO
2065 2023 * path won't cause spill/fill traps.
2066 2024 *
2067 2025 * This code uses a set of 4 limits for the maximum size that will
2068 2026 * be copied given a particular input/output address alignment.
2069 2027 * the default limits are:
2070 2028 *
2071 2029 * single byte aligned - 900 (hw_copy_limit_1)
2072 2030 * two byte aligned - 1800 (hw_copy_limit_2)
2073 2031 * four byte aligned - 3600 (hw_copy_limit_4)
2074 2032 * eight byte aligned - 7200 (hw_copy_limit_8)
2075 2033 *
2076 2034 * If the value for a particular limit is zero, the copy will be done
2077 2035 * via the copy loops rather than VIS.
2078 2036 *
2079 2037 * Flow:
2080 2038 *
2081 2039 * If count == zero return zero.
2082 2040 *
2083 2041 * Store the previous lo_fault handler into %g6.
2084 2042 * Place our secondary lofault handler into %g5.
2085 2043 * Place the address of our nowindow fault handler into %o3.
2086 2044 * Place the address of the windowed fault handler into %o4.
2087 2045 * --> We'll use this handler if we end up grabbing a window
2088 2046 * --> before we use VIS instructions.
2089 2047 *
2090 2048 * If count is less than or equal to SMALL_LIMIT (7) we
2091 2049 * always do a byte for byte copy.
2092 2050 *
2093 2051 * If count is > SMALL_LIMIT, we check the alignment of the input
2094 2052 * and output pointers. Based on the alignment we check count
2095 2053 * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If
2096 2054 * we're larger than VIS_COPY_THRESHOLD, we check against a limit based
2097 2055 * on detected alignment. If we exceed the alignment value we copy
2098 2056 * via VIS instructions.
2099 2057 *
2100 2058 * If we don't exceed one of the limits, we store -count in %o3,
2101 2059 * we store the number of chunks (8, 4, 2 or 1 byte) operated
2102 2060 * on in our basic copy loop in %o2. Following this we branch
2103 2061 * to the appropriate copy loop and copy that many chunks.
2104 2062 * Since we've been adding the chunk size to %o3 each time through
2105 2063 * as well as decrementing %o2, we can tell if any data is
2106 2064 * is left to be copied by examining %o3. If that is zero, we're
2107 2065 * done and can go home. If not, we figure out what the largest
2108 2066 * chunk size left to be copied is and branch to that copy loop
2109 2067 * unless there's only one byte left. We load that as we're
2110 2068 * branching to code that stores it just before we return.
2111 2069 *
2112 2070 * There is one potential situation in which we start to do a VIS
2113 2071 * copy but decide to punt and return to the copy loops. There is
2114 2072 * (in the default configuration) a window of 256 bytes between
2115 2073 * the single byte aligned copy limit and what VIS treats as its
2116 2074 * minimum if floating point is in use in the calling app. We need
2117 2075 * to be prepared to handle this. See the .small_copyOP label for
2118 2076 * details.
2119 2077 *
2120 2078 * Fault handlers are invoked if we reference memory that has no
2121 2079 * current mapping. All forms share the same copyio_fault handler.
2122 2080 * This routine handles fixing up the stack and general housecleaning.
2123 2081 * Each copy operation has a simple fault handler that is then called
↓ open down ↓ |
91 lines elided |
↑ open up ↑ |
2124 2082 * to do the work specific to the invidual operation. The handlers
2125 2083 * for default_copyOP and copyOP_noerr are found at the end of
2126 2084 * default_copyout. The handlers for default_xcopyOP are found at the
2127 2085 * end of xdefault_copyin.
2128 2086 */
2129 2087
2130 2088 /*
2131 2089 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
2132 2090 */
2133 2091
2134 -#if defined(lint)
2135 -
2136 -/*ARGSUSED*/
2137 -int
2138 -copyout(const void *kaddr, void *uaddr, size_t count)
2139 -{ return (0); }
2140 -
2141 -#else /* lint */
2142 -
2143 2092 /*
2144 2093 * We save the arguments in the following registers in case of a fault:
2145 2094 * kaddr - %g2
2146 2095 * uaddr - %g3
2147 2096 * count - %g4
2148 2097 */
2149 2098 #define SAVE_SRC %g2
2150 2099 #define SAVE_DST %g3
2151 2100 #define SAVE_COUNT %g4
2152 2101
2153 2102 #define REAL_LOFAULT %g5
2154 2103 #define SAVED_LOFAULT %g6
2155 2104
2156 2105 /*
2157 2106 * Generic copyio fault handler. This is the first line of defense when a
2158 2107 * fault occurs in (x)copyin/(x)copyout. In order for this to function
2159 2108 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
2160 2109 * This allows us to share common code for all the flavors of the copy
2161 2110 * operations, including the _noerr versions.
2162 2111 *
2163 2112 * Note that this function will restore the original input parameters before
2164 2113 * calling REAL_LOFAULT. So the real handler can vector to the appropriate
2165 2114 * member of the t_copyop structure, if needed.
2166 2115 */
2167 2116 ENTRY(copyio_fault)
2168 2117 btst FPUSED_FLAG, SAVED_LOFAULT
2169 2118 bz 1f
2170 2119 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2171 2120
2172 2121 membar #Sync
2173 2122
2174 2123 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
2175 2124 wr %o2, 0, %gsr ! restore gsr
2176 2125
2177 2126 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
2178 2127 btst FPRS_FEF, %o3
2179 2128 bz 4f
2180 2129 nop
2181 2130
2182 2131 ! restore fpregs from stack
2183 2132 membar #Sync
2184 2133 add %fp, STACK_BIAS - 257, %o2
2185 2134 and %o2, -64, %o2
2186 2135 ldda [%o2]ASI_BLK_P, %d0
2187 2136 add %o2, 64, %o2
2188 2137 ldda [%o2]ASI_BLK_P, %d16
2189 2138 add %o2, 64, %o2
2190 2139 ldda [%o2]ASI_BLK_P, %d32
2191 2140 add %o2, 64, %o2
2192 2141 ldda [%o2]ASI_BLK_P, %d48
2193 2142 membar #Sync
2194 2143
2195 2144 ba,pt %ncc, 1f
2196 2145 wr %o3, 0, %fprs ! restore fprs
2197 2146
2198 2147 4:
2199 2148 FZERO ! zero all of the fpregs
2200 2149 wr %o3, 0, %fprs ! restore fprs
2201 2150
2202 2151 1:
2203 2152
2204 2153 restore
2205 2154
2206 2155 mov SAVE_SRC, %o0
2207 2156 mov SAVE_DST, %o1
2208 2157 jmp REAL_LOFAULT
2209 2158 mov SAVE_COUNT, %o2
2210 2159 SET_SIZE(copyio_fault)
2211 2160
2212 2161 ENTRY(copyio_fault_nowindow)
2213 2162 membar #Sync
2214 2163 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2215 2164
2216 2165 mov SAVE_SRC, %o0
2217 2166 mov SAVE_DST, %o1
2218 2167 jmp REAL_LOFAULT
2219 2168 mov SAVE_COUNT, %o2
2220 2169 SET_SIZE(copyio_fault_nowindow)
2221 2170
2222 2171 ENTRY(copyout)
2223 2172 sethi %hi(.copyout_err), REAL_LOFAULT
2224 2173 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
2225 2174
2226 2175 .do_copyout:
2227 2176 !
2228 2177 ! Check the length and bail if zero.
2229 2178 !
2230 2179 tst %o2
2231 2180 bnz,pt %ncc, 1f
2232 2181 nop
2233 2182 retl
2234 2183 clr %o0
2235 2184 1:
2236 2185 sethi %hi(copyio_fault), %o4
2237 2186 or %o4, %lo(copyio_fault), %o4
2238 2187 sethi %hi(copyio_fault_nowindow), %o3
2239 2188 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
2240 2189 or %o3, %lo(copyio_fault_nowindow), %o3
2241 2190 membar #Sync
2242 2191 stn %o3, [THREAD_REG + T_LOFAULT]
2243 2192
2244 2193 mov %o0, SAVE_SRC
2245 2194 mov %o1, SAVE_DST
2246 2195 mov %o2, SAVE_COUNT
2247 2196
2248 2197 !
2249 2198 ! Check to see if we're more than SMALL_LIMIT (7 bytes).
2250 2199 ! Run in leaf mode, using the %o regs as our input regs.
2251 2200 !
2252 2201 subcc %o2, SMALL_LIMIT, %o3
2253 2202 bgu,a,pt %ncc, .dco_ns
2254 2203 or %o0, %o1, %o3
2255 2204 !
2256 2205 ! What was previously ".small_copyout"
2257 2206 ! Do full differenced copy.
2258 2207 !
2259 2208 .dcobcp:
2260 2209 sub %g0, %o2, %o3 ! negate count
2261 2210 add %o0, %o2, %o0 ! make %o0 point at the end
2262 2211 add %o1, %o2, %o1 ! make %o1 point at the end
2263 2212 ba,pt %ncc, .dcocl
2264 2213 ldub [%o0 + %o3], %o4 ! load first byte
2265 2214 !
2266 2215 ! %o0 and %o2 point at the end and remain pointing at the end
2267 2216 ! of their buffers. We pull things out by adding %o3 (which is
2268 2217 ! the negation of the length) to the buffer end which gives us
2269 2218 ! the curent location in the buffers. By incrementing %o3 we walk
2270 2219 ! through both buffers without having to bump each buffer's
2271 2220 ! pointer. A very fast 4 instruction loop.
2272 2221 !
2273 2222 .align 16
2274 2223 .dcocl:
2275 2224 stba %o4, [%o1 + %o3]ASI_USER
2276 2225 inccc %o3
2277 2226 bl,a,pt %ncc, .dcocl
2278 2227 ldub [%o0 + %o3], %o4
2279 2228 !
2280 2229 ! We're done. Go home.
2281 2230 !
2282 2231 membar #Sync
2283 2232 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
2284 2233 retl
2285 2234 clr %o0
2286 2235 !
2287 2236 ! Try aligned copies from here.
2288 2237 !
2289 2238 .dco_ns:
2290 2239 ! %o0 = kernel addr (to be copied from)
2291 2240 ! %o1 = user addr (to be copied to)
2292 2241 ! %o2 = length
2293 2242 ! %o3 = %o1 | %o2 (used for alignment checking)
2294 2243 ! %o4 is alternate lo_fault
2295 2244 ! %o5 is original lo_fault
2296 2245 !
2297 2246 ! See if we're single byte aligned. If we are, check the
2298 2247 ! limit for single byte copies. If we're smaller or equal,
2299 2248 ! bounce to the byte for byte copy loop. Otherwise do it in
2300 2249 ! HW (if enabled).
2301 2250 !
2302 2251 btst 1, %o3
2303 2252 bz,pt %icc, .dcoh8
2304 2253 btst 7, %o3
2305 2254 !
2306 2255 ! Single byte aligned. Do we do it via HW or via
2307 2256 ! byte for byte? Do a quick no memory reference
2308 2257 ! check to pick up small copies.
2309 2258 !
2310 2259 subcc %o2, VIS_COPY_THRESHOLD, %o3
2311 2260 bleu,pt %ncc, .dcobcp
2312 2261 sethi %hi(hw_copy_limit_1), %o3
2313 2262 !
2314 2263 ! Big enough that we need to check the HW limit for
2315 2264 ! this size copy.
2316 2265 !
2317 2266 ld [%o3 + %lo(hw_copy_limit_1)], %o3
2318 2267 !
2319 2268 ! Is HW copy on? If not, do everything byte for byte.
2320 2269 !
2321 2270 tst %o3
2322 2271 bz,pn %icc, .dcobcp
2323 2272 subcc %o3, %o2, %o3
2324 2273 !
2325 2274 ! If we're less than or equal to the single byte copy limit,
2326 2275 ! bop to the copy loop.
2327 2276 !
2328 2277 bge,pt %ncc, .dcobcp
2329 2278 nop
2330 2279 !
2331 2280 ! We're big enough and copy is on. Do it with HW.
2332 2281 !
2333 2282 ba,pt %ncc, .big_copyout
2334 2283 nop
2335 2284 .dcoh8:
2336 2285 !
2337 2286 ! 8 byte aligned?
2338 2287 !
2339 2288 bnz,a %ncc, .dcoh4
2340 2289 btst 3, %o3
2341 2290 !
2342 2291 ! See if we're in the "small range".
2343 2292 ! If so, go off and do the copy.
2344 2293 ! If not, load the hard limit. %o3 is
2345 2294 ! available for reuse.
2346 2295 !
2347 2296 subcc %o2, VIS_COPY_THRESHOLD, %o3
2348 2297 bleu,pt %ncc, .dcos8
2349 2298 sethi %hi(hw_copy_limit_8), %o3
2350 2299 ld [%o3 + %lo(hw_copy_limit_8)], %o3
2351 2300 !
2352 2301 ! If it's zero, there's no HW bcopy.
2353 2302 ! Bop off to the aligned copy.
2354 2303 !
2355 2304 tst %o3
2356 2305 bz,pn %icc, .dcos8
2357 2306 subcc %o3, %o2, %o3
2358 2307 !
2359 2308 ! We're negative if our size is larger than hw_copy_limit_8.
2360 2309 !
2361 2310 bge,pt %ncc, .dcos8
2362 2311 nop
2363 2312 !
2364 2313 ! HW assist is on and we're large enough. Do it.
2365 2314 !
2366 2315 ba,pt %ncc, .big_copyout
2367 2316 nop
2368 2317 .dcos8:
2369 2318 !
2370 2319 ! Housekeeping for copy loops. Uses same idea as in the byte for
2371 2320 ! byte copy loop above.
2372 2321 !
2373 2322 add %o0, %o2, %o0
2374 2323 add %o1, %o2, %o1
2375 2324 sub %g0, %o2, %o3
2376 2325 ba,pt %ncc, .dodebc
2377 2326 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
2378 2327 !
2379 2328 ! 4 byte aligned?
2380 2329 !
2381 2330 .dcoh4:
2382 2331 bnz,pn %ncc, .dcoh2
2383 2332 !
2384 2333 ! See if we're in the "small range".
2385 2334 ! If so, go off an do the copy.
2386 2335 ! If not, load the hard limit. %o3 is
2387 2336 ! available for reuse.
2388 2337 !
2389 2338 subcc %o2, VIS_COPY_THRESHOLD, %o3
2390 2339 bleu,pt %ncc, .dcos4
2391 2340 sethi %hi(hw_copy_limit_4), %o3
2392 2341 ld [%o3 + %lo(hw_copy_limit_4)], %o3
2393 2342 !
2394 2343 ! If it's zero, there's no HW bcopy.
2395 2344 ! Bop off to the aligned copy.
2396 2345 !
2397 2346 tst %o3
2398 2347 bz,pn %icc, .dcos4
2399 2348 subcc %o3, %o2, %o3
2400 2349 !
2401 2350 ! We're negative if our size is larger than hw_copy_limit_4.
2402 2351 !
2403 2352 bge,pt %ncc, .dcos4
2404 2353 nop
2405 2354 !
2406 2355 ! HW assist is on and we're large enough. Do it.
2407 2356 !
2408 2357 ba,pt %ncc, .big_copyout
2409 2358 nop
2410 2359 .dcos4:
2411 2360 add %o0, %o2, %o0
2412 2361 add %o1, %o2, %o1
2413 2362 sub %g0, %o2, %o3
2414 2363 ba,pt %ncc, .dodfbc
2415 2364 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
2416 2365 !
2417 2366 ! We must be 2 byte aligned. Off we go.
2418 2367 ! The check for small copies was done in the
2419 2368 ! delay at .dcoh4
2420 2369 !
2421 2370 .dcoh2:
2422 2371 ble %ncc, .dcos2
2423 2372 sethi %hi(hw_copy_limit_2), %o3
2424 2373 ld [%o3 + %lo(hw_copy_limit_2)], %o3
2425 2374 tst %o3
2426 2375 bz,pn %icc, .dcos2
2427 2376 subcc %o3, %o2, %o3
2428 2377 bge,pt %ncc, .dcos2
2429 2378 nop
2430 2379 !
2431 2380 ! HW is on and we're big enough. Do it.
2432 2381 !
2433 2382 ba,pt %ncc, .big_copyout
2434 2383 nop
2435 2384 .dcos2:
2436 2385 add %o0, %o2, %o0
2437 2386 add %o1, %o2, %o1
2438 2387 sub %g0, %o2, %o3
2439 2388 ba,pt %ncc, .dodtbc
2440 2389 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
2441 2390 .small_copyout:
2442 2391 !
2443 2392 ! Why are we doing this AGAIN? There are certain conditions in
2444 2393 ! big_copyout that will cause us to forego the HW assisted copies
2445 2394 ! and bounce back to a non-HW assisted copy. This dispatches those
2446 2395 ! copies. Note that we branch around this in the main line code.
2447 2396 !
2448 2397 ! We make no check for limits or HW enablement here. We've
2449 2398 ! already been told that we're a poster child so just go off
2450 2399 ! and do it.
2451 2400 !
2452 2401 or %o0, %o1, %o3
2453 2402 btst 1, %o3
2454 2403 bnz %icc, .dcobcp ! Most likely
2455 2404 btst 7, %o3
2456 2405 bz %icc, .dcos8
2457 2406 btst 3, %o3
2458 2407 bz %icc, .dcos4
2459 2408 nop
2460 2409 ba,pt %ncc, .dcos2
2461 2410 nop
2462 2411 .align 32
2463 2412 .dodebc:
2464 2413 ldx [%o0 + %o3], %o4
2465 2414 deccc %o2
2466 2415 stxa %o4, [%o1 + %o3]ASI_USER
2467 2416 bg,pt %ncc, .dodebc
2468 2417 addcc %o3, 8, %o3
2469 2418 !
2470 2419 ! End of copy loop. Check to see if we're done. Most
2471 2420 ! eight byte aligned copies end here.
2472 2421 !
2473 2422 bz,pt %ncc, .dcofh
2474 2423 nop
2475 2424 !
2476 2425 ! Something is left - do it byte for byte.
2477 2426 !
2478 2427 ba,pt %ncc, .dcocl
2479 2428 ldub [%o0 + %o3], %o4 ! load next byte
2480 2429 !
2481 2430 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
2482 2431 !
2483 2432 .align 32
2484 2433 .dodfbc:
2485 2434 lduw [%o0 + %o3], %o4
2486 2435 deccc %o2
2487 2436 sta %o4, [%o1 + %o3]ASI_USER
2488 2437 bg,pt %ncc, .dodfbc
2489 2438 addcc %o3, 4, %o3
2490 2439 !
2491 2440 ! End of copy loop. Check to see if we're done. Most
2492 2441 ! four byte aligned copies end here.
2493 2442 !
2494 2443 bz,pt %ncc, .dcofh
2495 2444 nop
2496 2445 !
2497 2446 ! Something is left. Do it byte for byte.
2498 2447 !
2499 2448 ba,pt %ncc, .dcocl
2500 2449 ldub [%o0 + %o3], %o4 ! load next byte
2501 2450 !
2502 2451 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
2503 2452 ! copy.
2504 2453 !
2505 2454 .align 32
2506 2455 .dodtbc:
2507 2456 lduh [%o0 + %o3], %o4
2508 2457 deccc %o2
2509 2458 stha %o4, [%o1 + %o3]ASI_USER
2510 2459 bg,pt %ncc, .dodtbc
2511 2460 addcc %o3, 2, %o3
2512 2461 !
2513 2462 ! End of copy loop. Anything left?
2514 2463 !
2515 2464 bz,pt %ncc, .dcofh
2516 2465 nop
2517 2466 !
2518 2467 ! Deal with the last byte
2519 2468 !
2520 2469 ldub [%o0 + %o3], %o4
2521 2470 stba %o4, [%o1 + %o3]ASI_USER
2522 2471 .dcofh:
2523 2472 membar #Sync
2524 2473 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
2525 2474 retl
2526 2475 clr %o0
2527 2476
2528 2477 .big_copyout:
2529 2478 !
2530 2479 ! Are we using the FP registers?
2531 2480 !
2532 2481 rd %fprs, %o3 ! check for unused fp
2533 2482 btst FPRS_FEF, %o3
2534 2483 bnz %icc, .copyout_fpregs_inuse
2535 2484 nop
2536 2485 !
2537 2486 ! We're going to go off and do a block copy.
2538 2487 ! Switch fault hendlers and grab a window. We
2539 2488 ! don't do a membar #Sync since we've done only
2540 2489 ! kernel data to this point.
2541 2490 !
2542 2491 stn %o4, [THREAD_REG + T_LOFAULT]
2543 2492 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2544 2493 !
2545 2494 ! %o3 is now %i3. Save original %fprs.
2546 2495 !
2547 2496 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2548 2497 ba,pt %ncc, .do_block_copyout ! Not in use. Go off and do it.
2549 2498 wr %g0, FPRS_FEF, %fprs ! clear %fprs
2550 2499 !
2551 2500 .copyout_fpregs_inuse:
2552 2501 !
2553 2502 ! We're here if the FP regs are in use. Need to see if the request
2554 2503 ! exceeds our suddenly larger minimum.
2555 2504 !
2556 2505 cmp %i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
2557 2506 bl %ncc, .small_copyout
2558 2507 nop
2559 2508 !
2560 2509 ! We're going to go off and do a block copy.
2561 2510 ! Change to the heavy duty fault handler and grab a window first.
2562 2511 !
2563 2512 stn %o4, [THREAD_REG + T_LOFAULT]
2564 2513 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2565 2514 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2566 2515 !
2567 2516 ! save in-use fpregs on stack
2568 2517 !
2569 2518 wr %g0, FPRS_FEF, %fprs
2570 2519 membar #Sync
2571 2520 add %fp, STACK_BIAS - 257, %o2
2572 2521 and %o2, -64, %o2
2573 2522 stda %d0, [%o2]ASI_BLK_P
2574 2523 add %o2, 64, %o2
2575 2524 stda %d16, [%o2]ASI_BLK_P
2576 2525 add %o2, 64, %o2
2577 2526 stda %d32, [%o2]ASI_BLK_P
2578 2527 add %o2, 64, %o2
2579 2528 stda %d48, [%o2]ASI_BLK_P
2580 2529 membar #Sync
2581 2530
2582 2531 .do_block_copyout:
2583 2532 membar #StoreStore|#StoreLoad|#LoadStore
2584 2533
2585 2534 rd %gsr, %o2
2586 2535 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
2587 2536
2588 2537 ! Set the lower bit in the saved t_lofault to indicate
2589 2538 ! that we need to clear the %fprs register on the way
2590 2539 ! out
2591 2540 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2592 2541
2593 2542 ! Swap src/dst since the code below is memcpy code
2594 2543 ! and memcpy/bcopy have different calling sequences
2595 2544 mov %i1, %i5
2596 2545 mov %i0, %i1
2597 2546 mov %i5, %i0
2598 2547
2599 2548 !!! This code is nearly identical to the version in the sun4u
2600 2549 !!! libc_psr. Most bugfixes made to that file should be
2601 2550 !!! merged into this routine.
2602 2551
2603 2552 andcc %i0, 7, %o3
2604 2553 bz %ncc, copyout_blkcpy
2605 2554 sub %o3, 8, %o3
2606 2555 neg %o3
2607 2556 sub %i2, %o3, %i2
2608 2557
2609 2558 ! Align Destination on double-word boundary
2610 2559
2611 2560 2: ldub [%i1], %o4
2612 2561 inc %i1
2613 2562 stba %o4, [%i0]ASI_USER
2614 2563 deccc %o3
2615 2564 bgu %ncc, 2b
2616 2565 inc %i0
2617 2566 copyout_blkcpy:
2618 2567 andcc %i0, 63, %i3
2619 2568 bz,pn %ncc, copyout_blalign ! now block aligned
2620 2569 sub %i3, 64, %i3
2621 2570 neg %i3 ! bytes till block aligned
2622 2571 sub %i2, %i3, %i2 ! update %i2 with new count
2623 2572
2624 2573 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
2625 2574 ! double word copies.
2626 2575
2627 2576 alignaddr %i1, %g0, %g1
2628 2577 ldd [%g1], %d0
2629 2578 add %g1, 8, %g1
2630 2579 6:
2631 2580 ldd [%g1], %d2
2632 2581 add %g1, 8, %g1
2633 2582 subcc %i3, 8, %i3
2634 2583 faligndata %d0, %d2, %d8
2635 2584 stda %d8, [%i0]ASI_USER
2636 2585 add %i1, 8, %i1
2637 2586 bz,pn %ncc, copyout_blalign
2638 2587 add %i0, 8, %i0
2639 2588 ldd [%g1], %d0
2640 2589 add %g1, 8, %g1
2641 2590 subcc %i3, 8, %i3
2642 2591 faligndata %d2, %d0, %d8
2643 2592 stda %d8, [%i0]ASI_USER
2644 2593 add %i1, 8, %i1
2645 2594 bgu,pn %ncc, 6b
2646 2595 add %i0, 8, %i0
2647 2596
2648 2597 copyout_blalign:
2649 2598 membar #StoreLoad
2650 2599 ! %i2 = total length
2651 2600 ! %i3 = blocks (length - 64) / 64
2652 2601 ! %i4 = doubles remaining (length - blocks)
2653 2602 sub %i2, 64, %i3
2654 2603 andn %i3, 63, %i3
2655 2604 sub %i2, %i3, %i4
2656 2605 andn %i4, 7, %i4
2657 2606 sub %i4, 16, %i4
2658 2607 sub %i2, %i4, %i2
2659 2608 sub %i2, %i3, %i2
2660 2609
2661 2610 andn %i1, 0x3f, %l7 ! blk aligned address
2662 2611 alignaddr %i1, %g0, %g0 ! gen %gsr
2663 2612
2664 2613 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
2665 2614 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
2666 2615 add %i1, %i4, %i1
2667 2616 add %i1, %i3, %i1
2668 2617
2669 2618 ldda [%l7]ASI_BLK_P, %d0
2670 2619 add %l7, 64, %l7
2671 2620 ldda [%l7]ASI_BLK_P, %d16
2672 2621 add %l7, 64, %l7
2673 2622 ldda [%l7]ASI_BLK_P, %d32
2674 2623 add %l7, 64, %l7
2675 2624 sub %i3, 128, %i3
2676 2625
2677 2626 ! switch statement to get us to the right 8 byte blk within a
2678 2627 ! 64 byte block
2679 2628
2680 2629 cmp %i5, 4
2681 2630 bgeu,a copyout_hlf
2682 2631 cmp %i5, 6
2683 2632 cmp %i5, 2
2684 2633 bgeu,a copyout_sqtr
2685 2634 nop
2686 2635 cmp %i5, 1
2687 2636 be,a copyout_seg1
2688 2637 nop
2689 2638 ba,pt %ncc, copyout_seg0
2690 2639 nop
2691 2640 copyout_sqtr:
2692 2641 be,a copyout_seg2
2693 2642 nop
2694 2643 ba,pt %ncc, copyout_seg3
2695 2644 nop
2696 2645
2697 2646 copyout_hlf:
2698 2647 bgeu,a copyout_fqtr
2699 2648 nop
2700 2649 cmp %i5, 5
2701 2650 be,a copyout_seg5
2702 2651 nop
2703 2652 ba,pt %ncc, copyout_seg4
2704 2653 nop
2705 2654 copyout_fqtr:
2706 2655 be,a copyout_seg6
2707 2656 nop
2708 2657 ba,pt %ncc, copyout_seg7
2709 2658 nop
2710 2659
2711 2660 copyout_seg0:
2712 2661 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2713 2662 FALIGN_D0
2714 2663 ldda [%l7]ASI_BLK_P, %d0
2715 2664 stda %d48, [%i0]ASI_BLK_AIUS
2716 2665 add %l7, 64, %l7
2717 2666 subcc %i3, 64, %i3
2718 2667 bz,pn %ncc, 0f
2719 2668 add %i0, 64, %i0
2720 2669 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2721 2670 FALIGN_D16
2722 2671 ldda [%l7]ASI_BLK_P, %d16
2723 2672 stda %d48, [%i0]ASI_BLK_AIUS
2724 2673 add %l7, 64, %l7
2725 2674 subcc %i3, 64, %i3
2726 2675 bz,pn %ncc, 1f
2727 2676 add %i0, 64, %i0
2728 2677 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2729 2678 FALIGN_D32
2730 2679 ldda [%l7]ASI_BLK_P, %d32
2731 2680 stda %d48, [%i0]ASI_BLK_AIUS
2732 2681 add %l7, 64, %l7
2733 2682 subcc %i3, 64, %i3
2734 2683 bz,pn %ncc, 2f
2735 2684 add %i0, 64, %i0
2736 2685 ba,a,pt %ncc, copyout_seg0
2737 2686
2738 2687 0:
2739 2688 FALIGN_D16
2740 2689 stda %d48, [%i0]ASI_BLK_AIUS
2741 2690 add %i0, 64, %i0
2742 2691 membar #Sync
2743 2692 FALIGN_D32
2744 2693 stda %d48, [%i0]ASI_BLK_AIUS
2745 2694 ba,pt %ncc, copyout_blkd0
2746 2695 add %i0, 64, %i0
2747 2696
2748 2697 1:
2749 2698 FALIGN_D32
2750 2699 stda %d48, [%i0]ASI_BLK_AIUS
2751 2700 add %i0, 64, %i0
2752 2701 membar #Sync
2753 2702 FALIGN_D0
2754 2703 stda %d48, [%i0]ASI_BLK_AIUS
2755 2704 ba,pt %ncc, copyout_blkd16
2756 2705 add %i0, 64, %i0
2757 2706
2758 2707 2:
2759 2708 FALIGN_D0
2760 2709 stda %d48, [%i0]ASI_BLK_AIUS
2761 2710 add %i0, 64, %i0
2762 2711 membar #Sync
2763 2712 FALIGN_D16
2764 2713 stda %d48, [%i0]ASI_BLK_AIUS
2765 2714 ba,pt %ncc, copyout_blkd32
2766 2715 add %i0, 64, %i0
2767 2716
2768 2717 copyout_seg1:
2769 2718 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2770 2719 FALIGN_D2
2771 2720 ldda [%l7]ASI_BLK_P, %d0
2772 2721 stda %d48, [%i0]ASI_BLK_AIUS
2773 2722 add %l7, 64, %l7
2774 2723 subcc %i3, 64, %i3
2775 2724 bz,pn %ncc, 0f
2776 2725 add %i0, 64, %i0
2777 2726 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2778 2727 FALIGN_D18
2779 2728 ldda [%l7]ASI_BLK_P, %d16
2780 2729 stda %d48, [%i0]ASI_BLK_AIUS
2781 2730 add %l7, 64, %l7
2782 2731 subcc %i3, 64, %i3
2783 2732 bz,pn %ncc, 1f
2784 2733 add %i0, 64, %i0
2785 2734 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2786 2735 FALIGN_D34
2787 2736 ldda [%l7]ASI_BLK_P, %d32
2788 2737 stda %d48, [%i0]ASI_BLK_AIUS
2789 2738 add %l7, 64, %l7
2790 2739 subcc %i3, 64, %i3
2791 2740 bz,pn %ncc, 2f
2792 2741 add %i0, 64, %i0
2793 2742 ba,a,pt %ncc, copyout_seg1
2794 2743 0:
2795 2744 FALIGN_D18
2796 2745 stda %d48, [%i0]ASI_BLK_AIUS
2797 2746 add %i0, 64, %i0
2798 2747 membar #Sync
2799 2748 FALIGN_D34
2800 2749 stda %d48, [%i0]ASI_BLK_AIUS
2801 2750 ba,pt %ncc, copyout_blkd2
2802 2751 add %i0, 64, %i0
2803 2752
2804 2753 1:
2805 2754 FALIGN_D34
2806 2755 stda %d48, [%i0]ASI_BLK_AIUS
2807 2756 add %i0, 64, %i0
2808 2757 membar #Sync
2809 2758 FALIGN_D2
2810 2759 stda %d48, [%i0]ASI_BLK_AIUS
2811 2760 ba,pt %ncc, copyout_blkd18
2812 2761 add %i0, 64, %i0
2813 2762
2814 2763 2:
2815 2764 FALIGN_D2
2816 2765 stda %d48, [%i0]ASI_BLK_AIUS
2817 2766 add %i0, 64, %i0
2818 2767 membar #Sync
2819 2768 FALIGN_D18
2820 2769 stda %d48, [%i0]ASI_BLK_AIUS
2821 2770 ba,pt %ncc, copyout_blkd34
2822 2771 add %i0, 64, %i0
2823 2772
2824 2773 copyout_seg2:
2825 2774 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2826 2775 FALIGN_D4
2827 2776 ldda [%l7]ASI_BLK_P, %d0
2828 2777 stda %d48, [%i0]ASI_BLK_AIUS
2829 2778 add %l7, 64, %l7
2830 2779 subcc %i3, 64, %i3
2831 2780 bz,pn %ncc, 0f
2832 2781 add %i0, 64, %i0
2833 2782 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2834 2783 FALIGN_D20
2835 2784 ldda [%l7]ASI_BLK_P, %d16
2836 2785 stda %d48, [%i0]ASI_BLK_AIUS
2837 2786 add %l7, 64, %l7
2838 2787 subcc %i3, 64, %i3
2839 2788 bz,pn %ncc, 1f
2840 2789 add %i0, 64, %i0
2841 2790 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2842 2791 FALIGN_D36
2843 2792 ldda [%l7]ASI_BLK_P, %d32
2844 2793 stda %d48, [%i0]ASI_BLK_AIUS
2845 2794 add %l7, 64, %l7
2846 2795 subcc %i3, 64, %i3
2847 2796 bz,pn %ncc, 2f
2848 2797 add %i0, 64, %i0
2849 2798 ba,a,pt %ncc, copyout_seg2
2850 2799
2851 2800 0:
2852 2801 FALIGN_D20
2853 2802 stda %d48, [%i0]ASI_BLK_AIUS
2854 2803 add %i0, 64, %i0
2855 2804 membar #Sync
2856 2805 FALIGN_D36
2857 2806 stda %d48, [%i0]ASI_BLK_AIUS
2858 2807 ba,pt %ncc, copyout_blkd4
2859 2808 add %i0, 64, %i0
2860 2809
2861 2810 1:
2862 2811 FALIGN_D36
2863 2812 stda %d48, [%i0]ASI_BLK_AIUS
2864 2813 add %i0, 64, %i0
2865 2814 membar #Sync
2866 2815 FALIGN_D4
2867 2816 stda %d48, [%i0]ASI_BLK_AIUS
2868 2817 ba,pt %ncc, copyout_blkd20
2869 2818 add %i0, 64, %i0
2870 2819
2871 2820 2:
2872 2821 FALIGN_D4
2873 2822 stda %d48, [%i0]ASI_BLK_AIUS
2874 2823 add %i0, 64, %i0
2875 2824 membar #Sync
2876 2825 FALIGN_D20
2877 2826 stda %d48, [%i0]ASI_BLK_AIUS
2878 2827 ba,pt %ncc, copyout_blkd36
2879 2828 add %i0, 64, %i0
2880 2829
2881 2830 copyout_seg3:
2882 2831 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2883 2832 FALIGN_D6
2884 2833 ldda [%l7]ASI_BLK_P, %d0
2885 2834 stda %d48, [%i0]ASI_BLK_AIUS
2886 2835 add %l7, 64, %l7
2887 2836 subcc %i3, 64, %i3
2888 2837 bz,pn %ncc, 0f
2889 2838 add %i0, 64, %i0
2890 2839 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2891 2840 FALIGN_D22
2892 2841 ldda [%l7]ASI_BLK_P, %d16
2893 2842 stda %d48, [%i0]ASI_BLK_AIUS
2894 2843 add %l7, 64, %l7
2895 2844 subcc %i3, 64, %i3
2896 2845 bz,pn %ncc, 1f
2897 2846 add %i0, 64, %i0
2898 2847 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2899 2848 FALIGN_D38
2900 2849 ldda [%l7]ASI_BLK_P, %d32
2901 2850 stda %d48, [%i0]ASI_BLK_AIUS
2902 2851 add %l7, 64, %l7
2903 2852 subcc %i3, 64, %i3
2904 2853 bz,pn %ncc, 2f
2905 2854 add %i0, 64, %i0
2906 2855 ba,a,pt %ncc, copyout_seg3
2907 2856
2908 2857 0:
2909 2858 FALIGN_D22
2910 2859 stda %d48, [%i0]ASI_BLK_AIUS
2911 2860 add %i0, 64, %i0
2912 2861 membar #Sync
2913 2862 FALIGN_D38
2914 2863 stda %d48, [%i0]ASI_BLK_AIUS
2915 2864 ba,pt %ncc, copyout_blkd6
2916 2865 add %i0, 64, %i0
2917 2866
2918 2867 1:
2919 2868 FALIGN_D38
2920 2869 stda %d48, [%i0]ASI_BLK_AIUS
2921 2870 add %i0, 64, %i0
2922 2871 membar #Sync
2923 2872 FALIGN_D6
2924 2873 stda %d48, [%i0]ASI_BLK_AIUS
2925 2874 ba,pt %ncc, copyout_blkd22
2926 2875 add %i0, 64, %i0
2927 2876
2928 2877 2:
2929 2878 FALIGN_D6
2930 2879 stda %d48, [%i0]ASI_BLK_AIUS
2931 2880 add %i0, 64, %i0
2932 2881 membar #Sync
2933 2882 FALIGN_D22
2934 2883 stda %d48, [%i0]ASI_BLK_AIUS
2935 2884 ba,pt %ncc, copyout_blkd38
2936 2885 add %i0, 64, %i0
2937 2886
2938 2887 copyout_seg4:
2939 2888 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2940 2889 FALIGN_D8
2941 2890 ldda [%l7]ASI_BLK_P, %d0
2942 2891 stda %d48, [%i0]ASI_BLK_AIUS
2943 2892 add %l7, 64, %l7
2944 2893 subcc %i3, 64, %i3
2945 2894 bz,pn %ncc, 0f
2946 2895 add %i0, 64, %i0
2947 2896 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
2948 2897 FALIGN_D24
2949 2898 ldda [%l7]ASI_BLK_P, %d16
2950 2899 stda %d48, [%i0]ASI_BLK_AIUS
2951 2900 add %l7, 64, %l7
2952 2901 subcc %i3, 64, %i3
2953 2902 bz,pn %ncc, 1f
2954 2903 add %i0, 64, %i0
2955 2904 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
2956 2905 FALIGN_D40
2957 2906 ldda [%l7]ASI_BLK_P, %d32
2958 2907 stda %d48, [%i0]ASI_BLK_AIUS
2959 2908 add %l7, 64, %l7
2960 2909 subcc %i3, 64, %i3
2961 2910 bz,pn %ncc, 2f
2962 2911 add %i0, 64, %i0
2963 2912 ba,a,pt %ncc, copyout_seg4
2964 2913
2965 2914 0:
2966 2915 FALIGN_D24
2967 2916 stda %d48, [%i0]ASI_BLK_AIUS
2968 2917 add %i0, 64, %i0
2969 2918 membar #Sync
2970 2919 FALIGN_D40
2971 2920 stda %d48, [%i0]ASI_BLK_AIUS
2972 2921 ba,pt %ncc, copyout_blkd8
2973 2922 add %i0, 64, %i0
2974 2923
2975 2924 1:
2976 2925 FALIGN_D40
2977 2926 stda %d48, [%i0]ASI_BLK_AIUS
2978 2927 add %i0, 64, %i0
2979 2928 membar #Sync
2980 2929 FALIGN_D8
2981 2930 stda %d48, [%i0]ASI_BLK_AIUS
2982 2931 ba,pt %ncc, copyout_blkd24
2983 2932 add %i0, 64, %i0
2984 2933
2985 2934 2:
2986 2935 FALIGN_D8
2987 2936 stda %d48, [%i0]ASI_BLK_AIUS
2988 2937 add %i0, 64, %i0
2989 2938 membar #Sync
2990 2939 FALIGN_D24
2991 2940 stda %d48, [%i0]ASI_BLK_AIUS
2992 2941 ba,pt %ncc, copyout_blkd40
2993 2942 add %i0, 64, %i0
2994 2943
2995 2944 copyout_seg5:
2996 2945 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2997 2946 FALIGN_D10
2998 2947 ldda [%l7]ASI_BLK_P, %d0
2999 2948 stda %d48, [%i0]ASI_BLK_AIUS
3000 2949 add %l7, 64, %l7
3001 2950 subcc %i3, 64, %i3
3002 2951 bz,pn %ncc, 0f
3003 2952 add %i0, 64, %i0
3004 2953 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3005 2954 FALIGN_D26
3006 2955 ldda [%l7]ASI_BLK_P, %d16
3007 2956 stda %d48, [%i0]ASI_BLK_AIUS
3008 2957 add %l7, 64, %l7
3009 2958 subcc %i3, 64, %i3
3010 2959 bz,pn %ncc, 1f
3011 2960 add %i0, 64, %i0
3012 2961 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3013 2962 FALIGN_D42
3014 2963 ldda [%l7]ASI_BLK_P, %d32
3015 2964 stda %d48, [%i0]ASI_BLK_AIUS
3016 2965 add %l7, 64, %l7
3017 2966 subcc %i3, 64, %i3
3018 2967 bz,pn %ncc, 2f
3019 2968 add %i0, 64, %i0
3020 2969 ba,a,pt %ncc, copyout_seg5
3021 2970
3022 2971 0:
3023 2972 FALIGN_D26
3024 2973 stda %d48, [%i0]ASI_BLK_AIUS
3025 2974 add %i0, 64, %i0
3026 2975 membar #Sync
3027 2976 FALIGN_D42
3028 2977 stda %d48, [%i0]ASI_BLK_AIUS
3029 2978 ba,pt %ncc, copyout_blkd10
3030 2979 add %i0, 64, %i0
3031 2980
3032 2981 1:
3033 2982 FALIGN_D42
3034 2983 stda %d48, [%i0]ASI_BLK_AIUS
3035 2984 add %i0, 64, %i0
3036 2985 membar #Sync
3037 2986 FALIGN_D10
3038 2987 stda %d48, [%i0]ASI_BLK_AIUS
3039 2988 ba,pt %ncc, copyout_blkd26
3040 2989 add %i0, 64, %i0
3041 2990
3042 2991 2:
3043 2992 FALIGN_D10
3044 2993 stda %d48, [%i0]ASI_BLK_AIUS
3045 2994 add %i0, 64, %i0
3046 2995 membar #Sync
3047 2996 FALIGN_D26
3048 2997 stda %d48, [%i0]ASI_BLK_AIUS
3049 2998 ba,pt %ncc, copyout_blkd42
3050 2999 add %i0, 64, %i0
3051 3000
3052 3001 copyout_seg6:
3053 3002 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3054 3003 FALIGN_D12
3055 3004 ldda [%l7]ASI_BLK_P, %d0
3056 3005 stda %d48, [%i0]ASI_BLK_AIUS
3057 3006 add %l7, 64, %l7
3058 3007 subcc %i3, 64, %i3
3059 3008 bz,pn %ncc, 0f
3060 3009 add %i0, 64, %i0
3061 3010 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3062 3011 FALIGN_D28
3063 3012 ldda [%l7]ASI_BLK_P, %d16
3064 3013 stda %d48, [%i0]ASI_BLK_AIUS
3065 3014 add %l7, 64, %l7
3066 3015 subcc %i3, 64, %i3
3067 3016 bz,pn %ncc, 1f
3068 3017 add %i0, 64, %i0
3069 3018 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3070 3019 FALIGN_D44
3071 3020 ldda [%l7]ASI_BLK_P, %d32
3072 3021 stda %d48, [%i0]ASI_BLK_AIUS
3073 3022 add %l7, 64, %l7
3074 3023 subcc %i3, 64, %i3
3075 3024 bz,pn %ncc, 2f
3076 3025 add %i0, 64, %i0
3077 3026 ba,a,pt %ncc, copyout_seg6
3078 3027
3079 3028 0:
3080 3029 FALIGN_D28
3081 3030 stda %d48, [%i0]ASI_BLK_AIUS
3082 3031 add %i0, 64, %i0
3083 3032 membar #Sync
3084 3033 FALIGN_D44
3085 3034 stda %d48, [%i0]ASI_BLK_AIUS
3086 3035 ba,pt %ncc, copyout_blkd12
3087 3036 add %i0, 64, %i0
3088 3037
3089 3038 1:
3090 3039 FALIGN_D44
3091 3040 stda %d48, [%i0]ASI_BLK_AIUS
3092 3041 add %i0, 64, %i0
3093 3042 membar #Sync
3094 3043 FALIGN_D12
3095 3044 stda %d48, [%i0]ASI_BLK_AIUS
3096 3045 ba,pt %ncc, copyout_blkd28
3097 3046 add %i0, 64, %i0
3098 3047
3099 3048 2:
3100 3049 FALIGN_D12
3101 3050 stda %d48, [%i0]ASI_BLK_AIUS
3102 3051 add %i0, 64, %i0
3103 3052 membar #Sync
3104 3053 FALIGN_D28
3105 3054 stda %d48, [%i0]ASI_BLK_AIUS
3106 3055 ba,pt %ncc, copyout_blkd44
3107 3056 add %i0, 64, %i0
3108 3057
3109 3058 copyout_seg7:
3110 3059 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3111 3060 FALIGN_D14
3112 3061 ldda [%l7]ASI_BLK_P, %d0
3113 3062 stda %d48, [%i0]ASI_BLK_AIUS
3114 3063 add %l7, 64, %l7
3115 3064 subcc %i3, 64, %i3
3116 3065 bz,pn %ncc, 0f
3117 3066 add %i0, 64, %i0
3118 3067 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3119 3068 FALIGN_D30
3120 3069 ldda [%l7]ASI_BLK_P, %d16
3121 3070 stda %d48, [%i0]ASI_BLK_AIUS
3122 3071 add %l7, 64, %l7
3123 3072 subcc %i3, 64, %i3
3124 3073 bz,pn %ncc, 1f
3125 3074 add %i0, 64, %i0
3126 3075 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3127 3076 FALIGN_D46
3128 3077 ldda [%l7]ASI_BLK_P, %d32
3129 3078 stda %d48, [%i0]ASI_BLK_AIUS
3130 3079 add %l7, 64, %l7
3131 3080 subcc %i3, 64, %i3
3132 3081 bz,pn %ncc, 2f
3133 3082 add %i0, 64, %i0
3134 3083 ba,a,pt %ncc, copyout_seg7
3135 3084
3136 3085 0:
3137 3086 FALIGN_D30
3138 3087 stda %d48, [%i0]ASI_BLK_AIUS
3139 3088 add %i0, 64, %i0
3140 3089 membar #Sync
3141 3090 FALIGN_D46
3142 3091 stda %d48, [%i0]ASI_BLK_AIUS
3143 3092 ba,pt %ncc, copyout_blkd14
3144 3093 add %i0, 64, %i0
3145 3094
3146 3095 1:
3147 3096 FALIGN_D46
3148 3097 stda %d48, [%i0]ASI_BLK_AIUS
3149 3098 add %i0, 64, %i0
3150 3099 membar #Sync
3151 3100 FALIGN_D14
3152 3101 stda %d48, [%i0]ASI_BLK_AIUS
3153 3102 ba,pt %ncc, copyout_blkd30
3154 3103 add %i0, 64, %i0
3155 3104
3156 3105 2:
3157 3106 FALIGN_D14
3158 3107 stda %d48, [%i0]ASI_BLK_AIUS
3159 3108 add %i0, 64, %i0
3160 3109 membar #Sync
3161 3110 FALIGN_D30
3162 3111 stda %d48, [%i0]ASI_BLK_AIUS
3163 3112 ba,pt %ncc, copyout_blkd46
3164 3113 add %i0, 64, %i0
3165 3114
3166 3115
3167 3116 !
3168 3117 ! dribble out the last partial block
3169 3118 !
3170 3119 copyout_blkd0:
3171 3120 subcc %i4, 8, %i4
3172 3121 blu,pn %ncc, copyout_blkdone
3173 3122 faligndata %d0, %d2, %d48
3174 3123 stda %d48, [%i0]ASI_USER
3175 3124 add %i0, 8, %i0
3176 3125 copyout_blkd2:
3177 3126 subcc %i4, 8, %i4
3178 3127 blu,pn %ncc, copyout_blkdone
3179 3128 faligndata %d2, %d4, %d48
3180 3129 stda %d48, [%i0]ASI_USER
3181 3130 add %i0, 8, %i0
3182 3131 copyout_blkd4:
3183 3132 subcc %i4, 8, %i4
3184 3133 blu,pn %ncc, copyout_blkdone
3185 3134 faligndata %d4, %d6, %d48
3186 3135 stda %d48, [%i0]ASI_USER
3187 3136 add %i0, 8, %i0
3188 3137 copyout_blkd6:
3189 3138 subcc %i4, 8, %i4
3190 3139 blu,pn %ncc, copyout_blkdone
3191 3140 faligndata %d6, %d8, %d48
3192 3141 stda %d48, [%i0]ASI_USER
3193 3142 add %i0, 8, %i0
3194 3143 copyout_blkd8:
3195 3144 subcc %i4, 8, %i4
3196 3145 blu,pn %ncc, copyout_blkdone
3197 3146 faligndata %d8, %d10, %d48
3198 3147 stda %d48, [%i0]ASI_USER
3199 3148 add %i0, 8, %i0
3200 3149 copyout_blkd10:
3201 3150 subcc %i4, 8, %i4
3202 3151 blu,pn %ncc, copyout_blkdone
3203 3152 faligndata %d10, %d12, %d48
3204 3153 stda %d48, [%i0]ASI_USER
3205 3154 add %i0, 8, %i0
3206 3155 copyout_blkd12:
3207 3156 subcc %i4, 8, %i4
3208 3157 blu,pn %ncc, copyout_blkdone
3209 3158 faligndata %d12, %d14, %d48
3210 3159 stda %d48, [%i0]ASI_USER
3211 3160 add %i0, 8, %i0
3212 3161 copyout_blkd14:
3213 3162 subcc %i4, 8, %i4
3214 3163 blu,pn %ncc, copyout_blkdone
3215 3164 fsrc1 %d14, %d0
3216 3165 ba,a,pt %ncc, copyout_blkleft
3217 3166
3218 3167 copyout_blkd16:
3219 3168 subcc %i4, 8, %i4
3220 3169 blu,pn %ncc, copyout_blkdone
3221 3170 faligndata %d16, %d18, %d48
3222 3171 stda %d48, [%i0]ASI_USER
3223 3172 add %i0, 8, %i0
3224 3173 copyout_blkd18:
3225 3174 subcc %i4, 8, %i4
3226 3175 blu,pn %ncc, copyout_blkdone
3227 3176 faligndata %d18, %d20, %d48
3228 3177 stda %d48, [%i0]ASI_USER
3229 3178 add %i0, 8, %i0
3230 3179 copyout_blkd20:
3231 3180 subcc %i4, 8, %i4
3232 3181 blu,pn %ncc, copyout_blkdone
3233 3182 faligndata %d20, %d22, %d48
3234 3183 stda %d48, [%i0]ASI_USER
3235 3184 add %i0, 8, %i0
3236 3185 copyout_blkd22:
3237 3186 subcc %i4, 8, %i4
3238 3187 blu,pn %ncc, copyout_blkdone
3239 3188 faligndata %d22, %d24, %d48
3240 3189 stda %d48, [%i0]ASI_USER
3241 3190 add %i0, 8, %i0
3242 3191 copyout_blkd24:
3243 3192 subcc %i4, 8, %i4
3244 3193 blu,pn %ncc, copyout_blkdone
3245 3194 faligndata %d24, %d26, %d48
3246 3195 stda %d48, [%i0]ASI_USER
3247 3196 add %i0, 8, %i0
3248 3197 copyout_blkd26:
3249 3198 subcc %i4, 8, %i4
3250 3199 blu,pn %ncc, copyout_blkdone
3251 3200 faligndata %d26, %d28, %d48
3252 3201 stda %d48, [%i0]ASI_USER
3253 3202 add %i0, 8, %i0
3254 3203 copyout_blkd28:
3255 3204 subcc %i4, 8, %i4
3256 3205 blu,pn %ncc, copyout_blkdone
3257 3206 faligndata %d28, %d30, %d48
3258 3207 stda %d48, [%i0]ASI_USER
3259 3208 add %i0, 8, %i0
3260 3209 copyout_blkd30:
3261 3210 subcc %i4, 8, %i4
3262 3211 blu,pn %ncc, copyout_blkdone
3263 3212 fsrc1 %d30, %d0
3264 3213 ba,a,pt %ncc, copyout_blkleft
3265 3214 copyout_blkd32:
3266 3215 subcc %i4, 8, %i4
3267 3216 blu,pn %ncc, copyout_blkdone
3268 3217 faligndata %d32, %d34, %d48
3269 3218 stda %d48, [%i0]ASI_USER
3270 3219 add %i0, 8, %i0
3271 3220 copyout_blkd34:
3272 3221 subcc %i4, 8, %i4
3273 3222 blu,pn %ncc, copyout_blkdone
3274 3223 faligndata %d34, %d36, %d48
3275 3224 stda %d48, [%i0]ASI_USER
3276 3225 add %i0, 8, %i0
3277 3226 copyout_blkd36:
3278 3227 subcc %i4, 8, %i4
3279 3228 blu,pn %ncc, copyout_blkdone
3280 3229 faligndata %d36, %d38, %d48
3281 3230 stda %d48, [%i0]ASI_USER
3282 3231 add %i0, 8, %i0
3283 3232 copyout_blkd38:
3284 3233 subcc %i4, 8, %i4
3285 3234 blu,pn %ncc, copyout_blkdone
3286 3235 faligndata %d38, %d40, %d48
3287 3236 stda %d48, [%i0]ASI_USER
3288 3237 add %i0, 8, %i0
3289 3238 copyout_blkd40:
3290 3239 subcc %i4, 8, %i4
3291 3240 blu,pn %ncc, copyout_blkdone
3292 3241 faligndata %d40, %d42, %d48
3293 3242 stda %d48, [%i0]ASI_USER
3294 3243 add %i0, 8, %i0
3295 3244 copyout_blkd42:
3296 3245 subcc %i4, 8, %i4
3297 3246 blu,pn %ncc, copyout_blkdone
3298 3247 faligndata %d42, %d44, %d48
3299 3248 stda %d48, [%i0]ASI_USER
3300 3249 add %i0, 8, %i0
3301 3250 copyout_blkd44:
3302 3251 subcc %i4, 8, %i4
3303 3252 blu,pn %ncc, copyout_blkdone
3304 3253 faligndata %d44, %d46, %d48
3305 3254 stda %d48, [%i0]ASI_USER
3306 3255 add %i0, 8, %i0
3307 3256 copyout_blkd46:
3308 3257 subcc %i4, 8, %i4
3309 3258 blu,pn %ncc, copyout_blkdone
3310 3259 fsrc1 %d46, %d0
3311 3260
3312 3261 copyout_blkleft:
3313 3262 1:
3314 3263 ldd [%l7], %d2
3315 3264 add %l7, 8, %l7
3316 3265 subcc %i4, 8, %i4
3317 3266 faligndata %d0, %d2, %d8
3318 3267 stda %d8, [%i0]ASI_USER
3319 3268 blu,pn %ncc, copyout_blkdone
3320 3269 add %i0, 8, %i0
3321 3270 ldd [%l7], %d0
3322 3271 add %l7, 8, %l7
3323 3272 subcc %i4, 8, %i4
3324 3273 faligndata %d2, %d0, %d8
3325 3274 stda %d8, [%i0]ASI_USER
3326 3275 bgeu,pt %ncc, 1b
3327 3276 add %i0, 8, %i0
3328 3277
3329 3278 copyout_blkdone:
3330 3279 tst %i2
3331 3280 bz,pt %ncc, .copyout_exit
3332 3281 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
3333 3282
3334 3283 7: ldub [%i1], %i4
3335 3284 inc %i1
3336 3285 stba %i4, [%i0]ASI_USER
3337 3286 inc %i0
3338 3287 deccc %i2
3339 3288 bgu %ncc, 7b
3340 3289 nop
3341 3290
3342 3291 .copyout_exit:
3343 3292 membar #StoreLoad|#StoreStore
3344 3293 btst FPUSED_FLAG, SAVED_LOFAULT
3345 3294 bz 1f
3346 3295 nop
3347 3296
3348 3297 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
3349 3298 wr %o2, 0, %gsr ! restore gsr
3350 3299
3351 3300 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
3352 3301 btst FPRS_FEF, %o3
3353 3302 bz 4f
3354 3303 nop
3355 3304
3356 3305 ! restore fpregs from stack
3357 3306 membar #Sync
3358 3307 add %fp, STACK_BIAS - 257, %o2
3359 3308 and %o2, -64, %o2
3360 3309 ldda [%o2]ASI_BLK_P, %d0
3361 3310 add %o2, 64, %o2
3362 3311 ldda [%o2]ASI_BLK_P, %d16
3363 3312 add %o2, 64, %o2
3364 3313 ldda [%o2]ASI_BLK_P, %d32
3365 3314 add %o2, 64, %o2
3366 3315 ldda [%o2]ASI_BLK_P, %d48
3367 3316 membar #Sync
3368 3317
3369 3318 ba,pt %ncc, 1f
3370 3319 wr %o3, 0, %fprs ! restore fprs
3371 3320
3372 3321 4:
3373 3322 FZERO ! zero all of the fpregs
3374 3323 wr %o3, 0, %fprs ! restore fprs
3375 3324
3376 3325 1:
3377 3326 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3378 3327 membar #Sync ! sync error barrier
3379 3328 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3380 3329 ret
3381 3330 restore %g0, 0, %o0
3382 3331
3383 3332 .copyout_err:
3384 3333 ldn [THREAD_REG + T_COPYOPS], %o4
↓ open down ↓ |
1232 lines elided |
↑ open up ↑ |
3385 3334 brz %o4, 2f
3386 3335 nop
3387 3336 ldn [%o4 + CP_COPYOUT], %g2
3388 3337 jmp %g2
3389 3338 nop
3390 3339 2:
3391 3340 retl
3392 3341 mov -1, %o0
3393 3342 SET_SIZE(copyout)
3394 3343
3395 -#endif /* lint */
3396 3344
3397 -
3398 -#ifdef lint
3399 -
3400 -/*ARGSUSED*/
3401 -int
3402 -xcopyout(const void *kaddr, void *uaddr, size_t count)
3403 -{ return (0); }
3404 -
3405 -#else /* lint */
3406 -
3407 3345 ENTRY(xcopyout)
3408 3346 sethi %hi(.xcopyout_err), REAL_LOFAULT
3409 3347 b .do_copyout
3410 3348 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
3411 3349 .xcopyout_err:
3412 3350 ldn [THREAD_REG + T_COPYOPS], %o4
3413 3351 brz %o4, 2f
3414 3352 nop
3415 3353 ldn [%o4 + CP_XCOPYOUT], %g2
3416 3354 jmp %g2
3417 3355 nop
3418 3356 2:
3419 3357 retl
3420 3358 mov %g1, %o0
3421 3359 SET_SIZE(xcopyout)
3422 3360
3423 -#endif /* lint */
3424 -
3425 -#ifdef lint
3426 -
3427 -/*ARGSUSED*/
3428 -int
3429 -xcopyout_little(const void *kaddr, void *uaddr, size_t count)
3430 -{ return (0); }
3431 -
3432 -#else /* lint */
3433 -
3434 3361 ENTRY(xcopyout_little)
3435 3362 sethi %hi(.little_err), %o4
3436 3363 ldn [THREAD_REG + T_LOFAULT], %o5
3437 3364 or %o4, %lo(.little_err), %o4
3438 3365 membar #Sync ! sync error barrier
3439 3366 stn %o4, [THREAD_REG + T_LOFAULT]
3440 3367
3441 3368 subcc %g0, %o2, %o3
3442 3369 add %o0, %o2, %o0
3443 3370 bz,pn %ncc, 2f ! check for zero bytes
3444 3371 sub %o2, 1, %o4
3445 3372 add %o0, %o4, %o0 ! start w/last byte
3446 3373 add %o1, %o2, %o1
3447 3374 ldub [%o0+%o3], %o4
3448 3375
3449 3376 1: stba %o4, [%o1+%o3]ASI_AIUSL
3450 3377 inccc %o3
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
3451 3378 sub %o0, 2, %o0 ! get next byte
3452 3379 bcc,a,pt %ncc, 1b
3453 3380 ldub [%o0+%o3], %o4
3454 3381
3455 3382 2: membar #Sync ! sync error barrier
3456 3383 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3457 3384 retl
3458 3385 mov %g0, %o0 ! return (0)
3459 3386 SET_SIZE(xcopyout_little)
3460 3387
3461 -#endif /* lint */
3462 -
3463 3388 /*
3464 3389 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
3465 3390 */
3466 3391
3467 -#if defined(lint)
3468 -
3469 -/*ARGSUSED*/
3470 -int
3471 -copyin(const void *uaddr, void *kaddr, size_t count)
3472 -{ return (0); }
3473 -
3474 -#else /* lint */
3475 -
3476 3392 ENTRY(copyin)
3477 3393 sethi %hi(.copyin_err), REAL_LOFAULT
3478 3394 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
3479 3395
3480 3396 .do_copyin:
3481 3397 !
3482 3398 ! Check the length and bail if zero.
3483 3399 !
3484 3400 tst %o2
3485 3401 bnz,pt %ncc, 1f
3486 3402 nop
3487 3403 retl
3488 3404 clr %o0
3489 3405 1:
3490 3406 sethi %hi(copyio_fault), %o4
3491 3407 or %o4, %lo(copyio_fault), %o4
3492 3408 sethi %hi(copyio_fault_nowindow), %o3
3493 3409 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
3494 3410 or %o3, %lo(copyio_fault_nowindow), %o3
3495 3411 membar #Sync
3496 3412 stn %o3, [THREAD_REG + T_LOFAULT]
3497 3413
3498 3414 mov %o0, SAVE_SRC
3499 3415 mov %o1, SAVE_DST
3500 3416 mov %o2, SAVE_COUNT
3501 3417
3502 3418 !
3503 3419 ! Check to see if we're more than SMALL_LIMIT.
3504 3420 !
3505 3421 subcc %o2, SMALL_LIMIT, %o3
3506 3422 bgu,a,pt %ncc, .dci_ns
3507 3423 or %o0, %o1, %o3
3508 3424 !
3509 3425 ! What was previously ".small_copyin"
3510 3426 !
3511 3427 .dcibcp:
3512 3428 sub %g0, %o2, %o3 ! setup for copy loop
3513 3429 add %o0, %o2, %o0
3514 3430 add %o1, %o2, %o1
3515 3431 ba,pt %ncc, .dcicl
3516 3432 lduba [%o0 + %o3]ASI_USER, %o4
3517 3433 !
3518 3434 ! %o0 and %o1 point at the end and remain pointing at the end
3519 3435 ! of their buffers. We pull things out by adding %o3 (which is
3520 3436 ! the negation of the length) to the buffer end which gives us
3521 3437 ! the curent location in the buffers. By incrementing %o3 we walk
3522 3438 ! through both buffers without having to bump each buffer's
3523 3439 ! pointer. A very fast 4 instruction loop.
3524 3440 !
3525 3441 .align 16
3526 3442 .dcicl:
3527 3443 stb %o4, [%o1 + %o3]
3528 3444 inccc %o3
3529 3445 bl,a,pt %ncc, .dcicl
3530 3446 lduba [%o0 + %o3]ASI_USER, %o4
3531 3447 !
3532 3448 ! We're done. Go home.
3533 3449 !
3534 3450 membar #Sync
3535 3451 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3536 3452 retl
3537 3453 clr %o0
3538 3454 !
3539 3455 ! Try aligned copies from here.
3540 3456 !
3541 3457 .dci_ns:
3542 3458 !
3543 3459 ! See if we're single byte aligned. If we are, check the
3544 3460 ! limit for single byte copies. If we're smaller, or equal,
3545 3461 ! bounce to the byte for byte copy loop. Otherwise do it in
3546 3462 ! HW (if enabled).
3547 3463 !
3548 3464 btst 1, %o3
3549 3465 bz,a,pt %icc, .dcih8
3550 3466 btst 7, %o3
3551 3467 !
3552 3468 ! We're single byte aligned.
3553 3469 !
3554 3470 subcc %o2, VIS_COPY_THRESHOLD, %o3
3555 3471 bleu,pt %ncc, .dcibcp
3556 3472 sethi %hi(hw_copy_limit_1), %o3
3557 3473 ld [%o3 + %lo(hw_copy_limit_1)], %o3
3558 3474 !
3559 3475 ! Is HW copy on? If not do everything byte for byte.
3560 3476 !
3561 3477 tst %o3
3562 3478 bz,pn %icc, .dcibcp
3563 3479 subcc %o3, %o2, %o3
3564 3480 !
3565 3481 ! Are we bigger than the HW limit? If not
3566 3482 ! go to byte for byte.
3567 3483 !
3568 3484 bge,pt %ncc, .dcibcp
3569 3485 nop
3570 3486 !
3571 3487 ! We're big enough and copy is on. Do it with HW.
3572 3488 !
3573 3489 ba,pt %ncc, .big_copyin
3574 3490 nop
3575 3491 .dcih8:
3576 3492 !
3577 3493 ! 8 byte aligned?
3578 3494 !
3579 3495 bnz,a %ncc, .dcih4
3580 3496 btst 3, %o3
3581 3497 !
3582 3498 ! We're eight byte aligned.
3583 3499 !
3584 3500 subcc %o2, VIS_COPY_THRESHOLD, %o3
3585 3501 bleu,pt %ncc, .dcis8
3586 3502 sethi %hi(hw_copy_limit_8), %o3
3587 3503 ld [%o3 + %lo(hw_copy_limit_8)], %o3
3588 3504 !
3589 3505 ! Is HW assist on? If not, do it with the aligned copy.
3590 3506 !
3591 3507 tst %o3
3592 3508 bz,pn %icc, .dcis8
3593 3509 subcc %o3, %o2, %o3
3594 3510 bge %ncc, .dcis8
3595 3511 nop
3596 3512 ba,pt %ncc, .big_copyin
3597 3513 nop
3598 3514 .dcis8:
3599 3515 !
3600 3516 ! Housekeeping for copy loops. Uses same idea as in the byte for
3601 3517 ! byte copy loop above.
3602 3518 !
3603 3519 add %o0, %o2, %o0
3604 3520 add %o1, %o2, %o1
3605 3521 sub %g0, %o2, %o3
3606 3522 ba,pt %ncc, .didebc
3607 3523 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy
3608 3524 !
3609 3525 ! 4 byte aligned?
3610 3526 !
3611 3527 .dcih4:
3612 3528 bnz %ncc, .dcih2
3613 3529 subcc %o2, VIS_COPY_THRESHOLD, %o3
3614 3530 bleu,pt %ncc, .dcis4
3615 3531 sethi %hi(hw_copy_limit_4), %o3
3616 3532 ld [%o3 + %lo(hw_copy_limit_4)], %o3
3617 3533 !
3618 3534 ! Is HW assist on? If not, do it with the aligned copy.
3619 3535 !
3620 3536 tst %o3
3621 3537 bz,pn %icc, .dcis4
3622 3538 subcc %o3, %o2, %o3
3623 3539 !
3624 3540 ! We're negative if our size is less than or equal to hw_copy_limit_4.
3625 3541 !
3626 3542 bge %ncc, .dcis4
3627 3543 nop
3628 3544 ba,pt %ncc, .big_copyin
3629 3545 nop
3630 3546 .dcis4:
3631 3547 !
3632 3548 ! Housekeeping for copy loops. Uses same idea as in the byte
3633 3549 ! for byte copy loop above.
3634 3550 !
3635 3551 add %o0, %o2, %o0
3636 3552 add %o1, %o2, %o1
3637 3553 sub %g0, %o2, %o3
3638 3554 ba,pt %ncc, .didfbc
3639 3555 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy
3640 3556 .dcih2:
3641 3557 !
3642 3558 ! We're two byte aligned. Check for "smallness"
3643 3559 ! done in delay at .dcih4
3644 3560 !
3645 3561 bleu,pt %ncc, .dcis2
3646 3562 sethi %hi(hw_copy_limit_2), %o3
3647 3563 ld [%o3 + %lo(hw_copy_limit_2)], %o3
3648 3564 !
3649 3565 ! Is HW assist on? If not, do it with the aligned copy.
3650 3566 !
3651 3567 tst %o3
3652 3568 bz,pn %icc, .dcis2
3653 3569 subcc %o3, %o2, %o3
3654 3570 !
3655 3571 ! Are we larger than the HW limit?
3656 3572 !
3657 3573 bge %ncc, .dcis2
3658 3574 nop
3659 3575 !
3660 3576 ! HW assist is on and we're large enough to use it.
3661 3577 !
3662 3578 ba,pt %ncc, .big_copyin
3663 3579 nop
3664 3580 !
3665 3581 ! Housekeeping for copy loops. Uses same idea as in the byte
3666 3582 ! for byte copy loop above.
3667 3583 !
3668 3584 .dcis2:
3669 3585 add %o0, %o2, %o0
3670 3586 add %o1, %o2, %o1
3671 3587 sub %g0, %o2, %o3
3672 3588 ba,pt %ncc, .didtbc
3673 3589 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy
3674 3590 !
3675 3591 .small_copyin:
3676 3592 !
3677 3593 ! Why are we doing this AGAIN? There are certain conditions in
3678 3594 ! big copyin that will cause us to forgo the HW assisted copys
3679 3595 ! and bounce back to a non-hw assisted copy. This dispatches
3680 3596 ! those copies. Note that we branch around this in the main line
3681 3597 ! code.
3682 3598 !
3683 3599 ! We make no check for limits or HW enablement here. We've
3684 3600 ! already been told that we're a poster child so just go off
3685 3601 ! and do it.
3686 3602 !
3687 3603 or %o0, %o1, %o3
3688 3604 btst 1, %o3
3689 3605 bnz %icc, .dcibcp ! Most likely
3690 3606 btst 7, %o3
3691 3607 bz %icc, .dcis8
3692 3608 btst 3, %o3
3693 3609 bz %icc, .dcis4
3694 3610 nop
3695 3611 ba,pt %ncc, .dcis2
3696 3612 nop
3697 3613 !
3698 3614 ! Eight byte aligned copies. A steal from the original .small_copyin
3699 3615 ! with modifications. %o2 is number of 8 byte chunks to copy. When
3700 3616 ! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
3701 3617 ! to copy.
3702 3618 !
3703 3619 .align 32
3704 3620 .didebc:
3705 3621 ldxa [%o0 + %o3]ASI_USER, %o4
3706 3622 deccc %o2
3707 3623 stx %o4, [%o1 + %o3]
3708 3624 bg,pt %ncc, .didebc
3709 3625 addcc %o3, 8, %o3
3710 3626 !
3711 3627 ! End of copy loop. Most 8 byte aligned copies end here.
3712 3628 !
3713 3629 bz,pt %ncc, .dcifh
3714 3630 nop
3715 3631 !
3716 3632 ! Something is left. Do it byte for byte.
3717 3633 !
3718 3634 ba,pt %ncc, .dcicl
3719 3635 lduba [%o0 + %o3]ASI_USER, %o4
3720 3636 !
3721 3637 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
3722 3638 !
3723 3639 .align 32
3724 3640 .didfbc:
3725 3641 lduwa [%o0 + %o3]ASI_USER, %o4
3726 3642 deccc %o2
3727 3643 st %o4, [%o1 + %o3]
3728 3644 bg,pt %ncc, .didfbc
3729 3645 addcc %o3, 4, %o3
3730 3646 !
3731 3647 ! End of copy loop. Most 4 byte aligned copies end here.
3732 3648 !
3733 3649 bz,pt %ncc, .dcifh
3734 3650 nop
3735 3651 !
3736 3652 ! Something is left. Do it byte for byte.
3737 3653 !
3738 3654 ba,pt %ncc, .dcicl
3739 3655 lduba [%o0 + %o3]ASI_USER, %o4
3740 3656 !
3741 3657 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
3742 3658 ! copy.
3743 3659 !
3744 3660 .align 32
3745 3661 .didtbc:
3746 3662 lduha [%o0 + %o3]ASI_USER, %o4
3747 3663 deccc %o2
3748 3664 sth %o4, [%o1 + %o3]
3749 3665 bg,pt %ncc, .didtbc
3750 3666 addcc %o3, 2, %o3
3751 3667 !
3752 3668 ! End of copy loop. Most 2 byte aligned copies end here.
3753 3669 !
3754 3670 bz,pt %ncc, .dcifh
3755 3671 nop
3756 3672 !
3757 3673 ! Deal with the last byte
3758 3674 !
3759 3675 lduba [%o0 + %o3]ASI_USER, %o4
3760 3676 stb %o4, [%o1 + %o3]
3761 3677 .dcifh:
3762 3678 membar #Sync
3763 3679 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
3764 3680 retl
3765 3681 clr %o0
3766 3682
3767 3683 .big_copyin:
3768 3684 !
3769 3685 ! Are we using the FP registers?
3770 3686 !
3771 3687 rd %fprs, %o3 ! check for unused fp
3772 3688 btst FPRS_FEF, %o3
3773 3689 bnz %ncc, .copyin_fpregs_inuse
3774 3690 nop
3775 3691 !
3776 3692 ! We're going off to do a block copy.
3777 3693 ! Switch fault hendlers and grab a window. We
3778 3694 ! don't do a membar #Sync since we've done only
3779 3695 ! kernel data to this point.
3780 3696 !
3781 3697 stn %o4, [THREAD_REG + T_LOFAULT]
3782 3698 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3783 3699 !
3784 3700 ! %o3 is %i3 after the save...
3785 3701 !
3786 3702 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3787 3703 ba,pt %ncc, .do_blockcopyin
3788 3704 wr %g0, FPRS_FEF, %fprs
3789 3705 .copyin_fpregs_inuse:
3790 3706 !
3791 3707 ! We're here if the FP regs are in use. Need to see if the request
3792 3708 ! exceeds our suddenly larger minimum.
3793 3709 !
3794 3710 cmp %i2, VIS_COPY_THRESHOLD+(64*4)
3795 3711 bl %ncc, .small_copyin
3796 3712 nop
3797 3713 !
3798 3714 ! We're going off and do a block copy.
3799 3715 ! Change to the heavy duty fault handler and grab a window first.
3800 3716 ! New handler is passed in
3801 3717 !
3802 3718 stn %o4, [THREAD_REG + T_LOFAULT]
3803 3719 save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3804 3720 !
3805 3721 ! %o3 is now %i3
3806 3722 !
3807 3723 st %i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3808 3724
3809 3725 ! save in-use fpregs on stack
3810 3726 wr %g0, FPRS_FEF, %fprs
3811 3727 membar #Sync
3812 3728 add %fp, STACK_BIAS - 257, %o2
3813 3729 and %o2, -64, %o2
3814 3730 stda %d0, [%o2]ASI_BLK_P
3815 3731 add %o2, 64, %o2
3816 3732 stda %d16, [%o2]ASI_BLK_P
3817 3733 add %o2, 64, %o2
3818 3734 stda %d32, [%o2]ASI_BLK_P
3819 3735 add %o2, 64, %o2
3820 3736 stda %d48, [%o2]ASI_BLK_P
3821 3737 membar #Sync
3822 3738
3823 3739 .do_blockcopyin:
3824 3740 membar #StoreStore|#StoreLoad|#LoadStore
3825 3741
3826 3742 rd %gsr, %o2
3827 3743 st %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
3828 3744
3829 3745 ! Set the lower bit in the saved t_lofault to indicate
3830 3746 ! that we need to clear the %fprs register on the way
3831 3747 ! out
3832 3748 or SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3833 3749
3834 3750 ! Swap src/dst since the code below is memcpy code
3835 3751 ! and memcpy/bcopy have different calling sequences
3836 3752 mov %i1, %i5
3837 3753 mov %i0, %i1
3838 3754 mov %i5, %i0
3839 3755
3840 3756 !!! This code is nearly identical to the version in the sun4u
3841 3757 !!! libc_psr. Most bugfixes made to that file should be
3842 3758 !!! merged into this routine.
3843 3759
3844 3760 andcc %i0, 7, %o3
3845 3761 bz copyin_blkcpy
3846 3762 sub %o3, 8, %o3
3847 3763 neg %o3
3848 3764 sub %i2, %o3, %i2
3849 3765
3850 3766 ! Align Destination on double-word boundary
3851 3767
3852 3768 2: lduba [%i1]ASI_USER, %o4
3853 3769 inc %i1
3854 3770 inc %i0
3855 3771 deccc %o3
3856 3772 bgu %ncc, 2b
3857 3773 stb %o4, [%i0-1]
3858 3774 copyin_blkcpy:
3859 3775 andcc %i0, 63, %i3
3860 3776 bz,pn %ncc, copyin_blalign ! now block aligned
3861 3777 sub %i3, 64, %i3
3862 3778 neg %i3 ! bytes till block aligned
3863 3779 sub %i2, %i3, %i2 ! update %i2 with new count
3864 3780
3865 3781 ! Copy %i3 bytes till dst is block (64 byte) aligned. use
3866 3782 ! double word copies.
3867 3783
3868 3784 alignaddr %i1, %g0, %g1
3869 3785 ldda [%g1]ASI_USER, %d0
3870 3786 add %g1, 8, %g1
3871 3787 6:
3872 3788 ldda [%g1]ASI_USER, %d2
3873 3789 add %g1, 8, %g1
3874 3790 subcc %i3, 8, %i3
3875 3791 faligndata %d0, %d2, %d8
3876 3792 std %d8, [%i0]
3877 3793 add %i1, 8, %i1
3878 3794 bz,pn %ncc, copyin_blalign
3879 3795 add %i0, 8, %i0
3880 3796 ldda [%g1]ASI_USER, %d0
3881 3797 add %g1, 8, %g1
3882 3798 subcc %i3, 8, %i3
3883 3799 faligndata %d2, %d0, %d8
3884 3800 std %d8, [%i0]
3885 3801 add %i1, 8, %i1
3886 3802 bgu,pn %ncc, 6b
3887 3803 add %i0, 8, %i0
3888 3804
3889 3805 copyin_blalign:
3890 3806 membar #StoreLoad
3891 3807 ! %i2 = total length
3892 3808 ! %i3 = blocks (length - 64) / 64
3893 3809 ! %i4 = doubles remaining (length - blocks)
3894 3810 sub %i2, 64, %i3
3895 3811 andn %i3, 63, %i3
3896 3812 sub %i2, %i3, %i4
3897 3813 andn %i4, 7, %i4
3898 3814 sub %i4, 16, %i4
3899 3815 sub %i2, %i4, %i2
3900 3816 sub %i2, %i3, %i2
3901 3817
3902 3818 andn %i1, 0x3f, %l7 ! blk aligned address
3903 3819 alignaddr %i1, %g0, %g0 ! gen %gsr
3904 3820
3905 3821 srl %i1, 3, %l5 ! bits 3,4,5 are now least sig in %l5
3906 3822 andcc %l5, 7, %i5 ! mask everything except bits 1,2 3
3907 3823 add %i1, %i4, %i1
3908 3824 add %i1, %i3, %i1
3909 3825
3910 3826 ldda [%l7]ASI_BLK_AIUS, %d0
3911 3827 add %l7, 64, %l7
3912 3828 ldda [%l7]ASI_BLK_AIUS, %d16
3913 3829 add %l7, 64, %l7
3914 3830 ldda [%l7]ASI_BLK_AIUS, %d32
3915 3831 add %l7, 64, %l7
3916 3832 sub %i3, 128, %i3
3917 3833
3918 3834 ! switch statement to get us to the right 8 byte blk within a
3919 3835 ! 64 byte block
3920 3836
3921 3837 cmp %i5, 4
3922 3838 bgeu,a copyin_hlf
3923 3839 cmp %i5, 6
3924 3840 cmp %i5, 2
3925 3841 bgeu,a copyin_sqtr
3926 3842 nop
3927 3843 cmp %i5, 1
3928 3844 be,a copyin_seg1
3929 3845 nop
3930 3846 ba,pt %ncc, copyin_seg0
3931 3847 nop
3932 3848 copyin_sqtr:
3933 3849 be,a copyin_seg2
3934 3850 nop
3935 3851 ba,pt %ncc, copyin_seg3
3936 3852 nop
3937 3853
3938 3854 copyin_hlf:
3939 3855 bgeu,a copyin_fqtr
3940 3856 nop
3941 3857 cmp %i5, 5
3942 3858 be,a copyin_seg5
3943 3859 nop
3944 3860 ba,pt %ncc, copyin_seg4
3945 3861 nop
3946 3862 copyin_fqtr:
3947 3863 be,a copyin_seg6
3948 3864 nop
3949 3865 ba,pt %ncc, copyin_seg7
3950 3866 nop
3951 3867
3952 3868 copyin_seg0:
3953 3869 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3954 3870 FALIGN_D0
3955 3871 ldda [%l7]ASI_BLK_AIUS, %d0
3956 3872 stda %d48, [%i0]ASI_BLK_P
3957 3873 add %l7, 64, %l7
3958 3874 subcc %i3, 64, %i3
3959 3875 bz,pn %ncc, 0f
3960 3876 add %i0, 64, %i0
3961 3877 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
3962 3878 FALIGN_D16
3963 3879 ldda [%l7]ASI_BLK_AIUS, %d16
3964 3880 stda %d48, [%i0]ASI_BLK_P
3965 3881 add %l7, 64, %l7
3966 3882 subcc %i3, 64, %i3
3967 3883 bz,pn %ncc, 1f
3968 3884 add %i0, 64, %i0
3969 3885 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
3970 3886 FALIGN_D32
3971 3887 ldda [%l7]ASI_BLK_AIUS, %d32
3972 3888 stda %d48, [%i0]ASI_BLK_P
3973 3889 add %l7, 64, %l7
3974 3890 subcc %i3, 64, %i3
3975 3891 bz,pn %ncc, 2f
3976 3892 add %i0, 64, %i0
3977 3893 ba,a,pt %ncc, copyin_seg0
3978 3894
3979 3895 0:
3980 3896 FALIGN_D16
3981 3897 stda %d48, [%i0]ASI_BLK_P
3982 3898 add %i0, 64, %i0
3983 3899 membar #Sync
3984 3900 FALIGN_D32
3985 3901 stda %d48, [%i0]ASI_BLK_P
3986 3902 ba,pt %ncc, copyin_blkd0
3987 3903 add %i0, 64, %i0
3988 3904
3989 3905 1:
3990 3906 FALIGN_D32
3991 3907 stda %d48, [%i0]ASI_BLK_P
3992 3908 add %i0, 64, %i0
3993 3909 membar #Sync
3994 3910 FALIGN_D0
3995 3911 stda %d48, [%i0]ASI_BLK_P
3996 3912 ba,pt %ncc, copyin_blkd16
3997 3913 add %i0, 64, %i0
3998 3914
3999 3915 2:
4000 3916 FALIGN_D0
4001 3917 stda %d48, [%i0]ASI_BLK_P
4002 3918 add %i0, 64, %i0
4003 3919 membar #Sync
4004 3920 FALIGN_D16
4005 3921 stda %d48, [%i0]ASI_BLK_P
4006 3922 ba,pt %ncc, copyin_blkd32
4007 3923 add %i0, 64, %i0
4008 3924
4009 3925 copyin_seg1:
4010 3926 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4011 3927 FALIGN_D2
4012 3928 ldda [%l7]ASI_BLK_AIUS, %d0
4013 3929 stda %d48, [%i0]ASI_BLK_P
4014 3930 add %l7, 64, %l7
4015 3931 subcc %i3, 64, %i3
4016 3932 bz,pn %ncc, 0f
4017 3933 add %i0, 64, %i0
4018 3934 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4019 3935 FALIGN_D18
4020 3936 ldda [%l7]ASI_BLK_AIUS, %d16
4021 3937 stda %d48, [%i0]ASI_BLK_P
4022 3938 add %l7, 64, %l7
4023 3939 subcc %i3, 64, %i3
4024 3940 bz,pn %ncc, 1f
4025 3941 add %i0, 64, %i0
4026 3942 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4027 3943 FALIGN_D34
4028 3944 ldda [%l7]ASI_BLK_AIUS, %d32
4029 3945 stda %d48, [%i0]ASI_BLK_P
4030 3946 add %l7, 64, %l7
4031 3947 subcc %i3, 64, %i3
4032 3948 bz,pn %ncc, 2f
4033 3949 add %i0, 64, %i0
4034 3950 ba,a,pt %ncc, copyin_seg1
4035 3951 0:
4036 3952 FALIGN_D18
4037 3953 stda %d48, [%i0]ASI_BLK_P
4038 3954 add %i0, 64, %i0
4039 3955 membar #Sync
4040 3956 FALIGN_D34
4041 3957 stda %d48, [%i0]ASI_BLK_P
4042 3958 ba,pt %ncc, copyin_blkd2
4043 3959 add %i0, 64, %i0
4044 3960
4045 3961 1:
4046 3962 FALIGN_D34
4047 3963 stda %d48, [%i0]ASI_BLK_P
4048 3964 add %i0, 64, %i0
4049 3965 membar #Sync
4050 3966 FALIGN_D2
4051 3967 stda %d48, [%i0]ASI_BLK_P
4052 3968 ba,pt %ncc, copyin_blkd18
4053 3969 add %i0, 64, %i0
4054 3970
4055 3971 2:
4056 3972 FALIGN_D2
4057 3973 stda %d48, [%i0]ASI_BLK_P
4058 3974 add %i0, 64, %i0
4059 3975 membar #Sync
4060 3976 FALIGN_D18
4061 3977 stda %d48, [%i0]ASI_BLK_P
4062 3978 ba,pt %ncc, copyin_blkd34
4063 3979 add %i0, 64, %i0
4064 3980 copyin_seg2:
4065 3981 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4066 3982 FALIGN_D4
4067 3983 ldda [%l7]ASI_BLK_AIUS, %d0
4068 3984 stda %d48, [%i0]ASI_BLK_P
4069 3985 add %l7, 64, %l7
4070 3986 subcc %i3, 64, %i3
4071 3987 bz,pn %ncc, 0f
4072 3988 add %i0, 64, %i0
4073 3989 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4074 3990 FALIGN_D20
4075 3991 ldda [%l7]ASI_BLK_AIUS, %d16
4076 3992 stda %d48, [%i0]ASI_BLK_P
4077 3993 add %l7, 64, %l7
4078 3994 subcc %i3, 64, %i3
4079 3995 bz,pn %ncc, 1f
4080 3996 add %i0, 64, %i0
4081 3997 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4082 3998 FALIGN_D36
4083 3999 ldda [%l7]ASI_BLK_AIUS, %d32
4084 4000 stda %d48, [%i0]ASI_BLK_P
4085 4001 add %l7, 64, %l7
4086 4002 subcc %i3, 64, %i3
4087 4003 bz,pn %ncc, 2f
4088 4004 add %i0, 64, %i0
4089 4005 ba,a,pt %ncc, copyin_seg2
4090 4006
4091 4007 0:
4092 4008 FALIGN_D20
4093 4009 stda %d48, [%i0]ASI_BLK_P
4094 4010 add %i0, 64, %i0
4095 4011 membar #Sync
4096 4012 FALIGN_D36
4097 4013 stda %d48, [%i0]ASI_BLK_P
4098 4014 ba,pt %ncc, copyin_blkd4
4099 4015 add %i0, 64, %i0
4100 4016
4101 4017 1:
4102 4018 FALIGN_D36
4103 4019 stda %d48, [%i0]ASI_BLK_P
4104 4020 add %i0, 64, %i0
4105 4021 membar #Sync
4106 4022 FALIGN_D4
4107 4023 stda %d48, [%i0]ASI_BLK_P
4108 4024 ba,pt %ncc, copyin_blkd20
4109 4025 add %i0, 64, %i0
4110 4026
4111 4027 2:
4112 4028 FALIGN_D4
4113 4029 stda %d48, [%i0]ASI_BLK_P
4114 4030 add %i0, 64, %i0
4115 4031 membar #Sync
4116 4032 FALIGN_D20
4117 4033 stda %d48, [%i0]ASI_BLK_P
4118 4034 ba,pt %ncc, copyin_blkd36
4119 4035 add %i0, 64, %i0
4120 4036
4121 4037 copyin_seg3:
4122 4038 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4123 4039 FALIGN_D6
4124 4040 ldda [%l7]ASI_BLK_AIUS, %d0
4125 4041 stda %d48, [%i0]ASI_BLK_P
4126 4042 add %l7, 64, %l7
4127 4043 subcc %i3, 64, %i3
4128 4044 bz,pn %ncc, 0f
4129 4045 add %i0, 64, %i0
4130 4046 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4131 4047 FALIGN_D22
4132 4048 ldda [%l7]ASI_BLK_AIUS, %d16
4133 4049 stda %d48, [%i0]ASI_BLK_P
4134 4050 add %l7, 64, %l7
4135 4051 subcc %i3, 64, %i3
4136 4052 bz,pn %ncc, 1f
4137 4053 add %i0, 64, %i0
4138 4054 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4139 4055 FALIGN_D38
4140 4056 ldda [%l7]ASI_BLK_AIUS, %d32
4141 4057 stda %d48, [%i0]ASI_BLK_P
4142 4058 add %l7, 64, %l7
4143 4059 subcc %i3, 64, %i3
4144 4060 bz,pn %ncc, 2f
4145 4061 add %i0, 64, %i0
4146 4062 ba,a,pt %ncc, copyin_seg3
4147 4063
4148 4064 0:
4149 4065 FALIGN_D22
4150 4066 stda %d48, [%i0]ASI_BLK_P
4151 4067 add %i0, 64, %i0
4152 4068 membar #Sync
4153 4069 FALIGN_D38
4154 4070 stda %d48, [%i0]ASI_BLK_P
4155 4071 ba,pt %ncc, copyin_blkd6
4156 4072 add %i0, 64, %i0
4157 4073
4158 4074 1:
4159 4075 FALIGN_D38
4160 4076 stda %d48, [%i0]ASI_BLK_P
4161 4077 add %i0, 64, %i0
4162 4078 membar #Sync
4163 4079 FALIGN_D6
4164 4080 stda %d48, [%i0]ASI_BLK_P
4165 4081 ba,pt %ncc, copyin_blkd22
4166 4082 add %i0, 64, %i0
4167 4083
4168 4084 2:
4169 4085 FALIGN_D6
4170 4086 stda %d48, [%i0]ASI_BLK_P
4171 4087 add %i0, 64, %i0
4172 4088 membar #Sync
4173 4089 FALIGN_D22
4174 4090 stda %d48, [%i0]ASI_BLK_P
4175 4091 ba,pt %ncc, copyin_blkd38
4176 4092 add %i0, 64, %i0
4177 4093
4178 4094 copyin_seg4:
4179 4095 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4180 4096 FALIGN_D8
4181 4097 ldda [%l7]ASI_BLK_AIUS, %d0
4182 4098 stda %d48, [%i0]ASI_BLK_P
4183 4099 add %l7, 64, %l7
4184 4100 subcc %i3, 64, %i3
4185 4101 bz,pn %ncc, 0f
4186 4102 add %i0, 64, %i0
4187 4103 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4188 4104 FALIGN_D24
4189 4105 ldda [%l7]ASI_BLK_AIUS, %d16
4190 4106 stda %d48, [%i0]ASI_BLK_P
4191 4107 add %l7, 64, %l7
4192 4108 subcc %i3, 64, %i3
4193 4109 bz,pn %ncc, 1f
4194 4110 add %i0, 64, %i0
4195 4111 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4196 4112 FALIGN_D40
4197 4113 ldda [%l7]ASI_BLK_AIUS, %d32
4198 4114 stda %d48, [%i0]ASI_BLK_P
4199 4115 add %l7, 64, %l7
4200 4116 subcc %i3, 64, %i3
4201 4117 bz,pn %ncc, 2f
4202 4118 add %i0, 64, %i0
4203 4119 ba,a,pt %ncc, copyin_seg4
4204 4120
4205 4121 0:
4206 4122 FALIGN_D24
4207 4123 stda %d48, [%i0]ASI_BLK_P
4208 4124 add %i0, 64, %i0
4209 4125 membar #Sync
4210 4126 FALIGN_D40
4211 4127 stda %d48, [%i0]ASI_BLK_P
4212 4128 ba,pt %ncc, copyin_blkd8
4213 4129 add %i0, 64, %i0
4214 4130
4215 4131 1:
4216 4132 FALIGN_D40
4217 4133 stda %d48, [%i0]ASI_BLK_P
4218 4134 add %i0, 64, %i0
4219 4135 membar #Sync
4220 4136 FALIGN_D8
4221 4137 stda %d48, [%i0]ASI_BLK_P
4222 4138 ba,pt %ncc, copyin_blkd24
4223 4139 add %i0, 64, %i0
4224 4140
4225 4141 2:
4226 4142 FALIGN_D8
4227 4143 stda %d48, [%i0]ASI_BLK_P
4228 4144 add %i0, 64, %i0
4229 4145 membar #Sync
4230 4146 FALIGN_D24
4231 4147 stda %d48, [%i0]ASI_BLK_P
4232 4148 ba,pt %ncc, copyin_blkd40
4233 4149 add %i0, 64, %i0
4234 4150
4235 4151 copyin_seg5:
4236 4152 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4237 4153 FALIGN_D10
4238 4154 ldda [%l7]ASI_BLK_AIUS, %d0
4239 4155 stda %d48, [%i0]ASI_BLK_P
4240 4156 add %l7, 64, %l7
4241 4157 subcc %i3, 64, %i3
4242 4158 bz,pn %ncc, 0f
4243 4159 add %i0, 64, %i0
4244 4160 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4245 4161 FALIGN_D26
4246 4162 ldda [%l7]ASI_BLK_AIUS, %d16
4247 4163 stda %d48, [%i0]ASI_BLK_P
4248 4164 add %l7, 64, %l7
4249 4165 subcc %i3, 64, %i3
4250 4166 bz,pn %ncc, 1f
4251 4167 add %i0, 64, %i0
4252 4168 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4253 4169 FALIGN_D42
4254 4170 ldda [%l7]ASI_BLK_AIUS, %d32
4255 4171 stda %d48, [%i0]ASI_BLK_P
4256 4172 add %l7, 64, %l7
4257 4173 subcc %i3, 64, %i3
4258 4174 bz,pn %ncc, 2f
4259 4175 add %i0, 64, %i0
4260 4176 ba,a,pt %ncc, copyin_seg5
4261 4177
4262 4178 0:
4263 4179 FALIGN_D26
4264 4180 stda %d48, [%i0]ASI_BLK_P
4265 4181 add %i0, 64, %i0
4266 4182 membar #Sync
4267 4183 FALIGN_D42
4268 4184 stda %d48, [%i0]ASI_BLK_P
4269 4185 ba,pt %ncc, copyin_blkd10
4270 4186 add %i0, 64, %i0
4271 4187
4272 4188 1:
4273 4189 FALIGN_D42
4274 4190 stda %d48, [%i0]ASI_BLK_P
4275 4191 add %i0, 64, %i0
4276 4192 membar #Sync
4277 4193 FALIGN_D10
4278 4194 stda %d48, [%i0]ASI_BLK_P
4279 4195 ba,pt %ncc, copyin_blkd26
4280 4196 add %i0, 64, %i0
4281 4197
4282 4198 2:
4283 4199 FALIGN_D10
4284 4200 stda %d48, [%i0]ASI_BLK_P
4285 4201 add %i0, 64, %i0
4286 4202 membar #Sync
4287 4203 FALIGN_D26
4288 4204 stda %d48, [%i0]ASI_BLK_P
4289 4205 ba,pt %ncc, copyin_blkd42
4290 4206 add %i0, 64, %i0
4291 4207
4292 4208 copyin_seg6:
4293 4209 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4294 4210 FALIGN_D12
4295 4211 ldda [%l7]ASI_BLK_AIUS, %d0
4296 4212 stda %d48, [%i0]ASI_BLK_P
4297 4213 add %l7, 64, %l7
4298 4214 subcc %i3, 64, %i3
4299 4215 bz,pn %ncc, 0f
4300 4216 add %i0, 64, %i0
4301 4217 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4302 4218 FALIGN_D28
4303 4219 ldda [%l7]ASI_BLK_AIUS, %d16
4304 4220 stda %d48, [%i0]ASI_BLK_P
4305 4221 add %l7, 64, %l7
4306 4222 subcc %i3, 64, %i3
4307 4223 bz,pn %ncc, 1f
4308 4224 add %i0, 64, %i0
4309 4225 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4310 4226 FALIGN_D44
4311 4227 ldda [%l7]ASI_BLK_AIUS, %d32
4312 4228 stda %d48, [%i0]ASI_BLK_P
4313 4229 add %l7, 64, %l7
4314 4230 subcc %i3, 64, %i3
4315 4231 bz,pn %ncc, 2f
4316 4232 add %i0, 64, %i0
4317 4233 ba,a,pt %ncc, copyin_seg6
4318 4234
4319 4235 0:
4320 4236 FALIGN_D28
4321 4237 stda %d48, [%i0]ASI_BLK_P
4322 4238 add %i0, 64, %i0
4323 4239 membar #Sync
4324 4240 FALIGN_D44
4325 4241 stda %d48, [%i0]ASI_BLK_P
4326 4242 ba,pt %ncc, copyin_blkd12
4327 4243 add %i0, 64, %i0
4328 4244
4329 4245 1:
4330 4246 FALIGN_D44
4331 4247 stda %d48, [%i0]ASI_BLK_P
4332 4248 add %i0, 64, %i0
4333 4249 membar #Sync
4334 4250 FALIGN_D12
4335 4251 stda %d48, [%i0]ASI_BLK_P
4336 4252 ba,pt %ncc, copyin_blkd28
4337 4253 add %i0, 64, %i0
4338 4254
4339 4255 2:
4340 4256 FALIGN_D12
4341 4257 stda %d48, [%i0]ASI_BLK_P
4342 4258 add %i0, 64, %i0
4343 4259 membar #Sync
4344 4260 FALIGN_D28
4345 4261 stda %d48, [%i0]ASI_BLK_P
4346 4262 ba,pt %ncc, copyin_blkd44
4347 4263 add %i0, 64, %i0
4348 4264
4349 4265 copyin_seg7:
4350 4266 ! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4351 4267 FALIGN_D14
4352 4268 ldda [%l7]ASI_BLK_AIUS, %d0
4353 4269 stda %d48, [%i0]ASI_BLK_P
4354 4270 add %l7, 64, %l7
4355 4271 subcc %i3, 64, %i3
4356 4272 bz,pn %ncc, 0f
4357 4273 add %i0, 64, %i0
4358 4274 ! 2nd chunk - %d0 pre, %d16 low, %d32 high, %d48 dst
4359 4275 FALIGN_D30
4360 4276 ldda [%l7]ASI_BLK_AIUS, %d16
4361 4277 stda %d48, [%i0]ASI_BLK_P
4362 4278 add %l7, 64, %l7
4363 4279 subcc %i3, 64, %i3
4364 4280 bz,pn %ncc, 1f
4365 4281 add %i0, 64, %i0
4366 4282 ! 3rd chunk - %d0 high, %d16 pre, %d32 low, %d48 dst
4367 4283 FALIGN_D46
4368 4284 ldda [%l7]ASI_BLK_AIUS, %d32
4369 4285 stda %d48, [%i0]ASI_BLK_P
4370 4286 add %l7, 64, %l7
4371 4287 subcc %i3, 64, %i3
4372 4288 bz,pn %ncc, 2f
4373 4289 add %i0, 64, %i0
4374 4290 ba,a,pt %ncc, copyin_seg7
4375 4291
4376 4292 0:
4377 4293 FALIGN_D30
4378 4294 stda %d48, [%i0]ASI_BLK_P
4379 4295 add %i0, 64, %i0
4380 4296 membar #Sync
4381 4297 FALIGN_D46
4382 4298 stda %d48, [%i0]ASI_BLK_P
4383 4299 ba,pt %ncc, copyin_blkd14
4384 4300 add %i0, 64, %i0
4385 4301
4386 4302 1:
4387 4303 FALIGN_D46
4388 4304 stda %d48, [%i0]ASI_BLK_P
4389 4305 add %i0, 64, %i0
4390 4306 membar #Sync
4391 4307 FALIGN_D14
4392 4308 stda %d48, [%i0]ASI_BLK_P
4393 4309 ba,pt %ncc, copyin_blkd30
4394 4310 add %i0, 64, %i0
4395 4311
4396 4312 2:
4397 4313 FALIGN_D14
4398 4314 stda %d48, [%i0]ASI_BLK_P
4399 4315 add %i0, 64, %i0
4400 4316 membar #Sync
4401 4317 FALIGN_D30
4402 4318 stda %d48, [%i0]ASI_BLK_P
4403 4319 ba,pt %ncc, copyin_blkd46
4404 4320 add %i0, 64, %i0
4405 4321
4406 4322
4407 4323 !
4408 4324 ! dribble out the last partial block
4409 4325 !
4410 4326 copyin_blkd0:
4411 4327 subcc %i4, 8, %i4
4412 4328 blu,pn %ncc, copyin_blkdone
4413 4329 faligndata %d0, %d2, %d48
4414 4330 std %d48, [%i0]
4415 4331 add %i0, 8, %i0
4416 4332 copyin_blkd2:
4417 4333 subcc %i4, 8, %i4
4418 4334 blu,pn %ncc, copyin_blkdone
4419 4335 faligndata %d2, %d4, %d48
4420 4336 std %d48, [%i0]
4421 4337 add %i0, 8, %i0
4422 4338 copyin_blkd4:
4423 4339 subcc %i4, 8, %i4
4424 4340 blu,pn %ncc, copyin_blkdone
4425 4341 faligndata %d4, %d6, %d48
4426 4342 std %d48, [%i0]
4427 4343 add %i0, 8, %i0
4428 4344 copyin_blkd6:
4429 4345 subcc %i4, 8, %i4
4430 4346 blu,pn %ncc, copyin_blkdone
4431 4347 faligndata %d6, %d8, %d48
4432 4348 std %d48, [%i0]
4433 4349 add %i0, 8, %i0
4434 4350 copyin_blkd8:
4435 4351 subcc %i4, 8, %i4
4436 4352 blu,pn %ncc, copyin_blkdone
4437 4353 faligndata %d8, %d10, %d48
4438 4354 std %d48, [%i0]
4439 4355 add %i0, 8, %i0
4440 4356 copyin_blkd10:
4441 4357 subcc %i4, 8, %i4
4442 4358 blu,pn %ncc, copyin_blkdone
4443 4359 faligndata %d10, %d12, %d48
4444 4360 std %d48, [%i0]
4445 4361 add %i0, 8, %i0
4446 4362 copyin_blkd12:
4447 4363 subcc %i4, 8, %i4
4448 4364 blu,pn %ncc, copyin_blkdone
4449 4365 faligndata %d12, %d14, %d48
4450 4366 std %d48, [%i0]
4451 4367 add %i0, 8, %i0
4452 4368 copyin_blkd14:
4453 4369 subcc %i4, 8, %i4
4454 4370 blu,pn %ncc, copyin_blkdone
4455 4371 fsrc1 %d14, %d0
4456 4372 ba,a,pt %ncc, copyin_blkleft
4457 4373
4458 4374 copyin_blkd16:
4459 4375 subcc %i4, 8, %i4
4460 4376 blu,pn %ncc, copyin_blkdone
4461 4377 faligndata %d16, %d18, %d48
4462 4378 std %d48, [%i0]
4463 4379 add %i0, 8, %i0
4464 4380 copyin_blkd18:
4465 4381 subcc %i4, 8, %i4
4466 4382 blu,pn %ncc, copyin_blkdone
4467 4383 faligndata %d18, %d20, %d48
4468 4384 std %d48, [%i0]
4469 4385 add %i0, 8, %i0
4470 4386 copyin_blkd20:
4471 4387 subcc %i4, 8, %i4
4472 4388 blu,pn %ncc, copyin_blkdone
4473 4389 faligndata %d20, %d22, %d48
4474 4390 std %d48, [%i0]
4475 4391 add %i0, 8, %i0
4476 4392 copyin_blkd22:
4477 4393 subcc %i4, 8, %i4
4478 4394 blu,pn %ncc, copyin_blkdone
4479 4395 faligndata %d22, %d24, %d48
4480 4396 std %d48, [%i0]
4481 4397 add %i0, 8, %i0
4482 4398 copyin_blkd24:
4483 4399 subcc %i4, 8, %i4
4484 4400 blu,pn %ncc, copyin_blkdone
4485 4401 faligndata %d24, %d26, %d48
4486 4402 std %d48, [%i0]
4487 4403 add %i0, 8, %i0
4488 4404 copyin_blkd26:
4489 4405 subcc %i4, 8, %i4
4490 4406 blu,pn %ncc, copyin_blkdone
4491 4407 faligndata %d26, %d28, %d48
4492 4408 std %d48, [%i0]
4493 4409 add %i0, 8, %i0
4494 4410 copyin_blkd28:
4495 4411 subcc %i4, 8, %i4
4496 4412 blu,pn %ncc, copyin_blkdone
4497 4413 faligndata %d28, %d30, %d48
4498 4414 std %d48, [%i0]
4499 4415 add %i0, 8, %i0
4500 4416 copyin_blkd30:
4501 4417 subcc %i4, 8, %i4
4502 4418 blu,pn %ncc, copyin_blkdone
4503 4419 fsrc1 %d30, %d0
4504 4420 ba,a,pt %ncc, copyin_blkleft
4505 4421 copyin_blkd32:
4506 4422 subcc %i4, 8, %i4
4507 4423 blu,pn %ncc, copyin_blkdone
4508 4424 faligndata %d32, %d34, %d48
4509 4425 std %d48, [%i0]
4510 4426 add %i0, 8, %i0
4511 4427 copyin_blkd34:
4512 4428 subcc %i4, 8, %i4
4513 4429 blu,pn %ncc, copyin_blkdone
4514 4430 faligndata %d34, %d36, %d48
4515 4431 std %d48, [%i0]
4516 4432 add %i0, 8, %i0
4517 4433 copyin_blkd36:
4518 4434 subcc %i4, 8, %i4
4519 4435 blu,pn %ncc, copyin_blkdone
4520 4436 faligndata %d36, %d38, %d48
4521 4437 std %d48, [%i0]
4522 4438 add %i0, 8, %i0
4523 4439 copyin_blkd38:
4524 4440 subcc %i4, 8, %i4
4525 4441 blu,pn %ncc, copyin_blkdone
4526 4442 faligndata %d38, %d40, %d48
4527 4443 std %d48, [%i0]
4528 4444 add %i0, 8, %i0
4529 4445 copyin_blkd40:
4530 4446 subcc %i4, 8, %i4
4531 4447 blu,pn %ncc, copyin_blkdone
4532 4448 faligndata %d40, %d42, %d48
4533 4449 std %d48, [%i0]
4534 4450 add %i0, 8, %i0
4535 4451 copyin_blkd42:
4536 4452 subcc %i4, 8, %i4
4537 4453 blu,pn %ncc, copyin_blkdone
4538 4454 faligndata %d42, %d44, %d48
4539 4455 std %d48, [%i0]
4540 4456 add %i0, 8, %i0
4541 4457 copyin_blkd44:
4542 4458 subcc %i4, 8, %i4
4543 4459 blu,pn %ncc, copyin_blkdone
4544 4460 faligndata %d44, %d46, %d48
4545 4461 std %d48, [%i0]
4546 4462 add %i0, 8, %i0
4547 4463 copyin_blkd46:
4548 4464 subcc %i4, 8, %i4
4549 4465 blu,pn %ncc, copyin_blkdone
4550 4466 fsrc1 %d46, %d0
4551 4467
4552 4468 copyin_blkleft:
4553 4469 1:
4554 4470 ldda [%l7]ASI_USER, %d2
4555 4471 add %l7, 8, %l7
4556 4472 subcc %i4, 8, %i4
4557 4473 faligndata %d0, %d2, %d8
4558 4474 std %d8, [%i0]
4559 4475 blu,pn %ncc, copyin_blkdone
4560 4476 add %i0, 8, %i0
4561 4477 ldda [%l7]ASI_USER, %d0
4562 4478 add %l7, 8, %l7
4563 4479 subcc %i4, 8, %i4
4564 4480 faligndata %d2, %d0, %d8
4565 4481 std %d8, [%i0]
4566 4482 bgeu,pt %ncc, 1b
4567 4483 add %i0, 8, %i0
4568 4484
4569 4485 copyin_blkdone:
4570 4486 tst %i2
4571 4487 bz,pt %ncc, .copyin_exit
4572 4488 and %l3, 0x4, %l3 ! fprs.du = fprs.dl = 0
4573 4489
4574 4490 7: lduba [%i1]ASI_USER, %i4
4575 4491 inc %i1
4576 4492 inc %i0
4577 4493 deccc %i2
4578 4494 bgu %ncc, 7b
4579 4495 stb %i4, [%i0 - 1]
4580 4496
4581 4497 .copyin_exit:
4582 4498 membar #StoreLoad|#StoreStore
4583 4499 btst FPUSED_FLAG, SAVED_LOFAULT
4584 4500 bz %icc, 1f
4585 4501 nop
4586 4502
4587 4503 ld [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
4588 4504 wr %o2, 0, %gsr
4589 4505
4590 4506 ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
4591 4507 btst FPRS_FEF, %o3
4592 4508 bz %icc, 4f
4593 4509 nop
4594 4510
4595 4511 ! restore fpregs from stack
4596 4512 membar #Sync
4597 4513 add %fp, STACK_BIAS - 257, %o2
4598 4514 and %o2, -64, %o2
4599 4515 ldda [%o2]ASI_BLK_P, %d0
4600 4516 add %o2, 64, %o2
4601 4517 ldda [%o2]ASI_BLK_P, %d16
4602 4518 add %o2, 64, %o2
4603 4519 ldda [%o2]ASI_BLK_P, %d32
4604 4520 add %o2, 64, %o2
4605 4521 ldda [%o2]ASI_BLK_P, %d48
4606 4522 membar #Sync
4607 4523
4608 4524 ba,pt %ncc, 1f
4609 4525 wr %o3, 0, %fprs ! restore fprs
4610 4526
4611 4527 4:
4612 4528 FZERO ! zero all of the fpregs
4613 4529 wr %o3, 0, %fprs ! restore fprs
4614 4530
4615 4531 1:
4616 4532 andn SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
4617 4533 membar #Sync ! sync error barrier
4618 4534 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4619 4535 ret
4620 4536 restore %g0, 0, %o0
4621 4537 .copyin_err:
4622 4538 ldn [THREAD_REG + T_COPYOPS], %o4
↓ open down ↓ |
1137 lines elided |
↑ open up ↑ |
4623 4539 brz %o4, 2f
4624 4540 nop
4625 4541 ldn [%o4 + CP_COPYIN], %g2
4626 4542 jmp %g2
4627 4543 nop
4628 4544 2:
4629 4545 retl
4630 4546 mov -1, %o0
4631 4547 SET_SIZE(copyin)
4632 4548
4633 -#endif /* lint */
4634 -
4635 -#ifdef lint
4636 -
4637 -/*ARGSUSED*/
4638 -int
4639 -xcopyin(const void *uaddr, void *kaddr, size_t count)
4640 -{ return (0); }
4641 -
4642 -#else /* lint */
4643 -
4644 4549 ENTRY(xcopyin)
4645 4550 sethi %hi(.xcopyin_err), REAL_LOFAULT
4646 4551 b .do_copyin
4647 4552 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
4648 4553 .xcopyin_err:
4649 4554 ldn [THREAD_REG + T_COPYOPS], %o4
4650 4555 brz %o4, 2f
4651 4556 nop
4652 4557 ldn [%o4 + CP_XCOPYIN], %g2
4653 4558 jmp %g2
4654 4559 nop
4655 4560 2:
4656 4561 retl
4657 4562 mov %g1, %o0
4658 4563 SET_SIZE(xcopyin)
4659 4564
4660 -#endif /* lint */
4661 -
4662 -#ifdef lint
4663 -
4664 -/*ARGSUSED*/
4665 -int
4666 -xcopyin_little(const void *uaddr, void *kaddr, size_t count)
4667 -{ return (0); }
4668 -
4669 -#else /* lint */
4670 -
4671 4565 ENTRY(xcopyin_little)
4672 4566 sethi %hi(.little_err), %o4
4673 4567 ldn [THREAD_REG + T_LOFAULT], %o5
4674 4568 or %o4, %lo(.little_err), %o4
4675 4569 membar #Sync ! sync error barrier
4676 4570 stn %o4, [THREAD_REG + T_LOFAULT]
4677 4571
4678 4572 subcc %g0, %o2, %o3
4679 4573 add %o0, %o2, %o0
4680 4574 bz,pn %ncc, 2f ! check for zero bytes
4681 4575 sub %o2, 1, %o4
4682 4576 add %o0, %o4, %o0 ! start w/last byte
4683 4577 add %o1, %o2, %o1
4684 4578 lduba [%o0+%o3]ASI_AIUSL, %o4
4685 4579
4686 4580 1: stb %o4, [%o1+%o3]
4687 4581 inccc %o3
4688 4582 sub %o0, 2, %o0 ! get next byte
4689 4583 bcc,a,pt %ncc, 1b
4690 4584 lduba [%o0+%o3]ASI_AIUSL, %o4
4691 4585
4692 4586 2: membar #Sync ! sync error barrier
4693 4587 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
4694 4588 retl
4695 4589 mov %g0, %o0 ! return (0)
4696 4590
4697 4591 .little_err:
4698 4592 membar #Sync ! sync error barrier
4699 4593 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
4700 4594 retl
4701 4595 mov %g1, %o0
4702 4596 SET_SIZE(xcopyin_little)
4703 4597
4704 -#endif /* lint */
4705 4598
4706 -
4707 4599 /*
4708 4600 * Copy a block of storage - must not overlap (from + len <= to).
4709 4601 * No fault handler installed (to be called under on_fault())
4710 4602 */
4711 -#if defined(lint)
4712 4603
4713 -/* ARGSUSED */
4714 -void
4715 -copyin_noerr(const void *ufrom, void *kto, size_t count)
4716 -{}
4717 -
4718 -#else /* lint */
4719 -
4720 4604 ENTRY(copyin_noerr)
4721 4605 sethi %hi(.copyio_noerr), REAL_LOFAULT
4722 4606 b .do_copyin
4723 4607 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4724 4608 .copyio_noerr:
4725 4609 jmp SAVED_LOFAULT
4726 4610 nop
4727 4611 SET_SIZE(copyin_noerr)
4728 4612
4729 -#endif /* lint */
4730 -
4731 4613 /*
4732 4614 * Copy a block of storage - must not overlap (from + len <= to).
4733 4615 * No fault handler installed (to be called under on_fault())
4734 4616 */
4735 4617
4736 -#if defined(lint)
4737 -
4738 -/* ARGSUSED */
4739 -void
4740 -copyout_noerr(const void *kfrom, void *uto, size_t count)
4741 -{}
4742 -
4743 -#else /* lint */
4744 -
4745 4618 ENTRY(copyout_noerr)
4746 4619 sethi %hi(.copyio_noerr), REAL_LOFAULT
4747 4620 b .do_copyout
4748 4621 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4749 4622 SET_SIZE(copyout_noerr)
4750 4623
4751 -#endif /* lint */
4752 -
4753 -#if defined(lint)
4754 -
4755 -int use_hw_bcopy = 1;
4756 -int use_hw_copyio = 1;
4757 -int use_hw_bzero = 1;
4758 -uint_t hw_copy_limit_1 = 0;
4759 -uint_t hw_copy_limit_2 = 0;
4760 -uint_t hw_copy_limit_4 = 0;
4761 -uint_t hw_copy_limit_8 = 0;
4762 -
4763 -#else /* !lint */
4764 -
4765 4624 .align 4
4766 4625 DGDEF(use_hw_bcopy)
4767 4626 .word 1
4768 4627 DGDEF(use_hw_copyio)
4769 4628 .word 1
4770 4629 DGDEF(use_hw_bzero)
4771 4630 .word 1
4772 4631 DGDEF(hw_copy_limit_1)
4773 4632 .word 0
4774 4633 DGDEF(hw_copy_limit_2)
4775 4634 .word 0
4776 4635 DGDEF(hw_copy_limit_4)
4777 4636 .word 0
4778 4637 DGDEF(hw_copy_limit_8)
4779 4638 .word 0
4780 4639
4781 4640 .align 64
4782 4641 .section ".text"
4783 -#endif /* !lint */
4784 4642
4785 4643
4786 4644 /*
4787 4645 * hwblkclr - clears block-aligned, block-multiple-sized regions that are
4788 4646 * longer than 256 bytes in length using spitfire's block stores. If
4789 4647 * the criteria for using this routine are not met then it calls bzero
4790 4648 * and returns 1. Otherwise 0 is returned indicating success.
4791 4649 * Caller is responsible for ensuring use_hw_bzero is true and that
4792 4650 * kpreempt_disable() has been called.
4793 4651 */
4794 -#ifdef lint
4795 -/*ARGSUSED*/
4796 -int
4797 -hwblkclr(void *addr, size_t len)
4798 -{
4799 - return(0);
4800 -}
4801 -#else /* lint */
4802 4652 ! %i0 - start address
4803 4653 ! %i1 - length of region (multiple of 64)
4804 4654 ! %l0 - saved fprs
4805 4655 ! %l1 - pointer to saved %d0 block
4806 4656 ! %l2 - saved curthread->t_lwp
4807 4657
4808 4658 ENTRY(hwblkclr)
4809 4659 ! get another window w/space for one aligned block of saved fpregs
4810 4660 save %sp, -SA(MINFRAME + 2*64), %sp
4811 4661
4812 4662 ! Must be block-aligned
4813 4663 andcc %i0, (64-1), %g0
4814 4664 bnz,pn %ncc, 1f
4815 4665 nop
4816 4666
4817 4667 ! ... and must be 256 bytes or more
4818 4668 cmp %i1, 256
4819 4669 blu,pn %ncc, 1f
4820 4670 nop
4821 4671
4822 4672 ! ... and length must be a multiple of 64
4823 4673 andcc %i1, (64-1), %g0
4824 4674 bz,pn %ncc, 2f
4825 4675 nop
4826 4676
4827 4677 1: ! punt, call bzero but notify the caller that bzero was used
4828 4678 mov %i0, %o0
4829 4679 call bzero
4830 4680 mov %i1, %o1
4831 4681 ret
4832 4682 restore %g0, 1, %o0 ! return (1) - did not use block operations
4833 4683
4834 4684 2: rd %fprs, %l0 ! check for unused fp
4835 4685 btst FPRS_FEF, %l0
4836 4686 bz 1f
4837 4687 nop
4838 4688
4839 4689 ! save in-use fpregs on stack
4840 4690 membar #Sync
4841 4691 add %fp, STACK_BIAS - 65, %l1
4842 4692 and %l1, -64, %l1
4843 4693 stda %d0, [%l1]ASI_BLK_P
4844 4694
4845 4695 1: membar #StoreStore|#StoreLoad|#LoadStore
4846 4696 wr %g0, FPRS_FEF, %fprs
4847 4697 wr %g0, ASI_BLK_P, %asi
4848 4698
4849 4699 ! Clear block
4850 4700 fzero %d0
4851 4701 fzero %d2
4852 4702 fzero %d4
4853 4703 fzero %d6
4854 4704 fzero %d8
4855 4705 fzero %d10
4856 4706 fzero %d12
4857 4707 fzero %d14
4858 4708
4859 4709 mov 256, %i3
4860 4710 ba .pz_doblock
4861 4711 nop
4862 4712
4863 4713 .pz_blkstart:
4864 4714 ! stda %d0, [%i0+192]%asi ! in dly slot of branch that got us here
4865 4715 stda %d0, [%i0+128]%asi
4866 4716 stda %d0, [%i0+64]%asi
4867 4717 stda %d0, [%i0]%asi
4868 4718 .pz_zinst:
4869 4719 add %i0, %i3, %i0
4870 4720 sub %i1, %i3, %i1
4871 4721 .pz_doblock:
4872 4722 cmp %i1, 256
4873 4723 bgeu,a %ncc, .pz_blkstart
4874 4724 stda %d0, [%i0+192]%asi
4875 4725
4876 4726 cmp %i1, 64
4877 4727 blu %ncc, .pz_finish
4878 4728
4879 4729 andn %i1, (64-1), %i3
4880 4730 srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
4881 4731 set .pz_zinst, %i4
4882 4732 sub %i4, %i2, %i4
4883 4733 jmp %i4
4884 4734 nop
4885 4735
4886 4736 .pz_finish:
4887 4737 membar #Sync
4888 4738 btst FPRS_FEF, %l0
4889 4739 bz,a .pz_finished
4890 4740 wr %l0, 0, %fprs ! restore fprs
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
4891 4741
4892 4742 ! restore fpregs from stack
4893 4743 ldda [%l1]ASI_BLK_P, %d0
4894 4744 membar #Sync
4895 4745 wr %l0, 0, %fprs ! restore fprs
4896 4746
4897 4747 .pz_finished:
4898 4748 ret
4899 4749 restore %g0, 0, %o0 ! return (bzero or not)
4900 4750 SET_SIZE(hwblkclr)
4901 -#endif /* lint */
4902 4751
4903 -#ifdef lint
4904 -/* Copy 32 bytes of data from src to dst using physical addresses */
4905 -/*ARGSUSED*/
4906 -void
4907 -hw_pa_bcopy32(uint64_t src, uint64_t dst)
4908 -{}
4909 -#else /*!lint */
4910 -
4911 4752 /*
4912 4753 * Copy 32 bytes of data from src (%o0) to dst (%o1)
4913 4754 * using physical addresses.
4914 4755 */
4915 4756 ENTRY_NP(hw_pa_bcopy32)
4916 4757 rdpr %pstate, %g1
4917 4758 andn %g1, PSTATE_IE, %g2
4918 4759 wrpr %g0, %g2, %pstate
4919 4760
4920 4761 ldxa [%o0]ASI_MEM, %o2
4921 4762 add %o0, 8, %o0
4922 4763 ldxa [%o0]ASI_MEM, %o3
4923 4764 add %o0, 8, %o0
4924 4765 ldxa [%o0]ASI_MEM, %o4
4925 4766 add %o0, 8, %o0
4926 4767 ldxa [%o0]ASI_MEM, %o5
4927 4768 stxa %o2, [%o1]ASI_MEM
4928 4769 add %o1, 8, %o1
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
4929 4770 stxa %o3, [%o1]ASI_MEM
4930 4771 add %o1, 8, %o1
4931 4772 stxa %o4, [%o1]ASI_MEM
4932 4773 add %o1, 8, %o1
4933 4774 stxa %o5, [%o1]ASI_MEM
4934 4775
4935 4776 membar #Sync
4936 4777 retl
4937 4778 wrpr %g0, %g1, %pstate
4938 4779 SET_SIZE(hw_pa_bcopy32)
4939 -#endif /* lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX