Print this page
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/swapfs/swap_vnops.c
+++ new/usr/src/uts/common/fs/swapfs/swap_vnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/param.h>
28 28 #include <sys/systm.h>
29 29 #include <sys/buf.h>
30 30 #include <sys/cred.h>
31 31 #include <sys/errno.h>
32 32 #include <sys/vnode.h>
33 33 #include <sys/vfs_opreg.h>
34 34 #include <sys/cmn_err.h>
35 35 #include <sys/swap.h>
36 36 #include <sys/mman.h>
37 37 #include <sys/vmsystm.h>
38 38 #include <sys/vtrace.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/sysmacros.h>
41 41 #include <sys/vm.h>
42 42
43 43 #include <sys/fs/swapnode.h>
44 44
45 45 #include <vm/seg.h>
46 46 #include <vm/page.h>
47 47 #include <vm/pvn.h>
48 48 #include <fs/fs_subr.h>
49 49
50 50 #include <vm/seg_kp.h>
51 51
52 52 /*
53 53 * Define the routines within this file.
54 54 */
55 55 static int swap_getpage(struct vnode *vp, offset_t off, size_t len,
56 56 uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
57 57 caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
58 58 static int swap_putpage(struct vnode *vp, offset_t off, size_t len,
59 59 int flags, struct cred *cr, caller_context_t *ct);
60 60 static void swap_inactive(struct vnode *vp, struct cred *cr,
61 61 caller_context_t *ct);
62 62 static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63 63 cred_t *cr, caller_context_t *ct);
64 64
65 65 static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
66 66 uint_t *protp, page_t **plarr, size_t plsz,
67 67 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
68 68
69 69 int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
70 70 uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
71 71 uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
72 72 enum seg_rw rw, struct cred *cr);
73 73
74 74 static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
75 75 size_t *lenp, int flags, struct cred *cr);
76 76
77 77 const fs_operation_def_t swap_vnodeops_template[] = {
78 - VOPNAME_INACTIVE, { .vop_inactive = swap_inactive },
79 - VOPNAME_GETPAGE, { .vop_getpage = swap_getpage },
80 - VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage },
81 - VOPNAME_DISPOSE, { .vop_dispose = swap_dispose },
82 - VOPNAME_SETFL, { .error = fs_error },
83 - VOPNAME_POLL, { .error = fs_error },
84 - VOPNAME_PATHCONF, { .error = fs_error },
85 - VOPNAME_GETSECATTR, { .error = fs_error },
86 - VOPNAME_SHRLOCK, { .error = fs_error },
87 - NULL, NULL
78 + {VOPNAME_INACTIVE, { .vop_inactive = swap_inactive }},
79 + {VOPNAME_GETPAGE, { .vop_getpage = swap_getpage }},
80 + {VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage }},
81 + {VOPNAME_DISPOSE, { .vop_dispose = swap_dispose }},
82 + {VOPNAME_SETFL, { .error = fs_error }},
83 + {VOPNAME_POLL, { .error = fs_error }},
84 + {VOPNAME_PATHCONF, { .error = fs_error }},
85 + {VOPNAME_GETSECATTR, { .error = fs_error }},
86 + {VOPNAME_SHRLOCK, { .error = fs_error }},
87 + {NULL, {NULL}}
88 88 };
89 89
90 90 vnodeops_t *swap_vnodeops;
91 91
92 92 /* ARGSUSED */
93 93 static void
94 94 swap_inactive(
95 95 struct vnode *vp,
96 96 struct cred *cr,
97 97 caller_context_t *ct)
98 98 {
99 99 SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
100 100 }
101 101
102 102 /*
103 103 * Return all the pages from [off..off+len] in given file
104 104 */
105 105 /*ARGSUSED*/
106 106 static int
107 107 swap_getpage(
108 108 struct vnode *vp,
109 109 offset_t off,
110 110 size_t len,
111 111 uint_t *protp,
112 112 page_t *pl[],
113 113 size_t plsz,
114 114 struct seg *seg,
115 115 caddr_t addr,
116 116 enum seg_rw rw,
117 117 struct cred *cr,
118 118 caller_context_t *ct)
119 119 {
120 120 SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
121 121 (void *)vp, off, len, 0, 0);
122 122
123 123 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
124 124 "swapfs getpage:vp %p off %llx len %ld",
125 125 (void *)vp, off, len);
126 126
127 127 return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
128 128 pl, plsz, seg, addr, rw, cr));
129 129 }
130 130
131 131 /*
132 132 * Called from pvn_getpages to get a particular page.
133 133 */
134 134 /*ARGSUSED*/
135 135 static int
136 136 swap_getapage(
137 137 struct vnode *vp,
138 138 u_offset_t off,
139 139 size_t len,
140 140 uint_t *protp,
141 141 page_t *pl[],
142 142 size_t plsz,
143 143 struct seg *seg,
144 144 caddr_t addr,
145 145 enum seg_rw rw,
146 146 struct cred *cr)
147 147 {
148 148 struct page *pp, *rpp;
149 149 int flags;
150 150 int err = 0;
151 151 struct vnode *pvp = NULL;
152 152 u_offset_t poff;
153 153 int flag_noreloc;
154 154 se_t lock;
155 155 extern int kcage_on;
156 156 int upgrade = 0;
157 157
158 158 SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
159 159 vp, off, len, 0, 0);
160 160
161 161 /*
162 162 * Until there is a call-back mechanism to cause SEGKP
163 163 * pages to be unlocked, make them non-relocatable.
164 164 */
165 165 if (SEG_IS_SEGKP(seg))
166 166 flag_noreloc = PG_NORELOC;
167 167 else
168 168 flag_noreloc = 0;
169 169
170 170 if (protp != NULL)
171 171 *protp = PROT_ALL;
172 172
173 173 lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
174 174
175 175 again:
176 176 if (pp = page_lookup(vp, off, lock)) {
177 177 /*
178 178 * In very rare instances, a segkp page may have been
179 179 * relocated outside of the kernel by the kernel cage
180 180 * due to the window between page_unlock() and
181 181 * VOP_PUTPAGE() in segkp_unlock(). Due to the
182 182 * rareness of these occurances, the solution is to
183 183 * relocate the page to a P_NORELOC page.
184 184 */
185 185 if (flag_noreloc != 0) {
186 186 if (!PP_ISNORELOC(pp) && kcage_on) {
187 187 if (lock != SE_EXCL) {
188 188 upgrade = 1;
189 189 if (!page_tryupgrade(pp)) {
190 190 page_unlock(pp);
191 191 lock = SE_EXCL;
192 192 goto again;
193 193 }
194 194 }
195 195
196 196 if (page_relocate_cage(&pp, &rpp) != 0)
197 197 panic("swap_getapage: "
198 198 "page_relocate_cage failed");
199 199
200 200 pp = rpp;
201 201 }
202 202 }
203 203
204 204 if (pl) {
205 205 if (upgrade)
206 206 page_downgrade(pp);
207 207
208 208 pl[0] = pp;
209 209 pl[1] = NULL;
210 210 } else {
211 211 page_unlock(pp);
212 212 }
213 213 } else {
214 214 pp = page_create_va(vp, off, PAGESIZE,
215 215 PG_WAIT | PG_EXCL | flag_noreloc,
216 216 seg, addr);
217 217 /*
218 218 * Someone raced in and created the page after we did the
219 219 * lookup but before we did the create, so go back and
220 220 * try to look it up again.
221 221 */
222 222 if (pp == NULL)
223 223 goto again;
224 224 if (rw != S_CREATE) {
225 225 err = swap_getphysname(vp, off, &pvp, &poff);
226 226 if (pvp) {
227 227 struct anon *ap;
228 228 kmutex_t *ahm;
229 229
230 230 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
231 231 err = VOP_PAGEIO(pvp, pp, poff,
232 232 PAGESIZE, flags, cr, NULL);
233 233
234 234 if (!err) {
235 235 ahm = AH_MUTEX(vp, off);
236 236 mutex_enter(ahm);
237 237
238 238 ap = swap_anon(vp, off);
239 239 if (ap == NULL) {
240 240 panic("swap_getapage:"
241 241 " null anon");
242 242 }
243 243
244 244 if (ap->an_pvp == pvp &&
245 245 ap->an_poff == poff) {
246 246 swap_phys_free(pvp, poff,
247 247 PAGESIZE);
248 248 ap->an_pvp = NULL;
249 249 ap->an_poff = NULL;
250 250 hat_setmod(pp);
251 251 }
252 252
253 253 mutex_exit(ahm);
254 254 }
255 255 } else {
256 256 if (!err)
257 257 pagezero(pp, 0, PAGESIZE);
258 258
259 259 /*
260 260 * If it's a fault ahead, release page_io_lock
261 261 * and SE_EXCL we grabbed in page_create_va
262 262 *
263 263 * If we are here, we haven't called VOP_PAGEIO
264 264 * and thus calling pvn_read_done(pp, B_READ)
265 265 * below may mislead that we tried i/o. Besides,
266 266 * in case of async, pvn_read_done() should
267 267 * not be called by *getpage()
268 268 */
269 269 if (pl == NULL) {
270 270 /*
271 271 * swap_getphysname can return error
272 272 * only when we are getting called from
273 273 * swapslot_free which passes non-NULL
274 274 * pl to VOP_GETPAGE.
275 275 */
276 276 ASSERT(err == 0);
277 277 page_io_unlock(pp);
278 278 page_unlock(pp);
279 279 }
280 280 }
281 281 }
282 282
283 283 ASSERT(pp != NULL);
284 284
285 285 if (err && pl)
286 286 pvn_read_done(pp, B_ERROR);
287 287
288 288 if (!err && pl)
289 289 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
290 290 }
291 291 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
292 292 "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
293 293 return (err);
294 294 }
295 295
296 296 /*
297 297 * Called from large page anon routines only! This is an ugly hack where
298 298 * the anon layer directly calls into swapfs with a preallocated large page.
299 299 * Another method would have been to change to VOP and add an extra arg for
300 300 * the preallocated large page. This all could be cleaned up later when we
301 301 * solve the anonymous naming problem and no longer need to loop across of
302 302 * the VOP in PAGESIZE increments to fill in or initialize a large page as
303 303 * is done today. I think the latter is better since it avoid a change to
304 304 * the VOP interface that could later be avoided.
305 305 */
306 306 int
307 307 swap_getconpage(
308 308 struct vnode *vp,
309 309 u_offset_t off,
310 310 size_t len,
311 311 uint_t *protp,
312 312 page_t *pl[],
313 313 size_t plsz,
314 314 page_t *conpp,
315 315 uint_t *pszc,
316 316 spgcnt_t *nreloc,
317 317 struct seg *seg,
318 318 caddr_t addr,
319 319 enum seg_rw rw,
320 320 struct cred *cr)
321 321 {
322 322 struct page *pp;
323 323 int err = 0;
324 324 struct vnode *pvp = NULL;
325 325 u_offset_t poff;
326 326
327 327 ASSERT(len == PAGESIZE);
328 328 ASSERT(pl != NULL);
329 329 ASSERT(plsz == PAGESIZE);
330 330 ASSERT(protp == NULL);
331 331 ASSERT(nreloc != NULL);
332 332 ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
333 333 SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
334 334 vp, off, len, 0, 0);
335 335
336 336 /*
337 337 * If we are not using a preallocated page then we know one already
338 338 * exists. So just let the old code handle it.
339 339 */
340 340 if (conpp == NULL) {
341 341 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
342 342 seg, addr, rw, cr);
343 343 return (err);
344 344 }
345 345 ASSERT(conpp->p_szc != 0);
346 346 ASSERT(PAGE_EXCL(conpp));
347 347
348 348
349 349 ASSERT(conpp->p_next == conpp);
350 350 ASSERT(conpp->p_prev == conpp);
351 351 ASSERT(!PP_ISAGED(conpp));
352 352 ASSERT(!PP_ISFREE(conpp));
353 353
354 354 *nreloc = 0;
355 355 pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
356 356
357 357 /*
358 358 * If existing page is found we may need to relocate.
359 359 */
360 360 if (pp != conpp) {
361 361 ASSERT(rw != S_CREATE);
362 362 ASSERT(pszc != NULL);
363 363 ASSERT(PAGE_SHARED(pp));
364 364 if (pp->p_szc < conpp->p_szc) {
365 365 *pszc = pp->p_szc;
366 366 page_unlock(pp);
367 367 err = -1;
368 368 } else if (pp->p_szc > conpp->p_szc &&
369 369 seg->s_szc > conpp->p_szc) {
370 370 *pszc = MIN(pp->p_szc, seg->s_szc);
371 371 page_unlock(pp);
372 372 err = -2;
373 373 } else {
374 374 pl[0] = pp;
375 375 pl[1] = NULL;
376 376 if (page_pptonum(pp) &
377 377 (page_get_pagecnt(conpp->p_szc) - 1))
378 378 cmn_err(CE_PANIC, "swap_getconpage: no root");
379 379 }
380 380 return (err);
381 381 }
382 382
383 383 ASSERT(PAGE_EXCL(pp));
384 384
385 385 if (*nreloc != 0) {
386 386 ASSERT(rw != S_CREATE);
387 387 pl[0] = pp;
388 388 pl[1] = NULL;
389 389 return (0);
390 390 }
391 391
392 392 *nreloc = 1;
393 393
394 394 /*
395 395 * If necessary do the page io.
396 396 */
397 397 if (rw != S_CREATE) {
398 398 /*
399 399 * Since we are only called now on behalf of an
400 400 * address space operation it's impossible for
401 401 * us to fail unlike swap_getapge() which
402 402 * also gets called from swapslot_free().
403 403 */
404 404 if (swap_getphysname(vp, off, &pvp, &poff)) {
405 405 cmn_err(CE_PANIC,
406 406 "swap_getconpage: swap_getphysname failed!");
407 407 }
408 408
409 409 if (pvp != NULL) {
410 410 err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
411 411 cr, NULL);
412 412 if (err == 0) {
413 413 struct anon *ap;
414 414 kmutex_t *ahm;
415 415
416 416 ahm = AH_MUTEX(vp, off);
417 417 mutex_enter(ahm);
418 418 ap = swap_anon(vp, off);
419 419 if (ap == NULL)
420 420 panic("swap_getconpage: null anon");
421 421 if (ap->an_pvp != pvp || ap->an_poff != poff)
422 422 panic("swap_getconpage: bad anon");
423 423
424 424 swap_phys_free(pvp, poff, PAGESIZE);
425 425 ap->an_pvp = NULL;
426 426 ap->an_poff = NULL;
427 427 hat_setmod(pp);
428 428 mutex_exit(ahm);
429 429 }
430 430 } else {
431 431 pagezero(pp, 0, PAGESIZE);
432 432 }
433 433 }
434 434
435 435 /*
436 436 * Normally we would let pvn_read_done() destroy
437 437 * the page on IO error. But since this is a preallocated
438 438 * page we'll let the anon layer handle it.
439 439 */
440 440 page_io_unlock(pp);
441 441 if (err != 0)
442 442 page_hashout(pp, NULL);
443 443 ASSERT(pp->p_next == pp);
444 444 ASSERT(pp->p_prev == pp);
445 445
446 446 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
447 447 "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
448 448
449 449 pl[0] = pp;
450 450 pl[1] = NULL;
451 451 return (err);
452 452 }
453 453
454 454 /* Async putpage klustering stuff */
455 455 int sw_pending_size;
456 456 extern int klustsize;
457 457 extern struct async_reqs *sw_getreq();
458 458 extern void sw_putreq(struct async_reqs *);
459 459 extern void sw_putbackreq(struct async_reqs *);
460 460 extern struct async_reqs *sw_getfree();
461 461 extern void sw_putfree(struct async_reqs *);
462 462
463 463 static size_t swap_putpagecnt, swap_pagespushed;
464 464 static size_t swap_otherfail, swap_otherpages;
465 465 static size_t swap_klustfail, swap_klustpages;
466 466 static size_t swap_getiofail, swap_getiopages;
467 467
468 468 /*
469 469 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
470 470 * If len == 0, do from off to EOF.
471 471 */
472 472 static int swap_nopage = 0; /* Don't do swap_putpage's if set */
473 473
474 474 /* ARGSUSED */
475 475 static int
476 476 swap_putpage(
477 477 struct vnode *vp,
478 478 offset_t off,
479 479 size_t len,
480 480 int flags,
481 481 struct cred *cr,
482 482 caller_context_t *ct)
483 483 {
484 484 page_t *pp;
485 485 u_offset_t io_off;
486 486 size_t io_len = 0;
487 487 int err = 0;
488 488 int nowait;
489 489 struct async_reqs *arg;
490 490
491 491 if (swap_nopage)
492 492 return (0);
493 493
494 494 ASSERT(vp->v_count != 0);
495 495
496 496 nowait = flags & B_PAGE_NOWAIT;
497 497
498 498 /*
499 499 * Clear force flag so that p_lckcnt pages are not invalidated.
500 500 */
501 501 flags &= ~(B_FORCE | B_PAGE_NOWAIT);
502 502
503 503 SWAPFS_PRINT(SWAP_VOPS,
504 504 "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
505 505 (void *)vp, off, len, flags, 0);
506 506 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
507 507 "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
508 508
509 509 if (vp->v_flag & VNOMAP)
510 510 return (ENOSYS);
511 511
512 512 if (!vn_has_cached_data(vp))
513 513 return (0);
514 514
515 515 if (len == 0) {
516 516 if (curproc == proc_pageout)
517 517 cmn_err(CE_PANIC, "swapfs: pageout can't block");
518 518
519 519 /* Search the entire vp list for pages >= off. */
520 520 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
521 521 flags, cr);
522 522 } else {
523 523 u_offset_t eoff;
524 524
525 525 /*
526 526 * Loop over all offsets in the range [off...off + len]
527 527 * looking for pages to deal with.
528 528 */
529 529 eoff = off + len;
530 530 for (io_off = (u_offset_t)off; io_off < eoff;
531 531 io_off += io_len) {
532 532 /*
533 533 * If we run out of the async req slot, put the page
534 534 * now instead of queuing.
535 535 */
536 536 if (flags == (B_ASYNC | B_FREE) &&
537 537 sw_pending_size < klustsize &&
538 538 (arg = sw_getfree())) {
539 539 /*
540 540 * If we are clustering, we should allow
541 541 * pageout to feed us more pages because # of
542 542 * pushes is limited by # of I/Os, and one
543 543 * cluster is considered to be one I/O.
544 544 */
545 545 if (pushes)
546 546 pushes--;
547 547
548 548 arg->a_vp = vp;
549 549 arg->a_off = io_off;
550 550 arg->a_len = PAGESIZE;
551 551 arg->a_flags = B_ASYNC | B_FREE;
552 552 arg->a_cred = kcred;
553 553 sw_putreq(arg);
554 554 io_len = PAGESIZE;
555 555 continue;
556 556 }
557 557 /*
558 558 * If we are not invalidating pages, use the
559 559 * routine page_lookup_nowait() to prevent
560 560 * reclaiming them from the free list.
561 561 */
562 562 if (!nowait && ((flags & B_INVAL) ||
563 563 (flags & (B_ASYNC | B_FREE)) == B_FREE))
564 564 pp = page_lookup(vp, io_off, SE_EXCL);
565 565 else
566 566 pp = page_lookup_nowait(vp, io_off,
567 567 (flags & (B_FREE | B_INVAL)) ?
568 568 SE_EXCL : SE_SHARED);
569 569
570 570 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
571 571 io_len = PAGESIZE;
572 572 else {
573 573 err = swap_putapage(vp, pp, &io_off, &io_len,
574 574 flags, cr);
575 575 if (err != 0)
576 576 break;
577 577 }
578 578 }
579 579 }
580 580 /* If invalidating, verify all pages on vnode list are gone. */
581 581 if (err == 0 && off == 0 && len == 0 &&
582 582 (flags & B_INVAL) && vn_has_cached_data(vp)) {
583 583 cmn_err(CE_WARN,
584 584 "swap_putpage: B_INVAL, pages not gone");
585 585 }
586 586 return (err);
587 587 }
588 588
589 589 /*
590 590 * Write out a single page.
591 591 * For swapfs this means choose a physical swap slot and write the page
592 592 * out using VOP_PAGEIO.
593 593 * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
594 594 * swapfs pages, a bunch of contiguous swap slots and then write them
595 595 * all out in one clustered i/o.
596 596 */
597 597 /*ARGSUSED*/
598 598 static int
599 599 swap_putapage(
600 600 struct vnode *vp,
601 601 page_t *pp,
602 602 u_offset_t *offp,
603 603 size_t *lenp,
604 604 int flags,
605 605 struct cred *cr)
606 606 {
607 607 int err;
608 608 struct vnode *pvp;
609 609 u_offset_t poff, off;
610 610 u_offset_t doff;
611 611 size_t dlen;
612 612 size_t klsz = 0;
613 613 u_offset_t klstart = 0;
614 614 struct vnode *klvp = NULL;
615 615 page_t *pplist;
616 616 se_t se;
617 617 struct async_reqs *arg;
618 618 size_t swap_klustsize;
619 619
620 620 /*
621 621 * This check is added for callers who access swap_putpage with len = 0.
622 622 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
623 623 * And it's necessary to do the same queuing if users have the same
624 624 * B_ASYNC|B_FREE flags on.
625 625 */
626 626 if (flags == (B_ASYNC | B_FREE) &&
627 627 sw_pending_size < klustsize && (arg = sw_getfree())) {
628 628
629 629 hat_setmod(pp);
630 630 page_io_unlock(pp);
631 631 page_unlock(pp);
632 632
633 633 arg->a_vp = vp;
634 634 arg->a_off = pp->p_offset;
635 635 arg->a_len = PAGESIZE;
636 636 arg->a_flags = B_ASYNC | B_FREE;
637 637 arg->a_cred = kcred;
638 638 sw_putreq(arg);
639 639
640 640 return (0);
641 641 }
642 642
643 643 SWAPFS_PRINT(SWAP_PUTP,
644 644 "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
645 645 pp, vp, pp->p_offset, flags, 0);
646 646
647 647 ASSERT(PAGE_LOCKED(pp));
648 648
649 649 off = pp->p_offset;
650 650
651 651 doff = off;
652 652 dlen = PAGESIZE;
653 653
654 654 if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
655 655 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
656 656 hat_setmod(pp);
657 657 page_io_unlock(pp);
658 658 page_unlock(pp);
659 659 goto out;
660 660 }
661 661
662 662 klvp = pvp;
663 663 klstart = poff;
664 664 pplist = pp;
665 665 /*
666 666 * If this is ASYNC | FREE and we've accumulated a bunch of such
667 667 * pending requests, kluster.
668 668 */
669 669 if (flags == (B_ASYNC | B_FREE))
670 670 swap_klustsize = klustsize;
671 671 else
672 672 swap_klustsize = PAGESIZE;
673 673 se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
674 674 klsz = PAGESIZE;
675 675 while (klsz < swap_klustsize) {
676 676 if ((arg = sw_getreq()) == NULL) {
677 677 swap_getiofail++;
678 678 swap_getiopages += btop(klsz);
679 679 break;
680 680 }
681 681 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
682 682 vp = arg->a_vp;
683 683 off = arg->a_off;
684 684
685 685 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
686 686 swap_otherfail++;
687 687 swap_otherpages += btop(klsz);
688 688 sw_putfree(arg);
689 689 break;
690 690 }
691 691 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
692 692 sw_putfree(arg);
693 693 continue;
694 694 }
695 695 /* Get new physical backing store for the page */
696 696 doff = off;
697 697 dlen = PAGESIZE;
698 698 if (err = swap_newphysname(vp, off, &doff, &dlen,
699 699 &pvp, &poff)) {
700 700 swap_otherfail++;
701 701 swap_otherpages += btop(klsz);
702 702 hat_setmod(pp);
703 703 page_io_unlock(pp);
704 704 page_unlock(pp);
705 705 sw_putbackreq(arg);
706 706 break;
707 707 }
708 708 /* Try to cluster new physical name with previous ones */
709 709 if (klvp == pvp && poff == klstart + klsz) {
710 710 klsz += PAGESIZE;
711 711 page_add(&pplist, pp);
712 712 pplist = pplist->p_next;
713 713 sw_putfree(arg);
714 714 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
715 715 klsz += PAGESIZE;
716 716 klstart -= PAGESIZE;
717 717 page_add(&pplist, pp);
718 718 sw_putfree(arg);
719 719 } else {
720 720 swap_klustfail++;
721 721 swap_klustpages += btop(klsz);
722 722 hat_setmod(pp);
723 723 page_io_unlock(pp);
724 724 page_unlock(pp);
725 725 sw_putbackreq(arg);
726 726 break;
727 727 }
728 728 }
729 729
730 730 err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
731 731 B_WRITE | flags, cr, NULL);
732 732
733 733 if ((flags & B_ASYNC) == 0)
734 734 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
735 735
736 736 /* Statistics */
737 737 if (!err) {
738 738 swap_putpagecnt++;
739 739 swap_pagespushed += btop(klsz);
740 740 }
741 741 out:
742 742 TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
743 743 "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
744 744 vp, klvp, klstart, klsz);
745 745 if (err && err != ENOMEM)
746 746 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
747 747 if (lenp)
748 748 *lenp = PAGESIZE;
749 749 return (err);
750 750 }
751 751
752 752 static void
753 753 swap_dispose(
754 754 vnode_t *vp,
755 755 page_t *pp,
756 756 int fl,
757 757 int dn,
758 758 cred_t *cr,
759 759 caller_context_t *ct)
760 760 {
761 761 int err;
762 762 u_offset_t off = pp->p_offset;
763 763 vnode_t *pvp;
764 764 u_offset_t poff;
765 765
766 766 ASSERT(PAGE_EXCL(pp));
767 767
768 768 /*
769 769 * The caller will free/invalidate large page in one shot instead of
770 770 * one small page at a time.
771 771 */
772 772 if (pp->p_szc != 0) {
773 773 page_unlock(pp);
774 774 return;
775 775 }
776 776
777 777 err = swap_getphysname(vp, off, &pvp, &poff);
778 778 if (!err && pvp != NULL)
779 779 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
780 780 else
781 781 fs_dispose(vp, pp, fl, dn, cr, ct);
782 782 }
↓ open down ↓ |
685 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX