illumos-gate Wdiff usr/src/uts/common/fs/swapfs/swap_vnops.c

Print this page

cstyle sort of updates
7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/swapfs/swap_vnops.c
          +++ new/usr/src/uts/common/fs/swapfs/swap_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/systm.h>
  29   29  #include <sys/buf.h>
  30   30  #include <sys/cred.h>
  31   31  #include <sys/errno.h>
  32   32  #include <sys/vnode.h>
  33   33  #include <sys/vfs_opreg.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/swap.h>
  36   36  #include <sys/mman.h>
  37   37  #include <sys/vmsystm.h>
  38   38  #include <sys/vtrace.h>
  39   39  #include <sys/debug.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/vm.h>
  42   42  
  43   43  #include <sys/fs/swapnode.h>
  44   44  
  45   45  #include <vm/seg.h>
  46   46  #include <vm/page.h>
  47   47  #include <vm/pvn.h>
  48   48  #include <fs/fs_subr.h>
  49   49  
  50   50  #include <vm/seg_kp.h>
  51   51  
  52   52  /*
  53   53   * Define the routines within this file.
  54   54   */
  55   55  static int      swap_getpage(struct vnode *vp, offset_t off, size_t len,
  56   56      uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
  57   57      caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
  58   58  static int      swap_putpage(struct vnode *vp, offset_t off, size_t len,
  59   59      int flags, struct cred *cr, caller_context_t *ct);
  60   60  static void     swap_inactive(struct vnode *vp, struct cred *cr,
  61   61      caller_context_t *ct);
  62   62  static void     swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
  63   63      cred_t *cr, caller_context_t *ct);
  64   64  
  65   65  static int      swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
  66   66      uint_t *protp, page_t **plarr, size_t plsz,
  67   67      struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);

↓ open down ↓

67 lines elided

↑ open up ↑

  68   68  
  69   69  int     swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
  70   70      uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
  71   71      uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
  72   72      enum seg_rw rw, struct cred *cr);
  73   73  
  74   74  static int      swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
  75   75      size_t *lenp, int flags, struct cred *cr);
  76   76  
  77   77  const fs_operation_def_t swap_vnodeops_template[] = {
  78      -        VOPNAME_INACTIVE,       { .vop_inactive = swap_inactive },
  79      -        VOPNAME_GETPAGE,        { .vop_getpage = swap_getpage },
  80      -        VOPNAME_PUTPAGE,        { .vop_putpage = swap_putpage },
  81      -        VOPNAME_DISPOSE,        { .vop_dispose = swap_dispose },
  82      -        VOPNAME_SETFL,          { .error = fs_error },
  83      -        VOPNAME_POLL,           { .error = fs_error },
  84      -        VOPNAME_PATHCONF,       { .error = fs_error },
  85      -        VOPNAME_GETSECATTR,     { .error = fs_error },
  86      -        VOPNAME_SHRLOCK,        { .error = fs_error },
  87      -        NULL,                   NULL
       78 +        {VOPNAME_INACTIVE,      { .vop_inactive = swap_inactive }},
       79 +        {VOPNAME_GETPAGE,       { .vop_getpage = swap_getpage }},
       80 +        {VOPNAME_PUTPAGE,       { .vop_putpage = swap_putpage }},
       81 +        {VOPNAME_DISPOSE,       { .vop_dispose = swap_dispose }},
       82 +        {VOPNAME_SETFL,         { .error = fs_error }},
       83 +        {VOPNAME_POLL,          { .error = fs_error }},
       84 +        {VOPNAME_PATHCONF,      { .error = fs_error }},
       85 +        {VOPNAME_GETSECATTR,    { .error = fs_error }},
       86 +        {VOPNAME_SHRLOCK,       { .error = fs_error }},
       87 +        {NULL,                  {NULL}}
  88   88  };
  89   89  
  90   90  vnodeops_t *swap_vnodeops;
  91   91  
  92   92  /* ARGSUSED */
  93   93  static void
  94   94  swap_inactive(
  95   95          struct vnode *vp,
  96   96          struct cred *cr,
  97   97          caller_context_t *ct)

  98   98  {
  99   99          SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
 100  100  }
 101  101  
 102  102  /*
 103  103   * Return all the pages from [off..off+len] in given file
 104  104   */
 105  105  /*ARGSUSED*/
 106  106  static int
 107  107  swap_getpage(
 108  108          struct vnode *vp,
 109  109          offset_t off,
 110  110          size_t len,
 111  111          uint_t *protp,
 112  112          page_t *pl[],
 113  113          size_t plsz,
 114  114          struct seg *seg,
 115  115          caddr_t addr,
 116  116          enum seg_rw rw,
 117  117          struct cred *cr,
 118  118          caller_context_t *ct)
 119  119  {
 120  120          SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
 121  121              (void *)vp, off, len, 0, 0);
 122  122  
 123  123          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
 124  124              "swapfs getpage:vp %p off %llx len %ld",
 125  125              (void *)vp, off, len);
 126  126  
 127  127          return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
 128  128              pl, plsz, seg, addr, rw, cr));
 129  129  }
 130  130  
 131  131  /*
 132  132   * Called from pvn_getpages to get a particular page.
 133  133   */
 134  134  /*ARGSUSED*/
 135  135  static int
 136  136  swap_getapage(
 137  137          struct vnode *vp,
 138  138          u_offset_t off,
 139  139          size_t len,
 140  140          uint_t *protp,
 141  141          page_t *pl[],
 142  142          size_t plsz,
 143  143          struct seg *seg,
 144  144          caddr_t addr,
 145  145          enum seg_rw rw,
 146  146          struct cred *cr)
 147  147  {
 148  148          struct page *pp, *rpp;
 149  149          int flags;
 150  150          int err = 0;
 151  151          struct vnode *pvp = NULL;
 152  152          u_offset_t poff;
 153  153          int flag_noreloc;
 154  154          se_t lock;
 155  155          extern int kcage_on;
 156  156          int upgrade = 0;
 157  157  
 158  158          SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
 159  159              vp, off, len, 0, 0);
 160  160  
 161  161          /*
 162  162           * Until there is a call-back mechanism to cause SEGKP
 163  163           * pages to be unlocked, make them non-relocatable.
 164  164           */
 165  165          if (SEG_IS_SEGKP(seg))
 166  166                  flag_noreloc = PG_NORELOC;
 167  167          else
 168  168                  flag_noreloc = 0;
 169  169  
 170  170          if (protp != NULL)
 171  171                  *protp = PROT_ALL;
 172  172  
 173  173          lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
 174  174  
 175  175  again:
 176  176          if (pp = page_lookup(vp, off, lock)) {
 177  177                  /*
 178  178                   * In very rare instances, a segkp page may have been
 179  179                   * relocated outside of the kernel by the kernel cage
 180  180                   * due to the window between page_unlock() and
 181  181                   * VOP_PUTPAGE() in segkp_unlock().  Due to the
 182  182                   * rareness of these occurances, the solution is to
 183  183                   * relocate the page to a P_NORELOC page.
 184  184                   */
 185  185                  if (flag_noreloc != 0) {
 186  186                          if (!PP_ISNORELOC(pp) && kcage_on) {
 187  187                                  if (lock != SE_EXCL) {
 188  188                                          upgrade = 1;
 189  189                                          if (!page_tryupgrade(pp)) {
 190  190                                                  page_unlock(pp);
 191  191                                                  lock = SE_EXCL;
 192  192                                                  goto again;
 193  193                                          }
 194  194                                  }
 195  195  
 196  196                                  if (page_relocate_cage(&pp, &rpp) != 0)
 197  197                                          panic("swap_getapage: "
 198  198                                              "page_relocate_cage failed");
 199  199  
 200  200                                  pp = rpp;
 201  201                          }
 202  202                  }
 203  203  
 204  204                  if (pl) {
 205  205                          if (upgrade)
 206  206                                  page_downgrade(pp);
 207  207  
 208  208                          pl[0] = pp;
 209  209                          pl[1] = NULL;
 210  210                  } else {
 211  211                          page_unlock(pp);
 212  212                  }
 213  213          } else {
 214  214                  pp = page_create_va(vp, off, PAGESIZE,
 215  215                      PG_WAIT | PG_EXCL | flag_noreloc,
 216  216                      seg, addr);
 217  217                  /*
 218  218                   * Someone raced in and created the page after we did the
 219  219                   * lookup but before we did the create, so go back and
 220  220                   * try to look it up again.
 221  221                   */
 222  222                  if (pp == NULL)
 223  223                          goto again;
 224  224                  if (rw != S_CREATE) {
 225  225                          err = swap_getphysname(vp, off, &pvp, &poff);
 226  226                          if (pvp) {
 227  227                                  struct anon *ap;
 228  228                                  kmutex_t *ahm;
 229  229  
 230  230                                  flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
 231  231                                  err = VOP_PAGEIO(pvp, pp, poff,
 232  232                                      PAGESIZE, flags, cr, NULL);
 233  233  
 234  234                                  if (!err) {
 235  235                                          ahm = AH_MUTEX(vp, off);
 236  236                                          mutex_enter(ahm);
 237  237  
 238  238                                          ap = swap_anon(vp, off);
 239  239                                          if (ap == NULL) {
 240  240                                                  panic("swap_getapage:"
 241  241                                                      " null anon");
 242  242                                          }
 243  243  
 244  244                                          if (ap->an_pvp == pvp &&
 245  245                                              ap->an_poff == poff) {
 246  246                                                  swap_phys_free(pvp, poff,
 247  247                                                      PAGESIZE);
 248  248                                                  ap->an_pvp = NULL;
 249  249                                                  ap->an_poff = NULL;
 250  250                                                  hat_setmod(pp);
 251  251                                          }
 252  252  
 253  253                                          mutex_exit(ahm);
 254  254                                  }
 255  255                          } else {
 256  256                                  if (!err)
 257  257                                          pagezero(pp, 0, PAGESIZE);
 258  258  
 259  259                                  /*
 260  260                                   * If it's a fault ahead, release page_io_lock
 261  261                                   * and SE_EXCL we grabbed in page_create_va
 262  262                                   *
 263  263                                   * If we are here, we haven't called VOP_PAGEIO
 264  264                                   * and thus calling pvn_read_done(pp, B_READ)
 265  265                                   * below may mislead that we tried i/o. Besides,
 266  266                                   * in case of async, pvn_read_done() should
 267  267                                   * not be called by *getpage()
 268  268                                   */
 269  269                                  if (pl == NULL) {
 270  270                                          /*
 271  271                                           * swap_getphysname can return error
 272  272                                           * only when we are getting called from
 273  273                                           * swapslot_free which passes non-NULL
 274  274                                           * pl to VOP_GETPAGE.
 275  275                                           */
 276  276                                          ASSERT(err == 0);
 277  277                                          page_io_unlock(pp);
 278  278                                          page_unlock(pp);
 279  279                                  }
 280  280                          }
 281  281                  }
 282  282  
 283  283                  ASSERT(pp != NULL);
 284  284  
 285  285                  if (err && pl)
 286  286                          pvn_read_done(pp, B_ERROR);
 287  287  
 288  288                  if (!err && pl)
 289  289                          pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
 290  290          }
 291  291          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 292  292              "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
 293  293          return (err);
 294  294  }
 295  295  
 296  296  /*
 297  297   * Called from large page anon routines only! This is an ugly hack where
 298  298   * the anon layer directly calls into swapfs with a preallocated large page.
 299  299   * Another method would have been to change to VOP and add an extra arg for
 300  300   * the preallocated large page. This all could be cleaned up later when we
 301  301   * solve the anonymous naming problem and no longer need to loop across of
 302  302   * the VOP in PAGESIZE increments to fill in or initialize a large page as
 303  303   * is done today. I think the latter is better since it avoid a change to
 304  304   * the VOP interface that could later be avoided.
 305  305   */
 306  306  int
 307  307  swap_getconpage(
 308  308          struct vnode *vp,
 309  309          u_offset_t off,
 310  310          size_t len,
 311  311          uint_t *protp,
 312  312          page_t *pl[],
 313  313          size_t plsz,
 314  314          page_t  *conpp,
 315  315          uint_t  *pszc,
 316  316          spgcnt_t *nreloc,
 317  317          struct seg *seg,
 318  318          caddr_t addr,
 319  319          enum seg_rw rw,
 320  320          struct cred *cr)
 321  321  {
 322  322          struct page     *pp;
 323  323          int             err = 0;
 324  324          struct vnode    *pvp = NULL;
 325  325          u_offset_t      poff;
 326  326  
 327  327          ASSERT(len == PAGESIZE);
 328  328          ASSERT(pl != NULL);
 329  329          ASSERT(plsz == PAGESIZE);
 330  330          ASSERT(protp == NULL);
 331  331          ASSERT(nreloc != NULL);
 332  332          ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
 333  333          SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
 334  334              vp, off, len, 0, 0);
 335  335  
 336  336          /*
 337  337           * If we are not using a preallocated page then we know one already
 338  338           * exists. So just let the old code handle it.
 339  339           */
 340  340          if (conpp == NULL) {
 341  341                  err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 342  342                      seg, addr, rw, cr);
 343  343                  return (err);
 344  344          }
 345  345          ASSERT(conpp->p_szc != 0);
 346  346          ASSERT(PAGE_EXCL(conpp));
 347  347  
 348  348  
 349  349          ASSERT(conpp->p_next == conpp);
 350  350          ASSERT(conpp->p_prev == conpp);
 351  351          ASSERT(!PP_ISAGED(conpp));
 352  352          ASSERT(!PP_ISFREE(conpp));
 353  353  
 354  354          *nreloc = 0;
 355  355          pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
 356  356  
 357  357          /*
 358  358           * If existing page is found we may need to relocate.
 359  359           */
 360  360          if (pp != conpp) {
 361  361                  ASSERT(rw != S_CREATE);
 362  362                  ASSERT(pszc != NULL);
 363  363                  ASSERT(PAGE_SHARED(pp));
 364  364                  if (pp->p_szc < conpp->p_szc) {
 365  365                          *pszc = pp->p_szc;
 366  366                          page_unlock(pp);
 367  367                          err = -1;
 368  368                  } else if (pp->p_szc > conpp->p_szc &&
 369  369                      seg->s_szc > conpp->p_szc) {
 370  370                          *pszc = MIN(pp->p_szc, seg->s_szc);
 371  371                          page_unlock(pp);
 372  372                          err = -2;
 373  373                  } else {
 374  374                          pl[0] = pp;
 375  375                          pl[1] = NULL;
 376  376                          if (page_pptonum(pp) &
 377  377                              (page_get_pagecnt(conpp->p_szc) - 1))
 378  378                                  cmn_err(CE_PANIC, "swap_getconpage: no root");
 379  379                  }
 380  380                  return (err);
 381  381          }
 382  382  
 383  383          ASSERT(PAGE_EXCL(pp));
 384  384  
 385  385          if (*nreloc != 0) {
 386  386                  ASSERT(rw != S_CREATE);
 387  387                  pl[0] = pp;
 388  388                  pl[1] = NULL;
 389  389                  return (0);
 390  390          }
 391  391  
 392  392          *nreloc = 1;
 393  393  
 394  394          /*
 395  395           * If necessary do the page io.
 396  396           */
 397  397          if (rw != S_CREATE) {
 398  398                  /*
 399  399                   * Since we are only called now on behalf of an
 400  400                   * address space operation it's impossible for
 401  401                   * us to fail unlike swap_getapge() which
 402  402                   * also gets called from swapslot_free().
 403  403                   */
 404  404                  if (swap_getphysname(vp, off, &pvp, &poff)) {
 405  405                          cmn_err(CE_PANIC,
 406  406                              "swap_getconpage: swap_getphysname failed!");
 407  407                  }
 408  408  
 409  409                  if (pvp != NULL) {
 410  410                          err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
 411  411                              cr, NULL);
 412  412                          if (err == 0) {
 413  413                                  struct anon *ap;
 414  414                                  kmutex_t *ahm;
 415  415  
 416  416                                  ahm = AH_MUTEX(vp, off);
 417  417                                  mutex_enter(ahm);
 418  418                                  ap = swap_anon(vp, off);
 419  419                                  if (ap == NULL)
 420  420                                          panic("swap_getconpage: null anon");
 421  421                                  if (ap->an_pvp != pvp || ap->an_poff != poff)
 422  422                                          panic("swap_getconpage: bad anon");
 423  423  
 424  424                                  swap_phys_free(pvp, poff, PAGESIZE);
 425  425                                  ap->an_pvp = NULL;
 426  426                                  ap->an_poff = NULL;
 427  427                                  hat_setmod(pp);
 428  428                                  mutex_exit(ahm);
 429  429                          }
 430  430                  } else {
 431  431                          pagezero(pp, 0, PAGESIZE);
 432  432                  }
 433  433          }
 434  434  
 435  435          /*
 436  436           * Normally we would let pvn_read_done() destroy
 437  437           * the page on IO error. But since this is a preallocated
 438  438           * page we'll let the anon layer handle it.
 439  439           */
 440  440          page_io_unlock(pp);
 441  441          if (err != 0)
 442  442                  page_hashout(pp, NULL);
 443  443          ASSERT(pp->p_next == pp);
 444  444          ASSERT(pp->p_prev == pp);
 445  445  
 446  446          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 447  447              "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
 448  448  
 449  449          pl[0] = pp;
 450  450          pl[1] = NULL;
 451  451          return (err);
 452  452  }
 453  453  
 454  454  /* Async putpage klustering stuff */
 455  455  int sw_pending_size;
 456  456  extern int klustsize;
 457  457  extern struct async_reqs *sw_getreq();
 458  458  extern void sw_putreq(struct async_reqs *);
 459  459  extern void sw_putbackreq(struct async_reqs *);
 460  460  extern struct async_reqs *sw_getfree();
 461  461  extern void sw_putfree(struct async_reqs *);
 462  462  
 463  463  static size_t swap_putpagecnt, swap_pagespushed;
 464  464  static size_t swap_otherfail, swap_otherpages;
 465  465  static size_t swap_klustfail, swap_klustpages;
 466  466  static size_t swap_getiofail, swap_getiopages;
 467  467  
 468  468  /*
 469  469   * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
 470  470   * If len == 0, do from off to EOF.
 471  471   */
 472  472  static int swap_nopage = 0;     /* Don't do swap_putpage's if set */
 473  473  
 474  474  /* ARGSUSED */
 475  475  static int
 476  476  swap_putpage(
 477  477          struct vnode *vp,
 478  478          offset_t off,
 479  479          size_t len,
 480  480          int flags,
 481  481          struct cred *cr,
 482  482          caller_context_t *ct)
 483  483  {
 484  484          page_t *pp;
 485  485          u_offset_t io_off;
 486  486          size_t io_len = 0;
 487  487          int err = 0;
 488  488          int nowait;
 489  489          struct async_reqs *arg;
 490  490  
 491  491          if (swap_nopage)
 492  492                  return (0);
 493  493  
 494  494          ASSERT(vp->v_count != 0);
 495  495  
 496  496          nowait = flags & B_PAGE_NOWAIT;
 497  497  
 498  498          /*
 499  499           * Clear force flag so that p_lckcnt pages are not invalidated.
 500  500           */
 501  501          flags &= ~(B_FORCE | B_PAGE_NOWAIT);
 502  502  
 503  503          SWAPFS_PRINT(SWAP_VOPS,
 504  504              "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
 505  505              (void *)vp, off, len, flags, 0);
 506  506          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
 507  507              "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
 508  508  
 509  509          if (vp->v_flag & VNOMAP)
 510  510                  return (ENOSYS);
 511  511  
 512  512          if (!vn_has_cached_data(vp))
 513  513                  return (0);
 514  514  
 515  515          if (len == 0) {
 516  516                  if (curproc == proc_pageout)
 517  517                          cmn_err(CE_PANIC, "swapfs: pageout can't block");
 518  518  
 519  519                  /* Search the entire vp list for pages >= off. */
 520  520                  err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
 521  521                      flags, cr);
 522  522          } else {
 523  523                  u_offset_t eoff;
 524  524  
 525  525                  /*
 526  526                   * Loop over all offsets in the range [off...off + len]
 527  527                   * looking for pages to deal with.
 528  528                   */
 529  529                  eoff = off + len;
 530  530                  for (io_off = (u_offset_t)off; io_off < eoff;
 531  531                      io_off += io_len) {
 532  532                          /*
 533  533                           * If we run out of the async req slot, put the page
 534  534                           * now instead of queuing.
 535  535                           */
 536  536                          if (flags == (B_ASYNC | B_FREE) &&
 537  537                              sw_pending_size < klustsize &&
 538  538                              (arg = sw_getfree())) {
 539  539                                  /*
 540  540                                   * If we are clustering, we should allow
 541  541                                   * pageout to feed us more pages because # of
 542  542                                   * pushes is limited by # of I/Os, and one
 543  543                                   * cluster is considered to be one I/O.
 544  544                                   */
 545  545                                  if (pushes)
 546  546                                          pushes--;
 547  547  
 548  548                                  arg->a_vp = vp;
 549  549                                  arg->a_off = io_off;
 550  550                                  arg->a_len = PAGESIZE;
 551  551                                  arg->a_flags = B_ASYNC | B_FREE;
 552  552                                  arg->a_cred = kcred;
 553  553                                  sw_putreq(arg);
 554  554                                  io_len = PAGESIZE;
 555  555                                  continue;
 556  556                          }
 557  557                          /*
 558  558                           * If we are not invalidating pages, use the
 559  559                           * routine page_lookup_nowait() to prevent
 560  560                           * reclaiming them from the free list.
 561  561                           */
 562  562                          if (!nowait && ((flags & B_INVAL) ||
 563  563                              (flags & (B_ASYNC | B_FREE)) == B_FREE))
 564  564                                  pp = page_lookup(vp, io_off, SE_EXCL);
 565  565                          else
 566  566                                  pp = page_lookup_nowait(vp, io_off,
 567  567                                      (flags & (B_FREE | B_INVAL)) ?
 568  568                                      SE_EXCL : SE_SHARED);
 569  569  
 570  570                          if (pp == NULL || pvn_getdirty(pp, flags) == 0)
 571  571                                  io_len = PAGESIZE;
 572  572                          else {
 573  573                                  err = swap_putapage(vp, pp, &io_off, &io_len,
 574  574                                      flags, cr);
 575  575                                  if (err != 0)
 576  576                                          break;
 577  577                          }
 578  578                  }
 579  579          }
 580  580          /* If invalidating, verify all pages on vnode list are gone. */
 581  581          if (err == 0 && off == 0 && len == 0 &&
 582  582              (flags & B_INVAL) && vn_has_cached_data(vp)) {
 583  583                  cmn_err(CE_WARN,
 584  584                      "swap_putpage: B_INVAL, pages not gone");
 585  585          }
 586  586          return (err);
 587  587  }
 588  588  
 589  589  /*
 590  590   * Write out a single page.
 591  591   * For swapfs this means choose a physical swap slot and write the page
 592  592   * out using VOP_PAGEIO.
 593  593   * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
 594  594   * swapfs pages, a bunch of contiguous swap slots and then write them
 595  595   * all out in one clustered i/o.
 596  596   */
 597  597  /*ARGSUSED*/
 598  598  static int
 599  599  swap_putapage(
 600  600          struct vnode *vp,
 601  601          page_t *pp,
 602  602          u_offset_t *offp,
 603  603          size_t *lenp,
 604  604          int flags,
 605  605          struct cred *cr)
 606  606  {
 607  607          int err;
 608  608          struct vnode *pvp;
 609  609          u_offset_t poff, off;
 610  610          u_offset_t doff;
 611  611          size_t dlen;
 612  612          size_t klsz = 0;
 613  613          u_offset_t klstart = 0;
 614  614          struct vnode *klvp = NULL;
 615  615          page_t *pplist;
 616  616          se_t se;
 617  617          struct async_reqs *arg;
 618  618          size_t swap_klustsize;
 619  619  
 620  620          /*
 621  621           * This check is added for callers who access swap_putpage with len = 0.
 622  622           * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
 623  623           * And it's necessary to do the same queuing if users have the same
 624  624           * B_ASYNC|B_FREE flags on.
 625  625           */
 626  626          if (flags == (B_ASYNC | B_FREE) &&
 627  627              sw_pending_size < klustsize && (arg = sw_getfree())) {
 628  628  
 629  629                  hat_setmod(pp);
 630  630                  page_io_unlock(pp);
 631  631                  page_unlock(pp);
 632  632  
 633  633                  arg->a_vp = vp;
 634  634                  arg->a_off = pp->p_offset;
 635  635                  arg->a_len = PAGESIZE;
 636  636                  arg->a_flags = B_ASYNC | B_FREE;
 637  637                  arg->a_cred = kcred;
 638  638                  sw_putreq(arg);
 639  639  
 640  640                  return (0);
 641  641          }
 642  642  
 643  643          SWAPFS_PRINT(SWAP_PUTP,
 644  644              "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
 645  645              pp, vp, pp->p_offset, flags, 0);
 646  646  
 647  647          ASSERT(PAGE_LOCKED(pp));
 648  648  
 649  649          off = pp->p_offset;
 650  650  
 651  651          doff = off;
 652  652          dlen = PAGESIZE;
 653  653  
 654  654          if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
 655  655                  err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
 656  656                  hat_setmod(pp);
 657  657                  page_io_unlock(pp);
 658  658                  page_unlock(pp);
 659  659                  goto out;
 660  660          }
 661  661  
 662  662          klvp = pvp;
 663  663          klstart = poff;
 664  664          pplist = pp;
 665  665          /*
 666  666           * If this is ASYNC | FREE and we've accumulated a bunch of such
 667  667           * pending requests, kluster.
 668  668           */
 669  669          if (flags == (B_ASYNC | B_FREE))
 670  670                  swap_klustsize = klustsize;
 671  671          else
 672  672                  swap_klustsize = PAGESIZE;
 673  673          se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
 674  674          klsz = PAGESIZE;
 675  675          while (klsz < swap_klustsize) {
 676  676                  if ((arg = sw_getreq()) == NULL) {
 677  677                          swap_getiofail++;
 678  678                          swap_getiopages += btop(klsz);
 679  679                          break;
 680  680                  }
 681  681                  ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
 682  682                  vp = arg->a_vp;
 683  683                  off = arg->a_off;
 684  684  
 685  685                  if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
 686  686                          swap_otherfail++;
 687  687                          swap_otherpages += btop(klsz);
 688  688                          sw_putfree(arg);
 689  689                          break;
 690  690                  }
 691  691                  if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
 692  692                          sw_putfree(arg);
 693  693                          continue;
 694  694                  }
 695  695                  /* Get new physical backing store for the page */
 696  696                  doff = off;
 697  697                  dlen = PAGESIZE;
 698  698                  if (err = swap_newphysname(vp, off, &doff, &dlen,
 699  699                      &pvp, &poff)) {
 700  700                          swap_otherfail++;
 701  701                          swap_otherpages += btop(klsz);
 702  702                          hat_setmod(pp);
 703  703                          page_io_unlock(pp);
 704  704                          page_unlock(pp);
 705  705                          sw_putbackreq(arg);
 706  706                          break;
 707  707                  }
 708  708                  /* Try to cluster new physical name with previous ones */
 709  709                  if (klvp == pvp && poff == klstart + klsz) {
 710  710                          klsz += PAGESIZE;
 711  711                          page_add(&pplist, pp);
 712  712                          pplist = pplist->p_next;
 713  713                          sw_putfree(arg);
 714  714                  } else if (klvp == pvp && poff == klstart - PAGESIZE) {
 715  715                          klsz += PAGESIZE;
 716  716                          klstart -= PAGESIZE;
 717  717                          page_add(&pplist, pp);
 718  718                          sw_putfree(arg);
 719  719                  } else {
 720  720                          swap_klustfail++;
 721  721                          swap_klustpages += btop(klsz);
 722  722                          hat_setmod(pp);
 723  723                          page_io_unlock(pp);
 724  724                          page_unlock(pp);
 725  725                          sw_putbackreq(arg);
 726  726                          break;
 727  727                  }
 728  728          }
 729  729  
 730  730          err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
 731  731              B_WRITE | flags, cr, NULL);
 732  732  
 733  733          if ((flags & B_ASYNC) == 0)
 734  734                  pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
 735  735  
 736  736          /* Statistics */
 737  737          if (!err) {
 738  738                  swap_putpagecnt++;
 739  739                  swap_pagespushed += btop(klsz);
 740  740          }
 741  741  out:
 742  742          TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
 743  743              "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
 744  744              vp, klvp, klstart, klsz);
 745  745          if (err && err != ENOMEM)
 746  746                  cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
 747  747          if (lenp)
 748  748                  *lenp = PAGESIZE;
 749  749          return (err);
 750  750  }
 751  751  
 752  752  static void
 753  753  swap_dispose(
 754  754          vnode_t *vp,
 755  755          page_t *pp,
 756  756          int fl,
 757  757          int dn,
 758  758          cred_t *cr,
 759  759          caller_context_t *ct)
 760  760  {
 761  761          int err;
 762  762          u_offset_t off = pp->p_offset;
 763  763          vnode_t *pvp;
 764  764          u_offset_t poff;
 765  765  
 766  766          ASSERT(PAGE_EXCL(pp));
 767  767  
 768  768          /*
 769  769           * The caller will free/invalidate large page in one shot instead of
 770  770           * one small page at a time.
 771  771           */
 772  772          if (pp->p_szc != 0) {
 773  773                  page_unlock(pp);
 774  774                  return;
 775  775          }
 776  776  
 777  777          err = swap_getphysname(vp, off, &pvp, &poff);
 778  778          if (!err && pvp != NULL)
 779  779                  VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
 780  780          else
 781  781                  fs_dispose(vp, pp, fl, dn, cr, ct);
 782  782  }

↓ open down ↓

685 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX