Print this page
11909 THREAD_KPRI_RELEASE does nothing of the sort
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/ufs/ufs_directio.c
          +++ new/usr/src/uts/common/fs/ufs/ufs_directio.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright 2019 Joyent, Inc.
  24   25   */
  25   26  
  26   27  /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27   28  /* All Rights Reserved */
  28   29  
  29   30  /*
  30   31   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   32   * under license from the Regents of the University of California.
  32   33   */
  33   34  
↓ open down ↓ 180 lines elided ↑ open up ↑
 214  215           */
 215  216          bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
 216  217          kmem_cache_free(directio_buf_cache, dbp);
 217  218          return (error);
 218  219  }
 219  220  
 220  221  /*
 221  222   * Wait for all of the direct IO operations to finish
 222  223   */
 223  224  
 224      -uint32_t        ufs_directio_drop_kpri = 0;     /* enable kpri hack */
 225      -
 226  225  static int
 227  226  directio_wait(struct directio_buf *tail, long *bytes_iop)
 228  227  {
 229  228          int     error = 0, newerror;
 230  229          struct directio_buf     *dbp;
 231      -        uint_t  kpri_req_save;
 232  230  
 233  231          /*
 234  232           * The linked list of directio buf structures is maintained
 235  233           * in reverse order (tail->last request->penultimate request->...)
 236  234           */
 237      -        /*
 238      -         * This is the k_pri_req hack. Large numbers of threads
 239      -         * sleeping with kernel priority will cause scheduler thrashing
 240      -         * on an MP machine. This can be seen running Oracle using
 241      -         * directio to ufs files. Sleep at normal priority here to
 242      -         * more closely mimic physio to a device partition. This
 243      -         * workaround is disabled by default as a niced thread could
 244      -         * be starved from running while holding i_rwlock and i_contents.
 245      -         */
 246      -        if (ufs_directio_drop_kpri) {
 247      -                kpri_req_save = curthread->t_kpri_req;
 248      -                curthread->t_kpri_req = 0;
 249      -        }
 250  235          while ((dbp = tail) != NULL) {
 251  236                  tail = dbp->next;
 252  237                  newerror = directio_wait_one(dbp, bytes_iop);
 253  238                  if (error == 0)
 254  239                          error = newerror;
 255  240          }
 256      -        if (ufs_directio_drop_kpri)
 257      -                curthread->t_kpri_req = kpri_req_save;
 258  241          return (error);
 259  242  }
 260  243  /*
 261  244   * Initiate direct IO request
 262  245   */
 263  246  static void
 264  247  directio_start(struct ufsvfs *ufsvfsp, struct inode *ip, size_t nbytes,
 265      -        offset_t offset, char *addr, enum seg_rw rw, struct proc *procp,
 266      -        struct directio_buf **tailp, page_t **pplist)
      248 +    offset_t offset, char *addr, enum seg_rw rw, struct proc *procp,
      249 +    struct directio_buf **tailp, page_t **pplist)
 267  250  {
 268  251          buf_t *bp;
 269  252          struct directio_buf *dbp;
 270  253  
 271  254          /*
 272  255           * Allocate a directio buf header
 273  256           *   Note - list is maintained in reverse order.
 274  257           *   directio_wait_one() depends on this fact when
 275  258           *   adjusting the ``bytes_io'' param. bytes_io
 276  259           *   is used to compute a residual in the case of error.
↓ open down ↓ 59 lines elided ↑ open up ↑
 336  319   * Force POSIX syncronous data integrity on all writes for testing.
 337  320   */
 338  321  uint32_t        ufs_force_posix_sdi = 0;
 339  322  
 340  323  /*
 341  324   * Direct Write
 342  325   */
 343  326  
 344  327  int
 345  328  ufs_directio_write(struct inode *ip, uio_t *arg_uio, int ioflag, int rewrite,
 346      -        cred_t *cr, int *statusp)
      329 +    cred_t *cr, int *statusp)
 347  330  {
 348  331          long            resid, bytes_written;
 349  332          u_offset_t      size, uoff;
 350  333          uio_t           *uio = arg_uio;
 351  334          rlim64_t        limit = uio->uio_llimit;
 352  335          int             on, n, error, newerror, len, has_holes;
 353  336          daddr_t         bn;
 354  337          size_t          nbytes;
 355  338          struct fs       *fs;
 356  339          vnode_t         *vp;
↓ open down ↓ 50 lines elided ↑ open up ↑
 407  390          has_holes = -1;
 408  391  
 409  392          /*
 410  393           * only on regular files; no metadata
 411  394           */
 412  395          if (((ip->i_mode & IFMT) != IFREG) || ip->i_ufsvfs->vfs_qinod == ip)
 413  396                  return (0);
 414  397  
 415  398          /*
 416  399           * Synchronous, allocating writes run very slow in Direct-Mode
 417      -         *      XXX - can be fixed with bmap_write changes for large writes!!!
      400 +         *      XXX - can be fixed with bmap_write changes for large writes!!!
 418  401           *      XXX - can be fixed for updates to "almost-full" files
 419  402           *      XXX - WARNING - system hangs if bmap_write() has to
 420      -         *                      allocate lots of pages since pageout
 421      -         *                      suspends on locked inode
      403 +         *                      allocate lots of pages since pageout
      404 +         *                      suspends on locked inode
 422  405           */
 423  406          if (!rewrite && (ip->i_flag & ISYNC)) {
 424  407                  if ((uoff + resid) > size)
 425  408                          return (0);
 426  409                  has_holes = bmap_has_holes(ip);
 427  410                  if (has_holes)
 428  411                          return (0);
 429  412          }
 430  413  
 431  414          /*
↓ open down ↓ 619 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX