1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #ifndef _SYS_FS_CACHEFS_FS_H
  28 #define _SYS_FS_CACHEFS_FS_H
  29 
  30 #pragma ident   "%Z%%M% %I%     %E% SMI"
  31 
  32 #include <sys/vnode.h>
  33 #include <sys/vfs.h>
  34 #include <sys/types.h>
  35 #include <sys/types32.h>
  36 #include <sys/t_lock.h>
  37 #include <sys/thread.h>
  38 #include <sys/kmem.h>
  39 #include <sys/inttypes.h>
  40 #include <sys/time_impl.h>
  41 #include <sys/systm.h>
  42 
  43 #ifdef __cplusplus
  44 extern "C" {
  45 #endif
  46 
  47 #ifdef CFSDEBUG
  48 #define CFSDEBUG_ALL            0xffffffff
  49 #define CFSDEBUG_NONE           0x0
  50 #define CFSDEBUG_GENERAL        0x1
  51 #define CFSDEBUG_SUBR           0x2
  52 #define CFSDEBUG_CNODE          0x4
  53 #define CFSDEBUG_DIR            0x8
  54 #define CFSDEBUG_STRICT         0x10
  55 #define CFSDEBUG_VOPS           0x20
  56 #define CFSDEBUG_VFSOP          0x40
  57 #define CFSDEBUG_RESOURCE       0x80
  58 #define CFSDEBUG_CHEAT          0x100
  59 #define CFSDEBUG_INVALIDATE     0x200
  60 #define CFSDEBUG_DLOG           0x400
  61 #define CFSDEBUG_FILEGRP        0x800
  62 #define CFSDEBUG_IOCTL          0x1000
  63 #define CFSDEBUG_FRONT          0x2000
  64 #define CFSDEBUG_BACK           0x4000
  65 #define CFSDEBUG_ALLOCMAP       0x8000
  66 #define CFSDEBUG_ASYNCPOP       0x10000
  67 #define CFSDEBUG_VOPS_NFSV4     0x20000
  68 
  69 #define CFSCLEANFLAG
  70 
  71 extern int cachefsdebug;
  72 
  73 #define CFS_DEBUG(N)    if (cachefsdebug & (N))
  74 #endif /* DEBUG */
  75 
  76 #if 0
  77 #ifdef CFSDEBUG
  78         /*
  79          * Testing usage of cd_access and friends.
  80          * Note we steal an unused bit in t_flag.
  81          * This will certainly bite us later.
  82          */
  83 #define CFS_CD_DEBUG
  84 #define T_CD_HELD       0x01000
  85 #endif
  86 #endif
  87 
  88 /*
  89  * Note: in an RL debugging kernel, CFSVERSION is augmented by 100
  90  *
  91  * Version History:
  92  *
  93  * Beginning -- Solaris 2.3 and 2.4: 1
  94  *
  95  * In Solaris 2.5 alpha, the size of fid_t changed: 2
  96  *
  97  * In 2.6: Chart, RL pointers/idents became rl_entry: 3
  98  *      added which RL list to attrcache header: 4
  99  *
 100  * Large Files support made version to 6.
 101  *
 102  * Sequence numbers made version to 7.
 103  *
 104  * 64-bit on-disk cache will make version 8. Not yet supported.
 105  */
 106 
 107 #if 0
 108 #define CFSRLDEBUG
 109 #endif
 110 
 111 #ifdef CFSRLDEBUG
 112 #define CFSVERSION              110
 113 #define CFSVERSION64            111     /* 64-bit cache - not yet used */
 114 #else /* CFSRLDEBUG */
 115 #define CFSVERSION              7
 116 #define CFSVERSION64            8       /* 64-bit cache - not yet used */
 117 #endif /* CFSRLDEBUG */
 118 
 119 /* Some default values */
 120 #define DEF_FILEGRP_SIZE        256
 121 #define DEF_POP_SIZE            0x10000         /* 64K */
 122 #define CACHELABEL_NAME         ".cfs_label"
 123 #define RESOURCE_NAME           ".cfs_resource"
 124 #define CACHEFS_FSINFO          ".cfs_fsinfo"
 125 #define ATTRCACHE_NAME          ".cfs_attrcache"
 126 #define CACHEFS_LOSTFOUND_NAME  "lost+found"
 127 #define BACKMNT_NAME            ".cfs_mnt_points"
 128 #define CACHEFS_LOCK_FILE       ".cfs_lock"
 129 #define CACHEFS_DLOG_FILE       ".cfs_dlog"
 130 #define CACHEFS_DMAP_FILE       ".cfs_dmap"
 131 #define CACHEFS_MNT_FILE        ".cfs_mnt"
 132 #define CACHEFS_UNMNT_FILE      ".cfs_unmnt"
 133 #define LOG_STATUS_NAME         ".cfs_logging"
 134 #define NOBACKUP_NAME           ".nsr"
 135 #define CACHEFS_PREFIX          ".cfs_"
 136 #define CACHEFS_PREFIX_LEN      5
 137 #define ROOTLINK_NAME           "root"
 138 #define CFS_FRONTFILE_NAME_SIZE 18
 139 #define CACHEFS_BASETYPE        "cachefs" /* used in statvfs() */
 140 #define CFS_MAXFREECNODES       20
 141 #define CACHEFSTAB              "/etc/cachefstab"
 142 #define CACHEFS_ROOTRUN         "/var/run"
 143 #define CACHEFS_LOCKDIR_PRE     ".cachefs." /* used by mount(1M)/fsck(1M) */
 144 
 145 /*
 146  * The options structure is passed in as part of the mount arguments.
 147  * It is stored in the .options file and kept track of in the fscache
 148  * structure.
 149  */
 150 struct cachefsoptions {
 151         uint_t          opt_flags;              /* mount flags */
 152         int             opt_popsize;            /* cache population size */
 153         int             opt_fgsize;             /* filegrp size, default 256 */
 154 };
 155 
 156 typedef struct cachefscache cachefscache_t;
 157 
 158 /*
 159  * all the stuff needed to manage a queue of requests to be processed
 160  * by async threads.
 161  */
 162 struct cachefs_workq {
 163         struct cachefs_req      *wq_head;               /* head of work q */
 164         struct cachefs_req      *wq_tail;               /* tail of work q */
 165         int                     wq_length;              /* # of requests on q */
 166         int                     wq_thread_count;        /* # of threads */
 167         int                     wq_max_len;             /* longest queue */
 168         int                     wq_halt_request;        /* halt requested */
 169         unsigned int            wq_keepone:1;           /* keep one thread */
 170         unsigned int            wq_logwork:1;           /* write logfile */
 171         kcondvar_t              wq_req_cv;              /* wait on work to do */
 172         kcondvar_t              wq_halt_cv;             /* wait/signal halt */
 173         kmutex_t                wq_queue_lock;          /* protect queue */
 174         cachefscache_t          *wq_cachep;             /* sometimes NULL */
 175 };
 176 
 177 /*
 178  * cfs_cid is stored on disk, so it needs to be the same 32-bit vs. 64-bit.
 179  */
 180 
 181 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 182 #pragma pack(4)
 183 #endif
 184 
 185 /* identifies a file in the cache */
 186 struct cfs_cid {
 187         ino64_t cid_fileno;             /* fileno */
 188         int     cid_flags;              /* flags */
 189 };
 190 typedef struct cfs_cid cfs_cid_t;
 191 #define CFS_CID_LOCAL   1       /* local file */
 192 
 193 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 194 #pragma pack()
 195 #endif
 196 
 197 /*
 198  * XX64 - for now redefine      all time_t fields that are used by both kernel
 199  * and user space apps as a 32-bit quantity,
 200  */
 201 
 202 #if (defined(_SYSCALL32) && defined(_LP64))
 203 
 204 /*
 205  * The cfs_* types are used to represent on-disk data, since its size is
 206  * independent of the kernel memory model (in the LP64 case)
 207  */
 208 typedef time32_t                cfs_time_t;
 209 typedef timestruc32_t           cfs_timestruc_t;
 210 typedef vattr32_t               cfs_vattr_t;
 211 typedef fid32_t                 cfs_fid_t;
 212 
 213 #define cfs_timespec            timespec32
 214 #define cfs_vattr               vattr32
 215 #define cfs_fid                 fid32
 216 
 217 /*
 218  * CACHEFS_DEV_COPY copies between two dev_t's. It expands or compresses
 219  * them based on type changes (if needed).
 220  */
 221 #define CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)               \
 222         if (cmpldev((dev32_t *)&(out_dev), in_dev) == 0)            \
 223                 error = EOVERFLOW;
 224 
 225 #define CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)                      \
 226         out_dev = (dev_t)expldev(in_dev);
 227 
 228 #define TIME_OVERFLOW(tval)                                             \
 229         ((tval) < TIME32_MIN || (tval) > TIME32_MAX)
 230 
 231 /* Set the referred to time value. Set error if overflow */
 232 #define CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)           \
 233         out_tval = (in_tval);                                           \
 234         if (TIME_OVERFLOW(in_tval))                                     \
 235                 error = EOVERFLOW;
 236 
 237 #define CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)                  \
 238         out_tval = (in_tval);
 239 
 240 /* Set the cfs_timestruc_t with values from input timestruc_t */
 241 #define CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)                 \
 242         (out_tsp)->tv_nsec = (in_tsp)->tv_nsec;                           \
 243         CACHEFS_TIME_TO_TIME32_COPY((in_tsp)->tv_sec, (out_tsp)->tv_sec, error)
 244 
 245 #define CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)                        \
 246         (out_tsp)->tv_nsec = (in_tsp)->tv_nsec;                           \
 247         CACHEFS_TIME32_TO_TIME_COPY((in_tsp)->tv_sec, (out_tsp)->tv_sec)
 248 
 249 /* CACHEFS_FID_COPY copies between two fids */
 250 #define CACHEFS_FID_COPY(in_fidp, out_fidp)                             \
 251         (out_fidp)->fid_len = (in_fidp)->fid_len;                 \
 252         bcopy((in_fidp)->fid_data, (out_fidp)->fid_data, (in_fidp)->fid_len)
 253 
 254 #define CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)     \
 255         (out_vattrp)->va_mask = (in_vattrp)->va_mask;                     \
 256         (out_vattrp)->va_type = (in_vattrp)->va_type;                     \
 257         (out_vattrp)->va_mode = (in_vattrp)->va_mode;                     \
 258         (out_vattrp)->va_uid = (in_vattrp)->va_uid;                       \
 259         (out_vattrp)->va_gid = (in_vattrp)->va_gid;                       \
 260         CACHEFS_DEV_TO_DEV32_COPY((in_vattrp)->va_fsid,                      \
 261                 (out_vattrp)->va_fsid, error);                               \
 262         (out_vattrp)->va_nodeid = (in_vattrp)->va_nodeid;         \
 263         (out_vattrp)->va_nlink = (in_vattrp)->va_nlink;                   \
 264         (out_vattrp)->va_size = (in_vattrp)->va_size;                     \
 265         CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_atime,                  \
 266                 &(out_vattrp)->va_atime, error);                 \
 267         CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_mtime,                  \
 268                 &(out_vattrp)->va_mtime, error);                 \
 269         CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_ctime,          \
 270                 &(out_vattrp)->va_ctime, error);                 \
 271         CACHEFS_DEV_TO_DEV32_COPY((in_vattrp)->va_rdev,                      \
 272                 (out_vattrp)->va_rdev, error);                               \
 273         (out_vattrp)->va_blksize = (in_vattrp)->va_blksize;               \
 274         (out_vattrp)->va_nblocks = (in_vattrp)->va_nblocks;               \
 275         (out_vattrp)->va_seq = 0
 276 
 277 #define CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)            \
 278         (out_vattrp)->va_mask = (in_vattrp)->va_mask;                     \
 279         (out_vattrp)->va_type = (in_vattrp)->va_type;                     \
 280         (out_vattrp)->va_mode = (in_vattrp)->va_mode;                     \
 281         (out_vattrp)->va_uid = (in_vattrp)->va_uid;                       \
 282         (out_vattrp)->va_gid = (in_vattrp)->va_gid;                       \
 283         CACHEFS_DEV32_TO_DEV_COPY((in_vattrp)->va_fsid,                      \
 284                 (out_vattrp)->va_fsid);                                      \
 285         (out_vattrp)->va_nodeid = (in_vattrp)->va_nodeid;         \
 286         (out_vattrp)->va_nlink = (in_vattrp)->va_nlink;                   \
 287         (out_vattrp)->va_size = (in_vattrp)->va_size;                     \
 288         CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_atime,                  \
 289                 &(out_vattrp)->va_atime);                                \
 290         CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_mtime,                  \
 291                 &(out_vattrp)->va_mtime);                                \
 292         CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_ctime,                  \
 293                 &(out_vattrp)->va_ctime);                                \
 294         CACHEFS_DEV32_TO_DEV_COPY((in_vattrp)->va_rdev,                      \
 295                 (out_vattrp)->va_rdev);                                      \
 296         (out_vattrp)->va_blksize = (in_vattrp)->va_blksize;               \
 297         (out_vattrp)->va_nblocks = (in_vattrp)->va_nblocks;               \
 298         (out_vattrp)->va_seq = 0
 299 
 300 #else /* not _SYSCALL32 && _LP64 */
 301 
 302 /*
 303  * The cfs_* types are used to represent on-disk data, since its size is
 304  * independent of the kernel memory model (in the LP64 case)
 305  */
 306 typedef time_t                  cfs_time_t;
 307 typedef timestruc_t             cfs_timestruc_t;
 308 typedef vattr_t                 cfs_vattr_t;
 309 typedef fid_t                   cfs_fid_t;
 310 
 311 #define cfs_timespec            timespec
 312 #define cfs_vattr               vattr
 313 #define cfs_fid                 fid
 314 
 315 #define TIME_OVERFLOW(tval)     FALSE
 316 
 317 #define CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)               \
 318         out_dev = (in_dev)
 319 
 320 #define CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)                      \
 321         out_dev = (in_dev)
 322 
 323 #define CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)           \
 324         out_tval = (in_tval)
 325 
 326 #define CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)                  \
 327         out_tval = (in_tval)
 328 
 329 #define CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)                 \
 330         *(out_tsp) = *(in_tsp)
 331 
 332 #define CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)                        \
 333         *(out_tsp) = *(in_tsp)
 334 
 335 #define CACHEFS_FID_COPY(in_fidp, out_fidp)                             \
 336         *(out_fidp) = *(in_fidp)
 337 
 338 #define CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)     \
 339         *(out_vattrp) = *(in_vattrp);                                   \
 340         (out_vattrp)->va_seq = 0
 341 
 342 #define CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)            \
 343         *(out_vattrp) = *(in_vattrp);                                   \
 344         (out_vattrp)->va_seq = 0
 345 
 346 #endif /* _SYSCALL32 && _LP64 */
 347 
 348 /*
 349  * The "cfs_*" structs below refer to the on-disk structures. Presently
 350  * they are 32-bit based. When they change to 64-bit, we'd have to modify the
 351  * macros below accordingly.
 352  */
 353 #define CACHEFS_DEV_TO_CFS_DEV_COPY(in_dev, out_dev, error)             \
 354         CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)
 355 
 356 #define CACHEFS_CFS_DEV_TO_DEV_COPY(in_dev, out_dev)            \
 357         CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)
 358 
 359 #define CACHEFS_TIME_TO_CFS_TIME_COPY(in_tval, out_tval, error)         \
 360         CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)
 361 
 362 #define CACHEFS_CFS_TIME_TO_TIME_COPY(in_tval, out_tval)                \
 363         CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)
 364 
 365 #define CACHEFS_TS_TO_CFS_TS_COPY(in_tsp, out_tsp, error)               \
 366         CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)
 367 
 368 #define CACHEFS_CFS_TS_TO_TS_COPY(in_tsp, out_tsp)                      \
 369         CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)
 370 
 371 #define CACHEFS_VATTR_TO_CFS_VATTR_COPY(in_vattrp, out_vattrp, error)   \
 372         CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)
 373 
 374 #define CACHEFS_CFS_VATTR_TO_VATTR_COPY(in_vattrp, out_vattrp)          \
 375         CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)
 376 
 377 #include <sys/fs/cachefs_fscache.h>
 378 #include <sys/fs/cachefs_filegrp.h>
 379 
 380 /*
 381  * One cache_label structure per cache. Contains mainly user defined or
 382  * default values for cache resource management. Contents is static.
 383  * The value cl_maxfiles is not used any where in cachefs code. If and when
 384  * this is really used the cl_maxfiles should be declared as a 64bit value
 385  * for large file support.
 386  * The maxblks, blkhiwat, blklowat, blocktresh, blockmin, may need to be
 387  * 64bit values when we actually start supporting file systems of size
 388  * greater than 1 terabyte.
 389  */
 390 struct cache_label {
 391         int     cl_cfsversion;  /* cfs version number */
 392         int     cl_maxblks;     /* max blocks to be used by cache */
 393         int     cl_blkhiwat;    /* high water-mark for block usage */
 394         int     cl_blklowat;    /* low water-mark for block usage */
 395         int     cl_maxinodes;   /* max inodes to be used by cache */
 396         int     cl_filehiwat;   /* high water-mark for inode usage */
 397         int     cl_filelowat;   /* low water-mark for indoe usage */
 398         int     cl_blocktresh;  /* block max usage treshold */
 399         int     cl_blockmin;    /* block min usage treshold */
 400         int     cl_filetresh;   /* inode max usage treshold */
 401         int     cl_filemin;     /* inode min usage treshold */
 402         int     cl_maxfiles;    /* max cache file size */
 403 };
 404 
 405 /*
 406  * One cache_usage structure per cache. Keeps track of cache usage figures.
 407  * Contents gets updated frequently.
 408  */
 409 struct cache_usage {
 410         int     cu_blksused;    /* actual number of blocks used */
 411         int     cu_filesused;   /* actual number of files used */
 412         uint_t  cu_flags;       /* Cache state flags */
 413         ushort_t cu_unique;     /* Fid persistent uniquifier */
 414 };
 415 
 416 #define CUSAGE_ACTIVE   1       /* Cache is active */
 417 #define CUSAGE_NEED_ADJUST 2    /* Adjust uniquifier before assigning new fid */
 418 
 419 /*
 420  * RL list identifiers.
 421  */
 422 enum cachefs_rl_type {
 423         CACHEFS_RL_NONE = 0x101,
 424         CACHEFS_RL_FREE,
 425         CACHEFS_RL_GC,
 426         CACHEFS_RL_ACTIVE,
 427         CACHEFS_RL_ATTRFILE,
 428         CACHEFS_RL_MODIFIED,
 429         CACHEFS_RL_PACKED,
 430         CACHEFS_RL_PACKED_PENDING,
 431         CACHEFS_RL_MF
 432 };
 433 #define CACHEFS_RL_START CACHEFS_RL_NONE
 434 #define CACHEFS_RL_END CACHEFS_RL_MF
 435 #define CACHEFS_RL_CNT  (CACHEFS_RL_END - CACHEFS_RL_START + 1)
 436 #define CACHEFS_RL_INDEX(X)     (X - CACHEFS_RL_START)
 437 
 438 struct cachefs_rl_listhead {
 439         uint_t          rli_front;              /* front of list */
 440         uint_t          rli_back;               /* back of list */
 441         int             rli_blkcnt;             /* number of 8k blocks */
 442         int             rli_itemcnt;            /* number of items on list */
 443 };
 444 typedef struct cachefs_rl_listhead cachefs_rl_listhead_t;
 445 
 446 /*
 447  * Resource List information.  One per cache.
 448  */
 449 struct cachefs_rl_info {
 450         uint_t          rl_entries;     /* number of entries allocated in rl */
 451         cfs_time_t      rl_gctime;      /* time of item on front of gc list */
 452 
 453         /* heads of the various lists */
 454         cachefs_rl_listhead_t   rl_items[CACHEFS_RL_CNT];
 455 };
 456 typedef struct cachefs_rl_info cachefs_rl_info_t;
 457 
 458 /*
 459  * rl_debug and rl_entry are stored on disk, so they need to be
 460  * the same 32-bit vs. 64-bit.
 461  */
 462 
 463 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 464 #pragma pack(4)
 465 #endif
 466 
 467 #ifdef CFSRLDEBUG
 468 /*
 469  * RL debugging thingy
 470  */
 471 
 472 #define CACHEFS_RLDB_STACKSIZE  16
 473 #define CACHEFS_RLDB_DEF_MAXCOUNT 5
 474 
 475 typedef struct rl_debug {
 476         hrtime_t db_hrtime;
 477 
 478         uint_t db_attrc: 1;
 479         uint_t db_fsck: 1;
 480         ino64_t db_fsid;
 481         ino64_t db_fileno;
 482         enum cachefs_rl_type db_current;
 483 
 484         int db_stackheight;
 485         pc_t db_stack[CACHEFS_RLDB_STACKSIZE];
 486 
 487         struct rl_debug *db_next;
 488 } rl_debug_t;
 489 
 490 extern time_t cachefs_dbvalid;
 491 extern struct kmem_cache *cachefs_rl_debug_cache;
 492 extern kmutex_t cachefs_rl_debug_mutex;
 493 #endif /* CFSRLDEBUG */
 494 
 495 /*
 496  * RL Entry type.
 497  */
 498 
 499 typedef struct rl_entry {
 500         uint_t rl_attrc: 1;
 501         uint_t rl_fsck: 1; /* used by fsck; true => rl_current is correct */
 502         uint_t rl_local: 1; /* 1 means a local file */
 503 
 504 #ifdef CFSRLDEBUG
 505         cfs_time_t rl_dbvalid; /* this == cachefs_dbvalid => trust rl_debug */
 506         rl_debug_t *rl_debug;
 507 #endif /* CFSRLDEBUG */
 508 
 509         ino64_t rl_fsid;
 510         ino64_t rl_fileno;
 511 
 512         enum cachefs_rl_type rl_current;
 513         uint_t rl_fwd_idx;
 514         uint_t rl_bkwd_idx;
 515 } rl_entry_t;
 516 
 517 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 518 #pragma pack()
 519 #endif
 520 
 521 /*
 522  * rl entries per MAXBSIZE chunk.  rl_entry_t's size need not divide
 523  * MAXBSIZE, as long as this constant is an integer (through integer
 524  * division) (see cachefs_rl_entry_get()).
 525  */
 526 
 527 #define CACHEFS_RLPMBS  (MAXBSIZE / (uint_t)sizeof (rl_entry_t))
 528 
 529 /*
 530  * struct cache contains cache-wide information, and provides access
 531  * to lower level info. There is one cache structure per cache.
 532  */
 533 struct cachefscache {
 534         struct cachefscache     *c_next;        /* list of caches */
 535         uint_t                  c_flags;        /* misc flags */
 536         struct cache_label      c_label;        /* cache resource info */
 537         struct cache_usage      c_usage;        /* cache usage info */
 538         struct cachefs_rl_info  c_rlinfo;       /* rl global pointers */
 539         struct vnode            *c_resfilevp;   /* resource file vp */
 540         uint_t                  c_rl_window;    /* window mapped in */
 541         rl_entry_t              *c_rl_entries;  /* mapping for rl entries */
 542         struct vnode            *c_dirvp;       /* cache directory vp */
 543         struct vnode            *c_lockvp;      /* lock file vp */
 544         struct vnode            *c_lostfoundvp; /* lost+found directory vp */
 545         int                     c_refcnt;       /* active fs ref count */
 546         struct fscache          *c_fslist;      /* fscache list head */
 547         struct cachefs_workq    c_workq;        /* async work */
 548         kmutex_t                c_contentslock; /* protect cache struct */
 549         kmutex_t                c_fslistlock;   /* protect fscache list */
 550         kmutex_t                c_mflock;       /* protect modified fixes */
 551         ushort_t                c_unique;       /* In core fid uniquifier */
 552         kcondvar_t              c_cwcv;         /* gc wait on work to do */
 553         kcondvar_t              c_cwhaltcv;     /* wait on gc thread exit */
 554         uint_t                  c_gc_count;     /* garbage collection count */
 555         time_t                  c_gc_time;      /* last garbage collection */
 556         time_t                  c_gc_before;    /* atime of front before gc */
 557         time_t                  c_gc_after;     /* atime of front after gc */
 558         uint_t                  c_apop_inqueue; /* # async pops queued */
 559         pid_t                   c_rootdaemonid; /* pid of root cachefsd */
 560         struct cachefs_log_cookie
 561                                 *c_log;         /* in-core logging stuff */
 562         struct cachefs_log_control
 563                                 *c_log_ctl;     /* on-disk logging stuff */
 564         kmutex_t                c_log_mutex;    /* protects c_log* */
 565 };
 566 
 567 extern struct kmem_cache *cachefs_cache_kmcache;
 568 
 569 #define CACHEFS_MAX_APOP_INQUEUE        50      /* default value for below */
 570 extern uint_t cachefs_max_apop_inqueue;         /* max populations pending */
 571 
 572 /*
 573  * Various cache structure flags.
 574  */
 575 #define CACHE_NOCACHE           0x1     /* all cache refs go to back fs */
 576 #define CACHE_ALLOC_PENDING     0x4     /* Allocation pending */
 577 #define CACHE_NOFILL            0x8     /* No fill mode */
 578 #define CACHE_GARBAGE_COLLECT   0x10    /* Garbage collect in progress */
 579 #define CACHE_CACHEW_THREADRUN  0x20    /* Cachep worker thread is alive */
 580 #define CACHE_CACHEW_THREADEXIT 0x40    /* cachew thread should exit */
 581 #define CACHE_DIRTY             0x80
 582 #define CACHE_PACKED_PENDING    0x100   /* Packed pending work to do */
 583 #define CACHE_CHECK_RLTYPE      0x200   /* double-check with resource lists */
 584 
 585 /*
 586  * Values for the mount options flag, opt_flags.
 587  */
 588 /*
 589  * Mount options
 590  */
 591 #define CFS_WRITE_AROUND        0x01    /* write-around */
 592 #define CFS_NONSHARED           0x02    /* write to cache and back file */
 593 #define CFS_NOCONST_MODE        0x08    /* no-op consistency mode */
 594 #define CFS_ACCESS_BACKFS       0x10    /* pass VOP_ACCESS to backfs */
 595 #define CFS_CODCONST_MODE       0x80    /* cod consistency mode */
 596 #define CFS_DISCONNECTABLE      0x100   /* server not reponding option */
 597 #define CFS_SOFT                0x200   /* soft mounted */
 598 #define CFS_NOACL               0x400   /* ACLs are disabled in this fs */
 599 #define CFS_LLOCK               0x800   /* use local file/record locks */
 600 #define CFS_SLIDE               0x1000  /* slide backfs under cachefs */
 601 #define CFS_NOFILL              0x2000  /* start in nofill mode */
 602 #define CFS_BACKFS_NFSV4        0x4000  /* back filesystem is NFSv4 */
 603 
 604 #define MAXCOOKIE_SIZE  36
 605 
 606 #define C_BACK_CHECK    0x2
 607 
 608 /*
 609  * Macro to determine if this is a snr error where we should do a
 610  * state transition.
 611  */
 612 
 613 #define CFS_TIMEOUT(FSCP, ERROR) \
 614         (ERROR && CFS_ISFS_SNR(FSCP) && \
 615         (((ERROR) == ETIMEDOUT) || ((ERROR) == EIO)))
 616 
 617 /*
 618  * Macros to assert that cachefs fscache and cnode are in
 619  * sync with NFSv4. Note that NFSv4 always passes-through
 620  * the vnode calls directly to the backfilesystem. For
 621  * this to work:
 622  * (1) cachefs is always setup for connected operation,
 623  * (2) cachefs options (example disconnectable (snr), nonshared, etc)
 624  *     are disabled, and
 625  * (3) the back filesystem vnode pointer always exists
 626  *      (except after a remove operation)
 627  * (4) the front filesystem vnode pointer is always NULL.
 628  */
 629 #ifdef DEBUG
 630 #define CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp) \
 631         if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
 632                 ASSERT((fscp)->fs_info.fi_mntflags == CFS_BACKFS_NFSV4); \
 633                 ASSERT((fscp)->fs_cdconnected == CFS_CD_CONNECTED); \
 634         }
 635 #define CFS_BACKFS_NFSV4_ASSERT_CNODE(cp) \
 636         if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
 637                 if (MUTEX_HELD(&cp->c_statelock)) { \
 638                         ASSERT((cp)->c_backvp != NULL || \
 639                                 ((cp)->c_flags & CN_DESTROY) != 0); \
 640                         ASSERT((cp)->c_frontvp == NULL); \
 641                 } else { \
 642                         mutex_enter(&(cp)->c_statelock); \
 643                         ASSERT((cp)->c_backvp != NULL || \
 644                                 ((cp)->c_flags & CN_DESTROY) != 0); \
 645                         ASSERT((cp)->c_frontvp == NULL); \
 646                         mutex_exit(&cp->c_statelock); \
 647                 } \
 648         }
 649 #else
 650 #define CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp)
 651 #define CFS_BACKFS_NFSV4_ASSERT_CNODE(cp)
 652 #endif  /* DEBUG */
 653 
 654 #ifdef CFSDEBUG
 655 #define CFS_DPRINT_BACKFS_NFSV4(fscp, x) \
 656         if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
 657                 CFS_DEBUG(CFSDEBUG_VOPS_NFSV4) \
 658                         printf x; \
 659         }
 660 #else
 661 #define CFS_DPRINT_BACKFS_NFSV4(fscp, x)
 662 #endif /* CFSDEBUG */
 663 
 664 /*
 665  * cachefs_allocmap and cfs_cachefs_metadata are stored on disk,
 666  * so they need to be the same 32-bit vs. 64-bit.
 667  */
 668 
 669 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 670 #pragma pack(4)
 671 #endif
 672 
 673 /*
 674  * Large file support. The start offset of the cached file can be
 675  * greater than 2GB and by coelescing the different chunks we may
 676  * end up having a chunk of siz3 > 2GB.
 677  */
 678 
 679 struct cachefs_allocmap {
 680         u_offset_t              am_start_off;   /* Start offset of this chunk */
 681         u_offset_t              am_size;        /* size of this chunk */
 682 };
 683 
 684 #define C_MAX_ALLOCINFO_SLOTS   32
 685 
 686 /*
 687  * CFS fastsymlinks. For symlink of size < C_FSL_SIZE, the symlink
 688  * is stored in the cnode allocmap array.
 689  */
 690 #define C_FSL_SIZE      (sizeof (struct cachefs_allocmap) * \
 691                         C_MAX_ALLOCINFO_SLOTS)
 692 
 693 /*
 694  * Structure representing a cached object in memory.
 695  */
 696 struct cachefs_metadata {
 697         struct vattr            md_vattr;       /* attributes */
 698         o_mode_t                md_aclclass;    /* CLASS_OBJ perm for ACL */
 699         ushort_t                md_pad1;        /* compiler padding */
 700         fid_t                   md_cookie;      /* back fid */
 701         int                     md_flags;       /* various flags */
 702         uint_t                  md_rlno;        /* rl entry */
 703         enum cachefs_rl_type    md_rltype;      /* rl type */
 704         int                     md_consttype;   /* type of consistency */
 705         fid_t                   md_fid;         /* fid of front file */
 706         uint_t                  md_frontblks;   /* # blks used in frontfs */
 707         uint_t                  md_gen;         /* fid uniquifier */
 708         struct cfs_cid          md_parent;      /* id of parent */
 709         timestruc_t             md_timestamp;   /* front file timestamp */
 710         timestruc_t             md_x_time;      /* see consistency routines */
 711         timestruc_t             md_localmtime;  /* persistent local mtime */
 712         timestruc_t             md_localctime;  /* persistent local ctime */
 713         uint_t                  md_resettimes;  /* when to reset local times */
 714         ino64_t                 md_localfileno; /* persistent local inum */
 715         uint_t                  md_resetfileno; /* when to reset local fileno */
 716         uint_t                  md_seq;         /* seq number for putpage */
 717         int                     md_allocents;   /* nbr of entries in allocmap */
 718         struct cachefs_allocmap md_allocinfo[C_MAX_ALLOCINFO_SLOTS];
 719 };
 720 typedef struct cachefs_metadata cachefs_metadata_t;
 721 
 722 #if (defined(_SYSCALL32) && defined(_LP64))
 723 
 724 /*
 725  * fid_t is long aligned, so user fid could be only 4 byte aligned.
 726  * Since vnode/vfs calls require fid_t (which would be 8 byte aligned in
 727  * _LP64), we would have to copy the user's value (and on-disk data) in/out.
 728  */
 729 /* on-disk metadata structure - fid aligned to int, time is 32-bit */
 730 
 731 struct cfs_cachefs_metadata {
 732         struct cfs_vattr        md_vattr;       /* attributes */
 733         o_mode_t                md_aclclass;    /* CLASS_OBJ perm for ACL */
 734         cfs_fid_t               md_cookie;      /* back fid */
 735         int                     md_flags;       /* various flags */
 736         uint_t                  md_rlno;        /* rl entry */
 737         enum cachefs_rl_type    md_rltype;      /* rl type */
 738         int                     md_consttype;   /* type of consistency */
 739         cfs_fid_t               md_fid;         /* fid of front file */
 740         uint_t                  md_frontblks;   /* # blks used in frontfs */
 741         uint_t                  md_gen;         /* fid uniquifier */
 742         struct cfs_cid          md_parent;      /* id of parent */
 743         cfs_timestruc_t         md_timestamp;   /* front file timestamp */
 744         cfs_timestruc_t         md_x_time;      /* see consistency routines */
 745         cfs_timestruc_t         md_localmtime;  /* persistent local mtime */
 746         cfs_timestruc_t         md_localctime;  /* persistent local ctime */
 747         uint_t                  md_resettimes;  /* when to reset local times */
 748         ino64_t                 md_localfileno; /* persistent local inum */
 749         uint_t                  md_resetfileno; /* when to reset local fileno */
 750         uint_t                  md_seq;         /* seq number for putpage */
 751         int                     md_allocents;   /* nbr of entries in allocmap */
 752         struct cachefs_allocmap md_allocinfo[C_MAX_ALLOCINFO_SLOTS];
 753 };
 754 typedef struct cfs_cachefs_metadata cfs_cachefs_metadata_t;
 755 
 756 #else /* not _SYSCALL32 && _LP64 */
 757 
 758 typedef cachefs_metadata_t      cfs_cachefs_metadata_t;
 759 
 760 #define cfs_cachefs_metadata    cachefs_metadata
 761 
 762 #endif /* _SYSCALL32 && _LP64 */
 763 
 764 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 765 #pragma pack()
 766 #endif
 767 
 768 /*
 769  * Various flags to be stored in md_flags field of the metadata.
 770  */
 771 #define MD_CREATEDONE   0x1             /* create was done to backfs */
 772 #define MD_POPULATED    0x2             /* front file or dir is populated */
 773 #define MD_FILE         0x4             /* front file or dir exists */
 774 #define MD_FASTSYMLNK   0x8             /* fast symbolic link */
 775 #define MD_PACKED       0x10            /* file is packed */
 776 #define MD_INVALREADDIR 0x40            /* repopulate on readdir */
 777 #define MD_PUTPAGE      0x200           /* we have already logged a putpage */
 778 #define MD_FREE         0x400           /* not used */
 779 #define MD_PUSHDONE     0x800           /* set if file pushed to back fs */
 780 #define MD_MAPPING      0x1000          /* set if cid mapping space written */
 781 #define MD_ACL          0x2000          /* file has a cached acl */
 782 #define MD_ACLDIR       0x4000          /* front `dir' exists for holding acl */
 783 #define MD_LOCALMTIME   0x8000          /* do not overwrite md_localmtime */
 784 #define MD_LOCALCTIME   0x10000         /* do not overwrite md_localctime */
 785 #define MD_LOCALFILENO  0x20000         /* do not overwrite md_localfileno */
 786 #define MD_NEEDATTRS    0x40000         /* new attrs needed at next check */
 787 
 788 #define C_MAX_MOUNT_FSCDIRNAME          128
 789 /*
 790  * cachefs mount structure and related data
 791  */
 792 struct cachefs_mountargs {
 793         struct cachefsoptions   cfs_options;    /* consistency modes, etc. */
 794         char                    *cfs_fsid;      /* CFS ID fpr file system */
 795         char                    cfs_cacheid[C_MAX_MOUNT_FSCDIRNAME];
 796         /* CFS fscdir name */
 797         char                    *cfs_cachedir;  /* path for this cache dir */
 798         char                    *cfs_backfs;    /* back filesystem dir */
 799         uint_t                  cfs_acregmin;   /* same as nfs values */
 800         uint_t                  cfs_acregmax;
 801         uint_t                  cfs_acdirmin;
 802         uint_t                  cfs_acdirmax;
 803         char                    *cfs_hostname;  /* server name */
 804         char                    *cfs_backfsname; /* back filesystem name */
 805 };
 806 
 807 #ifdef _SYSCALL32
 808 struct cachefs_mountargs32 {
 809         struct cachefsoptions   cfs_options;    /* consistency modes, etc. */
 810         caddr32_t               cfs_fsid;       /* CFS ID fpr file system */
 811         char                    cfs_cacheid[C_MAX_MOUNT_FSCDIRNAME];
 812         /* CFS fscdir name */
 813         caddr32_t               cfs_cachedir;   /* path for this cache dir */
 814         caddr32_t               cfs_backfs;     /* back filesystem dir */
 815         uint32_t                cfs_acregmin;   /* same as nfs values */
 816         uint32_t                cfs_acregmax;
 817         uint32_t                cfs_acdirmin;
 818         uint32_t                cfs_acdirmax;
 819         caddr32_t               cfs_hostname;  /* server name */
 820         caddr32_t               cfs_backfsname; /* back filesystem name */
 821 };
 822 #endif /* _SYSCALL32 */
 823 
 824 /*
 825  * struct cachefsops - consistency modules.
 826  */
 827 struct cachefsops {
 828         int     (*co_init_cobject)();
 829         int     (*co_check_cobject)();
 830         void    (*co_modify_cobject)();
 831         void    (*co_invalidate_cobject)();
 832         void    (*co_convert_cobject)();
 833 };
 834 
 835 
 836 
 837 /*
 838  * The attrcache file consists of a attrcache_header structure and an
 839  * array of attrcache_slot structures (one per front file).
 840  */
 841 
 842 /*
 843  * Attrcache file format
 844  *
 845  *      Header
 846  *      Offset array (# of entries = file group size)
 847  *      alloc list      (1 bit per entry, 0 = free) Note that the
 848  *                      file will be extended as needed
 849  *      attrcache entries
 850  *
 851  */
 852 struct attrcache_header {
 853         uint_t          ach_count;              /* number of entries */
 854         int             ach_nffs;               /* number of front files */
 855         int             ach_nblks;              /* number of allocated blocks */
 856         uint_t          ach_rlno;               /* rl entry for this file */
 857         enum cachefs_rl_type ach_rl_current;    /* which list we're on */
 858 };
 859 
 860 /*
 861  * We assume that the seek offset to metadata will never be > 2GB.
 862  * The filegrp size is 256 and the current calculations of the sizes
 863  * of the data structures show that the ach_offset value here will not
 864  * be > 2GB.
 865  */
 866 
 867 struct attrcache_index {
 868         uint_t  ach_written:1;          /* 1 if metadata written */
 869         uint_t  ach_offset:31;          /* seek offset to metadata */
 870 };
 871 
 872 /*
 873  * cnode structure, one per file.
 874  */
 875 #define c_attr                  c_metadata.md_vattr
 876 #define c_cookie                c_metadata.md_cookie
 877 #define c_fileno                c_id.cid_fileno
 878 
 879 /*
 880  * LOCKS:       c_rwlock        Read / Write serialization
 881  *              c_statelock     Protects most other fields in the cnode
 882  *              c_popcv         Condvar used to prevent routines from nuking
 883  *                              a cnode which is currently being populated.
 884  *                              Threads blocked on it will be woken when the
 885  *                              populate completes.
 886  *              c_iocv          broadcast, but never waited on - unused?
 887  *              c_iomutex       c_nio and c_ioflags
 888  *
 889  * Fields protected by other locks:
 890  *
 891  *              c_next          fg_cnodelock in the filegrp struct
 892  *              c_idleback      fs_idlelock in fscache struct
 893  *              c_idlefront     fs_idlelock in fscache struct
 894  *
 895  * Large File support: c_size goes to u_offset_t and the apopoff type
 896  * goes to offset_t.
 897  */
 898 struct cnode {
 899         int             c_flags;        /* see below */
 900         struct cnode    *c_next;        /* next cnode in fgp list */
 901         struct cnode    *c_idleback;    /* idle list back ptr */
 902         struct cnode    *c_idlefront;   /* idle list front ptr */
 903         struct vnode    *c_frontvp;     /* front vnode pointer */
 904         struct vnode    *c_backvp;      /* back vnode pointer */
 905         struct vnode    *c_acldirvp;    /* dir for storing dflt ACL */
 906         u_offset_t      c_size;         /* client view of the size */
 907         struct filegrp  *c_filegrp;     /* back pointer to filegrp */
 908         struct cfs_cid  c_id;           /* unique file number */
 909         int             c_invals;       /* # of recent dir invals */
 910         int             c_usage;        /* Usefulness of cache */
 911         struct vnode    *c_vnode;       /* pointer to vnode */
 912         struct cachefs_metadata c_metadata;     /* cookie, ... */
 913         int             c_error;
 914         kmutex_t        c_statelock;    /* statelock */
 915         krwlock_t       c_rwlock;       /* serialize write/setattr requests */
 916         kcondvar_t      c_popcv;        /* cnode populate cond var. */
 917         kthread_id_t    c_popthrp;      /* threadp performing pop */
 918         vnode_t         *c_unldvp;      /* dir to unlink in */
 919         char            *c_unlname;     /* name to unlink */
 920         cred_t          *c_unlcred;     /* creds for unlink */
 921         int             c_nio;          /* Number of io's pending */
 922         uint_t          c_ioflags;
 923         kcondvar_t      c_iocv;         /* IO cond var. */
 924         kmutex_t        c_iomutex;
 925         cred_t          *c_cred;
 926         int             c_ipending;     /* 1 if inactive is pending */
 927         int             c_mapcnt;       /* number of mapped blocks */
 928         offset_t        c_apopoffset;   /* offset for async pop */
 929         uint_t          c_apoplen;      /* length for async pop */
 930         u_offset_t      c_modaddr;      /* writepage offset */
 931         int             c_rdcnt;        /* # of read opens for backvp */
 932         int             c_wrcnt;        /* # of write opens for backvp */
 933 };
 934 typedef struct cnode cnode_t;
 935 
 936 extern struct kmem_cache *cachefs_cnode_cache;
 937 
 938 /*
 939  * Directory caching parameters - First cut...
 940  */
 941 #define CFS_DIRCACHE_COST       3
 942 #define CFS_DIRCACHE_INVAL      3
 943 #define CFS_DIRCACHE_ENABLE     (CFS_DIRCACHE_INVAL * CFS_DIRCACHE_COST)
 944 
 945 /*
 946  * Conversion macros
 947  */
 948 #define VTOC(VP)                ((struct cnode *)((void *)((VP)->v_data)))
 949 #define CTOV(CP)                ((CP)->c_vnode)
 950 #define VFS_TO_FSCACHE(VFSP)    ((struct fscache *)((void *)((VFSP)->vfs_data)))
 951 #define C_TO_FSCACHE(CP)        (VFS_TO_FSCACHE(CTOV(CP)->v_vfsp))
 952 
 953 /*
 954  * Various flags stored in the flags field of the cnode structure.
 955  */
 956 #define CN_NOCACHE      0x1             /* no-cache mode */
 957 #define CN_DESTROY      0x2             /* destroy when inactive */
 958 #define CN_ROOT         0x4             /* root of the file system */
 959 #define CN_IDLE         0x8             /* file is idle */
 960 #define CN_NEEDOPEN     0x10            /* need to open backvp */
 961 #define CN_UPDATED      0x40            /* Metadata was updated - needs sync */
 962 #define CDIRTY          0x80
 963 #define CN_NEED_FRONT_SYNC      0x100   /* front file needs to be sync'd */
 964 #define CN_ALLOC_PENDING        0x200   /* Need to alloc attr cache entry */
 965 #define CN_STALE        0x400           /* cnode is stale */
 966 #define CN_MODIFIED     0x800           /* Object has been written to */
 967 #define CN_POPULATION_PENDING   0x1000  /* Population data needs to be sync'd */
 968 #define CN_ASYNC_POPULATE       0x2000  /* async population pending */
 969 #define CN_ASYNC_POP_WORKING    0x4000  /* async population in progress */
 970 #define CN_PENDRM       0x8000          /* hold off unlink until reconnected */
 971 #define CN_MAPWRITE     0x100000        /* mmapped file that is being written */
 972 #define CN_CMODINPROG   0x200000        /* writepage() in progress */
 973 
 974 /*
 975  * io flags (in c_ioflag)
 976  */
 977 #define CIO_PUTPAGES    0x1             /* putpage pending: off==0, len==0 */
 978 
 979 #define CFS_MAX_THREADS         5
 980 #define CFS_ASYNC_TIMEOUT       (60 * hz)
 981 
 982 enum cachefs_cmd {
 983         CFS_INVALID,
 984         CFS_CACHE_SYNC,
 985         CFS_PUTPAGE,
 986         CFS_IDLE,
 987         CFS_POPULATE,
 988         CFS_NOOP
 989 };
 990 
 991 struct cachefs_fs_sync_req {
 992         struct cachefscache *cf_cachep;
 993 };
 994 
 995 struct cachefs_idle_req {
 996         vnode_t *ci_vp;
 997 };
 998 
 999 /*
1000  * Large File support the offset in the vnode for putpage request
1001  * can now be greater than 2GB.
1002  */
1003 
1004 struct cachefs_putpage_req {
1005         vnode_t *cp_vp;
1006         offset_t cp_off;
1007         int cp_len;
1008         int cp_flags;
1009 };
1010 
1011 /*
1012  * Large File support the offset in the vnode for populate request
1013  * can now be greater than 2GB.
1014  */
1015 
1016 struct cachefs_populate_req {
1017         vnode_t *cpop_vp;
1018         offset_t cpop_off;
1019         size_t cpop_size;
1020 };
1021 
1022 struct cachefs_req {
1023         struct cachefs_req      *cfs_next;
1024         enum cachefs_cmd        cfs_cmd;        /* Command to execute */
1025         cred_t *cfs_cr;
1026         union {
1027                 struct cachefs_fs_sync_req cu_fs_sync;
1028                 struct cachefs_idle_req cu_idle;
1029                 struct cachefs_putpage_req cu_putpage;
1030                 struct cachefs_populate_req cu_populate;
1031         } cfs_req_u;
1032         kmutex_t cfs_req_lock;  /* Protects contents */
1033 };
1034 
1035 extern struct kmem_cache *cachefs_req_cache;
1036 
1037 /*
1038  * Large file support: We allow cachefs to understand the 64 bit inode type.
1039  */
1040 
1041 struct cachefs_fid {
1042         ushort_t        cf_len;
1043         ino64_t         cf_fileno;
1044         uint_t          cf_gen;
1045 };
1046 #define CFS_FID_SIZE    (sizeof (struct cachefs_fid) - sizeof (ushort_t))
1047 
1048 /*
1049  *
1050  * cachefs kstat stuff.  each time you mount a cachefs filesystem, it
1051  * gets a unique number.  it'll get that number again if you remount
1052  * the same thing.  the number is unique until reboot, but it doesn't
1053  * survive reboots.
1054  *
1055  * each cachefs kstat uses this per-filesystem identifier.  to get the
1056  * valid identifiers, the `cachefs.0.key' kstat has a mapping of all
1057  * the available filesystems.  its structure, cachefs_kstat_key, is
1058  * below.
1059  *
1060  */
1061 
1062 typedef struct cachefs_kstat_key {
1063         int ks_id;
1064         int ks_mounted;
1065         uint64_t ks_vfsp;
1066         uint64_t ks_mountpoint;
1067         uint64_t ks_backfs;
1068         uint64_t ks_cachedir;
1069         uint64_t ks_cacheid;
1070 } cachefs_kstat_key_t;
1071 extern cachefs_kstat_key_t *cachefs_kstat_key;
1072 extern int cachefs_kstat_key_n;
1073 
1074 /*
1075  * cachefs debugging aid.  cachefs_debug_info_t is a cookie that we
1076  * can keep around to see what was happening at a certain time.
1077  *
1078  * for example, if we have a deadlock on the cnode's statelock
1079  * (i.e. someone is not letting go of it), we can add a
1080  * cachefs_debug_info_t * to the cnode structure, and call
1081  * cachefs_debug_save() whenever we grab the lock.  then, when we're
1082  * deadlocked, we can see what was going on when we grabbed the lock
1083  * in the first place, and (hopefully) why we didn't release it.
1084  */
1085 
1086 #define CACHEFS_DEBUG_DEPTH             (16)
1087 typedef struct cachefs_debug_info {
1088         char            *cdb_message;   /* arbitrary message */
1089         uint_t          cdb_flags;      /* arbitrary flags */
1090         int             cdb_int;        /* arbitrary int */
1091         void            *cdb_pointer;   /* arbitrary pointer */
1092         uint_t          cdb_count;      /* how many times called */
1093 
1094         cachefscache_t  *cdb_cachep;    /* relevant cachep (maybe undefined) */
1095         struct fscache  *cdb_fscp;      /* relevant fscache */
1096         struct cnode    *cdb_cnode;     /* relevant cnode */
1097         vnode_t         *cdb_frontvp;   /* relevant front vnode */
1098         vnode_t         *cdb_backvp;    /* relevant back vnode */
1099 
1100         kthread_id_t    cdb_thread;     /* thread who called */
1101         hrtime_t        cdb_timestamp;  /* when */
1102         int             cdb_depth;      /* depth of saved stack */
1103         pc_t            cdb_stack[CACHEFS_DEBUG_DEPTH]; /* stack trace */
1104         struct cachefs_debug_info *cdb_next; /* pointer to next */
1105 } cachefs_debug_info_t;
1106 
1107 /*
1108  * cachefs function prototypes
1109  */
1110 #if defined(_KERNEL) && defined(__STDC__)
1111 extern int cachefs_getcookie(vnode_t *, struct fid *, struct vattr *,
1112                 cred_t *, uint32_t);
1113 cachefscache_t *cachefs_cache_create(void);
1114 void cachefs_cache_destroy(cachefscache_t *cachep);
1115 int cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp);
1116 void cachefs_cache_activate_rw(cachefscache_t *cachep);
1117 void cachefs_cache_dirty(struct cachefscache *cachep, int lockit);
1118 int cachefs_cache_rssync(struct cachefscache *cachep);
1119 void cachefs_cache_sync(struct cachefscache *cachep);
1120 uint_t cachefs_cache_unique(cachefscache_t *cachep);
1121 void cachefs_do_req(struct cachefs_req *);
1122 
1123 /* cachefs_cnode.c */
1124 void cachefs_cnode_idle(struct vnode *vp, cred_t *cr);
1125 void cachefs_cnode_idleclean(fscache_t *fscp, int unmount);
1126 int cachefs_cnode_inactive(register struct vnode *vp, cred_t *cr);
1127 void cachefs_cnode_listadd(struct cnode *cp);
1128 void cachefs_cnode_listrem(struct cnode *cp);
1129 void cachefs_cnode_free(struct cnode *cp);
1130 void cachefs_cnode_cleanfreelist();
1131 void cachefs_cnode_idleadd(struct cnode *cp);
1132 void cachefs_cnode_idlerem(struct cnode *cp);
1133 int cachefs_cnode_find(filegrp_t *fgp, cfs_cid_t *cidp, fid_t *cookiep,
1134     struct cnode **cpp, struct vnode *vp, vattr_t *vap);
1135 int cachefs_cnode_make(cfs_cid_t *cidp, fscache_t *fscp, fid_t *cookiep,
1136     vattr_t *vap, vnode_t *backvp, cred_t *cr, int flag, cnode_t **cpp);
1137 int cachefs_cid_inuse(filegrp_t *fgp, cfs_cid_t *cidp);
1138 int cachefs_fileno_inuse(fscache_t *fscp, ino64_t fileno);
1139 int cachefs_cnode_create(fscache_t *fscp, vattr_t *vap, int flag,
1140     cnode_t **cpp);
1141 void cachefs_cnode_move(cnode_t *cp);
1142 int cachefs_cnode_lostfound(cnode_t *cp, char *rname);
1143 void cachefs_cnode_sync(cnode_t *cp);
1144 void cachefs_cnode_traverse(fscache_t *fscp, void (*routinep)(cnode_t *));
1145 void cachefs_cnode_stale(cnode_t *cp);
1146 void cachefs_cnode_setlocalstats(cnode_t *cp);
1147 void cachefs_cnode_disable_caching(cnode_t *cp);
1148 
1149 void cachefs_enable_caching(struct fscache *);
1150 
1151 /* cachefs_fscache.c */
1152 void fscache_destroy(fscache_t *);
1153 
1154 /* cachefs_ioctl.h */
1155 int cachefs_pack_common(vnode_t *vp, cred_t *cr);
1156 void cachefs_inum_register(fscache_t *fscp, ino64_t real, ino64_t fake);
1157 ino64_t cachefs_inum_real2fake(fscache_t *fscp, ino64_t real);
1158 
1159 
1160 /* cachefs_subr.c */
1161 int cachefs_sync_metadata(cnode_t *);
1162 int cachefs_cnode_cnt(int);
1163 int cachefs_getbackvp(struct fscache *, struct cnode *);
1164 int cachefs_getfrontfile(cnode_t *);
1165 void cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
1166     filegrp_t *fgp);
1167 void cachefs_nocache(cnode_t *);
1168 void cachefs_inval_object(cnode_t *);
1169 void make_ascii_name(cfs_cid_t *cidp, char *strp);
1170 int cachefs_async_halt(struct cachefs_workq *, int);
1171 int cachefs_async_okay(void);
1172 int cachefs_check_allocmap(cnode_t *cp, u_offset_t off);
1173 void cachefs_update_allocmap(cnode_t *, u_offset_t, size_t);
1174 int cachefs_cachesymlink(struct cnode *cp, cred_t *cr);
1175 int cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen);
1176 int cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp);
1177 /*
1178  * void cachefs_cluster_allocmap(struct cnode *, u_offset_t, u_offset_t *,
1179  *      size_t *, size_t);
1180  */
1181 void cachefs_cluster_allocmap(u_offset_t, u_offset_t *, size_t *, size_t,
1182                 struct cnode *);
1183 int cachefs_populate(cnode_t *, u_offset_t, size_t, vnode_t *, vnode_t *,
1184         u_offset_t, cred_t *);
1185 int cachefs_stats_kstat_snapshot(kstat_t *, void *, int);
1186 cachefs_debug_info_t *cachefs_debug_save(cachefs_debug_info_t *, int,
1187     char *, uint_t, int, void *, cachefscache_t *, struct fscache *,
1188     struct cnode *);
1189 void cachefs_debug_show(cachefs_debug_info_t *);
1190 uint32_t cachefs_cred_checksum(cred_t *cr);
1191 int cachefs_frontfile_size(cnode_t *cp, u_offset_t length);
1192 int cachefs_req_create(void *, void *, int);
1193 void cachefs_req_destroy(void *, void *);
1194 int cachefs_stop_cache(cnode_t *);
1195 
1196 
1197 /* cachefs_resource.c */
1198 void cachefs_rlent_moveto_nolock(cachefscache_t *cachep,
1199     enum cachefs_rl_type type, uint_t entno, size_t);
1200 void cachefs_rlent_moveto(cachefscache_t *, enum cachefs_rl_type, uint_t,
1201     size_t);
1202 void cachefs_rlent_verify(cachefscache_t *, enum cachefs_rl_type, uint_t);
1203 void cachefs_rl_changefileno(cachefscache_t *cachep, uint_t entno,
1204         ino64_t fileno);
1205 int cachefs_rlent_data(cachefscache_t *cachep, rl_entry_t *valp,
1206     uint_t *entnop);
1207 void cachefs_move_modified_to_mf(cachefscache_t *cachep, fscache_t *fscp);
1208 int cachefs_allocblocks(cachefscache_t *, size_t, enum cachefs_rl_type);
1209 void cachefs_freeblocks(cachefscache_t *, size_t, enum cachefs_rl_type);
1210 void cachefs_freefile(cachefscache_t *);
1211 int cachefs_allocfile(cachefscache_t *);
1212 int cachefs_rl_alloc(struct cachefscache *cachep, rl_entry_t *valp,
1213     uint_t *entnop);
1214 int cachefs_rl_attrc(struct cachefscache *, int, int);
1215 void cachefs_cachep_worker_thread(cachefscache_t *);
1216 void cachefs_rl_cleanup(cachefscache_t *);
1217 int cachefs_rl_entry_get(cachefscache_t *, uint_t, rl_entry_t **);
1218 #ifdef CFSRLDEBUG
1219 void cachefs_rl_debug_save(rl_entry_t *);
1220 void cachefs_rl_debug_show(rl_entry_t *);
1221 void cachefs_rl_debug_destroy(rl_entry_t *);
1222 #endif /* CFSRLDEBUG */
1223 
1224 /* cachefs_log.c */
1225 int cachefs_log_kstat_snapshot(kstat_t *, void *, int);
1226 void cachefs_log_process_queue(cachefscache_t *, int);
1227 int cachefs_log_logfile_open(cachefscache_t *, char *);
1228 struct cachefs_log_cookie
1229         *cachefs_log_create_cookie(struct cachefs_log_control *);
1230 void cachefs_log_error(cachefscache_t *, int, int);
1231 void cachefs_log_destroy_cookie(struct cachefs_log_cookie *);
1232 
1233 void cachefs_log_mount(cachefscache_t *, int, struct vfs *,
1234     fscache_t *, char *, enum uio_seg, char *);
1235 void cachefs_log_umount(cachefscache_t *, int, struct vfs *);
1236 void cachefs_log_getpage(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1237     uid_t, u_offset_t, size_t);
1238 void cachefs_log_readdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1239     uid_t, u_offset_t, int);
1240 void cachefs_log_readlink(cachefscache_t *, int, struct vfs *,
1241     fid_t *, ino64_t, uid_t, size_t);
1242 void cachefs_log_remove(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1243     uid_t);
1244 void cachefs_log_rmdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1245     uid_t);
1246 void cachefs_log_truncate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1247     uid_t, u_offset_t);
1248 void cachefs_log_putpage(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1249     uid_t, u_offset_t, size_t);
1250 void cachefs_log_create(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1251     uid_t);
1252 void cachefs_log_mkdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1253     uid_t);
1254 void cachefs_log_rename(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1255     int, uid_t);
1256 void cachefs_log_symlink(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1257     uid_t, int);
1258 void cachefs_log_populate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1259     u_offset_t, size_t);
1260 void cachefs_log_csymlink(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1261     int);
1262 void cachefs_log_filldir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1263     u_offset_t);
1264 void cachefs_log_mdcreate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1265     uint_t);
1266 void cachefs_log_gpfront(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1267     uid_t, u_offset_t, uint_t);
1268 void cachefs_log_rfdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1269     uid_t);
1270 void cachefs_log_ualloc(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1271     u_offset_t, size_t);
1272 void cachefs_log_calloc(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1273     u_offset_t, size_t);
1274 void cachefs_log_nocache(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t);
1275 
1276 /* cachefs_vnops.c */
1277 struct vnodeops *cachefs_getvnodeops(void);
1278 int cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
1279     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr);
1280 int cachefs_putpage_common(struct vnode *vp, offset_t off,
1281     size_t len, int flags, cred_t *cr);
1282 ino64_t cachefs_fileno_conflict(fscache_t *fscp, ino64_t old);
1283 int cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr,
1284     vnode_t *vp);
1285 int cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
1286     vnode_t *vp);
1287 int cachefs_cacheacl(cnode_t *, vsecattr_t *);
1288 void cachefs_purgeacl(cnode_t *);
1289 int cachefs_vtype_aclok(vnode_t *);
1290 
1291 /* cachefs_vfsops.c */
1292 int cachefs_init_vfsops(int);
1293 int cachefs_init_vnops(char *);
1294 void cachefs_kstat_mount(struct fscache *, char *, char *, char *, char *);
1295 void cachefs_kstat_umount(int);
1296 int cachefs_kstat_key_update(kstat_t *, int);
1297 int cachefs_kstat_key_snapshot(kstat_t *, void *, int);
1298 
1299 extern void cachefs_workq_init(struct cachefs_workq *);
1300 extern void cachefs_addqueue(struct cachefs_req *, struct cachefs_workq *);
1301 
1302 
1303 extern void *cachefs_kmem_alloc(size_t, int);
1304 extern void *cachefs_kmem_zalloc(size_t, int);
1305 extern void cachefs_kmem_free(void *, size_t);
1306 extern char *cachefs_strdup(char *);
1307 
1308 #endif /* defined (_KERNEL) && defined (__STDC__) */
1309 
1310 
1311 
1312 #define C_RL_MAXENTS    0x4000          /* Whatever */
1313 
1314 /*
1315  * ioctls.
1316  */
1317 #include <sys/ioccom.h>
1318 #define _FIOCOD         _IO('f', 78)            /* consistency on demand */
1319 #define _FIOSTOPCACHE   _IO('f', 86)            /* stop using cache */
1320 
1321 #define CACHEFSIO_PACK          _IO('f', 81)
1322 #define CACHEFSIO_UNPACK        _IO('f', 82)
1323 #define CACHEFSIO_UNPACKALL     _IO('f', 83)
1324 #define CACHEFSIO_PACKINFO      _IO('f', 84)
1325 #define CACHEFSIO_DCMD          _IO('f', 85)
1326 
1327 #ifdef __cplusplus
1328 }
1329 #endif
1330 
1331 #endif /* _SYS_FS_CACHEFS_FS_H */