illumos-gate New usr/src/uts/common/brand/lx/sys/lx

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #ifndef _LX_AUTOFS_H
  28 #define _LX_AUTOFS_H
  29 
  30 #pragma ident   "%Z%%M% %I%     %E% SMI"
  31 
  32 /*
  33  * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
  34  * and provide support for the Linux "automount" automounter.
  35  *
  36  *
  37  *
  38  * +++ Linux automounter background.
  39  *
  40  * Linux has two automounters: "amd" and "automount"
  41  *
  42  * 1) "amd" is a userland NFS server.  It basically mounts an NFS filesystem
  43  * at an automount point, and it acts as the NFS server for the mount.  When
  44  * an access is done to that NFS filesystem, the access is redirected by the
  45  * kernel to the "amd" process via rpc.  "amd" then looks up any information
  46  * required to resolve the requests, mounts real NFS filesystems if
  47  * necessary, and returns.  "amd" has it's own strange configuration
  48  * mechanism that doesn't seem to be very compatabile with Solaris's network
  49  * based automounter map support.
  50  *
  51  * 2) "automount" is the other Linux automounter.  It utilizes a kernel
  52  * filesystem (autofs) to provide it's functionality.  Basically, it mounts
  53  * the autofs filesystem at any automounter controlled mount point.  This
  54  * filesystem then intercepts and redirects lookup operations (and only
  55  * lookup ops) to the userland automounter process via a pipe.  (The
  56  * pipe to the automounter is establised via mount options when the autofs
  57  * filesystem is mounted.)  When the automounter recieves a request via this
  58  * pipe, it does lookups to whatever backing store it's configured to use,
  59  * does mkdir operations on the autofs filesystem, mounts remote NFS
  60  * filesystems on any leaf directories it just created, and signals the
  61  * autofs filesystem via an ioctl to let it know that the lookup can
  62  * continue.
  63  *
  64  *
  65  *
  66  * +++ Linux autofs (and automount daemon) notes
  67  *
  68  * Since we're mimicking the behavior of the Linux autofs filesystem it's
  69  * important to document some of it's observed behavior here since there's
  70  * no doubt that in the future this behavior will change.  These comments
  71  * apply to the behavior of the automounter as observed on a system
  72  * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
  73  *
  74  * A) Autofs allows root owned, non-automounter processes to create
  75  * directories in the autofs filesystem.  The autofs filesystem treats the
  76  * automounter's process group as special, but it doesn't prevent root
  77  * processes outside of the automounter's process group from creating new
  78  * directories in the autofs filesystem.
  79  *
  80  * B) Autofs doesn't allow creation of any non-directory entries in the
  81  * autofs filesystem.  No entity can create files (e.g. /bin/touch or
  82  * VOP_CREATE/VOP_SYMLINK/etc.)  The only entries that can exist within
  83  * the autofs filesystem are directories.
  84  *
  85  * C) Autofs only intercepts vop lookup operations.  Notably, it does _not_
  86  * intercept and re-direct vop readdir operations.  This means that the
  87  * observed behavior of the Linux automounter can be considerably different
  88  * from that of the Solaris automounter.  Specifically, on Solaris if autofs
  89  * mount point is mounted _without_ the -nobrowse option then if a user does
  90  * an ls operation (which translates into a vop readdir operation) then the
  91  * automounter will intercept that operation and list all the possible
  92  * directories and mount points without actually mounting any filesystems.
  93  * Essentially, all automounter managed mount points on Linux will behave
  94  * like "-nobrowse" mount points on Solaris.  Here's an example to
  95  * illustrate this.  If /ws was mounted on Solaris without the -nobrowse
  96  * option and an auto_ws yp map was setup as the backing store for this
  97  * mount point, then an "ls /ws" would list all the keys in the map as
  98  * valid directories, but an "ls /ws" on Linux would list an emptry
  99  * directory.
 100  *
 101  * D) NFS mounts are performed by the automount process.  When the automount
 102  * process gets a redirected lookup request, it determines _all_ the
 103  * possible remote mount points for that request, creates directory paths
 104  * via mkdir, and mounts the remote filesystems on the newly created paths.
 105  * So for example, if a machine called mcescher exported /var/crash and
 106  * /var/core, an "ls /net/mcescher" would result in the following actions
 107  * being done by the automounter:
 108  *      mkdir /net/mcescher
 109  *      mkdir /net/mcescher/var
 110  *      mkdir /net/mcescher/var/crash
 111  *      mkdir /net/mcescher/var/core
 112  *      mount mcescher:/var/crash /var/crash
 113  *      mount mcescher:/var/crash /var/core
 114  * once the automounter compleated the work above it would signal the autofs
 115  * filesystem (via an ioctl) that the lookup could continue.
 116  *
 117  * E.1) Autofs only redirects vop lookup operations for path entries that
 118  * don't already exist in the autofs filesystem.  So for the example above,
 119  * an initial (after the start of the automounter) "ls /net/mcescher" would
 120  * result in a request to the automounter.  A subsequest "ls /net/mcescher"
 121  * would not result in a request to the automounter.  Even if
 122  * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
 123  * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
 124  * would not result in a new request to the automounter.
 125  *
 126  * E.2) Autofs lookup requests that are sent to the automounter only include
 127  * the root directory path component.  So for example, after starting up
 128  * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
 129  * lookup request actually sent to the automounter would just be for
 130  * "mcescher".  (The same request as if the user had done "ls /net/mcescher".)
 131  *
 132  * E.3) The two statements above aren't entirely entirely true.  The Linux
 133  * autofs filesystem will also redirect lookup operations for leaf
 134  * directories that don't have a filesystem mounted on them.  Using the
 135  * example above, if a user did a "ls /net/mcescher", then manually
 136  * unmounted /net/mcescher/var/crash, and then did an "ls
 137  * /net/mcescher/var/crash", this would result in a request for
 138  * "mcescher/var/crash" being sent to the automounter.  The strange thing
 139  * (a Linux bug perhaps) is that the automounter won't do anything with this
 140  * request and the lookup will fail.
 141  *
 142  * F) The autofs filesystem communication protocol (what ioctls it supports
 143  * and what data it passes to the automount process) are versioned.  The
 144  * source for the userland automount daemon (i looked at version v3.1.7)
 145  * seemed to support two versions of the Linux kernel autofs implementation.
 146  * Both versions supported communiciation with a pipe and the format of the
 147  * structure passed via this pipe was the same.  The difference between the
 148  * two versions was in the functionality supported.  (The v3 version has
 149  * additional ioctls to support automount timeouts.)
 150  *
 151  *
 152  *
 153  * +++ lx_autofs notes
 154  *
 155  * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
 156  * Linux autofs filesystem with the following exceptions:
 157  *
 158  *      1.1) We don't bother to implement the E.3 functionality listed above
 159  *      since it doesn't appear to be of any use.
 160  *
 161  *      1.2) We only implement v2 of the automounter protocol since
 162  *      implementing v3 would take a _lot_ more work.  If this proves to be a
 163  *      problem we can re-visit this decision later.  (More details about v3
 164  *      support are included in comments below.)
 165  *
 166  * 2) In general, the approach taken for lx_autofs is to keep it as simple
 167  * as possible and to minimize it's memory usage.  To do this all information
 168  * about the contents of the lx_autofs filesystem are mirrored in the
 169  * underlying filesystem that lx_autofs is mounted on and most vop operations
 170  * are simply passed onto this underlying filesystem.  This means we don't
 171  * have to implement most the complex operations that a full filesystem
 172  * normally has to implement.  It also means that most of our filesystem state
 173  * (wrt the contents of the filesystem) doesn't actually have to be stored
 174  * in memory, we can simply go to the underlying filesystem to get it when
 175  * it's requested.  For the purposes of discussion, we'll call the underlying
 176  * filesystem the "backing store."
 177  *
 178  * The backing store is actually directory called ".lx_afs" which is created in
 179  * the directory where the lx_autofs filesystem is mounted.  When the lx_autofs
 180  * filesystem is unmounted this backing store directory is deleted.  If this
 181  * directory exists at mount time (perhaps the system crashed while a previous
 182  * lx_autofs instance was mounted at the same location) it will be deleted.
 183  * There are a few implications of using a backing store worth mentioning.
 184  *
 185  *      2.1) lx_autofs can't be mounted on a read only filesystem.  If this
 186  *      proves to be a problem we can probably move the location of the
 187  *      backing store.
 188  *
 189  *      2.2) If the backing store filesystem runs out of space then the
 190  *      automounter process won't be able to create more directories and mount
 191  *      new filesystems.  Of course, strange failures usually happen when
 192  *      filesystems run out of space.
 193  *
 194  * 3) Why aren't we using gfs?  gfs has two different usage models.
 195  *
 196  *      3.1) I'm my own filesystem but i'm using gfs to help with managing
 197  *      readdir operations.
 198  *
 199  *      3.2) I'm a gfs filesystem and gfs is managing all my vnodes
 200  *
 201  * We're not using the 3.1 interfaces because we don't implement readdir
 202  * ourselves.  We pass all readdir operations onto the backing store
 203  * filesystem and utilize its readdir implementation.
 204  *
 205  * We're not using the 3.2 interfaces because they are really designed for
 206  * in memory filesystems where all of the filesystem state is stored in
 207  * memory.  They don't lend themselves to filesystems where part of the
 208  * state is in memory and part of the state is on disk.
 209  *
 210  * For more information on gfs take a look at the block comments in the
 211  * top of gfs.c
 212  */
 213 
 214 #ifdef  __cplusplus
 215 extern "C" {
 216 #endif
 217 
 218 /*
 219  * Note that the name of the actual Solaris filesystem is lx_afs and not
 220  * lx_autofs.  This is becase filesystem names are stupidly limited to 8
 221  * characters.
 222  */
 223 #define LX_AUTOFS_NAME                  "lx_afs"
 224 
 225 /*
 226  * Mount options supported.
 227  */
 228 #define LX_MNTOPT_FD                    "fd"
 229 #define LX_MNTOPT_PGRP                  "pgrp"
 230 #define LX_MNTOPT_MINPROTO              "minproto"
 231 #define LX_MNTOPT_MAXPROTO              "maxproto"
 232 
 233 /* Version of the Linux kernel automount protocol we support. */
 234 #define LX_AUTOFS_PROTO_VERSION         2
 235 
 236 /*
 237  * Command structure sent to automount process from lx_autofs via a pipe.
 238  * This structure is the same for v2 and v3 of the automount protocol
 239  * (the communication pipe is established at mount time).
 240  */
 241 typedef struct lx_autofs_pkt {
 242         int     lap_protover;   /* protocol version number */
 243         int     lap_constant;   /* always set to 0 */
 244         int     lap_id;         /* every pkt must have a unique id */
 245         int     lap_name_len;   /* don't include newline or NULL */
 246         char    lap_name[256];  /* path component to lookup */
 247 } lx_autofs_pkt_t;
 248 
 249 /*
 250  * Ioctls supprted (v2 protocol).
 251  */
 252 #define LX_AUTOFS_IOC_READY             0x00009360 /* arg: int */
 253 #define LX_AUTOFS_IOC_FAIL              0x00009361 /* arg: int */
 254 #define LX_AUTOFS_IOC_CATATONIC         0x00009362 /* arg: <none> */
 255 
 256 /*
 257  * Ioctls not supported (v3 protocol).
 258  *
 259  * Initially we're only going to support v2 of the Linux kernel automount
 260  * protocol.  This means that we don't support the following ioctls.
 261  *
 262  * 1) The protocol version ioctl (by not supporting it the automounter
 263  * will assume version 2).
 264  *
 265  * 2) Automounter timeout ioctls.  For v3 and later the automounter can
 266  * be started with a timeout option.  It will notify the filesystem of
 267  * this timeout and, if any automounter filesystem root directory entry
 268  * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
 269  * ioctl.  For example, if the timeout is 60 seconds, the Linux
 270  * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
 271  * timeouts more often than that.  (v3.1.7 of the automount daemon would
 272  * perform this ioctl every <timeout>/4 seconds.)  Then, if the autofs
 273  * filesystem will
 274  * report top level directories that aren't in use to the automounter
 275  * via this ioctl.  If /net was managed by the automounter and
 276  * there were the following mount points:
 277  *      /net/jurassic/var/crash
 278  *      /net/mcescher/var/crash
 279  * and no one was looking at any crash dumps on mcescher but someone
 280  * was analyzing a crash dump on jurassic, then after <timeout> seconds
 281  * had passed the autofs filesystem would let the automounter know that
 282  * "mcescher" could be unmounted.  (Note the granularity of notification
 283  * is directories in the root of the autofs filesystem.)  Here's two
 284  * ideas for how this functionality could be implemented on Solaris:
 285  *
 286  * 2.1) The easy incomplete way.  Don't do any in-use detection.  Simply
 287  * tell the automounter it can try to unmount the filesystem every time
 288  * the specified timeout passes.  If the filesystem is in use then the
 289  * unmount will fail.  This would break down for remote hosts with multiple
 290  * mounts.  For example, if the automounter had mounted the following
 291  * filesystems:
 292  *      /net/jurassic/var/crash
 293  *      /net/jurassic/var/core
 294  * and the user was looking at a core file, and the timeout expired, the
 295  * automounter would recieve notification to unmount "jurassic".  Then
 296  * it would unmount crash (which would succeed) and then to try unmount
 297  * core (which would fail).  After that (since the automounter only
 298  * performs mounts for failed lookups in the root autofs directory)
 299  * future access to /net/jurassic/var/crash would result to access
 300  * to an empty autofs directory.  We might be able to work around
 301  * this by caching which root autofs directories we've timed out,
 302  * then any access to paths that contain those directories could be
 303  * stalled and we could resend another request to the automounter.
 304  * This could work if the automounter ignores mount failures.
 305  *
 306  * 2.2) The hard correct way.  The real difficulty here is detecting
 307  * files in use on other filesystems (say NFS) that have been mounted
 308  * on top of autofs.  (Detecting in use autofs vnodes should be easy.)
 309  * to do this we would probably have to create a new brand op to intercept
 310  * mount/umount filesystem operations.  Then using this entry point we
 311  * could detect mounts of other filesystems on top of lx_autofs.  When
 312  * a successful mount finishes we would use the FEM (file event
 313  * monitoring) framework to push a module onto that filesystem and
 314  * intercept VOP operations that allocate/free vnodes in that filesystem.
 315  * (We would also then have to track mount operations on top of that
 316  * filesystem, etc.)  this would allow us to properly detect any
 317  * usage of subdirectories of an autofs directory.
 318  */
 319 #define LX_AUTOFS_IOC_PROTOVER          0x80049363 /* arg: int */
 320 #define LX_AUTOFS_IOC_EXPIRE            0x81109365 /* arg: lx_autofs_expire * */
 321 #define LX_AUTOFS_IOC_SETTIMEOUT        0xc0049364 /* arg: ulong_t */
 322 
 323 typedef struct lx_autofs_expire {
 324         int     lap_protover;   /* protol version number */
 325         int     lap_constant;   /* always set to 1 */
 326         int     lap_name_len;   /* don't include newline or NULL */
 327         char    lap_name[256];  /* path component that has timed out */
 328 } lx_autofs_expire_t;
 329 
 330 #ifdef  __cplusplus
 331 }
 332 #endif
 333 
 334 #endif  /* _LX_AUTOFS_H */