1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #ifndef _LX_AUTOFS_H
28 #define _LX_AUTOFS_H
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 /*
33 * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
34 * and provide support for the Linux "automount" automounter.
35 *
36 *
37 *
38 * +++ Linux automounter background.
39 *
40 * Linux has two automounters: "amd" and "automount"
41 *
42 * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem
43 * at an automount point, and it acts as the NFS server for the mount. When
44 * an access is done to that NFS filesystem, the access is redirected by the
45 * kernel to the "amd" process via rpc. "amd" then looks up any information
46 * required to resolve the requests, mounts real NFS filesystems if
47 * necessary, and returns. "amd" has it's own strange configuration
48 * mechanism that doesn't seem to be very compatabile with Solaris's network
49 * based automounter map support.
50 *
51 * 2) "automount" is the other Linux automounter. It utilizes a kernel
52 * filesystem (autofs) to provide it's functionality. Basically, it mounts
53 * the autofs filesystem at any automounter controlled mount point. This
54 * filesystem then intercepts and redirects lookup operations (and only
55 * lookup ops) to the userland automounter process via a pipe. (The
56 * pipe to the automounter is establised via mount options when the autofs
57 * filesystem is mounted.) When the automounter recieves a request via this
58 * pipe, it does lookups to whatever backing store it's configured to use,
59 * does mkdir operations on the autofs filesystem, mounts remote NFS
60 * filesystems on any leaf directories it just created, and signals the
61 * autofs filesystem via an ioctl to let it know that the lookup can
62 * continue.
63 *
64 *
65 *
66 * +++ Linux autofs (and automount daemon) notes
67 *
68 * Since we're mimicking the behavior of the Linux autofs filesystem it's
69 * important to document some of it's observed behavior here since there's
70 * no doubt that in the future this behavior will change. These comments
71 * apply to the behavior of the automounter as observed on a system
72 * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
73 *
74 * A) Autofs allows root owned, non-automounter processes to create
75 * directories in the autofs filesystem. The autofs filesystem treats the
76 * automounter's process group as special, but it doesn't prevent root
77 * processes outside of the automounter's process group from creating new
78 * directories in the autofs filesystem.
79 *
80 * B) Autofs doesn't allow creation of any non-directory entries in the
81 * autofs filesystem. No entity can create files (e.g. /bin/touch or
82 * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within
83 * the autofs filesystem are directories.
84 *
85 * C) Autofs only intercepts vop lookup operations. Notably, it does _not_
86 * intercept and re-direct vop readdir operations. This means that the
87 * observed behavior of the Linux automounter can be considerably different
88 * from that of the Solaris automounter. Specifically, on Solaris if autofs
89 * mount point is mounted _without_ the -nobrowse option then if a user does
90 * an ls operation (which translates into a vop readdir operation) then the
91 * automounter will intercept that operation and list all the possible
92 * directories and mount points without actually mounting any filesystems.
93 * Essentially, all automounter managed mount points on Linux will behave
94 * like "-nobrowse" mount points on Solaris. Here's an example to
95 * illustrate this. If /ws was mounted on Solaris without the -nobrowse
96 * option and an auto_ws yp map was setup as the backing store for this
97 * mount point, then an "ls /ws" would list all the keys in the map as
98 * valid directories, but an "ls /ws" on Linux would list an emptry
99 * directory.
100 *
101 * D) NFS mounts are performed by the automount process. When the automount
102 * process gets a redirected lookup request, it determines _all_ the
103 * possible remote mount points for that request, creates directory paths
104 * via mkdir, and mounts the remote filesystems on the newly created paths.
105 * So for example, if a machine called mcescher exported /var/crash and
106 * /var/core, an "ls /net/mcescher" would result in the following actions
107 * being done by the automounter:
108 * mkdir /net/mcescher
109 * mkdir /net/mcescher/var
110 * mkdir /net/mcescher/var/crash
111 * mkdir /net/mcescher/var/core
112 * mount mcescher:/var/crash /var/crash
113 * mount mcescher:/var/crash /var/core
114 * once the automounter compleated the work above it would signal the autofs
115 * filesystem (via an ioctl) that the lookup could continue.
116 *
117 * E.1) Autofs only redirects vop lookup operations for path entries that
118 * don't already exist in the autofs filesystem. So for the example above,
119 * an initial (after the start of the automounter) "ls /net/mcescher" would
120 * result in a request to the automounter. A subsequest "ls /net/mcescher"
121 * would not result in a request to the automounter. Even if
122 * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
123 * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
124 * would not result in a new request to the automounter.
125 *
126 * E.2) Autofs lookup requests that are sent to the automounter only include
127 * the root directory path component. So for example, after starting up
128 * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
129 * lookup request actually sent to the automounter would just be for
130 * "mcescher". (The same request as if the user had done "ls /net/mcescher".)
131 *
132 * E.3) The two statements above aren't entirely entirely true. The Linux
133 * autofs filesystem will also redirect lookup operations for leaf
134 * directories that don't have a filesystem mounted on them. Using the
135 * example above, if a user did a "ls /net/mcescher", then manually
136 * unmounted /net/mcescher/var/crash, and then did an "ls
137 * /net/mcescher/var/crash", this would result in a request for
138 * "mcescher/var/crash" being sent to the automounter. The strange thing
139 * (a Linux bug perhaps) is that the automounter won't do anything with this
140 * request and the lookup will fail.
141 *
142 * F) The autofs filesystem communication protocol (what ioctls it supports
143 * and what data it passes to the automount process) are versioned. The
144 * source for the userland automount daemon (i looked at version v3.1.7)
145 * seemed to support two versions of the Linux kernel autofs implementation.
146 * Both versions supported communiciation with a pipe and the format of the
147 * structure passed via this pipe was the same. The difference between the
148 * two versions was in the functionality supported. (The v3 version has
149 * additional ioctls to support automount timeouts.)
150 *
151 *
152 *
153 * +++ lx_autofs notes
154 *
155 * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
156 * Linux autofs filesystem with the following exceptions:
157 *
158 * 1.1) We don't bother to implement the E.3 functionality listed above
159 * since it doesn't appear to be of any use.
160 *
161 * 1.2) We only implement v2 of the automounter protocol since
162 * implementing v3 would take a _lot_ more work. If this proves to be a
163 * problem we can re-visit this decision later. (More details about v3
164 * support are included in comments below.)
165 *
166 * 2) In general, the approach taken for lx_autofs is to keep it as simple
167 * as possible and to minimize it's memory usage. To do this all information
168 * about the contents of the lx_autofs filesystem are mirrored in the
169 * underlying filesystem that lx_autofs is mounted on and most vop operations
170 * are simply passed onto this underlying filesystem. This means we don't
171 * have to implement most the complex operations that a full filesystem
172 * normally has to implement. It also means that most of our filesystem state
173 * (wrt the contents of the filesystem) doesn't actually have to be stored
174 * in memory, we can simply go to the underlying filesystem to get it when
175 * it's requested. For the purposes of discussion, we'll call the underlying
176 * filesystem the "backing store."
177 *
178 * The backing store is actually directory called ".lx_afs" which is created in
179 * the directory where the lx_autofs filesystem is mounted. When the lx_autofs
180 * filesystem is unmounted this backing store directory is deleted. If this
181 * directory exists at mount time (perhaps the system crashed while a previous
182 * lx_autofs instance was mounted at the same location) it will be deleted.
183 * There are a few implications of using a backing store worth mentioning.
184 *
185 * 2.1) lx_autofs can't be mounted on a read only filesystem. If this
186 * proves to be a problem we can probably move the location of the
187 * backing store.
188 *
189 * 2.2) If the backing store filesystem runs out of space then the
190 * automounter process won't be able to create more directories and mount
191 * new filesystems. Of course, strange failures usually happen when
192 * filesystems run out of space.
193 *
194 * 3) Why aren't we using gfs? gfs has two different usage models.
195 *
196 * 3.1) I'm my own filesystem but i'm using gfs to help with managing
197 * readdir operations.
198 *
199 * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes
200 *
201 * We're not using the 3.1 interfaces because we don't implement readdir
202 * ourselves. We pass all readdir operations onto the backing store
203 * filesystem and utilize its readdir implementation.
204 *
205 * We're not using the 3.2 interfaces because they are really designed for
206 * in memory filesystems where all of the filesystem state is stored in
207 * memory. They don't lend themselves to filesystems where part of the
208 * state is in memory and part of the state is on disk.
209 *
210 * For more information on gfs take a look at the block comments in the
211 * top of gfs.c
212 */
213
214 #ifdef __cplusplus
215 extern "C" {
216 #endif
217
218 /*
219 * Note that the name of the actual Solaris filesystem is lx_afs and not
220 * lx_autofs. This is becase filesystem names are stupidly limited to 8
221 * characters.
222 */
223 #define LX_AUTOFS_NAME "lx_afs"
224
225 /*
226 * Mount options supported.
227 */
228 #define LX_MNTOPT_FD "fd"
229 #define LX_MNTOPT_PGRP "pgrp"
230 #define LX_MNTOPT_MINPROTO "minproto"
231 #define LX_MNTOPT_MAXPROTO "maxproto"
232
233 /* Version of the Linux kernel automount protocol we support. */
234 #define LX_AUTOFS_PROTO_VERSION 2
235
236 /*
237 * Command structure sent to automount process from lx_autofs via a pipe.
238 * This structure is the same for v2 and v3 of the automount protocol
239 * (the communication pipe is established at mount time).
240 */
241 typedef struct lx_autofs_pkt {
242 int lap_protover; /* protocol version number */
243 int lap_constant; /* always set to 0 */
244 int lap_id; /* every pkt must have a unique id */
245 int lap_name_len; /* don't include newline or NULL */
246 char lap_name[256]; /* path component to lookup */
247 } lx_autofs_pkt_t;
248
249 /*
250 * Ioctls supprted (v2 protocol).
251 */
252 #define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */
253 #define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */
254 #define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */
255
256 /*
257 * Ioctls not supported (v3 protocol).
258 *
259 * Initially we're only going to support v2 of the Linux kernel automount
260 * protocol. This means that we don't support the following ioctls.
261 *
262 * 1) The protocol version ioctl (by not supporting it the automounter
263 * will assume version 2).
264 *
265 * 2) Automounter timeout ioctls. For v3 and later the automounter can
266 * be started with a timeout option. It will notify the filesystem of
267 * this timeout and, if any automounter filesystem root directory entry
268 * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
269 * ioctl. For example, if the timeout is 60 seconds, the Linux
270 * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
271 * timeouts more often than that. (v3.1.7 of the automount daemon would
272 * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs
273 * filesystem will
274 * report top level directories that aren't in use to the automounter
275 * via this ioctl. If /net was managed by the automounter and
276 * there were the following mount points:
277 * /net/jurassic/var/crash
278 * /net/mcescher/var/crash
279 * and no one was looking at any crash dumps on mcescher but someone
280 * was analyzing a crash dump on jurassic, then after <timeout> seconds
281 * had passed the autofs filesystem would let the automounter know that
282 * "mcescher" could be unmounted. (Note the granularity of notification
283 * is directories in the root of the autofs filesystem.) Here's two
284 * ideas for how this functionality could be implemented on Solaris:
285 *
286 * 2.1) The easy incomplete way. Don't do any in-use detection. Simply
287 * tell the automounter it can try to unmount the filesystem every time
288 * the specified timeout passes. If the filesystem is in use then the
289 * unmount will fail. This would break down for remote hosts with multiple
290 * mounts. For example, if the automounter had mounted the following
291 * filesystems:
292 * /net/jurassic/var/crash
293 * /net/jurassic/var/core
294 * and the user was looking at a core file, and the timeout expired, the
295 * automounter would recieve notification to unmount "jurassic". Then
296 * it would unmount crash (which would succeed) and then to try unmount
297 * core (which would fail). After that (since the automounter only
298 * performs mounts for failed lookups in the root autofs directory)
299 * future access to /net/jurassic/var/crash would result to access
300 * to an empty autofs directory. We might be able to work around
301 * this by caching which root autofs directories we've timed out,
302 * then any access to paths that contain those directories could be
303 * stalled and we could resend another request to the automounter.
304 * This could work if the automounter ignores mount failures.
305 *
306 * 2.2) The hard correct way. The real difficulty here is detecting
307 * files in use on other filesystems (say NFS) that have been mounted
308 * on top of autofs. (Detecting in use autofs vnodes should be easy.)
309 * to do this we would probably have to create a new brand op to intercept
310 * mount/umount filesystem operations. Then using this entry point we
311 * could detect mounts of other filesystems on top of lx_autofs. When
312 * a successful mount finishes we would use the FEM (file event
313 * monitoring) framework to push a module onto that filesystem and
314 * intercept VOP operations that allocate/free vnodes in that filesystem.
315 * (We would also then have to track mount operations on top of that
316 * filesystem, etc.) this would allow us to properly detect any
317 * usage of subdirectories of an autofs directory.
318 */
319 #define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */
320 #define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */
321 #define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */
322
323 typedef struct lx_autofs_expire {
324 int lap_protover; /* protol version number */
325 int lap_constant; /* always set to 1 */
326 int lap_name_len; /* don't include newline or NULL */
327 char lap_name[256]; /* path component that has timed out */
328 } lx_autofs_expire_t;
329
330 #ifdef __cplusplus
331 }
332 #endif
333
334 #endif /* _LX_AUTOFS_H */