Print this page
8115 parallel zfs mount
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/sys/buf.h
+++ new/usr/src/uts/common/sys/buf.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + *
25 + * Copyright 2017 RackTop Systems.
24 26 */
25 27
26 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 29 /* All Rights Reserved */
28 30
29 31 /*
30 32 * University Copyright- Copyright (c) 1982, 1986, 1988
31 33 * The Regents of the University of California
32 34 * All Rights Reserved
33 35 *
34 36 * University Acknowledgment- Portions of this document are derived from
35 37 * software developed by the University of California, Berkeley, and its
36 38 * contributors.
37 39 */
38 40
39 41 #ifndef _SYS_BUF_H
40 42 #define _SYS_BUF_H
41 43
42 44 #include <sys/types32.h>
43 45 #include <sys/t_lock.h>
44 46 #include <sys/kstat.h>
45 47
46 48 #ifdef __cplusplus
47 49 extern "C" {
48 50 #endif
49 51
50 52 /*
51 53 * Each buffer in the pool is usually doubly linked into 2 lists:
52 54 * the device with which it is currently associated (always)
53 55 * and also on a list of blocks available for allocation
54 56 * for other use (usually).
55 57 * The latter list is kept in last-used order, and the two
56 58 * lists are doubly linked to make it easy to remove
57 59 * a buffer from one list when it was found by
58 60 * looking through the other.
59 61 * A buffer is on the available list, and is liable
60 62 * to be reassigned to another disk block, if and only
61 63 * if it is not marked BUSY. When a buffer is busy, the
62 64 * available-list pointers can be used for other purposes.
63 65 * Most drivers use the forward ptr as a link in their I/O active queue.
64 66 * A buffer header contains all the information required to perform I/O.
65 67 * Most of the routines which manipulate these things are in bio.c.
66 68 *
67 69 * There are a number of locks associated with the buffer management
68 70 * system.
69 71 * hbuf.b_lock: protects hash chains, buffer hdr freelists
70 72 * and delayed write freelist
71 73 * bfree_lock; protects the bfreelist structure
72 74 * bhdr_lock: protects the free header list
73 75 * blist_lock: protects b_list fields
74 76 * buf.b_sem: protects all remaining members in the buf struct
75 77 * buf.b_io: I/O synchronization variable
76 78 *
77 79 * A buffer header is never "locked" (b_sem) when it is on
78 80 * a "freelist" (bhdrlist or bfreelist avail lists).
79 81 */
80 82 typedef struct buf {
81 83 int b_flags; /* see defines below */
82 84 struct buf *b_forw; /* headed by d_tab of conf.c */
83 85 struct buf *b_back; /* " */
84 86 struct buf *av_forw; /* position on free list, */
85 87 struct buf *av_back; /* if not BUSY */
86 88 o_dev_t b_dev; /* OLD major+minor device name */
87 89 size_t b_bcount; /* transfer count */
88 90 union {
89 91 caddr_t b_addr; /* low order core address */
90 92 struct fs *b_fs; /* superblocks */
91 93 struct cg *b_cg; /* UFS cylinder group block */
92 94 struct dinode *b_dino; /* UFS ilist */
93 95 daddr32_t *b_daddr; /* disk blocks */
94 96 } b_un;
95 97
96 98 lldaddr_t _b_blkno; /* block # on device (union) */
97 99 #define b_lblkno _b_blkno._f
98 100 #ifdef _LP64
99 101 #define b_blkno _b_blkno._f
100 102 #else
101 103 #define b_blkno _b_blkno._p._l
102 104 #endif /* _LP64 */
103 105
104 106 char b_obs1; /* obsolete */
105 107 size_t b_resid; /* words not transferred after error */
106 108 clock_t b_start; /* request start time */
107 109 struct proc *b_proc; /* process doing physical or swap I/O */
108 110 struct page *b_pages; /* page list for PAGEIO */
109 111 clock_t b_obs2; /* obsolete */
110 112 /* Begin new stuff */
111 113 #define b_actf av_forw
112 114 #define b_actl av_back
113 115 #define b_active b_bcount
114 116 #define b_errcnt b_resid
115 117 size_t b_bufsize; /* size of allocated buffer */
116 118 int (*b_iodone)(struct buf *); /* function called by iodone */
117 119 struct vnode *b_vp; /* vnode associated with block */
118 120 struct buf *b_chain; /* chain together all buffers here */
119 121 int b_obs3; /* obsolete */
120 122 int b_error; /* expanded error field */
121 123 void *b_private; /* "opaque" driver private area */
122 124 dev_t b_edev; /* expanded dev field */
123 125 ksema_t b_sem; /* Exclusive access to buf */
124 126 ksema_t b_io; /* I/O Synchronization */
125 127 struct buf *b_list; /* List of potential B_DELWRI bufs */
126 128 struct page **b_shadow; /* shadow page list */
127 129 void *b_dip; /* device info pointer */
128 130 struct vnode *b_file; /* file associated with this buffer */
129 131 offset_t b_offset; /* offset in file assoc. with buffer */
130 132 } buf_t;
131 133
132 134 /*
133 135 * Bufhd structures used at the head of the hashed buffer queues.
134 136 * We only need seven words for this, so this abbreviated
135 137 * definition saves some space.
136 138 */
137 139 struct diskhd {
138 140 int b_flags; /* not used, needed for consistency */
139 141 struct buf *b_forw, *b_back; /* queue of unit queues */
140 142 struct buf *av_forw, *av_back; /* queue of bufs for this unit */
141 143 o_dev_t b_dev; /* OLD major+minor device name */
142 144 size_t b_bcount; /* transfer count */
143 145 };
144 146
145 147
146 148 /*
147 149 * Statistics on the buffer cache
148 150 */
149 151 struct biostats {
150 152 kstat_named_t bio_lookup; /* requests to assign buffer */
151 153 kstat_named_t bio_hit; /* buffer already associated with blk */
152 154 kstat_named_t bio_bufwant; /* kmem_allocs NOSLEEP failed new buf */
153 155 kstat_named_t bio_bufwait; /* kmem_allocs with KM_SLEEP for buf */
154 156 kstat_named_t bio_bufbusy; /* buffer locked by someone else */
155 157 kstat_named_t bio_bufdup; /* duplicate buffer found for block */
156 158 };
157 159
158 160 /*
159 161 * These flags are kept in b_flags.
160 162 * The first group is part of the DDI
161 163 */
162 164 #define B_BUSY 0x0001 /* not on av_forw/back list */
163 165 #define B_DONE 0x0002 /* transaction finished */
164 166 #define B_ERROR 0x0004 /* transaction aborted */
165 167 #define B_PAGEIO 0x0010 /* do I/O to pages on bp->p_pages */
166 168 #define B_PHYS 0x0020 /* Physical IO potentially using UNIBUS map */
167 169 #define B_READ 0x0040 /* read when I/O occurs */
168 170 #define B_WRITE 0x0100 /* non-read pseudo-flag */
169 171
170 172 /* Not part of the DDI */
171 173 #define B_WANTED 0x0080 /* issue wakeup when BUSY goes off */
172 174 #define B_AGE 0x000200 /* delayed write for correct aging */
173 175 #define B_ASYNC 0x000400 /* don't wait for I/O completion */
174 176 #define B_DELWRI 0x000800 /* delayed write-wait til buf needed */
175 177 #define B_STALE 0x001000 /* on av_* list; invalid contents */
176 178 #define B_DONTNEED 0x002000 /* after write, need not be cached */
177 179 #define B_REMAPPED 0x004000 /* buffer is kernel addressable */
178 180 #define B_FREE 0x008000 /* free page when done */
179 181 #define B_INVAL 0x010000 /* destroy page when done */
180 182 #define B_FORCE 0x020000 /* semi-permanent removal from cache */
181 183 #define B_NOCACHE 0x080000 /* don't cache block when released */
182 184 #define B_TRUNC 0x100000 /* truncate page without I/O */
183 185 #define B_SHADOW 0x200000 /* is b_shadow field valid? */
184 186 #define B_RETRYWRI 0x400000 /* retry write til works or bfinval */
185 187 #define B_FAILFAST 0x1000000 /* Fail promptly if device goes away */
186 188 #define B_STARTED 0x2000000 /* io:::start probe called for buf */
187 189 #define B_ABRWRITE 0x4000000 /* Application based recovery active */
188 190 #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
189 191
190 192 /*
191 193 * There is some confusion over the meaning of B_FREE and B_INVAL and what
192 194 * the use of one over the other implies.
193 195 *
194 196 * In both cases, when we are done with the page (buffer) we want to free
195 197 * up the page. In the case of B_FREE, the page will go to the cachelist.
196 198 * In the case of B_INVAL, the page will be destroyed (hashed out of it's
197 199 * vnode) and placed on the freelist. Beyond this, there is no difference
198 200 * between the sole use of these two flags. In both cases, IO will be done
199 201 * if the page is not yet committed to storage.
200 202 *
201 203 * In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
202 204 * should be used.
203 205 *
204 206 * Use (B_INVAL | B_FORCE) to force the page to be destroyed even if we
205 207 * could not successfuly write out the page.
206 208 */
207 209
208 210 /*
209 211 * Insq/Remq for the buffer hash lists.
210 212 */
211 213 #define bremhash(bp) { \
212 214 ASSERT((bp)->b_forw != NULL); \
213 215 ASSERT((bp)->b_back != NULL); \
214 216 (bp)->b_back->b_forw = (bp)->b_forw; \
215 217 (bp)->b_forw->b_back = (bp)->b_back; \
216 218 (bp)->b_forw = (bp)->b_back = NULL; \
217 219 }
218 220 #define binshash(bp, dp) { \
219 221 ASSERT((bp)->b_forw == NULL); \
220 222 ASSERT((bp)->b_back == NULL); \
221 223 ASSERT((dp)->b_forw != NULL); \
222 224 ASSERT((dp)->b_back != NULL); \
223 225 (bp)->b_forw = (dp)->b_forw; \
224 226 (bp)->b_back = (dp); \
225 227 (dp)->b_forw->b_back = (bp); \
226 228 (dp)->b_forw = (bp); \
227 229 }
228 230
229 231
230 232 /*
231 233 * The hash structure maintains two lists:
232 234 *
233 235 * 1) The hash list of buffers (b_forw & b_back)
234 236 * 2) The LRU free list of buffers on this hash bucket (av_forw & av_back)
235 237 *
236 238 * The dwbuf structure keeps a list of delayed write buffers per hash bucket
237 239 * hence there are exactly the same number of dwbuf structures as there are
238 240 * the hash buckets (hbuf structures) in the system.
239 241 *
240 242 * The number of buffers on the freelist may not be equal to the number of
241 243 * buffers on the hash list. That is because when buffers are busy they are
242 244 * taken off the freelist but not off the hash list. "b_length" field keeps
243 245 * track of the number of free buffers (including delayed writes ones) on
244 246 * the hash bucket. The "b_lock" mutex protects the free list as well as
245 247 * the hash list. It also protects the counter "b_length".
246 248 *
247 249 * Enties b_forw, b_back, av_forw & av_back must be at the same offset
248 250 * as the ones in buf structure.
249 251 */
250 252 struct hbuf {
251 253 int b_flags;
252 254
253 255 struct buf *b_forw; /* hash list forw pointer */
254 256 struct buf *b_back; /* hash list back pointer */
255 257
256 258 struct buf *av_forw; /* free list forw pointer */
257 259 struct buf *av_back; /* free list back pointer */
258 260
259 261 int b_length; /* # of entries on free list */
260 262 kmutex_t b_lock; /* lock to protect this structure */
261 263 };
262 264
263 265
264 266 /*
265 267 * The delayed list pointer entries should match with the buf strcuture.
266 268 */
267 269 struct dwbuf {
268 270 int b_flags; /* not used */
269 271
270 272 struct buf *b_forw; /* not used */
271 273 struct buf *b_back; /* not used */
272 274
273 275 struct buf *av_forw; /* delayed write forw pointer */
274 276 struct buf *av_back; /* delayed write back pointer */
275 277 };
276 278
277 279
278 280 /*
279 281 * Unlink a buffer from the available (free or delayed write) list and mark
280 282 * it busy (internal interface).
281 283 */
282 284 #define notavail(bp) \
283 285 {\
284 286 ASSERT(SEMA_HELD(&bp->b_sem)); \
↓ open down ↓ |
251 lines elided |
↑ open up ↑ |
285 287 ASSERT((bp)->av_forw != NULL); \
286 288 ASSERT((bp)->av_back != NULL); \
287 289 ASSERT((bp)->av_forw != (bp)); \
288 290 ASSERT((bp)->av_back != (bp)); \
289 291 (bp)->av_back->av_forw = (bp)->av_forw; \
290 292 (bp)->av_forw->av_back = (bp)->av_back; \
291 293 (bp)->b_flags |= B_BUSY; \
292 294 (bp)->av_forw = (bp)->av_back = NULL; \
293 295 }
294 296
295 -#if defined(_KERNEL)
297 +#if defined(_KERNEL) || defined(_FAKE_KERNEL)
296 298 /*
297 299 * Macros to avoid the extra function call needed for binary compat.
298 300 *
299 301 * B_RETRYWRI is not included in clear_flags for BWRITE(), BWRITE2(),
300 302 * or brwrite() so that the retry operation is persistent until the
301 303 * write either succeeds or the buffer is bfinval()'d.
302 304 *
303 305 */
304 306 #define BREAD(dev, blkno, bsize) \
305 307 bread_common(/* ufsvfsp */ NULL, dev, blkno, bsize)
306 308
307 309 #define BWRITE(bp) \
308 310 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 0, \
309 311 /* do_relse */ 1, \
310 312 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI))
311 313
312 314 #define BWRITE2(bp) \
313 315 bwrite_common(/* ufsvfsp */ NULL, bp, /* force_wait */ 1, \
314 316 /* do_relse */ 0, \
315 317 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI))
316 318
317 319 #define GETBLK(dev, blkno, bsize) \
318 320 getblk_common(/* ufsvfsp */ NULL, dev, blkno, bsize, /* errflg */ 0)
319 321
320 322
321 323 /*
322 324 * Macros for new retry write interfaces.
323 325 */
324 326
325 327 /*
326 328 * Same as bdwrite() except write failures are retried.
327 329 */
328 330 #define bdrwrite(bp) { \
329 331 (bp)->b_flags |= B_RETRYWRI; \
330 332 bdwrite((bp)); \
331 333 }
332 334
333 335 /*
334 336 * Same as bwrite() except write failures are retried.
335 337 */
336 338 #define brwrite(bp) { \
337 339 (bp)->b_flags |= B_RETRYWRI; \
338 340 bwrite_common((bp), /* force_wait */ 0, /* do_relse */ 1, \
339 341 /* clear_flags */ (B_READ | B_DONE | B_ERROR | B_DELWRI)); \
340 342 }
341 343
342 344 extern struct hbuf *hbuf; /* Hash table */
343 345 extern struct dwbuf *dwbuf; /* delayed write hash table */
344 346 extern struct buf *buf; /* The buffer pool itself */
345 347 extern struct buf bfreelist; /* head of available list */
346 348
347 349 extern void (*bio_lufs_strategy)(void *, buf_t *); /* UFS Logging */
348 350 extern void (*bio_snapshot_strategy)(void *, buf_t *); /* UFS snapshots */
349 351
350 352 int bcheck(dev_t, struct buf *);
351 353 int iowait(struct buf *);
352 354 int hash2ints(int x, int y);
353 355 int bio_busy(int);
354 356 int biowait(struct buf *);
355 357 int biomodified(struct buf *);
356 358 int geterror(struct buf *);
357 359 void minphys(struct buf *);
358 360 /*
359 361 * ufsvfsp is declared as a void * to avoid having everyone that uses
360 362 * this header file include sys/fs/ufs_inode.h.
361 363 */
362 364 void bwrite_common(void *ufsvfsp, struct buf *, int force_wait,
363 365 int do_relse, int clear_flags);
364 366 void bwrite(struct buf *);
365 367 void bwrite2(struct buf *);
366 368 void bdwrite(struct buf *);
367 369 void bawrite(struct buf *);
368 370 void brelse(struct buf *);
369 371 void iodone(struct buf *);
370 372 void clrbuf(struct buf *);
371 373 void bflush(dev_t);
372 374 void blkflush(dev_t, daddr_t);
373 375 void binval(dev_t);
374 376 int bfinval(dev_t, int);
375 377 void binit(void);
376 378 void biodone(struct buf *);
377 379 void bioinit(struct buf *);
378 380 void biofini(struct buf *);
379 381 void bp_mapin(struct buf *);
380 382 void *bp_mapin_common(struct buf *, int);
381 383 void bp_mapout(struct buf *);
382 384 int bp_copyin(struct buf *, void *, offset_t, size_t);
383 385 int bp_copyout(void *, struct buf *, offset_t, size_t);
384 386 void bp_init(size_t, uint_t);
385 387 int bp_color(struct buf *);
386 388 void pageio_done(struct buf *);
387 389 struct buf *bread(dev_t, daddr_t, long);
388 390 struct buf *bread_common(void *, dev_t, daddr_t, long);
389 391 struct buf *breada(dev_t, daddr_t, daddr_t, long);
↓ open down ↓ |
84 lines elided |
↑ open up ↑ |
390 392 struct buf *getblk(dev_t, daddr_t, long);
391 393 struct buf *getblk_common(void *, dev_t, daddr_t, long, int);
392 394 struct buf *ngeteblk(long);
393 395 struct buf *geteblk(void);
394 396 struct buf *pageio_setup(struct page *, size_t, struct vnode *, int);
395 397 void bioerror(struct buf *bp, int error);
396 398 void bioreset(struct buf *bp);
397 399 struct buf *bioclone(struct buf *, off_t, size_t, dev_t, daddr_t,
398 400 int (*)(struct buf *), struct buf *, int);
399 401 size_t biosize(void);
400 -#endif /* defined(_KERNEL) */
402 +#endif /* defined(_KERNEL) || defined(_FAKE_KERNEL) */
401 403
402 404 #ifdef __cplusplus
403 405 }
404 406 #endif
405 407
406 408 #endif /* _SYS_BUF_H */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX