1 /**
2 * compress.c - Compressed attribute handling code. Part of the Linux-NTFS
3 * project.
4 *
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 * Copyright (c) 2005 Yura Pakhuchiy
7 *
8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published
10 * by the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program/include file is distributed in the hope that it will be
14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program (in the main directory of the Linux-NTFS
20 * distribution in the file COPYING); if not, write to the Free Software
21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27
28 #ifdef HAVE_STDIO_H
29 #include <stdio.h>
30 #endif
31 #ifdef HAVE_STRING_H
32 #include <string.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40
41 #include "compat.h"
42 #include "attrib.h"
43 #include "debug.h"
44 #include "volume.h"
45 #include "types.h"
46 #include "layout.h"
47 #include "runlist.h"
48 #include "compress.h"
49 #include "logging.h"
50
51 /**
52 * enum ntfs_compression_constants - constants used in the compression code
53 */
54 typedef enum {
55 /* Token types and access mask. */
56 NTFS_SYMBOL_TOKEN = 0,
57 NTFS_PHRASE_TOKEN = 1,
58 NTFS_TOKEN_MASK = 1,
59
60 /* Compression sub-block constants. */
61 NTFS_SB_SIZE_MASK = 0x0fff,
62 NTFS_SB_SIZE = 0x1000,
63 NTFS_SB_IS_COMPRESSED = 0x8000,
64 } ntfs_compression_constants;
65
66 /**
67 * ntfs_decompress - decompress a compression block into an array of pages
68 * @dest: buffer to which to write the decompressed data
69 * @dest_size: size of buffer @dest in bytes
70 * @cb_start: compression block to decompress
71 * @cb_size: size of compression block @cb_start in bytes
72 *
73 * This decompresses the compression block @cb_start into the destination
74 * buffer @dest.
75 *
76 * @cb_start is a pointer to the compression block which needs decompressing
77 * and @cb_size is the size of @cb_start in bytes (8-64kiB).
78 *
79 * Return 0 if success or -EOVERFLOW on error in the compressed stream.
80 */
81 static int ntfs_decompress(u8 *dest, const u32 dest_size,
82 u8 *const cb_start, const u32 cb_size)
83 {
84 /*
85 * Pointers into the compressed data, i.e. the compression block (cb),
86 * and the therein contained sub-blocks (sb).
87 */
88 u8 *cb_end = cb_start + cb_size; /* End of cb. */
89 u8 *cb = cb_start; /* Current position in cb. */
90 u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */
91 u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
92 /* Variables for uncompressed data / destination. */
93 u8 *dest_end = dest + dest_size; /* End of dest buffer. */
94 u8 *dest_sb_start; /* Start of current sub-block in dest. */
95 u8 *dest_sb_end; /* End of current sb in dest. */
96 /* Variables for tag and token parsing. */
97 u8 tag; /* Current tag. */
98 int token; /* Loop counter for the eight tokens in tag. */
99
100 ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
101 do_next_sb:
102 ntfs_log_debug("Beginning sub-block at offset = 0x%x in the cb.\n",
103 cb - cb_start);
104 /*
105 * Have we reached the end of the compression block or the end of the
106 * decompressed data? The latter can happen for example if the current
107 * position in the compression block is one byte before its end so the
108 * first two checks do not detect it.
109 */
110 if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
111 ntfs_log_debug("Completed. Returning success (0).\n");
112 return 0;
113 }
114 /* Setup offset for the current sub-block destination. */
115 dest_sb_start = dest;
116 dest_sb_end = dest + NTFS_SB_SIZE;
117 /* Check that we are still within allowed boundaries. */
118 if (dest_sb_end > dest_end)
119 goto return_overflow;
120 /* Does the minimum size of a compressed sb overflow valid range? */
121 if (cb + 6 > cb_end)
122 goto return_overflow;
123 /* Setup the current sub-block source pointers and validate range. */
124 cb_sb_start = cb;
125 cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
126 + 3;
127 if (cb_sb_end > cb_end)
128 goto return_overflow;
129 /* Now, we are ready to process the current sub-block (sb). */
130 if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
131 ntfs_log_debug("Found uncompressed sub-block.\n");
132 /* This sb is not compressed, just copy it into destination. */
133 /* Advance source position to first data byte. */
134 cb += 2;
135 /* An uncompressed sb must be full size. */
136 if (cb_sb_end - cb != NTFS_SB_SIZE)
137 goto return_overflow;
138 /* Copy the block and advance the source position. */
139 memcpy(dest, cb, NTFS_SB_SIZE);
140 cb += NTFS_SB_SIZE;
141 /* Advance destination position to next sub-block. */
142 dest += NTFS_SB_SIZE;
143 goto do_next_sb;
144 }
145 ntfs_log_debug("Found compressed sub-block.\n");
146 /* This sb is compressed, decompress it into destination. */
147 /* Forward to the first tag in the sub-block. */
148 cb += 2;
149 do_next_tag:
150 if (cb == cb_sb_end) {
151 /* Check if the decompressed sub-block was not full-length. */
152 if (dest < dest_sb_end) {
153 int nr_bytes = dest_sb_end - dest;
154
155 ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
156 /* Zero remainder and update destination position. */
157 memset(dest, 0, nr_bytes);
158 dest += nr_bytes;
159 }
160 /* We have finished the current sub-block. */
161 goto do_next_sb;
162 }
163 /* Check we are still in range. */
164 if (cb > cb_sb_end || dest > dest_sb_end)
165 goto return_overflow;
166 /* Get the next tag and advance to first token. */
167 tag = *cb++;
168 /* Parse the eight tokens described by the tag. */
169 for (token = 0; token < 8; token++, tag >>= 1) {
170 u16 lg, pt, length, max_non_overlap;
171 register u16 i;
172 u8 *dest_back_addr;
173
174 /* Check if we are done / still in range. */
175 if (cb >= cb_sb_end || dest > dest_sb_end)
176 break;
177 /* Determine token type and parse appropriately.*/
178 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
179 /*
180 * We have a symbol token, copy the symbol across, and
181 * advance the source and destination positions.
182 */
183 *dest++ = *cb++;
184 /* Continue with the next token. */
185 continue;
186 }
187 /*
188 * We have a phrase token. Make sure it is not the first tag in
189 * the sb as this is illegal and would confuse the code below.
190 */
191 if (dest == dest_sb_start)
192 goto return_overflow;
193 /*
194 * Determine the number of bytes to go back (p) and the number
195 * of bytes to copy (l). We use an optimized algorithm in which
196 * we first calculate log2(current destination position in sb),
197 * which allows determination of l and p in O(1) rather than
198 * O(n). We just need an arch-optimized log2() function now.
199 */
200 lg = 0;
201 for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
202 lg++;
203 /* Get the phrase token into i. */
204 pt = le16_to_cpup((u16*)cb);
205 /*
206 * Calculate starting position of the byte sequence in
207 * the destination using the fact that p = (pt >> (12 - lg)) + 1
208 * and make sure we don't go too far back.
209 */
210 dest_back_addr = dest - (pt >> (12 - lg)) - 1;
211 if (dest_back_addr < dest_sb_start)
212 goto return_overflow;
213 /* Now calculate the length of the byte sequence. */
214 length = (pt & (0xfff >> lg)) + 3;
215 /* Verify destination is in range. */
216 if (dest + length > dest_sb_end)
217 goto return_overflow;
218 /* The number of non-overlapping bytes. */
219 max_non_overlap = dest - dest_back_addr;
220 if (length <= max_non_overlap) {
221 /* The byte sequence doesn't overlap, just copy it. */
222 memcpy(dest, dest_back_addr, length);
223 /* Advance destination pointer. */
224 dest += length;
225 } else {
226 /*
227 * The byte sequence does overlap, copy non-overlapping
228 * part and then do a slow byte by byte copy for the
229 * overlapping part. Also, advance the destination
230 * pointer.
231 */
232 memcpy(dest, dest_back_addr, max_non_overlap);
233 dest += max_non_overlap;
234 dest_back_addr += max_non_overlap;
235 length -= max_non_overlap;
236 while (length--)
237 *dest++ = *dest_back_addr++;
238 }
239 /* Advance source position and continue with the next token. */
240 cb += 2;
241 }
242 /* No tokens left in the current tag. Continue with the next tag. */
243 goto do_next_tag;
244 return_overflow:
245 ntfs_log_debug("Failed. Returning -EOVERFLOW.\n");
246 errno = EOVERFLOW;
247 return -1;
248 }
249
250 /**
251 * ntfs_is_cb_compressed - internal function, do not use
252 *
253 * This is a very specialised function determining if a cb is compressed or
254 * uncompressed. It is assumed that checking for a sparse cb has already been
255 * performed and that the cb is not sparse. It makes all sorts of other
256 * assumptions as well and hence it is not useful anywhere other than where it
257 * is used at the moment. Please, do not make this function available for use
258 * outside of compress.c as it is bound to confuse people and not do what they
259 * want.
260 *
261 * Return TRUE on errors so that the error will be detected later on in the
262 * code. Might be a bit confusing to debug but there really should never be
263 * errors coming from here.
264 */
265 static BOOL ntfs_is_cb_compressed(ntfs_attr *na,
266 runlist_element *rl, VCN cb_start_vcn, int cb_clusters)
267 {
268 /*
269 * The simplest case: the run starting at @cb_start_vcn contains
270 * @cb_clusters clusters which are all not sparse, thus the cb is not
271 * compressed.
272 */
273 restart:
274 cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
275 while (cb_clusters > 0) {
276 /* Go to the next run. */
277 rl++;
278 /* Map the next runlist fragment if it is not mapped. */
279 if (rl->lcn < LCN_HOLE || !rl->length) {
280 cb_start_vcn = rl->vcn;
281 rl = ntfs_attr_find_vcn(na, rl->vcn);
282 if (!rl || rl->lcn < LCN_HOLE || !rl->length)
283 return TRUE;
284 /*
285 * If the runs were merged need to deal with the
286 * resulting partial run so simply restart.
287 */
288 if (rl->vcn < cb_start_vcn)
289 goto restart;
290 }
291 /* If the current run is sparse, the cb is compressed. */
292 if (rl->lcn == LCN_HOLE)
293 return TRUE;
294 /* If the whole cb is not sparse, it is not compressed. */
295 if (rl->length >= cb_clusters)
296 return FALSE;
297 cb_clusters -= rl->length;
298 };
299 /* All cb_clusters were not sparse thus the cb is not compressed. */
300 return FALSE;
301 }
302
303 /**
304 * ntfs_compressed_attr_pread - read from a compressed attribute
305 * @na: ntfs attribute to read from
306 * @pos: byte position in the attribute to begin reading from
307 * @count: number of bytes to read
308 * @b: output data buffer
309 *
310 * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead.
311 *
312 * This function will read @count bytes starting at offset @pos from the
313 * compressed ntfs attribute @na into the data buffer @b.
314 *
315 * On success, return the number of successfully read bytes. If this number
316 * is lower than @count this means that the read reached end of file or that
317 * an error was encountered during the read so that the read is partial.
318 * 0 means end of file or nothing was read (also return 0 when @count is 0).
319 *
320 * On error and nothing has been read, return -1 with errno set appropriately
321 * to the return code of ntfs_pread(), or to EINVAL in case of invalid
322 * arguments.
323 */
324 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
325 {
326 s64 br, to_read, ofs, total, total2;
327 u64 cb_size_mask;
328 VCN start_vcn, vcn, end_vcn;
329 ntfs_volume *vol;
330 runlist_element *rl;
331 u8 *dest, *cb, *cb_pos, *cb_end;
332 u32 cb_size;
333 int err;
334 unsigned int nr_cbs, cb_clusters;
335
336 ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
337 (unsigned long long)na->ni->mft_no, na->type,
338 (long long)pos, (long long)count);
339 if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
340 pos < 0 || count < 0) {
341 errno = EINVAL;
342 return -1;
343 }
344 /*
345 * Encrypted attributes are not supported. We return access denied,
346 * which is what Windows NT4 does, too.
347 */
348 if (NAttrEncrypted(na)) {
349 errno = EACCES;
350 return -1;
351 }
352 if (!count)
353 return 0;
354 /* Truncate reads beyond end of attribute. */
355 if (pos + count > na->data_size) {
356 if (pos >= na->data_size) {
357 return 0;
358 }
359 count = na->data_size - pos;
360 }
361 /* If it is a resident attribute, simply use ntfs_attr_pread(). */
362 if (!NAttrNonResident(na))
363 return ntfs_attr_pread(na, pos, count, b);
364 total = total2 = 0;
365 /* Zero out reads beyond initialized size. */
366 if (pos + count > na->initialized_size) {
367 if (pos >= na->initialized_size) {
368 memset(b, 0, count);
369 return count;
370 }
371 total2 = pos + count - na->initialized_size;
372 count -= total2;
373 memset((u8*)b + count, 0, total2);
374 }
375 vol = na->ni->vol;
376 cb_size = na->compression_block_size;
377 cb_size_mask = cb_size - 1UL;
378 cb_clusters = na->compression_block_clusters;
379
380 /* Need a temporary buffer for each loaded compression block. */
381 cb = ntfs_malloc(cb_size);
382 if (!cb)
383 return -1;
384
385 /* Need a temporary buffer for each uncompressed block. */
386 dest = ntfs_malloc(cb_size);
387 if (!dest) {
388 err = errno;
389 free(cb);
390 errno = err;
391 return -1;
392 }
393 /*
394 * The first vcn in the first compression block (cb) which we need to
395 * decompress.
396 */
397 start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
398 /* Offset in the uncompressed cb at which to start reading data. */
399 ofs = pos & cb_size_mask;
400 /*
401 * The first vcn in the cb after the last cb which we need to
402 * decompress.
403 */
404 end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
405 vol->cluster_size_bits;
406 /* Number of compression blocks (cbs) in the wanted vcn range. */
407 nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
408 na->compression_block_size_bits;
409 cb_end = cb + cb_size;
410 do_next_cb:
411 nr_cbs--;
412 cb_pos = cb;
413 vcn = start_vcn;
414 start_vcn += cb_clusters;
415
416 /* Check whether the compression block is sparse. */
417 rl = ntfs_attr_find_vcn(na, vcn);
418 if (!rl || rl->lcn < LCN_HOLE) {
419 free(cb);
420 free(dest);
421 if (total)
422 return total;
423 /* FIXME: Do we want EIO or the error code? (AIA) */
424 errno = EIO;
425 return -1;
426 }
427 if (rl->lcn == LCN_HOLE) {
428 /* Sparse cb, zero out destination range overlapping the cb. */
429 ntfs_log_debug("Found sparse compression block.\n");
430 to_read = min(count, cb_size - ofs);
431 memset(b, 0, to_read);
432 ofs = 0;
433 total += to_read;
434 count -= to_read;
435 b = (u8*)b + to_read;
436 } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
437 s64 tdata_size, tinitialized_size;
438 /*
439 * Uncompressed cb, read it straight into the destination range
440 * overlapping the cb.
441 */
442 ntfs_log_debug("Found uncompressed compression block.\n");
443 /*
444 * Read the uncompressed data into the destination buffer.
445 * NOTE: We cheat a little bit here by marking the attribute as
446 * not compressed in the ntfs_attr structure so that we can
447 * read the data by simply using ntfs_attr_pread(). (-8
448 * NOTE: we have to modify data_size and initialized_size
449 * temporarily as well...
450 */
451 to_read = min(count, cb_size - ofs);
452 ofs += vcn << vol->cluster_size_bits;
453 NAttrClearCompressed(na);
454 tdata_size = na->data_size;
455 tinitialized_size = na->initialized_size;
456 na->data_size = na->initialized_size = na->allocated_size;
457 do {
458 br = ntfs_attr_pread(na, ofs, to_read, b);
459 if (br < 0) {
460 err = errno;
461 na->data_size = tdata_size;
462 na->initialized_size = tinitialized_size;
463 NAttrSetCompressed(na);
464 free(cb);
465 free(dest);
466 if (total)
467 return total;
468 errno = err;
469 return br;
470 }
471 total += br;
472 count -= br;
473 b = (u8*)b + br;
474 to_read -= br;
475 ofs += br;
476 } while (to_read > 0);
477 na->data_size = tdata_size;
478 na->initialized_size = tinitialized_size;
479 NAttrSetCompressed(na);
480 ofs = 0;
481 } else {
482 s64 tdata_size, tinitialized_size;
483
484 /*
485 * Compressed cb, decompress it into the temporary buffer, then
486 * copy the data to the destination range overlapping the cb.
487 */
488 ntfs_log_debug("Found compressed compression block.\n");
489 /*
490 * Read the compressed data into the temporary buffer.
491 * NOTE: We cheat a little bit here by marking the attribute as
492 * not compressed in the ntfs_attr structure so that we can
493 * read the raw, compressed data by simply using
494 * ntfs_attr_pread(). (-8
495 * NOTE: We have to modify data_size and initialized_size
496 * temporarily as well...
497 */
498 to_read = cb_size;
499 NAttrClearCompressed(na);
500 tdata_size = na->data_size;
501 tinitialized_size = na->initialized_size;
502 na->data_size = na->initialized_size = na->allocated_size;
503 do {
504 br = ntfs_attr_pread(na,
505 (vcn << vol->cluster_size_bits) +
506 (cb_pos - cb), to_read, cb_pos);
507 if (br < 0) {
508 err = errno;
509 na->data_size = tdata_size;
510 na->initialized_size = tinitialized_size;
511 NAttrSetCompressed(na);
512 free(cb);
513 free(dest);
514 if (total)
515 return total;
516 errno = err;
517 return br;
518 }
519 cb_pos += br;
520 to_read -= br;
521 } while (to_read > 0);
522 na->data_size = tdata_size;
523 na->initialized_size = tinitialized_size;
524 NAttrSetCompressed(na);
525 /* Just a precaution. */
526 if (cb_pos + 2 <= cb_end)
527 *(u16*)cb_pos = 0;
528 ntfs_log_debug("Successfully read the compression block.\n");
529 if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
530 err = errno;
531 free(cb);
532 free(dest);
533 if (total)
534 return total;
535 errno = err;
536 return -1;
537 }
538 to_read = min(count, cb_size - ofs);
539 memcpy(b, dest + ofs, to_read);
540 total += to_read;
541 count -= to_read;
542 b = (u8*)b + to_read;
543 ofs = 0;
544 }
545 /* Do we have more work to do? */
546 if (nr_cbs)
547 goto do_next_cb;
548 /* We no longer need the buffers. */
549 free(cb);
550 free(dest);
551 /* Return number of bytes read. */
552 return total + total2;
553 }