1 /** 2 * compress.c - Compressed attribute handling code. Part of the Linux-NTFS 3 * project. 4 * 5 * Copyright (c) 2004-2005 Anton Altaparmakov 6 * Copyright (c) 2005 Yura Pakhuchiy 7 * 8 * This program/include file is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License as published 10 * by the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program/include file is distributed in the hope that it will be 14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program (in the main directory of the Linux-NTFS 20 * distribution in the file COPYING); if not, write to the Free Software 21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 */ 23 24 #ifdef HAVE_CONFIG_H 25 #include "config.h" 26 #endif 27 28 #ifdef HAVE_STDIO_H 29 #include <stdio.h> 30 #endif 31 #ifdef HAVE_STRING_H 32 #include <string.h> 33 #endif 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_ERRNO_H 38 #include <errno.h> 39 #endif 40 41 #include "compat.h" 42 #include "attrib.h" 43 #include "debug.h" 44 #include "volume.h" 45 #include "types.h" 46 #include "layout.h" 47 #include "runlist.h" 48 #include "compress.h" 49 #include "logging.h" 50 51 /** 52 * enum ntfs_compression_constants - constants used in the compression code 53 */ 54 typedef enum { 55 /* Token types and access mask. */ 56 NTFS_SYMBOL_TOKEN = 0, 57 NTFS_PHRASE_TOKEN = 1, 58 NTFS_TOKEN_MASK = 1, 59 60 /* Compression sub-block constants. */ 61 NTFS_SB_SIZE_MASK = 0x0fff, 62 NTFS_SB_SIZE = 0x1000, 63 NTFS_SB_IS_COMPRESSED = 0x8000, 64 } ntfs_compression_constants; 65 66 /** 67 * ntfs_decompress - decompress a compression block into an array of pages 68 * @dest: buffer to which to write the decompressed data 69 * @dest_size: size of buffer @dest in bytes 70 * @cb_start: compression block to decompress 71 * @cb_size: size of compression block @cb_start in bytes 72 * 73 * This decompresses the compression block @cb_start into the destination 74 * buffer @dest. 75 * 76 * @cb_start is a pointer to the compression block which needs decompressing 77 * and @cb_size is the size of @cb_start in bytes (8-64kiB). 78 * 79 * Return 0 if success or -EOVERFLOW on error in the compressed stream. 80 */ 81 static int ntfs_decompress(u8 *dest, const u32 dest_size, 82 u8 *const cb_start, const u32 cb_size) 83 { 84 /* 85 * Pointers into the compressed data, i.e. the compression block (cb), 86 * and the therein contained sub-blocks (sb). 87 */ 88 u8 *cb_end = cb_start + cb_size; /* End of cb. */ 89 u8 *cb = cb_start; /* Current position in cb. */ 90 u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ 91 u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ 92 /* Variables for uncompressed data / destination. */ 93 u8 *dest_end = dest + dest_size; /* End of dest buffer. */ 94 u8 *dest_sb_start; /* Start of current sub-block in dest. */ 95 u8 *dest_sb_end; /* End of current sb in dest. */ 96 /* Variables for tag and token parsing. */ 97 u8 tag; /* Current tag. */ 98 int token; /* Loop counter for the eight tokens in tag. */ 99 100 ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size); 101 do_next_sb: 102 ntfs_log_debug("Beginning sub-block at offset = 0x%x in the cb.\n", 103 cb - cb_start); 104 /* 105 * Have we reached the end of the compression block or the end of the 106 * decompressed data? The latter can happen for example if the current 107 * position in the compression block is one byte before its end so the 108 * first two checks do not detect it. 109 */ 110 if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) { 111 ntfs_log_debug("Completed. Returning success (0).\n"); 112 return 0; 113 } 114 /* Setup offset for the current sub-block destination. */ 115 dest_sb_start = dest; 116 dest_sb_end = dest + NTFS_SB_SIZE; 117 /* Check that we are still within allowed boundaries. */ 118 if (dest_sb_end > dest_end) 119 goto return_overflow; 120 /* Does the minimum size of a compressed sb overflow valid range? */ 121 if (cb + 6 > cb_end) 122 goto return_overflow; 123 /* Setup the current sub-block source pointers and validate range. */ 124 cb_sb_start = cb; 125 cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK) 126 + 3; 127 if (cb_sb_end > cb_end) 128 goto return_overflow; 129 /* Now, we are ready to process the current sub-block (sb). */ 130 if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) { 131 ntfs_log_debug("Found uncompressed sub-block.\n"); 132 /* This sb is not compressed, just copy it into destination. */ 133 /* Advance source position to first data byte. */ 134 cb += 2; 135 /* An uncompressed sb must be full size. */ 136 if (cb_sb_end - cb != NTFS_SB_SIZE) 137 goto return_overflow; 138 /* Copy the block and advance the source position. */ 139 memcpy(dest, cb, NTFS_SB_SIZE); 140 cb += NTFS_SB_SIZE; 141 /* Advance destination position to next sub-block. */ 142 dest += NTFS_SB_SIZE; 143 goto do_next_sb; 144 } 145 ntfs_log_debug("Found compressed sub-block.\n"); 146 /* This sb is compressed, decompress it into destination. */ 147 /* Forward to the first tag in the sub-block. */ 148 cb += 2; 149 do_next_tag: 150 if (cb == cb_sb_end) { 151 /* Check if the decompressed sub-block was not full-length. */ 152 if (dest < dest_sb_end) { 153 int nr_bytes = dest_sb_end - dest; 154 155 ntfs_log_debug("Filling incomplete sub-block with zeroes.\n"); 156 /* Zero remainder and update destination position. */ 157 memset(dest, 0, nr_bytes); 158 dest += nr_bytes; 159 } 160 /* We have finished the current sub-block. */ 161 goto do_next_sb; 162 } 163 /* Check we are still in range. */ 164 if (cb > cb_sb_end || dest > dest_sb_end) 165 goto return_overflow; 166 /* Get the next tag and advance to first token. */ 167 tag = *cb++; 168 /* Parse the eight tokens described by the tag. */ 169 for (token = 0; token < 8; token++, tag >>= 1) { 170 u16 lg, pt, length, max_non_overlap; 171 register u16 i; 172 u8 *dest_back_addr; 173 174 /* Check if we are done / still in range. */ 175 if (cb >= cb_sb_end || dest > dest_sb_end) 176 break; 177 /* Determine token type and parse appropriately.*/ 178 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { 179 /* 180 * We have a symbol token, copy the symbol across, and 181 * advance the source and destination positions. 182 */ 183 *dest++ = *cb++; 184 /* Continue with the next token. */ 185 continue; 186 } 187 /* 188 * We have a phrase token. Make sure it is not the first tag in 189 * the sb as this is illegal and would confuse the code below. 190 */ 191 if (dest == dest_sb_start) 192 goto return_overflow; 193 /* 194 * Determine the number of bytes to go back (p) and the number 195 * of bytes to copy (l). We use an optimized algorithm in which 196 * we first calculate log2(current destination position in sb), 197 * which allows determination of l and p in O(1) rather than 198 * O(n). We just need an arch-optimized log2() function now. 199 */ 200 lg = 0; 201 for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1) 202 lg++; 203 /* Get the phrase token into i. */ 204 pt = le16_to_cpup((u16*)cb); 205 /* 206 * Calculate starting position of the byte sequence in 207 * the destination using the fact that p = (pt >> (12 - lg)) + 1 208 * and make sure we don't go too far back. 209 */ 210 dest_back_addr = dest - (pt >> (12 - lg)) - 1; 211 if (dest_back_addr < dest_sb_start) 212 goto return_overflow; 213 /* Now calculate the length of the byte sequence. */ 214 length = (pt & (0xfff >> lg)) + 3; 215 /* Verify destination is in range. */ 216 if (dest + length > dest_sb_end) 217 goto return_overflow; 218 /* The number of non-overlapping bytes. */ 219 max_non_overlap = dest - dest_back_addr; 220 if (length <= max_non_overlap) { 221 /* The byte sequence doesn't overlap, just copy it. */ 222 memcpy(dest, dest_back_addr, length); 223 /* Advance destination pointer. */ 224 dest += length; 225 } else { 226 /* 227 * The byte sequence does overlap, copy non-overlapping 228 * part and then do a slow byte by byte copy for the 229 * overlapping part. Also, advance the destination 230 * pointer. 231 */ 232 memcpy(dest, dest_back_addr, max_non_overlap); 233 dest += max_non_overlap; 234 dest_back_addr += max_non_overlap; 235 length -= max_non_overlap; 236 while (length--) 237 *dest++ = *dest_back_addr++; 238 } 239 /* Advance source position and continue with the next token. */ 240 cb += 2; 241 } 242 /* No tokens left in the current tag. Continue with the next tag. */ 243 goto do_next_tag; 244 return_overflow: 245 ntfs_log_debug("Failed. Returning -EOVERFLOW.\n"); 246 errno = EOVERFLOW; 247 return -1; 248 } 249 250 /** 251 * ntfs_is_cb_compressed - internal function, do not use 252 * 253 * This is a very specialised function determining if a cb is compressed or 254 * uncompressed. It is assumed that checking for a sparse cb has already been 255 * performed and that the cb is not sparse. It makes all sorts of other 256 * assumptions as well and hence it is not useful anywhere other than where it 257 * is used at the moment. Please, do not make this function available for use 258 * outside of compress.c as it is bound to confuse people and not do what they 259 * want. 260 * 261 * Return TRUE on errors so that the error will be detected later on in the 262 * code. Might be a bit confusing to debug but there really should never be 263 * errors coming from here. 264 */ 265 static BOOL ntfs_is_cb_compressed(ntfs_attr *na, 266 runlist_element *rl, VCN cb_start_vcn, int cb_clusters) 267 { 268 /* 269 * The simplest case: the run starting at @cb_start_vcn contains 270 * @cb_clusters clusters which are all not sparse, thus the cb is not 271 * compressed. 272 */ 273 restart: 274 cb_clusters -= rl->length - (cb_start_vcn - rl->vcn); 275 while (cb_clusters > 0) { 276 /* Go to the next run. */ 277 rl++; 278 /* Map the next runlist fragment if it is not mapped. */ 279 if (rl->lcn < LCN_HOLE || !rl->length) { 280 cb_start_vcn = rl->vcn; 281 rl = ntfs_attr_find_vcn(na, rl->vcn); 282 if (!rl || rl->lcn < LCN_HOLE || !rl->length) 283 return TRUE; 284 /* 285 * If the runs were merged need to deal with the 286 * resulting partial run so simply restart. 287 */ 288 if (rl->vcn < cb_start_vcn) 289 goto restart; 290 } 291 /* If the current run is sparse, the cb is compressed. */ 292 if (rl->lcn == LCN_HOLE) 293 return TRUE; 294 /* If the whole cb is not sparse, it is not compressed. */ 295 if (rl->length >= cb_clusters) 296 return FALSE; 297 cb_clusters -= rl->length; 298 }; 299 /* All cb_clusters were not sparse thus the cb is not compressed. */ 300 return FALSE; 301 } 302 303 /** 304 * ntfs_compressed_attr_pread - read from a compressed attribute 305 * @na: ntfs attribute to read from 306 * @pos: byte position in the attribute to begin reading from 307 * @count: number of bytes to read 308 * @b: output data buffer 309 * 310 * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead. 311 * 312 * This function will read @count bytes starting at offset @pos from the 313 * compressed ntfs attribute @na into the data buffer @b. 314 * 315 * On success, return the number of successfully read bytes. If this number 316 * is lower than @count this means that the read reached end of file or that 317 * an error was encountered during the read so that the read is partial. 318 * 0 means end of file or nothing was read (also return 0 when @count is 0). 319 * 320 * On error and nothing has been read, return -1 with errno set appropriately 321 * to the return code of ntfs_pread(), or to EINVAL in case of invalid 322 * arguments. 323 */ 324 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b) 325 { 326 s64 br, to_read, ofs, total, total2; 327 u64 cb_size_mask; 328 VCN start_vcn, vcn, end_vcn; 329 ntfs_volume *vol; 330 runlist_element *rl; 331 u8 *dest, *cb, *cb_pos, *cb_end; 332 u32 cb_size; 333 int err; 334 unsigned int nr_cbs, cb_clusters; 335 336 ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n", 337 (unsigned long long)na->ni->mft_no, na->type, 338 (long long)pos, (long long)count); 339 if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b || 340 pos < 0 || count < 0) { 341 errno = EINVAL; 342 return -1; 343 } 344 /* 345 * Encrypted attributes are not supported. We return access denied, 346 * which is what Windows NT4 does, too. 347 */ 348 if (NAttrEncrypted(na)) { 349 errno = EACCES; 350 return -1; 351 } 352 if (!count) 353 return 0; 354 /* Truncate reads beyond end of attribute. */ 355 if (pos + count > na->data_size) { 356 if (pos >= na->data_size) { 357 return 0; 358 } 359 count = na->data_size - pos; 360 } 361 /* If it is a resident attribute, simply use ntfs_attr_pread(). */ 362 if (!NAttrNonResident(na)) 363 return ntfs_attr_pread(na, pos, count, b); 364 total = total2 = 0; 365 /* Zero out reads beyond initialized size. */ 366 if (pos + count > na->initialized_size) { 367 if (pos >= na->initialized_size) { 368 memset(b, 0, count); 369 return count; 370 } 371 total2 = pos + count - na->initialized_size; 372 count -= total2; 373 memset((u8*)b + count, 0, total2); 374 } 375 vol = na->ni->vol; 376 cb_size = na->compression_block_size; 377 cb_size_mask = cb_size - 1UL; 378 cb_clusters = na->compression_block_clusters; 379 380 /* Need a temporary buffer for each loaded compression block. */ 381 cb = ntfs_malloc(cb_size); 382 if (!cb) 383 return -1; 384 385 /* Need a temporary buffer for each uncompressed block. */ 386 dest = ntfs_malloc(cb_size); 387 if (!dest) { 388 err = errno; 389 free(cb); 390 errno = err; 391 return -1; 392 } 393 /* 394 * The first vcn in the first compression block (cb) which we need to 395 * decompress. 396 */ 397 start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits; 398 /* Offset in the uncompressed cb at which to start reading data. */ 399 ofs = pos & cb_size_mask; 400 /* 401 * The first vcn in the cb after the last cb which we need to 402 * decompress. 403 */ 404 end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >> 405 vol->cluster_size_bits; 406 /* Number of compression blocks (cbs) in the wanted vcn range. */ 407 nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >> 408 na->compression_block_size_bits; 409 cb_end = cb + cb_size; 410 do_next_cb: 411 nr_cbs--; 412 cb_pos = cb; 413 vcn = start_vcn; 414 start_vcn += cb_clusters; 415 416 /* Check whether the compression block is sparse. */ 417 rl = ntfs_attr_find_vcn(na, vcn); 418 if (!rl || rl->lcn < LCN_HOLE) { 419 free(cb); 420 free(dest); 421 if (total) 422 return total; 423 /* FIXME: Do we want EIO or the error code? (AIA) */ 424 errno = EIO; 425 return -1; 426 } 427 if (rl->lcn == LCN_HOLE) { 428 /* Sparse cb, zero out destination range overlapping the cb. */ 429 ntfs_log_debug("Found sparse compression block.\n"); 430 to_read = min(count, cb_size - ofs); 431 memset(b, 0, to_read); 432 ofs = 0; 433 total += to_read; 434 count -= to_read; 435 b = (u8*)b + to_read; 436 } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) { 437 s64 tdata_size, tinitialized_size; 438 /* 439 * Uncompressed cb, read it straight into the destination range 440 * overlapping the cb. 441 */ 442 ntfs_log_debug("Found uncompressed compression block.\n"); 443 /* 444 * Read the uncompressed data into the destination buffer. 445 * NOTE: We cheat a little bit here by marking the attribute as 446 * not compressed in the ntfs_attr structure so that we can 447 * read the data by simply using ntfs_attr_pread(). (-8 448 * NOTE: we have to modify data_size and initialized_size 449 * temporarily as well... 450 */ 451 to_read = min(count, cb_size - ofs); 452 ofs += vcn << vol->cluster_size_bits; 453 NAttrClearCompressed(na); 454 tdata_size = na->data_size; 455 tinitialized_size = na->initialized_size; 456 na->data_size = na->initialized_size = na->allocated_size; 457 do { 458 br = ntfs_attr_pread(na, ofs, to_read, b); 459 if (br < 0) { 460 err = errno; 461 na->data_size = tdata_size; 462 na->initialized_size = tinitialized_size; 463 NAttrSetCompressed(na); 464 free(cb); 465 free(dest); 466 if (total) 467 return total; 468 errno = err; 469 return br; 470 } 471 total += br; 472 count -= br; 473 b = (u8*)b + br; 474 to_read -= br; 475 ofs += br; 476 } while (to_read > 0); 477 na->data_size = tdata_size; 478 na->initialized_size = tinitialized_size; 479 NAttrSetCompressed(na); 480 ofs = 0; 481 } else { 482 s64 tdata_size, tinitialized_size; 483 484 /* 485 * Compressed cb, decompress it into the temporary buffer, then 486 * copy the data to the destination range overlapping the cb. 487 */ 488 ntfs_log_debug("Found compressed compression block.\n"); 489 /* 490 * Read the compressed data into the temporary buffer. 491 * NOTE: We cheat a little bit here by marking the attribute as 492 * not compressed in the ntfs_attr structure so that we can 493 * read the raw, compressed data by simply using 494 * ntfs_attr_pread(). (-8 495 * NOTE: We have to modify data_size and initialized_size 496 * temporarily as well... 497 */ 498 to_read = cb_size; 499 NAttrClearCompressed(na); 500 tdata_size = na->data_size; 501 tinitialized_size = na->initialized_size; 502 na->data_size = na->initialized_size = na->allocated_size; 503 do { 504 br = ntfs_attr_pread(na, 505 (vcn << vol->cluster_size_bits) + 506 (cb_pos - cb), to_read, cb_pos); 507 if (br < 0) { 508 err = errno; 509 na->data_size = tdata_size; 510 na->initialized_size = tinitialized_size; 511 NAttrSetCompressed(na); 512 free(cb); 513 free(dest); 514 if (total) 515 return total; 516 errno = err; 517 return br; 518 } 519 cb_pos += br; 520 to_read -= br; 521 } while (to_read > 0); 522 na->data_size = tdata_size; 523 na->initialized_size = tinitialized_size; 524 NAttrSetCompressed(na); 525 /* Just a precaution. */ 526 if (cb_pos + 2 <= cb_end) 527 *(u16*)cb_pos = 0; 528 ntfs_log_debug("Successfully read the compression block.\n"); 529 if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) { 530 err = errno; 531 free(cb); 532 free(dest); 533 if (total) 534 return total; 535 errno = err; 536 return -1; 537 } 538 to_read = min(count, cb_size - ofs); 539 memcpy(b, dest + ofs, to_read); 540 total += to_read; 541 count -= to_read; 542 b = (u8*)b + to_read; 543 ofs = 0; 544 } 545 /* Do we have more work to do? */ 546 if (nr_cbs) 547 goto do_next_cb; 548 /* We no longer need the buffers. */ 549 free(cb); 550 free(dest); 551 /* Return number of bytes read. */ 552 return total + total2; 553 }