1 /**
   2  * compress.c - Compressed attribute handling code.  Part of the Linux-NTFS
   3  *              project.
   4  *
   5  * Copyright (c) 2004-2005 Anton Altaparmakov
   6  * Copyright (c)      2005 Yura Pakhuchiy
   7  *
   8  * This program/include file is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License as published
  10  * by the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program/include file is distributed in the hope that it will be
  14  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program (in the main directory of the Linux-NTFS
  20  * distribution in the file COPYING); if not, write to the Free Software
  21  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23 
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27 
  28 #ifdef HAVE_STDIO_H
  29 #include <stdio.h>
  30 #endif
  31 #ifdef HAVE_STRING_H
  32 #include <string.h>
  33 #endif
  34 #ifdef HAVE_STDLIB_H
  35 #include <stdlib.h>
  36 #endif
  37 #ifdef HAVE_ERRNO_H
  38 #include <errno.h>
  39 #endif
  40 
  41 #include "compat.h"
  42 #include "attrib.h"
  43 #include "debug.h"
  44 #include "volume.h"
  45 #include "types.h"
  46 #include "layout.h"
  47 #include "runlist.h"
  48 #include "compress.h"
  49 #include "logging.h"
  50 
  51 /**
  52  * enum ntfs_compression_constants - constants used in the compression code
  53  */
  54 typedef enum {
  55         /* Token types and access mask. */
  56         NTFS_SYMBOL_TOKEN       =       0,
  57         NTFS_PHRASE_TOKEN       =       1,
  58         NTFS_TOKEN_MASK         =       1,
  59 
  60         /* Compression sub-block constants. */
  61         NTFS_SB_SIZE_MASK       =       0x0fff,
  62         NTFS_SB_SIZE            =       0x1000,
  63         NTFS_SB_IS_COMPRESSED   =       0x8000,
  64 } ntfs_compression_constants;
  65 
  66 /**
  67  * ntfs_decompress - decompress a compression block into an array of pages
  68  * @dest:       buffer to which to write the decompressed data
  69  * @dest_size:  size of buffer @dest in bytes
  70  * @cb_start:   compression block to decompress
  71  * @cb_size:    size of compression block @cb_start in bytes
  72  *
  73  * This decompresses the compression block @cb_start into the destination
  74  * buffer @dest.
  75  *
  76  * @cb_start is a pointer to the compression block which needs decompressing
  77  * and @cb_size is the size of @cb_start in bytes (8-64kiB).
  78  *
  79  * Return 0 if success or -EOVERFLOW on error in the compressed stream.
  80  */
  81 static int ntfs_decompress(u8 *dest, const u32 dest_size,
  82                 u8 *const cb_start, const u32 cb_size)
  83 {
  84         /*
  85          * Pointers into the compressed data, i.e. the compression block (cb),
  86          * and the therein contained sub-blocks (sb).
  87          */
  88         u8 *cb_end = cb_start + cb_size; /* End of cb. */
  89         u8 *cb = cb_start;      /* Current position in cb. */
  90         u8 *cb_sb_start = cb;   /* Beginning of the current sb in the cb. */
  91         u8 *cb_sb_end;          /* End of current sb / beginning of next sb. */
  92         /* Variables for uncompressed data / destination. */
  93         u8 *dest_end = dest + dest_size;        /* End of dest buffer. */
  94         u8 *dest_sb_start;      /* Start of current sub-block in dest. */
  95         u8 *dest_sb_end;        /* End of current sb in dest. */
  96         /* Variables for tag and token parsing. */
  97         u8 tag;                 /* Current tag. */
  98         int token;              /* Loop counter for the eight tokens in tag. */
  99 
 100         ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
 101 do_next_sb:
 102         ntfs_log_debug("Beginning sub-block at offset = 0x%x in the cb.\n",
 103                         cb - cb_start);
 104         /*
 105          * Have we reached the end of the compression block or the end of the
 106          * decompressed data?  The latter can happen for example if the current
 107          * position in the compression block is one byte before its end so the
 108          * first two checks do not detect it.
 109          */
 110         if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
 111                 ntfs_log_debug("Completed. Returning success (0).\n");
 112                 return 0;
 113         }
 114         /* Setup offset for the current sub-block destination. */
 115         dest_sb_start = dest;
 116         dest_sb_end = dest + NTFS_SB_SIZE;
 117         /* Check that we are still within allowed boundaries. */
 118         if (dest_sb_end > dest_end)
 119                 goto return_overflow;
 120         /* Does the minimum size of a compressed sb overflow valid range? */
 121         if (cb + 6 > cb_end)
 122                 goto return_overflow;
 123         /* Setup the current sub-block source pointers and validate range. */
 124         cb_sb_start = cb;
 125         cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
 126                         + 3;
 127         if (cb_sb_end > cb_end)
 128                 goto return_overflow;
 129         /* Now, we are ready to process the current sub-block (sb). */
 130         if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
 131                 ntfs_log_debug("Found uncompressed sub-block.\n");
 132                 /* This sb is not compressed, just copy it into destination. */
 133                 /* Advance source position to first data byte. */
 134                 cb += 2;
 135                 /* An uncompressed sb must be full size. */
 136                 if (cb_sb_end - cb != NTFS_SB_SIZE)
 137                         goto return_overflow;
 138                 /* Copy the block and advance the source position. */
 139                 memcpy(dest, cb, NTFS_SB_SIZE);
 140                 cb += NTFS_SB_SIZE;
 141                 /* Advance destination position to next sub-block. */
 142                 dest += NTFS_SB_SIZE;
 143                 goto do_next_sb;
 144         }
 145         ntfs_log_debug("Found compressed sub-block.\n");
 146         /* This sb is compressed, decompress it into destination. */
 147         /* Forward to the first tag in the sub-block. */
 148         cb += 2;
 149 do_next_tag:
 150         if (cb == cb_sb_end) {
 151                 /* Check if the decompressed sub-block was not full-length. */
 152                 if (dest < dest_sb_end) {
 153                         int nr_bytes = dest_sb_end - dest;
 154 
 155                         ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
 156                         /* Zero remainder and update destination position. */
 157                         memset(dest, 0, nr_bytes);
 158                         dest += nr_bytes;
 159                 }
 160                 /* We have finished the current sub-block. */
 161                 goto do_next_sb;
 162         }
 163         /* Check we are still in range. */
 164         if (cb > cb_sb_end || dest > dest_sb_end)
 165                 goto return_overflow;
 166         /* Get the next tag and advance to first token. */
 167         tag = *cb++;
 168         /* Parse the eight tokens described by the tag. */
 169         for (token = 0; token < 8; token++, tag >>= 1) {
 170                 u16 lg, pt, length, max_non_overlap;
 171                 register u16 i;
 172                 u8 *dest_back_addr;
 173 
 174                 /* Check if we are done / still in range. */
 175                 if (cb >= cb_sb_end || dest > dest_sb_end)
 176                         break;
 177                 /* Determine token type and parse appropriately.*/
 178                 if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
 179                         /*
 180                          * We have a symbol token, copy the symbol across, and
 181                          * advance the source and destination positions.
 182                          */
 183                         *dest++ = *cb++;
 184                         /* Continue with the next token. */
 185                         continue;
 186                 }
 187                 /*
 188                  * We have a phrase token. Make sure it is not the first tag in
 189                  * the sb as this is illegal and would confuse the code below.
 190                  */
 191                 if (dest == dest_sb_start)
 192                         goto return_overflow;
 193                 /*
 194                  * Determine the number of bytes to go back (p) and the number
 195                  * of bytes to copy (l). We use an optimized algorithm in which
 196                  * we first calculate log2(current destination position in sb),
 197                  * which allows determination of l and p in O(1) rather than
 198                  * O(n). We just need an arch-optimized log2() function now.
 199                  */
 200                 lg = 0;
 201                 for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
 202                         lg++;
 203                 /* Get the phrase token into i. */
 204                 pt = le16_to_cpup((u16*)cb);
 205                 /*
 206                  * Calculate starting position of the byte sequence in
 207                  * the destination using the fact that p = (pt >> (12 - lg)) + 1
 208                  * and make sure we don't go too far back.
 209                  */
 210                 dest_back_addr = dest - (pt >> (12 - lg)) - 1;
 211                 if (dest_back_addr < dest_sb_start)
 212                         goto return_overflow;
 213                 /* Now calculate the length of the byte sequence. */
 214                 length = (pt & (0xfff >> lg)) + 3;
 215                 /* Verify destination is in range. */
 216                 if (dest + length > dest_sb_end)
 217                         goto return_overflow;
 218                 /* The number of non-overlapping bytes. */
 219                 max_non_overlap = dest - dest_back_addr;
 220                 if (length <= max_non_overlap) {
 221                         /* The byte sequence doesn't overlap, just copy it. */
 222                         memcpy(dest, dest_back_addr, length);
 223                         /* Advance destination pointer. */
 224                         dest += length;
 225                 } else {
 226                         /*
 227                          * The byte sequence does overlap, copy non-overlapping
 228                          * part and then do a slow byte by byte copy for the
 229                          * overlapping part. Also, advance the destination
 230                          * pointer.
 231                          */
 232                         memcpy(dest, dest_back_addr, max_non_overlap);
 233                         dest += max_non_overlap;
 234                         dest_back_addr += max_non_overlap;
 235                         length -= max_non_overlap;
 236                         while (length--)
 237                                 *dest++ = *dest_back_addr++;
 238                 }
 239                 /* Advance source position and continue with the next token. */
 240                 cb += 2;
 241         }
 242         /* No tokens left in the current tag. Continue with the next tag. */
 243         goto do_next_tag;
 244 return_overflow:
 245         ntfs_log_debug("Failed. Returning -EOVERFLOW.\n");
 246         errno = EOVERFLOW;
 247         return -1;
 248 }
 249 
 250 /**
 251  * ntfs_is_cb_compressed - internal function, do not use
 252  *
 253  * This is a very specialised function determining if a cb is compressed or
 254  * uncompressed.  It is assumed that checking for a sparse cb has already been
 255  * performed and that the cb is not sparse.  It makes all sorts of other
 256  * assumptions as well and hence it is not useful anywhere other than where it
 257  * is used at the moment.  Please, do not make this function available for use
 258  * outside of compress.c as it is bound to confuse people and not do what they
 259  * want.
 260  *
 261  * Return TRUE on errors so that the error will be detected later on in the
 262  * code.  Might be a bit confusing to debug but there really should never be
 263  * errors coming from here.
 264  */
 265 static BOOL ntfs_is_cb_compressed(ntfs_attr *na,
 266                 runlist_element *rl, VCN cb_start_vcn, int cb_clusters)
 267 {
 268         /*
 269          * The simplest case: the run starting at @cb_start_vcn contains
 270          * @cb_clusters clusters which are all not sparse, thus the cb is not
 271          * compressed.
 272          */
 273 restart:
 274         cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
 275         while (cb_clusters > 0) {
 276                 /* Go to the next run. */
 277                 rl++;
 278                 /* Map the next runlist fragment if it is not mapped. */
 279                 if (rl->lcn < LCN_HOLE || !rl->length) {
 280                         cb_start_vcn = rl->vcn;
 281                         rl = ntfs_attr_find_vcn(na, rl->vcn);
 282                         if (!rl || rl->lcn < LCN_HOLE || !rl->length)
 283                                 return TRUE;
 284                         /*
 285                          * If the runs were merged need to deal with the
 286                          * resulting partial run so simply restart.
 287                          */
 288                         if (rl->vcn < cb_start_vcn)
 289                                 goto restart;
 290                 }
 291                 /* If the current run is sparse, the cb is compressed. */
 292                 if (rl->lcn == LCN_HOLE)
 293                         return TRUE;
 294                 /* If the whole cb is not sparse, it is not compressed. */
 295                 if (rl->length >= cb_clusters)
 296                         return FALSE;
 297                 cb_clusters -= rl->length;
 298         };
 299         /* All cb_clusters were not sparse thus the cb is not compressed. */
 300         return FALSE;
 301 }
 302 
 303 /**
 304  * ntfs_compressed_attr_pread - read from a compressed attribute
 305  * @na:         ntfs attribute to read from
 306  * @pos:        byte position in the attribute to begin reading from
 307  * @count:      number of bytes to read
 308  * @b:          output data buffer
 309  *
 310  * NOTE:  You probably want to be using attrib.c::ntfs_attr_pread() instead.
 311  *
 312  * This function will read @count bytes starting at offset @pos from the
 313  * compressed ntfs attribute @na into the data buffer @b.
 314  *
 315  * On success, return the number of successfully read bytes.  If this number
 316  * is lower than @count this means that the read reached end of file or that
 317  * an error was encountered during the read so that the read is partial.
 318  * 0 means end of file or nothing was read (also return 0 when @count is 0).
 319  *
 320  * On error and nothing has been read, return -1 with errno set appropriately
 321  * to the return code of ntfs_pread(), or to EINVAL in case of invalid
 322  * arguments.
 323  */
 324 s64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
 325 {
 326         s64 br, to_read, ofs, total, total2;
 327         u64 cb_size_mask;
 328         VCN start_vcn, vcn, end_vcn;
 329         ntfs_volume *vol;
 330         runlist_element *rl;
 331         u8 *dest, *cb, *cb_pos, *cb_end;
 332         u32 cb_size;
 333         int err;
 334         unsigned int nr_cbs, cb_clusters;
 335 
 336         ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
 337                         (unsigned long long)na->ni->mft_no, na->type,
 338                         (long long)pos, (long long)count);
 339         if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
 340                         pos < 0 || count < 0) {
 341                 errno = EINVAL;
 342                 return -1;
 343         }
 344         /*
 345          * Encrypted attributes are not supported.  We return access denied,
 346          * which is what Windows NT4 does, too.
 347          */
 348         if (NAttrEncrypted(na)) {
 349                 errno = EACCES;
 350                 return -1;
 351         }
 352         if (!count)
 353                 return 0;
 354         /* Truncate reads beyond end of attribute. */
 355         if (pos + count > na->data_size) {
 356                 if (pos >= na->data_size) {
 357                         return 0;
 358                 }
 359                 count = na->data_size - pos;
 360         }
 361         /* If it is a resident attribute, simply use ntfs_attr_pread(). */
 362         if (!NAttrNonResident(na))
 363                 return ntfs_attr_pread(na, pos, count, b);
 364         total = total2 = 0;
 365         /* Zero out reads beyond initialized size. */
 366         if (pos + count > na->initialized_size) {
 367                 if (pos >= na->initialized_size) {
 368                         memset(b, 0, count);
 369                         return count;
 370                 }
 371                 total2 = pos + count - na->initialized_size;
 372                 count -= total2;
 373                 memset((u8*)b + count, 0, total2);
 374         }
 375         vol = na->ni->vol;
 376         cb_size = na->compression_block_size;
 377         cb_size_mask = cb_size - 1UL;
 378         cb_clusters = na->compression_block_clusters;
 379 
 380         /* Need a temporary buffer for each loaded compression block. */
 381         cb = ntfs_malloc(cb_size);
 382         if (!cb)
 383                 return -1;
 384 
 385         /* Need a temporary buffer for each uncompressed block. */
 386         dest = ntfs_malloc(cb_size);
 387         if (!dest) {
 388                 err = errno;
 389                 free(cb);
 390                 errno = err;
 391                 return -1;
 392         }
 393         /*
 394          * The first vcn in the first compression block (cb) which we need to
 395          * decompress.
 396          */
 397         start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
 398         /* Offset in the uncompressed cb at which to start reading data. */
 399         ofs = pos & cb_size_mask;
 400         /*
 401          * The first vcn in the cb after the last cb which we need to
 402          * decompress.
 403          */
 404         end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
 405                         vol->cluster_size_bits;
 406         /* Number of compression blocks (cbs) in the wanted vcn range. */
 407         nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
 408                         na->compression_block_size_bits;
 409         cb_end = cb + cb_size;
 410 do_next_cb:
 411         nr_cbs--;
 412         cb_pos = cb;
 413         vcn = start_vcn;
 414         start_vcn += cb_clusters;
 415 
 416         /* Check whether the compression block is sparse. */
 417         rl = ntfs_attr_find_vcn(na, vcn);
 418         if (!rl || rl->lcn < LCN_HOLE) {
 419                 free(cb);
 420                 free(dest);
 421                 if (total)
 422                         return total;
 423                 /* FIXME: Do we want EIO or the error code? (AIA) */
 424                 errno = EIO;
 425                 return -1;
 426         }
 427         if (rl->lcn == LCN_HOLE) {
 428                 /* Sparse cb, zero out destination range overlapping the cb. */
 429                 ntfs_log_debug("Found sparse compression block.\n");
 430                 to_read = min(count, cb_size - ofs);
 431                 memset(b, 0, to_read);
 432                 ofs = 0;
 433                 total += to_read;
 434                 count -= to_read;
 435                 b = (u8*)b + to_read;
 436         } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
 437                 s64 tdata_size, tinitialized_size;
 438                 /*
 439                  * Uncompressed cb, read it straight into the destination range
 440                  * overlapping the cb.
 441                  */
 442                 ntfs_log_debug("Found uncompressed compression block.\n");
 443                 /*
 444                  * Read the uncompressed data into the destination buffer.
 445                  * NOTE: We cheat a little bit here by marking the attribute as
 446                  * not compressed in the ntfs_attr structure so that we can
 447                  * read the data by simply using ntfs_attr_pread().  (-8
 448                  * NOTE: we have to modify data_size and initialized_size
 449                  * temporarily as well...
 450                  */
 451                 to_read = min(count, cb_size - ofs);
 452                 ofs += vcn << vol->cluster_size_bits;
 453                 NAttrClearCompressed(na);
 454                 tdata_size = na->data_size;
 455                 tinitialized_size = na->initialized_size;
 456                 na->data_size = na->initialized_size = na->allocated_size;
 457                 do {
 458                         br = ntfs_attr_pread(na, ofs, to_read, b);
 459                         if (br < 0) {
 460                                 err = errno;
 461                                 na->data_size = tdata_size;
 462                                 na->initialized_size = tinitialized_size;
 463                                 NAttrSetCompressed(na);
 464                                 free(cb);
 465                                 free(dest);
 466                                 if (total)
 467                                         return total;
 468                                 errno = err;
 469                                 return br;
 470                         }
 471                         total += br;
 472                         count -= br;
 473                         b = (u8*)b + br;
 474                         to_read -= br;
 475                         ofs += br;
 476                 } while (to_read > 0);
 477                 na->data_size = tdata_size;
 478                 na->initialized_size = tinitialized_size;
 479                 NAttrSetCompressed(na);
 480                 ofs = 0;
 481         } else {
 482                 s64 tdata_size, tinitialized_size;
 483 
 484                 /*
 485                  * Compressed cb, decompress it into the temporary buffer, then
 486                  * copy the data to the destination range overlapping the cb.
 487                  */
 488                 ntfs_log_debug("Found compressed compression block.\n");
 489                 /*
 490                  * Read the compressed data into the temporary buffer.
 491                  * NOTE: We cheat a little bit here by marking the attribute as
 492                  * not compressed in the ntfs_attr structure so that we can
 493                  * read the raw, compressed data by simply using
 494                  * ntfs_attr_pread().  (-8
 495                  * NOTE: We have to modify data_size and initialized_size
 496                  * temporarily as well...
 497                  */
 498                 to_read = cb_size;
 499                 NAttrClearCompressed(na);
 500                 tdata_size = na->data_size;
 501                 tinitialized_size = na->initialized_size;
 502                 na->data_size = na->initialized_size = na->allocated_size;
 503                 do {
 504                         br = ntfs_attr_pread(na,
 505                                         (vcn << vol->cluster_size_bits) +
 506                                         (cb_pos - cb), to_read, cb_pos);
 507                         if (br < 0) {
 508                                 err = errno;
 509                                 na->data_size = tdata_size;
 510                                 na->initialized_size = tinitialized_size;
 511                                 NAttrSetCompressed(na);
 512                                 free(cb);
 513                                 free(dest);
 514                                 if (total)
 515                                         return total;
 516                                 errno = err;
 517                                 return br;
 518                         }
 519                         cb_pos += br;
 520                         to_read -= br;
 521                 } while (to_read > 0);
 522                 na->data_size = tdata_size;
 523                 na->initialized_size = tinitialized_size;
 524                 NAttrSetCompressed(na);
 525                 /* Just a precaution. */
 526                 if (cb_pos + 2 <= cb_end)
 527                         *(u16*)cb_pos = 0;
 528                 ntfs_log_debug("Successfully read the compression block.\n");
 529                 if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
 530                         err = errno;
 531                         free(cb);
 532                         free(dest);
 533                         if (total)
 534                                 return total;
 535                         errno = err;
 536                         return -1;
 537                 }
 538                 to_read = min(count, cb_size - ofs);
 539                 memcpy(b, dest + ofs, to_read);
 540                 total += to_read;
 541                 count -= to_read;
 542                 b = (u8*)b + to_read;
 543                 ofs = 0;
 544         }
 545         /* Do we have more work to do? */
 546         if (nr_cbs)
 547                 goto do_next_cb;
 548         /* We no longer need the buffers. */
 549         free(cb);
 550         free(dest);
 551         /* Return number of bytes read. */
 552         return total + total2;
 553 }