1 /**
   2  * mft.c - Mft record handling code. Part of the Linux-NTFS project.
   3  *
   4  * Copyright (c) 2000-2004 Anton Altaparmakov
   5  * Copyright (c) 2005-2007 Yura Pakhuchiy
   6  * Copyright (c) 2004-2005 Richard Russon
   7  *
   8  * This program/include file is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License as published
  10  * by the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program/include file is distributed in the hope that it will be
  14  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program (in the main directory of the Linux-NTFS
  20  * distribution in the file COPYING); if not, write to the Free Software
  21  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23 
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27 
  28 #ifdef HAVE_STDLIB_H
  29 #include <stdlib.h>
  30 #endif
  31 #ifdef HAVE_STDIO_H
  32 #include <stdio.h>
  33 #endif
  34 #ifdef HAVE_ERRNO_H
  35 #include <errno.h>
  36 #endif
  37 #ifdef HAVE_STRING_H
  38 #include <string.h>
  39 #endif
  40 #include <time.h>
  41 
  42 #include "compat.h"
  43 #include "types.h"
  44 #include "device.h"
  45 #include "debug.h"
  46 #include "bitmap.h"
  47 #include "attrib.h"
  48 #include "inode.h"
  49 #include "volume.h"
  50 #include "layout.h"
  51 #include "lcnalloc.h"
  52 #include "mft.h"
  53 #include "logging.h"
  54 
  55 /**
  56  * ntfs_mft_records_read - read records from the mft from disk
  57  * @vol:        volume to read from
  58  * @mref:       starting mft record number to read
  59  * @count:      number of mft records to read
  60  * @b:          output data buffer
  61  *
  62  * Read @count mft records starting at @mref from volume @vol into buffer
  63  * @b. Return 0 on success or -1 on error, with errno set to the error
  64  * code.
  65  *
  66  * If any of the records exceed the initialized size of the $MFT/$DATA
  67  * attribute, i.e. they cannot possibly be allocated mft records, assume this
  68  * is a bug and return error code ESPIPE.
  69  *
  70  * The read mft records are mst deprotected and are hence ready to use. The
  71  * caller should check each record with is_baad_record() in case mst
  72  * deprotection failed.
  73  *
  74  * NOTE: @b has to be at least of size @count * vol->mft_record_size.
  75  */
  76 int ntfs_mft_records_read(const ntfs_volume *vol, const MFT_REF mref,
  77                 const s64 count, MFT_RECORD *b)
  78 {
  79         s64 br;
  80         VCN m;
  81 
  82         ntfs_log_trace("Entering for inode 0x%llx.\n", MREF(mref));
  83         if (!vol || !vol->mft_na || !b || count < 0) {
  84                 errno = EINVAL;
  85                 return -1;
  86         }
  87         m = MREF(mref);
  88         /* Refuse to read non-allocated mft records. */
  89         if (m + count > vol->mft_na->initialized_size >>
  90                         vol->mft_record_size_bits) {
  91                 errno = ESPIPE;
  92                 return -1;
  93         }
  94         br = ntfs_attr_mst_pread(vol->mft_na, m << vol->mft_record_size_bits,
  95                         count, vol->mft_record_size, b);
  96         if (br != count) {
  97                 if (br != -1)
  98                         errno = EIO;
  99                 if (br >= 0)
 100                         ntfs_log_debug("Error: partition is smaller than it should "
 101                                         "be!\n");
 102                 else
 103                         ntfs_log_perror("Error reading $Mft record(s)");
 104                 return -1;
 105         }
 106         return 0;
 107 }
 108 
 109 /**
 110  * ntfs_mft_records_write - write mft records to disk
 111  * @vol:        volume to write to
 112  * @mref:       starting mft record number to write
 113  * @count:      number of mft records to write
 114  * @b:          data buffer containing the mft records to write
 115  *
 116  * Write @count mft records starting at @mref from data buffer @b to volume
 117  * @vol. Return 0 on success or -1 on error, with errno set to the error code.
 118  *
 119  * If any of the records exceed the initialized size of the $MFT/$DATA
 120  * attribute, i.e. they cannot possibly be allocated mft records, assume this
 121  * is a bug and return error code ESPIPE.
 122  *
 123  * Before the mft records are written, they are mst protected. After the write,
 124  * they are deprotected again, thus resulting in an increase in the update
 125  * sequence number inside the data buffer @b.
 126  *
 127  * If any mft records are written which are also represented in the mft mirror
 128  * $MFTMirr, we make a copy of the relevant parts of the data buffer @b into a
 129  * temporary buffer before we do the actual write. Then if at least one mft
 130  * record was successfully written, we write the appropriate mft records from
 131  * the copied buffer to the mft mirror, too.
 132  */
 133 int ntfs_mft_records_write(const ntfs_volume *vol, const MFT_REF mref,
 134                 const s64 count, MFT_RECORD *b)
 135 {
 136         s64 bw;
 137         VCN m;
 138         void *bmirr = NULL;
 139         int cnt = 0, res = 0;
 140 
 141         ntfs_log_trace("Entering for inode 0x%llx.\n", MREF(mref));
 142         if (!vol || !vol->mft_na || vol->mftmirr_size <= 0 || !b || count < 0) {
 143                 errno = EINVAL;
 144                 return -1;
 145         }
 146         m = MREF(mref);
 147         /* Refuse to write non-allocated mft records. */
 148         if (m + count > vol->mft_na->initialized_size >>
 149                         vol->mft_record_size_bits) {
 150                 errno = ESPIPE;
 151                 return -1;
 152         }
 153         if (m < vol->mftmirr_size) {
 154                 if (!vol->mftmirr_na) {
 155                         errno = EINVAL;
 156                         return -1;
 157                 }
 158                 cnt = vol->mftmirr_size - m;
 159                 if (cnt > count)
 160                         cnt = count;
 161                 bmirr = ntfs_malloc(cnt * vol->mft_record_size);
 162                 if (!bmirr)
 163                         return -1;
 164                 memcpy(bmirr, b, cnt * vol->mft_record_size);
 165         }
 166         bw = ntfs_attr_mst_pwrite(vol->mft_na, m << vol->mft_record_size_bits,
 167                         count, vol->mft_record_size, b);
 168         if (bw != count) {
 169                 if (bw != -1)
 170                         errno = EIO;
 171                 if (bw >= 0)
 172                         ntfs_log_error("Partial write while writing $Mft "
 173                                         "record(s)!\n");
 174                 else
 175                         ntfs_log_perror("Error writing $Mft record(s)");
 176                 res = errno;
 177         }
 178         if (bmirr && bw > 0) {
 179                 if (bw < cnt)
 180                         cnt = bw;
 181                 bw = ntfs_attr_mst_pwrite(vol->mftmirr_na,
 182                                 m << vol->mft_record_size_bits, cnt,
 183                                 vol->mft_record_size, bmirr);
 184                 if (bw != cnt) {
 185                         if (bw != -1)
 186                                 errno = EIO;
 187                         ntfs_log_debug("Error: failed to sync $MFTMirr! Run "
 188                                         "chkdsk.\n");
 189                         res = errno;
 190                 }
 191         }
 192         free(bmirr);
 193         if (!res)
 194                 return res;
 195         errno = res;
 196         return -1;
 197 }
 198 
 199 /**
 200  * ntfs_file_record_read - read a FILE record from the mft from disk
 201  * @vol:        volume to read from
 202  * @mref:       mft reference specifying mft record to read
 203  * @mrec:       address of pointer in which to return the mft record
 204  * @attr:       address of pointer in which to return the first attribute
 205  *
 206  * Read a FILE record from the mft of @vol from the storage medium. @mref
 207  * specifies the mft record to read, including the sequence number, which can
 208  * be 0 if no sequence number checking is to be performed.
 209  *
 210  * The function allocates a buffer large enough to hold the mft record and
 211  * reads the record into the buffer (mst deprotecting it in the process).
 212  * *@mrec is then set to point to the buffer.
 213  *
 214  * If @attr is not NULL, *@attr is set to point to the first attribute in the
 215  * mft record, i.e. *@attr is a pointer into *@mrec.
 216  *
 217  * Return 0 on success, or -1 on error, with errno set to the error code.
 218  *
 219  * The read mft record is checked for having the magic FILE,
 220  * and for having a matching sequence number (if MSEQNO(*@mref) != 0).
 221  * If either of these fails, -1 is returned and errno is set to EIO. If you get
 222  * this, but you still want to read the mft record (e.g. in order to correct
 223  * it), use ntfs_mft_record_read() directly.
 224  *
 225  * Note: Caller has to free *@mrec when finished.
 226  *
 227  * Note: We do not check if the mft record is flagged in use. The caller can
 228  *       check if desired.
 229  */
 230 int ntfs_file_record_read(const ntfs_volume *vol, const MFT_REF mref,
 231                 MFT_RECORD **mrec, ATTR_RECORD **attr)
 232 {
 233         MFT_RECORD *m;
 234         ATTR_RECORD *a;
 235         int err;
 236 
 237         if (!vol || !mrec) {
 238                 errno = EINVAL;
 239                 return -1;
 240         }
 241         m = *mrec;
 242         if (!m) {
 243                 m = (MFT_RECORD*)ntfs_malloc(vol->mft_record_size);
 244                 if (!m)
 245                         return -1;
 246         }
 247         if (ntfs_mft_record_read(vol, mref, m)) {
 248                 err = errno;
 249                 goto read_failed;
 250         }
 251         if (!ntfs_is_file_record(m->magic))
 252                 goto file_corrupt;
 253         if (MSEQNO(mref) && MSEQNO(mref) != le16_to_cpu(m->sequence_number))
 254                 goto file_corrupt;
 255         a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
 256         if (p2n(a) < p2n(m) || (char*)a > (char*)m + vol->mft_record_size)
 257                 goto file_corrupt;
 258         *mrec = m;
 259         if (attr)
 260                 *attr = a;
 261         return 0;
 262 file_corrupt:
 263         ntfs_log_debug("ntfs_file_record_read(): file is corrupt.\n");
 264         err = EIO;
 265 read_failed:
 266         if (m != *mrec)
 267                 free(m);
 268         errno = err;
 269         return -1;
 270 }
 271 
 272 /**
 273  * ntfs_mft_record_layout - layout an mft record into a memory buffer
 274  * @vol:        volume to which the mft record will belong
 275  * @mref:       mft reference specifying the mft record number
 276  * @mrec:       destination buffer of size >= @vol->mft_record_size bytes
 277  *
 278  * Layout an empty, unused mft record with the mft reference @mref into the
 279  * buffer @m.  The volume @vol is needed because the mft record structure was
 280  * modified in NTFS 3.1 so we need to know which volume version this mft record
 281  * will be used on.
 282  *
 283  * On success return 0 and on error return -1 with errno set to the error code.
 284  */
 285 int ntfs_mft_record_layout(const ntfs_volume *vol, const MFT_REF mref,
 286                 MFT_RECORD *mrec)
 287 {
 288         ATTR_RECORD *a;
 289 
 290         if (!vol || !mrec) {
 291                 errno = EINVAL;
 292                 return -1;
 293         }
 294         /* Aligned to 2-byte boundary. */
 295         if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
 296                 mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
 297         else {
 298                 /* Abort if mref is > 32 bits. */
 299                 if (MREF(mref) & 0x0000ffff00000000ull) {
 300                         ntfs_log_debug("Mft reference exceeds 32 bits!\n");
 301                         errno = ERANGE;
 302                         return -1;
 303                 }
 304                 mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
 305                 /*
 306                  * Set the NTFS 3.1+ specific fields while we know that the
 307                  * volume version is 3.1+.
 308                  */
 309                 mrec->reserved = cpu_to_le16(0);
 310                 mrec->mft_record_number = cpu_to_le32(MREF(mref));
 311         }
 312         mrec->magic = magic_FILE;
 313         if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
 314                 mrec->usa_count = cpu_to_le16(vol->mft_record_size /
 315                                 NTFS_BLOCK_SIZE + 1);
 316         else {
 317                 mrec->usa_count = cpu_to_le16(1);
 318                 ntfs_log_error("Sector size is bigger than MFT record size.  "
 319                                 "Setting usa_count to 1.  If Windows chkdsk "
 320                                 "reports this as corruption, please email %s "
 321                                 "stating that you saw this message and that "
 322                                 "the file system created was corrupt.  "
 323                                 "Thank you.\n", NTFS_DEV_LIST);
 324         }
 325         /* Set the update sequence number to 1. */
 326         *(le16*)((u8*)mrec + le16_to_cpu(mrec->usa_ofs)) = cpu_to_le16(1);
 327         mrec->lsn = 0;
 328         mrec->sequence_number = cpu_to_le16(1);
 329         mrec->link_count = cpu_to_le16(0);
 330         /* Aligned to 8-byte boundary. */
 331         mrec->attrs_offset = cpu_to_le16((le16_to_cpu(mrec->usa_ofs) +
 332                         (le16_to_cpu(mrec->usa_count) << 1) + 7) & ~7);
 333         mrec->flags = cpu_to_le16(0);
 334         /*
 335          * Using attrs_offset plus eight bytes (for the termination attribute),
 336          * aligned to 8-byte boundary.
 337          */
 338         mrec->bytes_in_use = cpu_to_le32((le16_to_cpu(mrec->attrs_offset) + 8 +
 339                         7) & ~7);
 340         mrec->bytes_allocated = cpu_to_le32(vol->mft_record_size);
 341         mrec->base_mft_record = cpu_to_le64((MFT_REF)0);
 342         mrec->next_attr_instance = cpu_to_le16(0);
 343         a = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset));
 344         a->type = AT_END;
 345         a->length = cpu_to_le32(0);
 346         /* Finally, clear the unused part of the mft record. */
 347         memset((u8*)a + 8, 0, vol->mft_record_size - ((u8*)a + 8 - (u8*)mrec));
 348         return 0;
 349 }
 350 
 351 /**
 352  * ntfs_mft_record_format - format an mft record on an ntfs volume
 353  * @vol:        volume on which to format the mft record
 354  * @mref:       mft reference specifying mft record to format
 355  *
 356  * Format the mft record with the mft reference @mref in $MFT/$DATA, i.e. lay
 357  * out an empty, unused mft record in memory and write it to the volume @vol.
 358  *
 359  * On success return 0 and on error return -1 with errno set to the error code.
 360  */
 361 int ntfs_mft_record_format(const ntfs_volume *vol, const MFT_REF mref)
 362 {
 363         MFT_RECORD *m;
 364         int err;
 365 
 366         if (!vol || !vol->mft_na) {
 367                 errno = EINVAL;
 368                 return -1;
 369         }
 370         m = ntfs_calloc(vol->mft_record_size);
 371         if (!m)
 372                 return -1;
 373         if (ntfs_mft_record_layout(vol, mref, m)) {
 374                 err = errno;
 375                 free(m);
 376                 errno = err;
 377                 return -1;
 378         }
 379         if (ntfs_mft_record_write(vol, mref, m)) {
 380                 err = errno;
 381                 free(m);
 382                 errno = err;
 383                 return -1;
 384         }
 385         free(m);
 386         return 0;
 387 }
 388 
 389 static const char *es = "  Leaving inconsistent metadata.  Run chkdsk.";
 390 
 391 /**
 392  * ntfs_ffz - Find the first unset (zero) bit in a word
 393  * @word:
 394  *
 395  * Description...
 396  *
 397  * Returns:
 398  */
 399 static inline unsigned int ntfs_ffz(unsigned int word)
 400 {
 401         return ffs(~word) - 1;
 402 }
 403 
 404 #ifndef PAGE_SIZE
 405 #define PAGE_SIZE 4096
 406 #endif
 407 
 408 /**
 409  * ntfs_mft_bitmap_find_free_rec - find a free mft record in the mft bitmap
 410  * @vol:        volume on which to search for a free mft record
 411  * @base_ni:    open base inode if allocating an extent mft record or NULL
 412  *
 413  * Search for a free mft record in the mft bitmap attribute on the ntfs volume
 414  * @vol.
 415  *
 416  * If @base_ni is NULL start the search at the default allocator position.
 417  *
 418  * If @base_ni is not NULL start the search at the mft record after the base
 419  * mft record @base_ni.
 420  *
 421  * Return the free mft record on success and -1 on error with errno set to the
 422  * error code.  An error code of ENOSPC means that there are no free mft
 423  * records in the currently initialized mft bitmap.
 424  */
 425 static int ntfs_mft_bitmap_find_free_rec(ntfs_volume *vol, ntfs_inode *base_ni)
 426 {
 427         s64 pass_end, ll, data_pos, pass_start, ofs, bit;
 428         ntfs_attr *mftbmp_na;
 429         u8 *buf, *byte;
 430         unsigned int size;
 431         u8 pass, b;
 432 
 433         mftbmp_na = vol->mftbmp_na;
 434         /*
 435          * Set the end of the pass making sure we do not overflow the mft
 436          * bitmap.
 437          */
 438         size = PAGE_SIZE;
 439         pass_end = vol->mft_na->allocated_size >> vol->mft_record_size_bits;
 440         ll = mftbmp_na->initialized_size << 3;
 441         if (pass_end > ll)
 442                 pass_end = ll;
 443         pass = 1;
 444         if (!base_ni)
 445                 data_pos = vol->mft_data_pos;
 446         else
 447                 data_pos = base_ni->mft_no + 1;
 448         if (data_pos < 24)
 449                 data_pos = 24;
 450         if (data_pos >= pass_end) {
 451                 data_pos = 24;
 452                 pass = 2;
 453                 /* This happens on a freshly formatted volume. */
 454                 if (data_pos >= pass_end) {
 455                         errno = ENOSPC;
 456                         return -1;
 457                 }
 458         }
 459         pass_start = data_pos;
 460         buf = (u8*)ntfs_malloc(PAGE_SIZE);
 461         if (!buf)
 462                 return -1;
 463 
 464         ntfs_log_debug("Starting bitmap search: pass %u, pass_start 0x%llx, "
 465                         "pass_end 0x%llx, data_pos 0x%llx.\n", pass,
 466                         (long long)pass_start, (long long)pass_end,
 467                         (long long)data_pos);
 468 #ifdef DEBUG
 469         byte = NULL;
 470         b = 0;
 471 #endif
 472         /* Loop until a free mft record is found. */
 473         for (; pass <= 2; size = PAGE_SIZE) {
 474                 /* Cap size to pass_end. */
 475                 ofs = data_pos >> 3;
 476                 ll = ((pass_end + 7) >> 3) - ofs;
 477                 if (size > ll)
 478                         size = ll;
 479                 ll = ntfs_attr_pread(mftbmp_na, ofs, size, buf);
 480                 if (ll < 0) {
 481                         ntfs_log_error("Failed to read mft bitmap "
 482                                         "attribute, aborting.\n");
 483                         free(buf);
 484                         return -1;
 485                 }
 486                 ntfs_log_debug("Read 0x%llx bytes.\n", (long long)ll);
 487                 /* If we read at least one byte, search @buf for a zero bit. */
 488                 if (ll) {
 489                         size = ll << 3;
 490                         bit = data_pos & 7;
 491                         data_pos &= ~7ull;
 492                         ntfs_log_debug("Before inner for loop: size 0x%x, "
 493                                         "data_pos 0x%llx, bit 0x%llx, "
 494                                         "*byte 0x%hhx, b %u.\n", size,
 495                                         (long long)data_pos, (long long)bit,
 496                                         byte ? *byte : -1, b);
 497                         for (; bit < size && data_pos + bit < pass_end;
 498                                         bit &= ~7ull, bit += 8) {
 499                                 byte = buf + (bit >> 3);
 500                                 if (*byte == 0xff)
 501                                         continue;
 502                                 /* Note: ffz() result must be zero based. */
 503                                 b = ntfs_ffz((unsigned long)*byte);
 504                                 if (b < 8 && b >= (bit & 7)) {
 505                                         free(buf);
 506                                         return data_pos + (bit & ~7ull) + b;
 507                                 }
 508                         }
 509                         ntfs_log_debug("After inner for loop: size 0x%x, "
 510                                         "data_pos 0x%llx, bit 0x%llx, "
 511                                         "*byte 0x%hhx, b %u.\n", size,
 512                                         (long long)data_pos, (long long)bit,
 513                                         byte ? *byte : -1, b);
 514                         data_pos += size;
 515                         /*
 516                          * If the end of the pass has not been reached yet,
 517                          * continue searching the mft bitmap for a zero bit.
 518                          */
 519                         if (data_pos < pass_end)
 520                                 continue;
 521                 }
 522                 /* Do the next pass. */
 523                 pass++;
 524                 if (pass == 2) {
 525                         /*
 526                          * Starting the second pass, in which we scan the first
 527                          * part of the zone which we omitted earlier.
 528                          */
 529                         pass_end = pass_start;
 530                         data_pos = pass_start = 24;
 531                         ntfs_log_debug("pass %i, pass_start 0x%llx, pass_end "
 532                                         "0x%llx.\n", pass, (long long)pass_start,
 533                                         (long long)pass_end);
 534                         if (data_pos >= pass_end)
 535                                 break;
 536                 }
 537         }
 538         /* No free mft records in currently initialized mft bitmap. */
 539         free(buf);
 540         errno = ENOSPC;
 541         return -1;
 542 }
 543 
 544 /**
 545  * ntfs_mft_bitmap_extend_allocation - extend mft bitmap attribute by a cluster
 546  * @vol:        volume on which to extend the mft bitmap attribute
 547  *
 548  * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
 549  *
 550  * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
 551  * data_size.
 552  *
 553  * Return 0 on success and -1 on error with errno set to the error code.
 554  */
 555 static int ntfs_mft_bitmap_extend_allocation(ntfs_volume *vol)
 556 {
 557         LCN lcn;
 558         s64 ll = 0; /* silence compiler warning */
 559         ntfs_attr *mftbmp_na, *lcnbmp_na;
 560         runlist_element *rl, *rl2 = NULL; /* silence compiler warning */
 561         ntfs_attr_search_ctx *ctx;
 562         MFT_RECORD *m = NULL; /* silence compiler warning */
 563         ATTR_RECORD *a = NULL; /* silence compiler warning */
 564         int ret, mp_size;
 565         u32 old_alen = 0; /* silence compiler warning */
 566         u8 b, tb;
 567         struct {
 568                 u8 added_cluster:1;
 569                 u8 added_run:1;
 570                 u8 mp_rebuilt:1;
 571         } status = { 0, 0, 0 };
 572 
 573         mftbmp_na = vol->mftbmp_na;
 574         lcnbmp_na = vol->lcnbmp_na;
 575         /*
 576          * Determine the last lcn of the mft bitmap.  The allocated size of the
 577          * mft bitmap cannot be zero so we are ok to do this.
 578          */
 579         rl = ntfs_attr_find_vcn(mftbmp_na, (mftbmp_na->allocated_size - 1) >>
 580                         vol->cluster_size_bits);
 581         if (!rl || !rl->length || rl->lcn < 0) {
 582                 ntfs_log_error("Failed to determine last allocated "
 583                                 "cluster of mft bitmap attribute.\n");
 584                 if (rl)
 585                         errno = EIO;
 586                 return -1;
 587         }
 588         lcn = rl->lcn + rl->length;
 589         /*
 590          * Attempt to get the cluster following the last allocated cluster by
 591          * hand as it may be in the MFT zone so the allocator would not give it
 592          * to us.
 593          */
 594         ret = (int)ntfs_attr_pread(lcnbmp_na, lcn >> 3, 1, &b);
 595         if (ret < 0) {
 596                 ntfs_log_error("Failed to read from lcn bitmap.\n");
 597                 return -1;
 598         }
 599         ntfs_log_debug("Read %i byte%s.\n", ret, ret == 1 ? "" : "s");
 600         tb = 1 << (lcn & 7ull);
 601         if (ret == 1 && b != 0xff && !(b & tb)) {
 602                 /* Next cluster is free, allocate it. */
 603                 b |= tb;
 604                 ret = (int)ntfs_attr_pwrite(lcnbmp_na, lcn >> 3, 1, &b);
 605                 if (ret < 1) {
 606                         ntfs_log_error("Failed to write to lcn "
 607                                         "bitmap.\n");
 608                         if (!ret)
 609                                 errno = EIO;
 610                         return -1;
 611                 }
 612                 vol->nr_free_clusters--;
 613                 /* Update the mft bitmap runlist. */
 614                 rl->length++;
 615                 rl[1].vcn++;
 616                 status.added_cluster = 1;
 617                 ntfs_log_debug("Appending one cluster to mft bitmap.\n");
 618         } else {
 619                 /* Allocate a cluster from the DATA_ZONE. */
 620                 rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
 621                 if (!rl2) {
 622                         ntfs_log_error("Failed to allocate a cluster for "
 623                                         "the mft bitmap.\n");
 624                         return -1;
 625                 }
 626                 rl = ntfs_runlists_merge(mftbmp_na->rl, rl2);
 627                 if (!rl) {
 628                         ret = errno;
 629                         ntfs_log_error("Failed to merge runlists for mft "
 630                                         "bitmap.\n");
 631                         if (ntfs_cluster_free_from_rl(vol, rl2))
 632                                 ntfs_log_error("Failed to deallocate "
 633                                                 "cluster.%s\n", es);
 634                         free(rl2);
 635                         errno = ret;
 636                         return -1;
 637                 }
 638                 mftbmp_na->rl = rl;
 639                 status.added_run = 1;
 640                 ntfs_log_debug("Adding one run to mft bitmap.\n");
 641                 /* Find the last run in the new runlist. */
 642                 for (; rl[1].length; rl++)
 643                         ;
 644         }
 645         /*
 646          * Update the attribute record as well.  Note: @rl is the last
 647          * (non-terminator) runlist element of mft bitmap.
 648          */
 649         ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
 650         if (!ctx) {
 651                 ntfs_log_error("Failed to get search context.\n");
 652                 goto undo_alloc;
 653         }
 654         if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
 655                         mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
 656                 ntfs_log_error("Failed to find last attribute extent of "
 657                                 "mft bitmap attribute.\n");
 658                 goto undo_alloc;
 659         }
 660         m = ctx->mrec;
 661         a = ctx->attr;
 662         ll = sle64_to_cpu(a->u.nonres.lowest_vcn);
 663         rl2 = ntfs_attr_find_vcn(mftbmp_na, ll);
 664         if (!rl2 || !rl2->length) {
 665                 ntfs_log_error("Failed to determine previous last "
 666                                 "allocated cluster of mft bitmap attribute.\n");
 667                 if (rl2)
 668                         errno = EIO;
 669                 goto undo_alloc;
 670         }
 671         /* Get the size for the new mapping pairs array for this extent. */
 672         mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll);
 673         if (mp_size <= 0) {
 674                 ntfs_log_error("Get size for mapping pairs failed for "
 675                                 "mft bitmap attribute extent.\n");
 676                 goto undo_alloc;
 677         }
 678         /* Expand the attribute record if necessary. */
 679         old_alen = le32_to_cpu(a->length);
 680         if (ntfs_attr_record_resize(m, a, mp_size +
 681                         le16_to_cpu(a->u.nonres.mapping_pairs_offset))) {
 682                 if (errno != ENOSPC) {
 683                         ntfs_log_error("Failed to resize attribute "
 684                                         "record for mft bitmap attribute.\n");
 685                         goto undo_alloc;
 686                 }
 687                 // TODO: Deal with this by moving this extent to a new mft
 688                 // record or by starting a new extent in a new mft record.
 689                 ntfs_log_error("Not enough space in this mft record to "
 690                                 "accommodate extended mft bitmap attribute "
 691                                 "extent.  Cannot handle this yet.\n");
 692                 errno = EOPNOTSUPP;
 693                 goto undo_alloc;
 694         }
 695         status.mp_rebuilt = 1;
 696         /* Generate the mapping pairs array directly into the attr record. */
 697         if (ntfs_mapping_pairs_build(vol, (u8*)a +
 698                         le16_to_cpu(a->u.nonres.mapping_pairs_offset), mp_size, rl2, ll,
 699                         NULL)) {
 700                 ntfs_log_error("Failed to build mapping pairs array for "
 701                                 "mft bitmap attribute.\n");
 702                 errno = EIO;
 703                 goto undo_alloc;
 704         }
 705         /* Update the highest_vcn. */
 706         a->u.nonres.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
 707         /*
 708          * We now have extended the mft bitmap allocated_size by one cluster.
 709          * Reflect this in the ntfs_attr structure and the attribute record.
 710          */
 711         if (a->u.nonres.lowest_vcn) {
 712                 /*
 713                  * We are not in the first attribute extent, switch to it, but
 714                  * first ensure the changes will make it to disk later.
 715                  */
 716                 ntfs_inode_mark_dirty(ctx->ntfs_ino);
 717                 ntfs_attr_reinit_search_ctx(ctx);
 718                 if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
 719                                 mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
 720                         ntfs_log_error("Failed to find first attribute "
 721                                         "extent of mft bitmap attribute.\n");
 722                         goto restore_undo_alloc;
 723                 }
 724                 a = ctx->attr;
 725         }
 726         mftbmp_na->allocated_size += vol->cluster_size;
 727         a->u.nonres.allocated_size = cpu_to_sle64(mftbmp_na->allocated_size);
 728         /* Ensure the changes make it to disk. */
 729         ntfs_inode_mark_dirty(ctx->ntfs_ino);
 730         ntfs_attr_put_search_ctx(ctx);
 731         return 0;
 732 restore_undo_alloc:
 733         ret = errno;
 734         ntfs_attr_reinit_search_ctx(ctx);
 735         if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
 736                         mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
 737                 ntfs_log_error("Failed to find last attribute extent of "
 738                                 "mft bitmap attribute.%s\n", es);
 739                 ntfs_attr_put_search_ctx(ctx);
 740                 mftbmp_na->allocated_size += vol->cluster_size;
 741                 /*
 742                  * The only thing that is now wrong is ->allocated_size of the
 743                  * base attribute extent which chkdsk should be able to fix.
 744                  */
 745                 errno = ret;
 746                 return -1;
 747         }
 748         m = ctx->mrec;
 749         a = ctx->attr;
 750         a->u.nonres.highest_vcn = cpu_to_sle64(rl[1].vcn - 2);
 751         errno = ret;
 752 undo_alloc:
 753         ret = errno;
 754         if (status.added_cluster) {
 755                 /* Truncate the last run in the runlist by one cluster. */
 756                 rl->length--;
 757                 rl[1].vcn--;
 758         } else if (status.added_run) {
 759                 lcn = rl->lcn;
 760                 /* Remove the last run from the runlist. */
 761                 rl->lcn = rl[1].lcn;
 762                 rl->length = 0;
 763         }
 764         /* Deallocate the cluster. */
 765         if (ntfs_bitmap_clear_bit(lcnbmp_na, lcn))
 766                 ntfs_log_error("Failed to free cluster.%s\n", es);
 767         if (status.mp_rebuilt) {
 768                 if (ntfs_mapping_pairs_build(vol, (u8*)a +
 769                                 le16_to_cpu(a->u.nonres.mapping_pairs_offset),
 770                                 old_alen - le16_to_cpu(a->u.nonres.mapping_pairs_offset),
 771                                 rl2, ll, NULL))
 772                         ntfs_log_error("Failed to restore mapping "
 773                                         "pairs array.%s\n", es);
 774                 if (ntfs_attr_record_resize(m, a, old_alen))
 775                         ntfs_log_error("Failed to restore attribute "
 776                                         "record.%s\n", es);
 777                 ntfs_inode_mark_dirty(ctx->ntfs_ino);
 778         }
 779         if (ctx)
 780                 ntfs_attr_put_search_ctx(ctx);
 781         errno = ret;
 782         return -1;
 783 }
 784 
 785 /**
 786  * ntfs_mft_bitmap_extend_initialized - extend mft bitmap initialized data
 787  * @vol:        volume on which to extend the mft bitmap attribute
 788  *
 789  * Extend the initialized portion of the mft bitmap attribute on the ntfs
 790  * volume @vol by 8 bytes.
 791  *
 792  * Note:  Only changes initialized_size and data_size, i.e. requires that
 793  * allocated_size is big enough to fit the new initialized_size.
 794  *
 795  * Return 0 on success and -1 on error with errno set to the error code.
 796  */
 797 static int ntfs_mft_bitmap_extend_initialized(ntfs_volume *vol)
 798 {
 799         s64 old_data_size, old_initialized_size, ll;
 800         ntfs_attr *mftbmp_na;
 801         ntfs_attr_search_ctx *ctx;
 802         ATTR_RECORD *a;
 803         int err;
 804 
 805         mftbmp_na = vol->mftbmp_na;
 806         ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
 807         if (!ctx) {
 808                 ntfs_log_error("Failed to get search context.\n");
 809                 return -1;
 810         }
 811         if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
 812                         mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
 813                 ntfs_log_error("Failed to find first attribute extent of "
 814                                 "mft bitmap attribute.\n");
 815                 err = errno;
 816                 goto put_err_out;
 817         }
 818         a = ctx->attr;
 819         old_data_size = mftbmp_na->data_size;
 820         old_initialized_size = mftbmp_na->initialized_size;
 821         mftbmp_na->initialized_size += 8;
 822         a->u.nonres.initialized_size = cpu_to_sle64(mftbmp_na->initialized_size);
 823         if (mftbmp_na->initialized_size > mftbmp_na->data_size) {
 824                 mftbmp_na->data_size = mftbmp_na->initialized_size;
 825                 a->u.nonres.data_size = cpu_to_sle64(mftbmp_na->data_size);
 826         }
 827         /* Ensure the changes make it to disk. */
 828         ntfs_inode_mark_dirty(ctx->ntfs_ino);
 829         ntfs_attr_put_search_ctx(ctx);
 830         /* Initialize the mft bitmap attribute value with zeroes. */
 831         ll = 0;
 832         ll = ntfs_attr_pwrite(mftbmp_na, old_initialized_size, 8, &ll);
 833         if (ll == 8) {
 834                 ntfs_log_debug("Wrote eight initialized bytes to mft bitmap.\n");
 835                 return 0;
 836         }
 837         vol->nr_free_mft_records += 64; /* 8 bytes x 8 bits each. */
 838         ntfs_log_error("Failed to write to mft bitmap.\n");
 839         err = errno;
 840         if (ll >= 0)
 841                 err = EIO;
 842         /* Try to recover from the error. */
 843         ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
 844         if (!ctx) {
 845                 ntfs_log_error("Failed to get search context.%s\n", es);
 846                 goto err_out;
 847         }
 848         if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
 849                         mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
 850                 ntfs_log_error("Failed to find first attribute extent of "
 851                                 "mft bitmap attribute.%s\n", es);
 852 put_err_out:
 853                 ntfs_attr_put_search_ctx(ctx);
 854                 goto err_out;
 855         }
 856         a = ctx->attr;
 857         mftbmp_na->initialized_size = old_initialized_size;
 858         a->u.nonres.initialized_size = cpu_to_sle64(old_initialized_size);
 859         if (mftbmp_na->data_size != old_data_size) {
 860                 mftbmp_na->data_size = old_data_size;
 861                 a->u.nonres.data_size = cpu_to_sle64(old_data_size);
 862         }
 863         ntfs_inode_mark_dirty(ctx->ntfs_ino);
 864         ntfs_attr_put_search_ctx(ctx);
 865         ntfs_log_debug("Restored status of mftbmp: allocated_size 0x%llx, "
 866                         "data_size 0x%llx, initialized_size 0x%llx.\n",
 867                         (long long)mftbmp_na->allocated_size,
 868                         (long long)mftbmp_na->data_size,
 869                         (long long)mftbmp_na->initialized_size);
 870 err_out:
 871         errno = err;
 872         return -1;
 873 }
 874 
 875 /**
 876  * ntfs_mft_data_extend_allocation - extend mft data attribute
 877  * @vol:        volume on which to extend the mft data attribute
 878  *
 879  * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
 880  * worth of clusters or if not enough space for this by one mft record worth
 881  * of clusters.
 882  *
 883  * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
 884  * data_size.
 885  *
 886  * Return 0 on success and -1 on error with errno set to the error code.
 887  */
 888 static int ntfs_mft_data_extend_allocation(ntfs_volume *vol)
 889 {
 890         LCN lcn;
 891         VCN old_last_vcn;
 892         s64 min_nr, nr, ll = 0; /* silence compiler warning */
 893         ntfs_attr *mft_na;
 894         runlist_element *rl, *rl2;
 895         ntfs_attr_search_ctx *ctx;
 896         MFT_RECORD *m = NULL; /* silence compiler warning */
 897         ATTR_RECORD *a = NULL; /* silence compiler warning */
 898         int err, mp_size;
 899         u32 old_alen = 0; /* silence compiler warning */
 900         BOOL mp_rebuilt = FALSE;
 901 
 902         ntfs_log_debug("Extending mft data allocation.\n");
 903         mft_na = vol->mft_na;
 904         /*
 905          * Determine the preferred allocation location, i.e. the last lcn of
 906          * the mft data attribute.  The allocated size of the mft data
 907          * attribute cannot be zero so we are ok to do this.
 908          */
 909         rl = ntfs_attr_find_vcn(mft_na,
 910                         (mft_na->allocated_size - 1) >> vol->cluster_size_bits);
 911         if (!rl || !rl->length || rl->lcn < 0) {
 912                 ntfs_log_error("Failed to determine last allocated "
 913                                 "cluster of mft data attribute.\n");
 914                 if (rl)
 915                         errno = EIO;
 916                 return -1;
 917         }
 918         lcn = rl->lcn + rl->length;
 919         ntfs_log_debug("Last lcn of mft data attribute is 0x%llx.\n", (long long)lcn);
 920         /* Minimum allocation is one mft record worth of clusters. */
 921         min_nr = vol->mft_record_size >> vol->cluster_size_bits;
 922         if (!min_nr)
 923                 min_nr = 1;
 924         /* Want to allocate 16 mft records worth of clusters. */
 925         nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
 926         if (!nr)
 927                 nr = min_nr;
 928         ntfs_log_debug("Trying mft data allocation with default cluster count "
 929                         "%lli.\n", (long long)nr);
 930         old_last_vcn = rl[1].vcn;
 931         do {
 932                 rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
 933                 if (rl2)
 934                         break;
 935                 if (errno != ENOSPC || nr == min_nr) {
 936                         ntfs_log_error("Failed to allocate the minimal "
 937                                         "number of clusters (%lli) for the "
 938                                         "mft data attribute.\n", (long long)nr);
 939                         return -1;
 940                 }
 941                 /*
 942                  * There is not enough space to do the allocation, but there
 943                  * might be enough space to do a minimal allocation so try that
 944                  * before failing.
 945                  */
 946                 nr = min_nr;
 947                 ntfs_log_debug("Retrying mft data allocation with minimal cluster "
 948                                 "count %lli.\n", (long long)nr);
 949         } while (1);
 950         rl = ntfs_runlists_merge(mft_na->rl, rl2);
 951         if (!rl) {
 952                 err = errno;
 953                 ntfs_log_error("Failed to merge runlists for mft data "
 954                                 "attribute.\n");
 955                 if (ntfs_cluster_free_from_rl(vol, rl2))
 956                         ntfs_log_error("Failed to deallocate clusters "
 957                                         "from the mft data attribute.%s\n", es);
 958                 free(rl2);
 959                 errno = err;
 960                 return -1;
 961         }
 962         mft_na->rl = rl;
 963         ntfs_log_debug("Allocated %lli clusters.\n", nr);
 964         /* Find the last run in the new runlist. */
 965         for (; rl[1].length; rl++)
 966                 ;
 967         /* Update the attribute record as well. */
 968         ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
 969         if (!ctx) {
 970                 ntfs_log_error("Failed to get search context.\n");
 971                 goto undo_alloc;
 972         }
 973         if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
 974                         rl[1].vcn, NULL, 0, ctx)) {
 975                 ntfs_log_error("Failed to find last attribute extent of "
 976                                 "mft data attribute.\n");
 977                 goto undo_alloc;
 978         }
 979         m = ctx->mrec;
 980         a = ctx->attr;
 981         ll = sle64_to_cpu(a->u.nonres.lowest_vcn);
 982         rl2 = ntfs_attr_find_vcn(mft_na, ll);
 983         if (!rl2 || !rl2->length) {
 984                 ntfs_log_error("Failed to determine previous last "
 985                                 "allocated cluster of mft data attribute.\n");
 986                 if (rl2)
 987                         errno = EIO;
 988                 goto undo_alloc;
 989         }
 990         /* Get the size for the new mapping pairs array for this extent. */
 991         mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll);
 992         if (mp_size <= 0) {
 993                 ntfs_log_error("Get size for mapping pairs failed for "
 994                                 "mft data attribute extent.\n");
 995                 goto undo_alloc;
 996         }
 997         /* Expand the attribute record if necessary. */
 998         old_alen = le32_to_cpu(a->length);
 999         if (ntfs_attr_record_resize(m, a,
1000                         mp_size + le16_to_cpu(a->u.nonres.mapping_pairs_offset))) {
1001                 if (errno != ENOSPC) {
1002                         ntfs_log_error("Failed to resize attribute "
1003                                         "record for mft data attribute.\n");
1004                         goto undo_alloc;
1005                 }
1006                 // TODO: Deal with this by moving this extent to a new mft
1007                 // record or by starting a new extent in a new mft record.
1008                 // Note: Use the special reserved mft records and ensure that
1009                 // this extent is not required to find the mft record in
1010                 // question.
1011                 ntfs_log_error("Not enough space in this mft record to "
1012                                 "accommodate extended mft data attribute "
1013                                 "extent.  Cannot handle this yet.\n");
1014                 errno = EOPNOTSUPP;
1015                 goto undo_alloc;
1016         }
1017         mp_rebuilt = TRUE;
1018         /*
1019          * Generate the mapping pairs array directly into the attribute record.
1020          */
1021         if (ntfs_mapping_pairs_build(vol,
1022                         (u8*)a + le16_to_cpu(a->u.nonres.mapping_pairs_offset), mp_size,
1023                         rl2, ll, NULL)) {
1024                 ntfs_log_error("Failed to build mapping pairs array of "
1025                                 "mft data attribute.\n");
1026                 errno = EIO;
1027                 goto undo_alloc;
1028         }
1029         /* Update the highest_vcn. */
1030         a->u.nonres.highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
1031         /*
1032          * We now have extended the mft data allocated_size by nr clusters.
1033          * Reflect this in the ntfs_attr structure and the attribute record.
1034          * @rl is the last (non-terminator) runlist element of mft data
1035          * attribute.
1036          */
1037         if (a->u.nonres.lowest_vcn) {
1038                 /*
1039                  * We are not in the first attribute extent, switch to it, but
1040                  * first ensure the changes will make it to disk later.
1041                  */
1042                 ntfs_inode_mark_dirty(ctx->ntfs_ino);
1043                 ntfs_attr_reinit_search_ctx(ctx);
1044                 if (ntfs_attr_lookup(mft_na->type, mft_na->name,
1045                                 mft_na->name_len, 0, 0, NULL, 0, ctx)) {
1046                         ntfs_log_error("Failed to find first attribute "
1047                                         "extent of mft data attribute.\n");
1048                         goto restore_undo_alloc;
1049                 }
1050                 a = ctx->attr;
1051         }
1052         mft_na->allocated_size += nr << vol->cluster_size_bits;
1053         a->u.nonres.allocated_size = cpu_to_sle64(mft_na->allocated_size);
1054         /* Ensure the changes make it to disk. */
1055         ntfs_inode_mark_dirty(ctx->ntfs_ino);
1056         ntfs_attr_put_search_ctx(ctx);
1057         return 0;
1058 restore_undo_alloc:
1059         err = errno;
1060         ntfs_attr_reinit_search_ctx(ctx);
1061         if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1062                         rl[1].vcn, NULL, 0, ctx)) {
1063                 ntfs_log_error("Failed to find last attribute extent of "
1064                                 "mft data attribute.%s\n", es);
1065                 ntfs_attr_put_search_ctx(ctx);
1066                 mft_na->allocated_size += nr << vol->cluster_size_bits;
1067                 /*
1068                  * The only thing that is now wrong is ->allocated_size of the
1069                  * base attribute extent which chkdsk should be able to fix.
1070                  */
1071                 errno = err;
1072                 return -1;
1073         }
1074         m = ctx->mrec;
1075         a = ctx->attr;
1076         a->u.nonres.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
1077         errno = err;
1078 undo_alloc:
1079         err = errno;
1080         if (ntfs_cluster_free(vol, mft_na, old_last_vcn, -1) < 0)
1081                 ntfs_log_error("Failed to free clusters from mft data "
1082                                 "attribute.%s\n", es);
1083         if (ntfs_rl_truncate(&mft_na->rl, old_last_vcn))
1084                 ntfs_log_error("Failed to truncate mft data attribute "
1085                                 "runlist.%s\n", es);
1086         if (mp_rebuilt) {
1087                 if (ntfs_mapping_pairs_build(vol, (u8*)a +
1088                                 le16_to_cpu(a->u.nonres.mapping_pairs_offset),
1089                                 old_alen - le16_to_cpu(a->u.nonres.mapping_pairs_offset),
1090                                 rl2, ll, NULL))
1091                         ntfs_log_error("Failed to restore mapping pairs "
1092                                         "array.%s\n", es);
1093                 if (ntfs_attr_record_resize(m, a, old_alen))
1094                         ntfs_log_error("Failed to restore attribute "
1095                                         "record.%s\n", es);
1096                 ntfs_inode_mark_dirty(ctx->ntfs_ino);
1097         }
1098         if (ctx)
1099                 ntfs_attr_put_search_ctx(ctx);
1100         errno = err;
1101         return -1;
1102 }
1103 
1104 /**
1105  * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
1106  * @vol:        volume on which to allocate the mft record
1107  * @base_ni:    open base inode if allocating an extent mft record or NULL
1108  *
1109  * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
1110  *
1111  * If @base_ni is NULL make the mft record a base mft record and allocate it at
1112  * the default allocator position.
1113  *
1114  * If @base_ni is not NULL make the allocated mft record an extent record,
1115  * allocate it starting at the mft record after the base mft record and attach
1116  * the allocated and opened ntfs inode to the base inode @base_ni.
1117  *
1118  * On success return the now opened ntfs (extent) inode of the mft record.
1119  *
1120  * On error return NULL with errno set to the error code.
1121  *
1122  * To find a free mft record, we scan the mft bitmap for a zero bit.  To
1123  * optimize this we start scanning at the place specified by @base_ni or if
1124  * @base_ni is NULL we start where we last stopped and we perform wrap around
1125  * when we reach the end.  Note, we do not try to allocate mft records below
1126  * number 24 because numbers 0 to 15 are the defined system files anyway and 16
1127  * to 24 are special in that they are used for storing extension mft records
1128  * for the $DATA attribute of $MFT.  This is required to avoid the possibility
1129  * of creating a run list with a circular dependence which once written to disk
1130  * can never be read in again.  Windows will only use records 16 to 24 for
1131  * normal files if the volume is completely out of space.  We never use them
1132  * which means that when the volume is really out of space we cannot create any
1133  * more files while Windows can still create up to 8 small files.  We can start
1134  * doing this at some later time, it does not matter much for now.
1135  *
1136  * When scanning the mft bitmap, we only search up to the last allocated mft
1137  * record.  If there are no free records left in the range 24 to number of
1138  * allocated mft records, then we extend the $MFT/$DATA attribute in order to
1139  * create free mft records.  We extend the allocated size of $MFT/$DATA by 16
1140  * records at a time or one cluster, if cluster size is above 16kiB.  If there
1141  * is not sufficient space to do this, we try to extend by a single mft record
1142  * or one cluster, if cluster size is above the mft record size, but we only do
1143  * this if there is enough free space, which we know from the values returned
1144  * by the failed cluster allocation function when we tried to do the first
1145  * allocation.
1146  *
1147  * No matter how many mft records we allocate, we initialize only the first
1148  * allocated mft record, incrementing mft data size and initialized size
1149  * accordingly, open an ntfs_inode for it and return it to the caller, unless
1150  * there are less than 24 mft records, in which case we allocate and initialize
1151  * mft records until we reach record 24 which we consider as the first free mft
1152  * record for use by normal files.
1153  *
1154  * If during any stage we overflow the initialized data in the mft bitmap, we
1155  * extend the initialized size (and data size) by 8 bytes, allocating another
1156  * cluster if required.  The bitmap data size has to be at least equal to the
1157  * number of mft records in the mft, but it can be bigger, in which case the
1158  * superfluous bits are padded with zeroes.
1159  *
1160  * Thus, when we return successfully (return value non-zero), we will have:
1161  *      - initialized / extended the mft bitmap if necessary,
1162  *      - initialized / extended the mft data if necessary,
1163  *      - set the bit corresponding to the mft record being allocated in the
1164  *        mft bitmap,
1165  *      - open an ntfs_inode for the allocated mft record, and we will
1166  *      - return the ntfs_inode.
1167  *
1168  * On error (return value zero), nothing will have changed.  If we had changed
1169  * anything before the error occurred, we will have reverted back to the
1170  * starting state before returning to the caller.  Thus, except for bugs, we
1171  * should always leave the volume in a consistent state when returning from
1172  * this function.
1173  *
1174  * Note, this function cannot make use of most of the normal functions, like
1175  * for example for attribute resizing, etc, because when the run list overflows
1176  * the base mft record and an attribute list is used, it is very important that
1177  * the extension mft records used to store the $DATA attribute of $MFT can be
1178  * reached without having to read the information contained inside them, as
1179  * this would make it impossible to find them in the first place after the
1180  * volume is dismounted.  $MFT/$BITMAP probably does not need to follow this
1181  * rule because the bitmap is not essential for finding the mft records, but on
1182  * the other hand, handling the bitmap in this special way would make life
1183  * easier because otherwise there might be circular invocations of functions
1184  * when reading the bitmap but if we are careful, we should be able to avoid
1185  * all problems.
1186  */
1187 ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, ntfs_inode *base_ni)
1188 {
1189         s64 ll, bit, old_data_initialized, old_data_size;
1190         ntfs_attr *mft_na, *mftbmp_na;
1191         ntfs_attr_search_ctx *ctx;
1192         MFT_RECORD *m;
1193         ATTR_RECORD *a;
1194         ntfs_inode *ni;
1195         int err;
1196         le16 seq_no, usn;
1197 
1198         if (base_ni)
1199                 ntfs_log_trace("Entering (allocating an extent mft record for "
1200                                 "base mft record 0x%llx).\n",
1201                                 (long long)base_ni->mft_no);
1202         else
1203                 ntfs_log_trace("Entering (allocating a base mft record).\n");
1204         if (!vol || !vol->mft_na || !vol->mftbmp_na) {
1205                 errno = EINVAL;
1206                 return NULL;
1207         }
1208         mft_na = vol->mft_na;
1209         mftbmp_na = vol->mftbmp_na;
1210         bit = ntfs_mft_bitmap_find_free_rec(vol, base_ni);
1211         if (bit >= 0) {
1212                 ntfs_log_debug("Found free record (#1), bit 0x%llx.\n",
1213                                 (long long)bit);
1214                 goto found_free_rec;
1215         }
1216         if (errno != ENOSPC)
1217                 return NULL;
1218         /*
1219          * No free mft records left.  If the mft bitmap already covers more
1220          * than the currently used mft records, the next records are all free,
1221          * so we can simply allocate the first unused mft record.
1222          * Note: We also have to make sure that the mft bitmap at least covers
1223          * the first 24 mft records as they are special and whilst they may not
1224          * be in use, we do not allocate from them.
1225          */
1226         ll = mft_na->initialized_size >> vol->mft_record_size_bits;
1227         if (mftbmp_na->initialized_size << 3 > ll &&
1228                         mftbmp_na->initialized_size > 3) {
1229                 bit = ll;
1230                 if (bit < 24)
1231                         bit = 24;
1232                 ntfs_log_debug("Found free record (#2), bit 0x%llx.\n",
1233                                 (long long)bit);
1234                 goto found_free_rec;
1235         }
1236         /*
1237          * The mft bitmap needs to be expanded until it covers the first unused
1238          * mft record that we can allocate.
1239          * Note: The smallest mft record we allocate is mft record 24.
1240          */
1241         ntfs_log_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
1242                         "data_size 0x%llx, initialized_size 0x%llx.\n",
1243                         (long long)mftbmp_na->allocated_size,
1244                         (long long)mftbmp_na->data_size,
1245                         (long long)mftbmp_na->initialized_size);
1246         if (mftbmp_na->initialized_size + 8 > mftbmp_na->allocated_size) {
1247                 /* Need to extend bitmap by one more cluster. */
1248                 ntfs_log_debug("mftbmp: initialized_size + 8 > allocated_size.\n");
1249                 if (ntfs_mft_bitmap_extend_allocation(vol))
1250                         goto err_out;
1251                 ntfs_log_debug("Status of mftbmp after allocation extension: "
1252                                 "allocated_size 0x%llx, data_size 0x%llx, "
1253                                 "initialized_size 0x%llx.\n",
1254                                 (long long)mftbmp_na->allocated_size,
1255                                 (long long)mftbmp_na->data_size,
1256                                 (long long)mftbmp_na->initialized_size);
1257         }
1258         /*
1259          * We now have sufficient allocated space, extend the initialized_size
1260          * as well as the data_size if necessary and fill the new space with
1261          * zeroes.
1262          */
1263         bit = mftbmp_na->initialized_size << 3;
1264         if (ntfs_mft_bitmap_extend_initialized(vol))
1265                 goto err_out;
1266         ntfs_log_debug("Status of mftbmp after initialized extension: "
1267                         "allocated_size 0x%llx, data_size 0x%llx, "
1268                         "initialized_size 0x%llx.\n",
1269                         (long long)mftbmp_na->allocated_size,
1270                         (long long)mftbmp_na->data_size,
1271                         (long long)mftbmp_na->initialized_size);
1272         ntfs_log_debug("Found free record (#3), bit 0x%llx.\n", (long long)bit);
1273 found_free_rec:
1274         /* @bit is the found free mft record, allocate it in the mft bitmap. */
1275         ntfs_log_debug("At found_free_rec.\n");
1276         if (ntfs_bitmap_set_bit(mftbmp_na, bit)) {
1277                 ntfs_log_error("Failed to allocate bit in mft bitmap.\n");
1278                 goto err_out;
1279         }
1280         ntfs_log_debug("Set bit 0x%llx in mft bitmap.\n", (long long)bit);
1281         /* The mft bitmap is now uptodate.  Deal with mft data attribute now. */
1282         ll = (bit + 1) << vol->mft_record_size_bits;
1283         if (ll <= mft_na->initialized_size) {
1284                 ntfs_log_debug("Allocated mft record already initialized.\n");
1285                 goto mft_rec_already_initialized;
1286         }
1287         ntfs_log_debug("Initializing allocated mft record.\n");
1288         /*
1289          * The mft record is outside the initialized data.  Extend the mft data
1290          * attribute until it covers the allocated record.  The loop is only
1291          * actually traversed more than once when a freshly formatted volume is
1292          * first written to so it optimizes away nicely in the common case.
1293          */
1294         ntfs_log_debug("Status of mft data before extension: "
1295                         "allocated_size 0x%llx, data_size 0x%llx, "
1296                         "initialized_size 0x%llx.\n",
1297                         (long long)mft_na->allocated_size,
1298                         (long long)mft_na->data_size,
1299                         (long long)mft_na->initialized_size);
1300         while (ll > mft_na->allocated_size) {
1301                 if (ntfs_mft_data_extend_allocation(vol))
1302                         goto undo_mftbmp_alloc;
1303                 ntfs_log_debug("Status of mft data after allocation extension: "
1304                                 "allocated_size 0x%llx, data_size 0x%llx, "
1305                                 "initialized_size 0x%llx.\n",
1306                                 (long long)mft_na->allocated_size,
1307                                 (long long)mft_na->data_size,
1308                                 (long long)mft_na->initialized_size);
1309         }
1310         old_data_initialized = mft_na->initialized_size;
1311         old_data_size = mft_na->data_size;
1312         /*
1313          * Extend mft data initialized size (and data size of course) to reach
1314          * the allocated mft record, formatting the mft records along the way.
1315          * Note: We only modify the ntfs_attr structure as that is all that is
1316          * needed by ntfs_mft_record_format().  We will update the attribute
1317          * record itself in one fell swoop later on.
1318          */
1319         while (ll > mft_na->initialized_size) {
1320                 s64 ll2 = mft_na->initialized_size >> vol->mft_record_size_bits;
1321                 mft_na->initialized_size += vol->mft_record_size;
1322                 if (mft_na->initialized_size > mft_na->data_size)
1323                         mft_na->data_size = mft_na->initialized_size;
1324                 ntfs_log_debug("Initializing mft record 0x%llx.\n", (long long)ll2);
1325                 err = ntfs_mft_record_format(vol, ll2);
1326                 if (err) {
1327                         ntfs_log_error("Failed to format mft record.\n");
1328                         goto undo_data_init;
1329                 }
1330         }
1331         /* Update the mft data attribute record to reflect the new sizes. */
1332         ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1333         if (!ctx) {
1334                 ntfs_log_error("Failed to get search context.\n");
1335                 goto undo_data_init;
1336         }
1337         if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1338                         0, NULL, 0, ctx)) {
1339                 ntfs_log_error("Failed to find first attribute extent of "
1340                                 "mft data attribute.\n");
1341                 ntfs_attr_put_search_ctx(ctx);
1342                 goto undo_data_init;
1343         }
1344         a = ctx->attr;
1345         a->u.nonres.initialized_size = cpu_to_sle64(mft_na->initialized_size);
1346         a->u.nonres.data_size = cpu_to_sle64(mft_na->data_size);
1347         /* Ensure the changes make it to disk. */
1348         ntfs_inode_mark_dirty(ctx->ntfs_ino);
1349         ntfs_attr_put_search_ctx(ctx);
1350         ntfs_log_debug("Status of mft data after mft record initialization: "
1351                         "allocated_size 0x%llx, data_size 0x%llx, "
1352                         "initialized_size 0x%llx.\n",
1353                         (long long)mft_na->allocated_size,
1354                         (long long)mft_na->data_size,
1355                         (long long)mft_na->initialized_size);
1356         /* Sanity checks. */
1357         if (mft_na->data_size > mft_na->allocated_size ||
1358                         mft_na->initialized_size > mft_na->data_size)
1359                 NTFS_BUG("mft_na sanity checks failed");
1360         /* Sync MFT to disk now in order to minimize data-loss. */
1361         if (ntfs_inode_sync(mft_na->ni)) {
1362                 ntfs_log_debug("mft sync after extension failed. rolling back.");
1363                 goto undo_data_init;
1364         }
1365 mft_rec_already_initialized:
1366         /*
1367          * We now have allocated and initialized the mft record.  Need to read
1368          * it from disk and re-format it, preserving the sequence number if it
1369          * is not zero as well as the update sequence number if it is not zero
1370          * or -1 (0xffff).
1371          */
1372         m = (MFT_RECORD*)ntfs_malloc(vol->mft_record_size);
1373         if (!m)
1374                 goto undo_mftbmp_alloc;
1375 
1376         if (ntfs_mft_record_read(vol, bit, m)) {
1377                 err = errno;
1378                 ntfs_log_error("Failed to read mft record.\n");
1379                 free(m);
1380                 errno = err;
1381                 goto undo_mftbmp_alloc;
1382         }
1383         /* Sanity check that the mft record is really not in use. */
1384         if (ntfs_is_file_record(m->magic) && (m->flags & MFT_RECORD_IN_USE)) {
1385                 ntfs_log_error("Mft record 0x%llx was marked unused in "
1386                                 "mft bitmap but is marked used itself.  "
1387                                 "Corrupt filesystem or library bug!  "
1388                                 "Run chkdsk immediately!\n", (long long)bit);
1389                 free(m);
1390                 errno = EIO;
1391                 goto undo_mftbmp_alloc;
1392         }
1393         seq_no = m->sequence_number;
1394         usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs));
1395         if (ntfs_mft_record_layout(vol, bit, m)) {
1396                 err = errno;
1397                 ntfs_log_error("Failed to re-format mft record.\n");
1398                 free(m);
1399                 errno = err;
1400                 goto undo_mftbmp_alloc;
1401         }
1402         if (seq_no)
1403                 m->sequence_number = seq_no;
1404         if (usn && le16_to_cpu(usn) != 0xffff)
1405                 *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
1406         /* Set the mft record itself in use. */
1407         m->flags |= MFT_RECORD_IN_USE;
1408         /* Now need to open an ntfs inode for the mft record. */
1409         ni = ntfs_inode_allocate(vol);
1410         if (!ni) {
1411                 err = errno;
1412                 ntfs_log_error("Failed to allocate buffer for inode.\n");
1413                 free(m);
1414                 errno = err;
1415                 goto undo_mftbmp_alloc;
1416         }
1417         ni->mft_no = bit;
1418         ni->mrec = m;
1419         /*
1420          * If we are allocating an extent mft record, make the opened inode an
1421          * extent inode and attach it to the base inode.  Also, set the base
1422          * mft record reference in the extent inode.
1423          */
1424         if (base_ni) {
1425                 ni->nr_extents = -1;
1426                 ni->u.base_ni = base_ni;
1427                 m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
1428                                 le16_to_cpu(base_ni->mrec->sequence_number));
1429                 /*
1430                  * Attach the extent inode to the base inode, reallocating
1431                  * memory if needed.
1432                  */
1433                 if (!(base_ni->nr_extents & 3)) {
1434                         ntfs_inode **extent_nis;
1435                         int i;
1436 
1437                         i = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
1438                         extent_nis = (ntfs_inode**)ntfs_malloc(i);
1439                         if (!extent_nis) {
1440                                 err = errno;
1441                                 free(m);
1442                                 free(ni);
1443                                 errno = err;
1444                                 goto undo_mftbmp_alloc;
1445                         }
1446                         if (base_ni->u.extent_nis) {
1447                                 memcpy(extent_nis, base_ni->u.extent_nis,
1448                                                 i - 4 * sizeof(ntfs_inode *));
1449                                 free(base_ni->u.extent_nis);
1450                         }
1451                         base_ni->u.extent_nis = extent_nis;
1452                 }
1453                 base_ni->u.extent_nis[base_ni->nr_extents++] = ni;
1454         }
1455         /* Make sure the allocated inode is written out to disk later. */
1456         ntfs_inode_mark_dirty(ni);
1457         /* Initialize time, allocated and data size in ntfs_inode struct. */
1458         ni->data_size = ni->allocated_size = 0;
1459         ni->flags = 0;
1460         ni->creation_time = ni->last_data_change_time =
1461                         ni->last_mft_change_time =
1462                         ni->last_access_time = time(NULL);
1463         if (!base_ni) {
1464                 /* Update the default mft allocation position if it was used. */
1465                 vol->mft_data_pos = bit + 1;
1466                 /* Add inode to cache. */
1467                 __ntfs_inode_add_to_cache(ni);
1468         }
1469         /* Return the opened, allocated inode of the allocated mft record. */
1470         ntfs_log_debug("Returning opened, allocated %sinode 0x%llx.\n",
1471                         base_ni ? "extent " : "", (long long)bit);
1472         return ni;
1473 undo_data_init:
1474         mft_na->initialized_size = old_data_initialized;
1475         mft_na->data_size = old_data_size;
1476 undo_mftbmp_alloc:
1477         err = errno;
1478         if (ntfs_bitmap_clear_bit(mftbmp_na, bit))
1479                 ntfs_log_error("Failed to clear bit in mft bitmap.%s\n", es);
1480         errno = err;
1481 err_out:
1482         if (!errno)
1483                 errno = EIO;
1484         return NULL;
1485 }
1486 
1487 /**
1488  * ntfs_mft_record_free - free an mft record on an ntfs volume
1489  * @vol:        volume on which to free the mft record
1490  * @ni:         open ntfs inode of the mft record to free
1491  *
1492  * Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
1493  * Note that this function calls ntfs_inode_close() internally and hence you
1494  * cannot use the pointer @ni any more after this function returns success.
1495  *
1496  * On success return 0 and on error return -1 with errno set to the error code.
1497  */
1498 int ntfs_mft_record_free(ntfs_volume *vol, ntfs_inode *ni)
1499 {
1500         u64 mft_no;
1501         int err;
1502         u16 seq_no;
1503         le16 old_seq_no;
1504 
1505         ntfs_log_trace("Entering for inode 0x%llx.\n", (long long) ni->mft_no);
1506 
1507         if (!vol || !vol->mftbmp_na || !ni) {
1508                 errno = EINVAL;
1509                 return -1;
1510         }
1511 
1512         /* Cache the mft reference for later. */
1513         mft_no = ni->mft_no;
1514 
1515         /* Mark the mft record as not in use. */
1516         ni->mrec->flags &= ~MFT_RECORD_IN_USE;
1517 
1518         /* Increment the sequence number, skipping zero, if it is not zero. */
1519         old_seq_no = ni->mrec->sequence_number;
1520         seq_no = le16_to_cpu(old_seq_no);
1521         if (seq_no == 0xffff)
1522                 seq_no = 1;
1523         else if (seq_no)
1524                 seq_no++;
1525         ni->mrec->sequence_number = cpu_to_le16(seq_no);
1526 
1527         /* Set the inode dirty and write it out. */
1528         ntfs_inode_mark_dirty(ni);
1529         if (ntfs_inode_sync(ni)) {
1530                 err = errno;
1531                 goto sync_rollback;
1532         }
1533 
1534         /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
1535         if (ntfs_bitmap_clear_bit(vol->mftbmp_na, mft_no)) {
1536                 err = errno;
1537                 // FIXME: If ntfs_bitmap_clear_run() guarantees rollback on
1538                 //        error, this could be changed to goto sync_rollback;
1539                 goto bitmap_rollback;
1540         }
1541 
1542         /* Throw away the now freed inode. */
1543         if (!ntfs_inode_close(ni))
1544                 return 0;
1545         err = errno;
1546 
1547         /* Rollback what we did... */
1548 bitmap_rollback:
1549         if (ntfs_bitmap_set_bit(vol->mftbmp_na, mft_no))
1550                 ntfs_log_debug("Eeek! Rollback failed in ntfs_mft_record_free().  "
1551                                 "Leaving inconsistent metadata!\n");
1552 sync_rollback:
1553         ni->mrec->flags |= MFT_RECORD_IN_USE;
1554         ni->mrec->sequence_number = old_seq_no;
1555         ntfs_inode_mark_dirty(ni);
1556         errno = err;
1557         return -1;
1558 }
1559 
1560 /**
1561  * ntfs_mft_usn_dec - Decrement USN by one
1562  * @mrec:       pointer to an mft record
1563  *
1564  * On success return 0 and on error return -1 with errno set.
1565  */
1566 int ntfs_mft_usn_dec(MFT_RECORD *mrec)
1567 {
1568         u16 usn;
1569         le16 *usnp;
1570 
1571         if (!mrec) {
1572                 errno = EINVAL;
1573                 return -1;
1574         }
1575         usnp = (le16 *)((char *)mrec + le16_to_cpu(mrec->usa_ofs));
1576         usn = le16_to_cpup(usnp);
1577         if (usn-- <= 1)
1578                 usn = 0xfffe;
1579         *usnp = cpu_to_le16(usn);
1580 
1581         return 0;
1582 }
1583