Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>


 106  *      pushing cached pages (which acquires range locks) and syncing out
 107  *      cached atime changes.  Third, zfs_zinactive() may require a new tx,
 108  *      which could deadlock the system if you were already holding one.
 109  *      If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
 110  *
 111  *  (3) All range locks must be grabbed before calling dmu_tx_assign(),
 112  *      as they can span dmu_tx_assign() calls.
 113  *
 114  *  (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
 115  *      This is critical because we don't want to block while holding locks.
 116  *      Note, in particular, that if a lock is sometimes acquired before
 117  *      the tx assigns, and sometimes after (e.g. z_lock), then failing to
 118  *      use a non-blocking assign can deadlock the system.  The scenario:
 119  *
 120  *      Thread A has grabbed a lock before calling dmu_tx_assign().
 121  *      Thread B is in an already-assigned tx, and blocks for this lock.
 122  *      Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
 123  *      forever, because the previous txg can't quiesce until B's tx commits.
 124  *
 125  *      If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
 126  *      then drop all locks, call dmu_tx_wait(), and try again.




 127  *
 128  *  (5) If the operation succeeded, generate the intent log entry for it
 129  *      before dropping locks.  This ensures that the ordering of events
 130  *      in the intent log matches the order in which they actually occurred.
 131  *      During ZIL replay the zfs_log_* functions will update the sequence
 132  *      number to indicate the zil transaction has replayed.
 133  *
 134  *  (6) At the end of each vnode op, the DMU tx must always commit,
 135  *      regardless of whether there were any errors.
 136  *
 137  *  (7) After dropping all locks, invoke zil_commit(zilog, foid)
 138  *      to ensure that synchronous semantics are provided when necessary.
 139  *
 140  * In general, this is how things should be ordered in each vnode op:
 141  *
 142  *      ZFS_ENTER(zfsvfs);              // exit if unmounted
 143  * top:
 144  *      zfs_dirent_lock(&dl, ...)   // lock directory entry (may VN_HOLD())
 145  *      rw_enter(...);                  // grab any other locks you need
 146  *      tx = dmu_tx_create(...);        // get DMU tx
 147  *      dmu_tx_hold_*();                // hold each object you might modify
 148  *      error = dmu_tx_assign(tx, TXG_NOWAIT);  // try to assign
 149  *      if (error) {
 150  *              rw_exit(...);           // drop locks
 151  *              zfs_dirent_unlock(dl);  // unlock directory entry
 152  *              VN_RELE(...);           // release held vnodes
 153  *              if (error == ERESTART) {

 154  *                      dmu_tx_wait(tx);
 155  *                      dmu_tx_abort(tx);
 156  *                      goto top;
 157  *              }
 158  *              dmu_tx_abort(tx);       // abort DMU tx
 159  *              ZFS_EXIT(zfsvfs);       // finished in zfs
 160  *              return (error);         // really out of space
 161  *      }
 162  *      error = do_real_work();         // do whatever this VOP does
 163  *      if (error == 0)
 164  *              zfs_log_*(...);         // on success, make ZIL entry
 165  *      dmu_tx_commit(tx);              // commit DMU tx -- error or not
 166  *      rw_exit(...);                   // drop locks
 167  *      zfs_dirent_unlock(dl);          // unlock directory entry
 168  *      VN_RELE(...);                   // release held vnodes
 169  *      zil_commit(zilog, foid);        // synchronous when necessary
 170  *      ZFS_EXIT(zfsvfs);               // finished in zfs
 171  *      return (error);                 // done, report error
 172  */
 173 


1298 
1299 /* ARGSUSED */
1300 static int
1301 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1302     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1303     vsecattr_t *vsecp)
1304 {
1305         znode_t         *zp, *dzp = VTOZ(dvp);
1306         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1307         zilog_t         *zilog;
1308         objset_t        *os;
1309         zfs_dirlock_t   *dl;
1310         dmu_tx_t        *tx;
1311         int             error;
1312         ksid_t          *ksid;
1313         uid_t           uid;
1314         gid_t           gid = crgetgid(cr);
1315         zfs_acl_ids_t   acl_ids;
1316         boolean_t       fuid_dirtied;
1317         boolean_t       have_acl = B_FALSE;

1318 
1319         /*
1320          * If we have an ephemeral id, ACL, or XVATTR then
1321          * make sure file system is at proper version
1322          */
1323 
1324         ksid = crgetsid(cr, KSID_OWNER);
1325         if (ksid)
1326                 uid = ksid_getid(ksid);
1327         else
1328                 uid = crgetuid(cr);
1329 
1330         if (zfsvfs->z_use_fuids == B_FALSE &&
1331             (vsecp || (vap->va_mask & AT_XVATTR) ||
1332             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1333                 return (SET_ERROR(EINVAL));
1334 
1335         ZFS_ENTER(zfsvfs);
1336         ZFS_VERIFY_ZP(dzp);
1337         os = zfsvfs->z_os;


1418                         zfs_acl_ids_free(&acl_ids);
1419                         error = SET_ERROR(EDQUOT);
1420                         goto out;
1421                 }
1422 
1423                 tx = dmu_tx_create(os);
1424 
1425                 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1426                     ZFS_SA_BASE_ATTR_SIZE);
1427 
1428                 fuid_dirtied = zfsvfs->z_fuid_dirty;
1429                 if (fuid_dirtied)
1430                         zfs_fuid_txhold(zfsvfs, tx);
1431                 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1432                 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1433                 if (!zfsvfs->z_use_sa &&
1434                     acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1435                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1436                             0, acl_ids.z_aclp->z_acl_bytes);
1437                 }
1438                 error = dmu_tx_assign(tx, TXG_NOWAIT);
1439                 if (error) {
1440                         zfs_dirent_unlock(dl);
1441                         if (error == ERESTART) {

1442                                 dmu_tx_wait(tx);
1443                                 dmu_tx_abort(tx);
1444                                 goto top;
1445                         }
1446                         zfs_acl_ids_free(&acl_ids);
1447                         dmu_tx_abort(tx);
1448                         ZFS_EXIT(zfsvfs);
1449                         return (error);
1450                 }
1451                 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1452 
1453                 if (fuid_dirtied)
1454                         zfs_fuid_sync(zfsvfs, tx);
1455 
1456                 (void) zfs_link_create(dl, zp, tx, ZNEW);
1457                 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1458                 if (flag & FIGNORECASE)
1459                         txtype |= TX_CI;
1460                 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1461                     vsecp, acl_ids.z_fuidp, vap);


1553 zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1554     int flags)
1555 {
1556         znode_t         *zp, *dzp = VTOZ(dvp);
1557         znode_t         *xzp;
1558         vnode_t         *vp;
1559         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1560         zilog_t         *zilog;
1561         uint64_t        acl_obj, xattr_obj;
1562         uint64_t        xattr_obj_unlinked = 0;
1563         uint64_t        obj = 0;
1564         zfs_dirlock_t   *dl;
1565         dmu_tx_t        *tx;
1566         boolean_t       may_delete_now, delete_now = FALSE;
1567         boolean_t       unlinked, toobig = FALSE;
1568         uint64_t        txtype;
1569         pathname_t      *realnmp = NULL;
1570         pathname_t      realnm;
1571         int             error;
1572         int             zflg = ZEXISTS;

1573 
1574         ZFS_ENTER(zfsvfs);
1575         ZFS_VERIFY_ZP(dzp);
1576         zilog = zfsvfs->z_log;
1577 
1578         if (flags & FIGNORECASE) {
1579                 zflg |= ZCILOOK;
1580                 pn_alloc(&realnm);
1581                 realnmp = &realnm;
1582         }
1583 
1584 top:
1585         xattr_obj = 0;
1586         xzp = NULL;
1587         /*
1588          * Attempt to lock directory; fail if entry doesn't exist.
1589          */
1590         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1591             NULL, realnmp)) {
1592                 if (realnmp)


1641         }
1642 
1643         /* are there any extended attributes? */
1644         error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1645             &xattr_obj, sizeof (xattr_obj));
1646         if (error == 0 && xattr_obj) {
1647                 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1648                 ASSERT0(error);
1649                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1650                 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1651         }
1652 
1653         mutex_enter(&zp->z_lock);
1654         if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
1655                 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
1656         mutex_exit(&zp->z_lock);
1657 
1658         /* charge as an update -- would be nice not to charge at all */
1659         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1660 
1661         error = dmu_tx_assign(tx, TXG_NOWAIT);
1662         if (error) {
1663                 zfs_dirent_unlock(dl);
1664                 VN_RELE(vp);
1665                 if (xzp)
1666                         VN_RELE(ZTOV(xzp));
1667                 if (error == ERESTART) {

1668                         dmu_tx_wait(tx);
1669                         dmu_tx_abort(tx);
1670                         goto top;
1671                 }
1672                 if (realnmp)
1673                         pn_free(realnmp);
1674                 dmu_tx_abort(tx);
1675                 ZFS_EXIT(zfsvfs);
1676                 return (error);
1677         }
1678 
1679         /*
1680          * Remove the directory entry.
1681          */
1682         error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
1683 
1684         if (error) {
1685                 dmu_tx_commit(tx);
1686                 goto out;
1687         }


1781  *       vp - ctime|mtime|atime updated
1782  */
1783 /*ARGSUSED*/
1784 static int
1785 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
1786     caller_context_t *ct, int flags, vsecattr_t *vsecp)
1787 {
1788         znode_t         *zp, *dzp = VTOZ(dvp);
1789         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1790         zilog_t         *zilog;
1791         zfs_dirlock_t   *dl;
1792         uint64_t        txtype;
1793         dmu_tx_t        *tx;
1794         int             error;
1795         int             zf = ZNEW;
1796         ksid_t          *ksid;
1797         uid_t           uid;
1798         gid_t           gid = crgetgid(cr);
1799         zfs_acl_ids_t   acl_ids;
1800         boolean_t       fuid_dirtied;

1801 
1802         ASSERT(vap->va_type == VDIR);
1803 
1804         /*
1805          * If we have an ephemeral id, ACL, or XVATTR then
1806          * make sure file system is at proper version
1807          */
1808 
1809         ksid = crgetsid(cr, KSID_OWNER);
1810         if (ksid)
1811                 uid = ksid_getid(ksid);
1812         else
1813                 uid = crgetuid(cr);
1814         if (zfsvfs->z_use_fuids == B_FALSE &&
1815             (vsecp || (vap->va_mask & AT_XVATTR) ||
1816             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1817                 return (SET_ERROR(EINVAL));
1818 
1819         ZFS_ENTER(zfsvfs);
1820         ZFS_VERIFY_ZP(dzp);


1877                 return (SET_ERROR(EDQUOT));
1878         }
1879 
1880         /*
1881          * Add a new entry to the directory.
1882          */
1883         tx = dmu_tx_create(zfsvfs->z_os);
1884         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1885         dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1886         fuid_dirtied = zfsvfs->z_fuid_dirty;
1887         if (fuid_dirtied)
1888                 zfs_fuid_txhold(zfsvfs, tx);
1889         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1890                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1891                     acl_ids.z_aclp->z_acl_bytes);
1892         }
1893 
1894         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1895             ZFS_SA_BASE_ATTR_SIZE);
1896 
1897         error = dmu_tx_assign(tx, TXG_NOWAIT);
1898         if (error) {
1899                 zfs_dirent_unlock(dl);
1900                 if (error == ERESTART) {

1901                         dmu_tx_wait(tx);
1902                         dmu_tx_abort(tx);
1903                         goto top;
1904                 }
1905                 zfs_acl_ids_free(&acl_ids);
1906                 dmu_tx_abort(tx);
1907                 ZFS_EXIT(zfsvfs);
1908                 return (error);
1909         }
1910 
1911         /*
1912          * Create new node.
1913          */
1914         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1915 
1916         if (fuid_dirtied)
1917                 zfs_fuid_sync(zfsvfs, tx);
1918 
1919         /*
1920          * Now put new name in parent dir.


1956  *
1957  *      RETURN: 0 on success, error code on failure.
1958  *
1959  * Timestamps:
1960  *      dvp - ctime|mtime updated
1961  */
1962 /*ARGSUSED*/
1963 static int
1964 zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
1965     caller_context_t *ct, int flags)
1966 {
1967         znode_t         *dzp = VTOZ(dvp);
1968         znode_t         *zp;
1969         vnode_t         *vp;
1970         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1971         zilog_t         *zilog;
1972         zfs_dirlock_t   *dl;
1973         dmu_tx_t        *tx;
1974         int             error;
1975         int             zflg = ZEXISTS;

1976 
1977         ZFS_ENTER(zfsvfs);
1978         ZFS_VERIFY_ZP(dzp);
1979         zilog = zfsvfs->z_log;
1980 
1981         if (flags & FIGNORECASE)
1982                 zflg |= ZCILOOK;
1983 top:
1984         zp = NULL;
1985 
1986         /*
1987          * Attempt to lock directory; fail if entry doesn't exist.
1988          */
1989         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1990             NULL, NULL)) {
1991                 ZFS_EXIT(zfsvfs);
1992                 return (error);
1993         }
1994 
1995         vp = ZTOV(zp);


2011         vnevent_rmdir(vp, dvp, name, ct);
2012 
2013         /*
2014          * Grab a lock on the directory to make sure that noone is
2015          * trying to add (or lookup) entries while we are removing it.
2016          */
2017         rw_enter(&zp->z_name_lock, RW_WRITER);
2018 
2019         /*
2020          * Grab a lock on the parent pointer to make sure we play well
2021          * with the treewalk and directory rename code.
2022          */
2023         rw_enter(&zp->z_parent_lock, RW_WRITER);
2024 
2025         tx = dmu_tx_create(zfsvfs->z_os);
2026         dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
2027         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2028         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
2029         zfs_sa_upgrade_txholds(tx, zp);
2030         zfs_sa_upgrade_txholds(tx, dzp);
2031         error = dmu_tx_assign(tx, TXG_NOWAIT);
2032         if (error) {
2033                 rw_exit(&zp->z_parent_lock);
2034                 rw_exit(&zp->z_name_lock);
2035                 zfs_dirent_unlock(dl);
2036                 VN_RELE(vp);
2037                 if (error == ERESTART) {

2038                         dmu_tx_wait(tx);
2039                         dmu_tx_abort(tx);
2040                         goto top;
2041                 }
2042                 dmu_tx_abort(tx);
2043                 ZFS_EXIT(zfsvfs);
2044                 return (error);
2045         }
2046 
2047         error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
2048 
2049         if (error == 0) {
2050                 uint64_t txtype = TX_RMDIR;
2051                 if (flags & FIGNORECASE)
2052                         txtype |= TX_CI;
2053                 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
2054         }
2055 
2056         dmu_tx_commit(tx);
2057 


3345  *
3346  * Timestamps:
3347  *      sdvp,tdvp - ctime|mtime updated
3348  */
3349 /*ARGSUSED*/
3350 static int
3351 zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
3352     caller_context_t *ct, int flags)
3353 {
3354         znode_t         *tdzp, *szp, *tzp;
3355         znode_t         *sdzp = VTOZ(sdvp);
3356         zfsvfs_t        *zfsvfs = sdzp->z_zfsvfs;
3357         zilog_t         *zilog;
3358         vnode_t         *realvp;
3359         zfs_dirlock_t   *sdl, *tdl;
3360         dmu_tx_t        *tx;
3361         zfs_zlock_t     *zl;
3362         int             cmp, serr, terr;
3363         int             error = 0;
3364         int             zflg = 0;

3365 
3366         ZFS_ENTER(zfsvfs);
3367         ZFS_VERIFY_ZP(sdzp);
3368         zilog = zfsvfs->z_log;
3369 
3370         /*
3371          * Make sure we have the real vp for the target directory.
3372          */
3373         if (VOP_REALVP(tdvp, &realvp, ct) == 0)
3374                 tdvp = realvp;
3375 
3376         tdzp = VTOZ(tdvp);
3377         ZFS_VERIFY_ZP(tdzp);
3378 
3379         /*
3380          * We check z_zfsvfs rather than v_vfsp here, because snapshots and the
3381          * ctldir appear to have the same v_vfsp.
3382          */
3383         if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) {
3384                 ZFS_EXIT(zfsvfs);


3582         if (tdvp != sdvp) {
3583                 vnevent_rename_dest_dir(tdvp, ct);
3584         }
3585 
3586         tx = dmu_tx_create(zfsvfs->z_os);
3587         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3588         dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3589         dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3590         dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3591         if (sdzp != tdzp) {
3592                 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3593                 zfs_sa_upgrade_txholds(tx, tdzp);
3594         }
3595         if (tzp) {
3596                 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3597                 zfs_sa_upgrade_txholds(tx, tzp);
3598         }
3599 
3600         zfs_sa_upgrade_txholds(tx, szp);
3601         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3602         error = dmu_tx_assign(tx, TXG_NOWAIT);
3603         if (error) {
3604                 if (zl != NULL)
3605                         zfs_rename_unlock(&zl);
3606                 zfs_dirent_unlock(sdl);
3607                 zfs_dirent_unlock(tdl);
3608 
3609                 if (sdzp == tdzp)
3610                         rw_exit(&sdzp->z_name_lock);
3611 
3612                 VN_RELE(ZTOV(szp));
3613                 if (tzp)
3614                         VN_RELE(ZTOV(tzp));
3615                 if (error == ERESTART) {

3616                         dmu_tx_wait(tx);
3617                         dmu_tx_abort(tx);
3618                         goto top;
3619                 }
3620                 dmu_tx_abort(tx);
3621                 ZFS_EXIT(zfsvfs);
3622                 return (error);
3623         }
3624 
3625         if (tzp)        /* Attempt to remove the existing target */
3626                 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
3627 
3628         if (error == 0) {
3629                 error = zfs_link_create(tdl, szp, tx, ZRENAMING);
3630                 if (error == 0) {
3631                         szp->z_pflags |= ZFS_AV_MODIFIED;
3632 
3633                         error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3634                             (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3635                         ASSERT0(error);


3701  *
3702  * Timestamps:
3703  *      dvp - ctime|mtime updated
3704  */
3705 /*ARGSUSED*/
3706 static int
3707 zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr,
3708     caller_context_t *ct, int flags)
3709 {
3710         znode_t         *zp, *dzp = VTOZ(dvp);
3711         zfs_dirlock_t   *dl;
3712         dmu_tx_t        *tx;
3713         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3714         zilog_t         *zilog;
3715         uint64_t        len = strlen(link);
3716         int             error;
3717         int             zflg = ZNEW;
3718         zfs_acl_ids_t   acl_ids;
3719         boolean_t       fuid_dirtied;
3720         uint64_t        txtype = TX_SYMLINK;

3721 
3722         ASSERT(vap->va_type == VLNK);
3723 
3724         ZFS_ENTER(zfsvfs);
3725         ZFS_VERIFY_ZP(dzp);
3726         zilog = zfsvfs->z_log;
3727 
3728         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3729             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3730                 ZFS_EXIT(zfsvfs);
3731                 return (SET_ERROR(EILSEQ));
3732         }
3733         if (flags & FIGNORECASE)
3734                 zflg |= ZCILOOK;
3735 
3736         if (len > MAXPATHLEN) {
3737                 ZFS_EXIT(zfsvfs);
3738                 return (SET_ERROR(ENAMETOOLONG));
3739         }
3740 


3763 
3764         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
3765                 zfs_acl_ids_free(&acl_ids);
3766                 zfs_dirent_unlock(dl);
3767                 ZFS_EXIT(zfsvfs);
3768                 return (SET_ERROR(EDQUOT));
3769         }
3770         tx = dmu_tx_create(zfsvfs->z_os);
3771         fuid_dirtied = zfsvfs->z_fuid_dirty;
3772         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3773         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3774         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3775             ZFS_SA_BASE_ATTR_SIZE + len);
3776         dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3777         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3778                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3779                     acl_ids.z_aclp->z_acl_bytes);
3780         }
3781         if (fuid_dirtied)
3782                 zfs_fuid_txhold(zfsvfs, tx);
3783         error = dmu_tx_assign(tx, TXG_NOWAIT);
3784         if (error) {
3785                 zfs_dirent_unlock(dl);
3786                 if (error == ERESTART) {

3787                         dmu_tx_wait(tx);
3788                         dmu_tx_abort(tx);
3789                         goto top;
3790                 }
3791                 zfs_acl_ids_free(&acl_ids);
3792                 dmu_tx_abort(tx);
3793                 ZFS_EXIT(zfsvfs);
3794                 return (error);
3795         }
3796 
3797         /*
3798          * Create a new object for the symlink.
3799          * for version 4 ZPL datsets the symlink will be an SA attribute
3800          */
3801         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3802 
3803         if (fuid_dirtied)
3804                 zfs_fuid_sync(zfsvfs, tx);
3805 
3806         mutex_enter(&zp->z_lock);


3893  * Timestamps:
3894  *      tdvp - ctime|mtime updated
3895  *       svp - ctime updated
3896  */
3897 /* ARGSUSED */
3898 static int
3899 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
3900     caller_context_t *ct, int flags)
3901 {
3902         znode_t         *dzp = VTOZ(tdvp);
3903         znode_t         *tzp, *szp;
3904         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3905         zilog_t         *zilog;
3906         zfs_dirlock_t   *dl;
3907         dmu_tx_t        *tx;
3908         vnode_t         *realvp;
3909         int             error;
3910         int             zf = ZNEW;
3911         uint64_t        parent;
3912         uid_t           owner;

3913 
3914         ASSERT(tdvp->v_type == VDIR);
3915 
3916         ZFS_ENTER(zfsvfs);
3917         ZFS_VERIFY_ZP(dzp);
3918         zilog = zfsvfs->z_log;
3919 
3920         if (VOP_REALVP(svp, &realvp, ct) == 0)
3921                 svp = realvp;
3922 
3923         /*
3924          * POSIX dictates that we return EPERM here.
3925          * Better choices include ENOTSUP or EISDIR.
3926          */
3927         if (svp->v_type == VDIR) {
3928                 ZFS_EXIT(zfsvfs);
3929                 return (SET_ERROR(EPERM));
3930         }
3931 
3932         szp = VTOZ(svp);


3982         if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
3983                 ZFS_EXIT(zfsvfs);
3984                 return (error);
3985         }
3986 
3987 top:
3988         /*
3989          * Attempt to lock directory; fail if entry already exists.
3990          */
3991         error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
3992         if (error) {
3993                 ZFS_EXIT(zfsvfs);
3994                 return (error);
3995         }
3996 
3997         tx = dmu_tx_create(zfsvfs->z_os);
3998         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3999         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4000         zfs_sa_upgrade_txholds(tx, szp);
4001         zfs_sa_upgrade_txholds(tx, dzp);
4002         error = dmu_tx_assign(tx, TXG_NOWAIT);
4003         if (error) {
4004                 zfs_dirent_unlock(dl);
4005                 if (error == ERESTART) {

4006                         dmu_tx_wait(tx);
4007                         dmu_tx_abort(tx);
4008                         goto top;
4009                 }
4010                 dmu_tx_abort(tx);
4011                 ZFS_EXIT(zfsvfs);
4012                 return (error);
4013         }
4014 
4015         error = zfs_link_create(dl, szp, tx, 0);
4016 
4017         if (error == 0) {
4018                 uint64_t txtype = TX_LINK;
4019                 if (flags & FIGNORECASE)
4020                         txtype |= TX_CI;
4021                 zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4022         }
4023 
4024         dmu_tx_commit(tx);
4025 




 106  *      pushing cached pages (which acquires range locks) and syncing out
 107  *      cached atime changes.  Third, zfs_zinactive() may require a new tx,
 108  *      which could deadlock the system if you were already holding one.
 109  *      If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
 110  *
 111  *  (3) All range locks must be grabbed before calling dmu_tx_assign(),
 112  *      as they can span dmu_tx_assign() calls.
 113  *
 114  *  (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
 115  *      This is critical because we don't want to block while holding locks.
 116  *      Note, in particular, that if a lock is sometimes acquired before
 117  *      the tx assigns, and sometimes after (e.g. z_lock), then failing to
 118  *      use a non-blocking assign can deadlock the system.  The scenario:
 119  *
 120  *      Thread A has grabbed a lock before calling dmu_tx_assign().
 121  *      Thread B is in an already-assigned tx, and blocks for this lock.
 122  *      Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
 123  *      forever, because the previous txg can't quiesce until B's tx commits.
 124  *
 125  *      If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
 126  *      then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
 127  *      calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
 128  *      to indicate that this operation has already called dmu_tx_wait().
 129  *      This will ensure that we don't retry forever, waiting a short bit
 130  *      each time.
 131  *
 132  *  (5) If the operation succeeded, generate the intent log entry for it
 133  *      before dropping locks.  This ensures that the ordering of events
 134  *      in the intent log matches the order in which they actually occurred.
 135  *      During ZIL replay the zfs_log_* functions will update the sequence
 136  *      number to indicate the zil transaction has replayed.
 137  *
 138  *  (6) At the end of each vnode op, the DMU tx must always commit,
 139  *      regardless of whether there were any errors.
 140  *
 141  *  (7) After dropping all locks, invoke zil_commit(zilog, foid)
 142  *      to ensure that synchronous semantics are provided when necessary.
 143  *
 144  * In general, this is how things should be ordered in each vnode op:
 145  *
 146  *      ZFS_ENTER(zfsvfs);              // exit if unmounted
 147  * top:
 148  *      zfs_dirent_lock(&dl, ...)   // lock directory entry (may VN_HOLD())
 149  *      rw_enter(...);                  // grab any other locks you need
 150  *      tx = dmu_tx_create(...);        // get DMU tx
 151  *      dmu_tx_hold_*();                // hold each object you might modify
 152  *      error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 153  *      if (error) {
 154  *              rw_exit(...);           // drop locks
 155  *              zfs_dirent_unlock(dl);  // unlock directory entry
 156  *              VN_RELE(...);           // release held vnodes
 157  *              if (error == ERESTART) {
 158  *                      waited = B_TRUE;
 159  *                      dmu_tx_wait(tx);
 160  *                      dmu_tx_abort(tx);
 161  *                      goto top;
 162  *              }
 163  *              dmu_tx_abort(tx);       // abort DMU tx
 164  *              ZFS_EXIT(zfsvfs);       // finished in zfs
 165  *              return (error);         // really out of space
 166  *      }
 167  *      error = do_real_work();         // do whatever this VOP does
 168  *      if (error == 0)
 169  *              zfs_log_*(...);         // on success, make ZIL entry
 170  *      dmu_tx_commit(tx);              // commit DMU tx -- error or not
 171  *      rw_exit(...);                   // drop locks
 172  *      zfs_dirent_unlock(dl);          // unlock directory entry
 173  *      VN_RELE(...);                   // release held vnodes
 174  *      zil_commit(zilog, foid);        // synchronous when necessary
 175  *      ZFS_EXIT(zfsvfs);               // finished in zfs
 176  *      return (error);                 // done, report error
 177  */
 178 


1303 
1304 /* ARGSUSED */
1305 static int
1306 zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1307     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1308     vsecattr_t *vsecp)
1309 {
1310         znode_t         *zp, *dzp = VTOZ(dvp);
1311         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1312         zilog_t         *zilog;
1313         objset_t        *os;
1314         zfs_dirlock_t   *dl;
1315         dmu_tx_t        *tx;
1316         int             error;
1317         ksid_t          *ksid;
1318         uid_t           uid;
1319         gid_t           gid = crgetgid(cr);
1320         zfs_acl_ids_t   acl_ids;
1321         boolean_t       fuid_dirtied;
1322         boolean_t       have_acl = B_FALSE;
1323         boolean_t       waited = B_FALSE;
1324 
1325         /*
1326          * If we have an ephemeral id, ACL, or XVATTR then
1327          * make sure file system is at proper version
1328          */
1329 
1330         ksid = crgetsid(cr, KSID_OWNER);
1331         if (ksid)
1332                 uid = ksid_getid(ksid);
1333         else
1334                 uid = crgetuid(cr);
1335 
1336         if (zfsvfs->z_use_fuids == B_FALSE &&
1337             (vsecp || (vap->va_mask & AT_XVATTR) ||
1338             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1339                 return (SET_ERROR(EINVAL));
1340 
1341         ZFS_ENTER(zfsvfs);
1342         ZFS_VERIFY_ZP(dzp);
1343         os = zfsvfs->z_os;


1424                         zfs_acl_ids_free(&acl_ids);
1425                         error = SET_ERROR(EDQUOT);
1426                         goto out;
1427                 }
1428 
1429                 tx = dmu_tx_create(os);
1430 
1431                 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1432                     ZFS_SA_BASE_ATTR_SIZE);
1433 
1434                 fuid_dirtied = zfsvfs->z_fuid_dirty;
1435                 if (fuid_dirtied)
1436                         zfs_fuid_txhold(zfsvfs, tx);
1437                 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1438                 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1439                 if (!zfsvfs->z_use_sa &&
1440                     acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1441                         dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1442                             0, acl_ids.z_aclp->z_acl_bytes);
1443                 }
1444                 error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
1445                 if (error) {
1446                         zfs_dirent_unlock(dl);
1447                         if (error == ERESTART) {
1448                                 waited = B_TRUE;
1449                                 dmu_tx_wait(tx);
1450                                 dmu_tx_abort(tx);
1451                                 goto top;
1452                         }
1453                         zfs_acl_ids_free(&acl_ids);
1454                         dmu_tx_abort(tx);
1455                         ZFS_EXIT(zfsvfs);
1456                         return (error);
1457                 }
1458                 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1459 
1460                 if (fuid_dirtied)
1461                         zfs_fuid_sync(zfsvfs, tx);
1462 
1463                 (void) zfs_link_create(dl, zp, tx, ZNEW);
1464                 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1465                 if (flag & FIGNORECASE)
1466                         txtype |= TX_CI;
1467                 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1468                     vsecp, acl_ids.z_fuidp, vap);


1560 zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1561     int flags)
1562 {
1563         znode_t         *zp, *dzp = VTOZ(dvp);
1564         znode_t         *xzp;
1565         vnode_t         *vp;
1566         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1567         zilog_t         *zilog;
1568         uint64_t        acl_obj, xattr_obj;
1569         uint64_t        xattr_obj_unlinked = 0;
1570         uint64_t        obj = 0;
1571         zfs_dirlock_t   *dl;
1572         dmu_tx_t        *tx;
1573         boolean_t       may_delete_now, delete_now = FALSE;
1574         boolean_t       unlinked, toobig = FALSE;
1575         uint64_t        txtype;
1576         pathname_t      *realnmp = NULL;
1577         pathname_t      realnm;
1578         int             error;
1579         int             zflg = ZEXISTS;
1580         boolean_t       waited = B_FALSE;
1581 
1582         ZFS_ENTER(zfsvfs);
1583         ZFS_VERIFY_ZP(dzp);
1584         zilog = zfsvfs->z_log;
1585 
1586         if (flags & FIGNORECASE) {
1587                 zflg |= ZCILOOK;
1588                 pn_alloc(&realnm);
1589                 realnmp = &realnm;
1590         }
1591 
1592 top:
1593         xattr_obj = 0;
1594         xzp = NULL;
1595         /*
1596          * Attempt to lock directory; fail if entry doesn't exist.
1597          */
1598         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1599             NULL, realnmp)) {
1600                 if (realnmp)


1649         }
1650 
1651         /* are there any extended attributes? */
1652         error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1653             &xattr_obj, sizeof (xattr_obj));
1654         if (error == 0 && xattr_obj) {
1655                 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1656                 ASSERT0(error);
1657                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1658                 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1659         }
1660 
1661         mutex_enter(&zp->z_lock);
1662         if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
1663                 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
1664         mutex_exit(&zp->z_lock);
1665 
1666         /* charge as an update -- would be nice not to charge at all */
1667         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1668 
1669         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
1670         if (error) {
1671                 zfs_dirent_unlock(dl);
1672                 VN_RELE(vp);
1673                 if (xzp)
1674                         VN_RELE(ZTOV(xzp));
1675                 if (error == ERESTART) {
1676                         waited = B_TRUE;
1677                         dmu_tx_wait(tx);
1678                         dmu_tx_abort(tx);
1679                         goto top;
1680                 }
1681                 if (realnmp)
1682                         pn_free(realnmp);
1683                 dmu_tx_abort(tx);
1684                 ZFS_EXIT(zfsvfs);
1685                 return (error);
1686         }
1687 
1688         /*
1689          * Remove the directory entry.
1690          */
1691         error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
1692 
1693         if (error) {
1694                 dmu_tx_commit(tx);
1695                 goto out;
1696         }


1790  *       vp - ctime|mtime|atime updated
1791  */
1792 /*ARGSUSED*/
1793 static int
1794 zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
1795     caller_context_t *ct, int flags, vsecattr_t *vsecp)
1796 {
1797         znode_t         *zp, *dzp = VTOZ(dvp);
1798         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1799         zilog_t         *zilog;
1800         zfs_dirlock_t   *dl;
1801         uint64_t        txtype;
1802         dmu_tx_t        *tx;
1803         int             error;
1804         int             zf = ZNEW;
1805         ksid_t          *ksid;
1806         uid_t           uid;
1807         gid_t           gid = crgetgid(cr);
1808         zfs_acl_ids_t   acl_ids;
1809         boolean_t       fuid_dirtied;
1810         boolean_t       waited = B_FALSE;
1811 
1812         ASSERT(vap->va_type == VDIR);
1813 
1814         /*
1815          * If we have an ephemeral id, ACL, or XVATTR then
1816          * make sure file system is at proper version
1817          */
1818 
1819         ksid = crgetsid(cr, KSID_OWNER);
1820         if (ksid)
1821                 uid = ksid_getid(ksid);
1822         else
1823                 uid = crgetuid(cr);
1824         if (zfsvfs->z_use_fuids == B_FALSE &&
1825             (vsecp || (vap->va_mask & AT_XVATTR) ||
1826             IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1827                 return (SET_ERROR(EINVAL));
1828 
1829         ZFS_ENTER(zfsvfs);
1830         ZFS_VERIFY_ZP(dzp);


1887                 return (SET_ERROR(EDQUOT));
1888         }
1889 
1890         /*
1891          * Add a new entry to the directory.
1892          */
1893         tx = dmu_tx_create(zfsvfs->z_os);
1894         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1895         dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1896         fuid_dirtied = zfsvfs->z_fuid_dirty;
1897         if (fuid_dirtied)
1898                 zfs_fuid_txhold(zfsvfs, tx);
1899         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1900                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1901                     acl_ids.z_aclp->z_acl_bytes);
1902         }
1903 
1904         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1905             ZFS_SA_BASE_ATTR_SIZE);
1906 
1907         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
1908         if (error) {
1909                 zfs_dirent_unlock(dl);
1910                 if (error == ERESTART) {
1911                         waited = B_TRUE;
1912                         dmu_tx_wait(tx);
1913                         dmu_tx_abort(tx);
1914                         goto top;
1915                 }
1916                 zfs_acl_ids_free(&acl_ids);
1917                 dmu_tx_abort(tx);
1918                 ZFS_EXIT(zfsvfs);
1919                 return (error);
1920         }
1921 
1922         /*
1923          * Create new node.
1924          */
1925         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1926 
1927         if (fuid_dirtied)
1928                 zfs_fuid_sync(zfsvfs, tx);
1929 
1930         /*
1931          * Now put new name in parent dir.


1967  *
1968  *      RETURN: 0 on success, error code on failure.
1969  *
1970  * Timestamps:
1971  *      dvp - ctime|mtime updated
1972  */
1973 /*ARGSUSED*/
1974 static int
1975 zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
1976     caller_context_t *ct, int flags)
1977 {
1978         znode_t         *dzp = VTOZ(dvp);
1979         znode_t         *zp;
1980         vnode_t         *vp;
1981         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1982         zilog_t         *zilog;
1983         zfs_dirlock_t   *dl;
1984         dmu_tx_t        *tx;
1985         int             error;
1986         int             zflg = ZEXISTS;
1987         boolean_t       waited = B_FALSE;
1988 
1989         ZFS_ENTER(zfsvfs);
1990         ZFS_VERIFY_ZP(dzp);
1991         zilog = zfsvfs->z_log;
1992 
1993         if (flags & FIGNORECASE)
1994                 zflg |= ZCILOOK;
1995 top:
1996         zp = NULL;
1997 
1998         /*
1999          * Attempt to lock directory; fail if entry doesn't exist.
2000          */
2001         if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
2002             NULL, NULL)) {
2003                 ZFS_EXIT(zfsvfs);
2004                 return (error);
2005         }
2006 
2007         vp = ZTOV(zp);


2023         vnevent_rmdir(vp, dvp, name, ct);
2024 
2025         /*
2026          * Grab a lock on the directory to make sure that noone is
2027          * trying to add (or lookup) entries while we are removing it.
2028          */
2029         rw_enter(&zp->z_name_lock, RW_WRITER);
2030 
2031         /*
2032          * Grab a lock on the parent pointer to make sure we play well
2033          * with the treewalk and directory rename code.
2034          */
2035         rw_enter(&zp->z_parent_lock, RW_WRITER);
2036 
2037         tx = dmu_tx_create(zfsvfs->z_os);
2038         dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
2039         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2040         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
2041         zfs_sa_upgrade_txholds(tx, zp);
2042         zfs_sa_upgrade_txholds(tx, dzp);
2043         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
2044         if (error) {
2045                 rw_exit(&zp->z_parent_lock);
2046                 rw_exit(&zp->z_name_lock);
2047                 zfs_dirent_unlock(dl);
2048                 VN_RELE(vp);
2049                 if (error == ERESTART) {
2050                         waited = B_TRUE;
2051                         dmu_tx_wait(tx);
2052                         dmu_tx_abort(tx);
2053                         goto top;
2054                 }
2055                 dmu_tx_abort(tx);
2056                 ZFS_EXIT(zfsvfs);
2057                 return (error);
2058         }
2059 
2060         error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
2061 
2062         if (error == 0) {
2063                 uint64_t txtype = TX_RMDIR;
2064                 if (flags & FIGNORECASE)
2065                         txtype |= TX_CI;
2066                 zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
2067         }
2068 
2069         dmu_tx_commit(tx);
2070 


3358  *
3359  * Timestamps:
3360  *      sdvp,tdvp - ctime|mtime updated
3361  */
3362 /*ARGSUSED*/
3363 static int
3364 zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
3365     caller_context_t *ct, int flags)
3366 {
3367         znode_t         *tdzp, *szp, *tzp;
3368         znode_t         *sdzp = VTOZ(sdvp);
3369         zfsvfs_t        *zfsvfs = sdzp->z_zfsvfs;
3370         zilog_t         *zilog;
3371         vnode_t         *realvp;
3372         zfs_dirlock_t   *sdl, *tdl;
3373         dmu_tx_t        *tx;
3374         zfs_zlock_t     *zl;
3375         int             cmp, serr, terr;
3376         int             error = 0;
3377         int             zflg = 0;
3378         boolean_t       waited = B_FALSE;
3379 
3380         ZFS_ENTER(zfsvfs);
3381         ZFS_VERIFY_ZP(sdzp);
3382         zilog = zfsvfs->z_log;
3383 
3384         /*
3385          * Make sure we have the real vp for the target directory.
3386          */
3387         if (VOP_REALVP(tdvp, &realvp, ct) == 0)
3388                 tdvp = realvp;
3389 
3390         tdzp = VTOZ(tdvp);
3391         ZFS_VERIFY_ZP(tdzp);
3392 
3393         /*
3394          * We check z_zfsvfs rather than v_vfsp here, because snapshots and the
3395          * ctldir appear to have the same v_vfsp.
3396          */
3397         if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) {
3398                 ZFS_EXIT(zfsvfs);


3596         if (tdvp != sdvp) {
3597                 vnevent_rename_dest_dir(tdvp, ct);
3598         }
3599 
3600         tx = dmu_tx_create(zfsvfs->z_os);
3601         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3602         dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3603         dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3604         dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3605         if (sdzp != tdzp) {
3606                 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3607                 zfs_sa_upgrade_txholds(tx, tdzp);
3608         }
3609         if (tzp) {
3610                 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3611                 zfs_sa_upgrade_txholds(tx, tzp);
3612         }
3613 
3614         zfs_sa_upgrade_txholds(tx, szp);
3615         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3616         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
3617         if (error) {
3618                 if (zl != NULL)
3619                         zfs_rename_unlock(&zl);
3620                 zfs_dirent_unlock(sdl);
3621                 zfs_dirent_unlock(tdl);
3622 
3623                 if (sdzp == tdzp)
3624                         rw_exit(&sdzp->z_name_lock);
3625 
3626                 VN_RELE(ZTOV(szp));
3627                 if (tzp)
3628                         VN_RELE(ZTOV(tzp));
3629                 if (error == ERESTART) {
3630                         waited = B_TRUE;
3631                         dmu_tx_wait(tx);
3632                         dmu_tx_abort(tx);
3633                         goto top;
3634                 }
3635                 dmu_tx_abort(tx);
3636                 ZFS_EXIT(zfsvfs);
3637                 return (error);
3638         }
3639 
3640         if (tzp)        /* Attempt to remove the existing target */
3641                 error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
3642 
3643         if (error == 0) {
3644                 error = zfs_link_create(tdl, szp, tx, ZRENAMING);
3645                 if (error == 0) {
3646                         szp->z_pflags |= ZFS_AV_MODIFIED;
3647 
3648                         error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3649                             (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3650                         ASSERT0(error);


3716  *
3717  * Timestamps:
3718  *      dvp - ctime|mtime updated
3719  */
3720 /*ARGSUSED*/
3721 static int
3722 zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr,
3723     caller_context_t *ct, int flags)
3724 {
3725         znode_t         *zp, *dzp = VTOZ(dvp);
3726         zfs_dirlock_t   *dl;
3727         dmu_tx_t        *tx;
3728         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3729         zilog_t         *zilog;
3730         uint64_t        len = strlen(link);
3731         int             error;
3732         int             zflg = ZNEW;
3733         zfs_acl_ids_t   acl_ids;
3734         boolean_t       fuid_dirtied;
3735         uint64_t        txtype = TX_SYMLINK;
3736         boolean_t       waited = B_FALSE;
3737 
3738         ASSERT(vap->va_type == VLNK);
3739 
3740         ZFS_ENTER(zfsvfs);
3741         ZFS_VERIFY_ZP(dzp);
3742         zilog = zfsvfs->z_log;
3743 
3744         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3745             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3746                 ZFS_EXIT(zfsvfs);
3747                 return (SET_ERROR(EILSEQ));
3748         }
3749         if (flags & FIGNORECASE)
3750                 zflg |= ZCILOOK;
3751 
3752         if (len > MAXPATHLEN) {
3753                 ZFS_EXIT(zfsvfs);
3754                 return (SET_ERROR(ENAMETOOLONG));
3755         }
3756 


3779 
3780         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
3781                 zfs_acl_ids_free(&acl_ids);
3782                 zfs_dirent_unlock(dl);
3783                 ZFS_EXIT(zfsvfs);
3784                 return (SET_ERROR(EDQUOT));
3785         }
3786         tx = dmu_tx_create(zfsvfs->z_os);
3787         fuid_dirtied = zfsvfs->z_fuid_dirty;
3788         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3789         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3790         dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3791             ZFS_SA_BASE_ATTR_SIZE + len);
3792         dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3793         if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3794                 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3795                     acl_ids.z_aclp->z_acl_bytes);
3796         }
3797         if (fuid_dirtied)
3798                 zfs_fuid_txhold(zfsvfs, tx);
3799         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
3800         if (error) {
3801                 zfs_dirent_unlock(dl);
3802                 if (error == ERESTART) {
3803                         waited = B_TRUE;
3804                         dmu_tx_wait(tx);
3805                         dmu_tx_abort(tx);
3806                         goto top;
3807                 }
3808                 zfs_acl_ids_free(&acl_ids);
3809                 dmu_tx_abort(tx);
3810                 ZFS_EXIT(zfsvfs);
3811                 return (error);
3812         }
3813 
3814         /*
3815          * Create a new object for the symlink.
3816          * for version 4 ZPL datsets the symlink will be an SA attribute
3817          */
3818         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3819 
3820         if (fuid_dirtied)
3821                 zfs_fuid_sync(zfsvfs, tx);
3822 
3823         mutex_enter(&zp->z_lock);


3910  * Timestamps:
3911  *      tdvp - ctime|mtime updated
3912  *       svp - ctime updated
3913  */
3914 /* ARGSUSED */
3915 static int
3916 zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
3917     caller_context_t *ct, int flags)
3918 {
3919         znode_t         *dzp = VTOZ(tdvp);
3920         znode_t         *tzp, *szp;
3921         zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3922         zilog_t         *zilog;
3923         zfs_dirlock_t   *dl;
3924         dmu_tx_t        *tx;
3925         vnode_t         *realvp;
3926         int             error;
3927         int             zf = ZNEW;
3928         uint64_t        parent;
3929         uid_t           owner;
3930         boolean_t       waited = B_FALSE;
3931 
3932         ASSERT(tdvp->v_type == VDIR);
3933 
3934         ZFS_ENTER(zfsvfs);
3935         ZFS_VERIFY_ZP(dzp);
3936         zilog = zfsvfs->z_log;
3937 
3938         if (VOP_REALVP(svp, &realvp, ct) == 0)
3939                 svp = realvp;
3940 
3941         /*
3942          * POSIX dictates that we return EPERM here.
3943          * Better choices include ENOTSUP or EISDIR.
3944          */
3945         if (svp->v_type == VDIR) {
3946                 ZFS_EXIT(zfsvfs);
3947                 return (SET_ERROR(EPERM));
3948         }
3949 
3950         szp = VTOZ(svp);


4000         if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
4001                 ZFS_EXIT(zfsvfs);
4002                 return (error);
4003         }
4004 
4005 top:
4006         /*
4007          * Attempt to lock directory; fail if entry already exists.
4008          */
4009         error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
4010         if (error) {
4011                 ZFS_EXIT(zfsvfs);
4012                 return (error);
4013         }
4014 
4015         tx = dmu_tx_create(zfsvfs->z_os);
4016         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
4017         dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4018         zfs_sa_upgrade_txholds(tx, szp);
4019         zfs_sa_upgrade_txholds(tx, dzp);
4020         error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
4021         if (error) {
4022                 zfs_dirent_unlock(dl);
4023                 if (error == ERESTART) {
4024                         waited = B_TRUE;
4025                         dmu_tx_wait(tx);
4026                         dmu_tx_abort(tx);
4027                         goto top;
4028                 }
4029                 dmu_tx_abort(tx);
4030                 ZFS_EXIT(zfsvfs);
4031                 return (error);
4032         }
4033 
4034         error = zfs_link_create(dl, szp, tx, 0);
4035 
4036         if (error == 0) {
4037                 uint64_t txtype = TX_LINK;
4038                 if (flags & FIGNORECASE)
4039                         txtype |= TX_CI;
4040                 zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4041         }
4042 
4043         dmu_tx_commit(tx);
4044