Print this page
NFS4 data corruption (#3508)
If async calls are disabled, nfs4_async_putapage is supposed to do its
work synchronously. Due to a bug, it sometimes just does nothing, leaving
the page for later.
Unfortunately the caller has already reset the R4DIRTY flag.
Without R4DIRTY, nfs4_attrcache_va can't see that there are still
outstanding writes and accepts the file size from the server, which is
too low.
When the dirty page finally gets written back, the page size is truncated
to the file size, leaving some bytes unwritten.
Reviewed by: Marcel Telka <marcel@telka.sk>
Reviewed by: Robert Gordon <rbg@openrbg.com>


1766 
1767         mutex_enter(&rp->r_statelock);
1768         rp->r_count++;
1769         rp->r_awcount++;
1770         mutex_exit(&rp->r_statelock);
1771 
1772         if (mi->mi_io_kstats) {
1773                 mutex_enter(&mi->mi_lock);
1774                 kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
1775                 mutex_exit(&mi->mi_lock);
1776         }
1777 
1778         mi->mi_async_req_count++;
1779         ASSERT(mi->mi_async_req_count != 0);
1780         cv_signal(&mi->mi_async_reqs_cv);
1781         mutex_exit(&mi->mi_async_lock);
1782         return (0);
1783 
1784 noasync:
1785 
1786         if (curproc == proc_pageout || curproc == proc_fsflush ||
1787             nfs_zone() == mi->mi_zone) {
1788                 /*
1789                  * If we get here in the context of the pageout/fsflush,
1790                  * or we have run out of memory or we're attempting to
1791                  * unmount we refuse to do a sync write, because this may
1792                  * hang pageout/fsflush and the machine. In this case,
1793                  * we just re-mark the page as dirty and punt on the page.
1794                  *
1795                  * Make sure B_FORCE isn't set.  We can re-mark the
1796                  * pages as dirty and unlock the pages in one swoop by
1797                  * passing in B_ERROR to pvn_write_done().  However,
1798                  * we should make sure B_FORCE isn't set - we don't
1799                  * want the page tossed before it gets written out.
1800                  */
1801                 if (flags & B_FORCE)
1802                         flags &= ~(B_INVAL | B_FORCE);
1803                 pvn_write_done(pp, flags | B_ERROR);
1804                 return (0);
1805         }
1806 

1807         /*
1808          * We'll get here only if (nfs_zone() != mi->mi_zone)
1809          * which means that this was a cross-zone sync putpage.
1810          *
1811          * We pass in B_ERROR to pvn_write_done() to re-mark the pages
1812          * as dirty and unlock them.
1813          *
1814          * We don't want to clear B_FORCE here as the caller presumably
1815          * knows what they're doing if they set it.
1816          */
1817         pvn_write_done(pp, flags | B_ERROR);
1818         return (EPERM);


1819 }
1820 
1821 int
1822 nfs4_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
1823     int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
1824     size_t, int, cred_t *))
1825 {
1826         rnode4_t *rp;
1827         mntinfo4_t *mi;
1828         struct nfs4_async_reqs *args;
1829 
1830         ASSERT(flags & B_ASYNC);
1831         ASSERT(vp->v_vfsp != NULL);
1832 
1833         rp = VTOR4(vp);
1834         ASSERT(rp->r_count > 0);
1835 
1836         mi = VTOMI4(vp);
1837 
1838         /*




1766 
1767         mutex_enter(&rp->r_statelock);
1768         rp->r_count++;
1769         rp->r_awcount++;
1770         mutex_exit(&rp->r_statelock);
1771 
1772         if (mi->mi_io_kstats) {
1773                 mutex_enter(&mi->mi_lock);
1774                 kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
1775                 mutex_exit(&mi->mi_lock);
1776         }
1777 
1778         mi->mi_async_req_count++;
1779         ASSERT(mi->mi_async_req_count != 0);
1780         cv_signal(&mi->mi_async_reqs_cv);
1781         mutex_exit(&mi->mi_async_lock);
1782         return (0);
1783 
1784 noasync:
1785 
1786         if (curproc == proc_pageout || curproc == proc_fsflush) {

1787                 /*
1788                  * If we get here in the context of the pageout/fsflush,
1789                  * or we have run out of memory or we're attempting to
1790                  * unmount we refuse to do a sync write, because this may
1791                  * hang pageout/fsflush and the machine. In this case,
1792                  * we just re-mark the page as dirty and punt on the page.
1793                  *
1794                  * Make sure B_FORCE isn't set.  We can re-mark the
1795                  * pages as dirty and unlock the pages in one swoop by
1796                  * passing in B_ERROR to pvn_write_done().  However,
1797                  * we should make sure B_FORCE isn't set - we don't
1798                  * want the page tossed before it gets written out.
1799                  */
1800                 if (flags & B_FORCE)
1801                         flags &= ~(B_INVAL | B_FORCE);
1802                 pvn_write_done(pp, flags | B_ERROR);
1803                 return (0);
1804         }
1805 
1806         if (nfs_zone() != mi->mi_zone) {
1807                 /*
1808                  * So this was a cross-zone sync putpage.

1809                  *
1810                  * We pass in B_ERROR to pvn_write_done() to re-mark the pages
1811                  * as dirty and unlock them.
1812                  *
1813                  * We don't want to clear B_FORCE here as the caller presumably
1814                  * knows what they're doing if they set it.
1815                  */
1816                 pvn_write_done(pp, flags | B_ERROR);
1817                 return (EPERM);
1818         }
1819         return ((*putapage)(vp, pp, off, len, flags, cr));
1820 }
1821 
1822 int
1823 nfs4_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
1824     int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
1825     size_t, int, cred_t *))
1826 {
1827         rnode4_t *rp;
1828         mntinfo4_t *mi;
1829         struct nfs4_async_reqs *args;
1830 
1831         ASSERT(flags & B_ASYNC);
1832         ASSERT(vp->v_vfsp != NULL);
1833 
1834         rp = VTOR4(vp);
1835         ASSERT(rp->r_count > 0);
1836 
1837         mi = VTOMI4(vp);
1838 
1839         /*