1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_trans_priv.h"
16 #include "xfs_inode_item.h"
17
18 #include <linux/iversion.h>
19
20 /*
21 * Add a locked inode to the transaction.
22 *
23 * The inode must be locked, and it cannot be associated with any transaction.
24 * If lock_flags is non-zero the inode will be unlocked on transaction commit.
25 */
26 void
xfs_trans_ijoin(struct xfs_trans * tp,struct xfs_inode * ip,uint lock_flags)27 xfs_trans_ijoin(
28 struct xfs_trans *tp,
29 struct xfs_inode *ip,
30 uint lock_flags)
31 {
32 struct xfs_inode_log_item *iip;
33
34 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
35 if (ip->i_itemp == NULL)
36 xfs_inode_item_init(ip, ip->i_mount);
37 iip = ip->i_itemp;
38
39 ASSERT(iip->ili_lock_flags == 0);
40 iip->ili_lock_flags = lock_flags;
41 ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
42
43 /*
44 * Get a log_item_desc to point at the new item.
45 */
46 xfs_trans_add_item(tp, &iip->ili_item);
47 }
48
49 /*
50 * Transactional inode timestamp update. Requires the inode to be locked and
51 * joined to the transaction supplied. Relies on the transaction subsystem to
52 * track dirty state and update/writeback the inode accordingly.
53 */
54 void
xfs_trans_ichgtime(struct xfs_trans * tp,struct xfs_inode * ip,int flags)55 xfs_trans_ichgtime(
56 struct xfs_trans *tp,
57 struct xfs_inode *ip,
58 int flags)
59 {
60 struct inode *inode = VFS_I(ip);
61 struct timespec64 tv;
62
63 ASSERT(tp);
64 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
65
66 tv = current_time(inode);
67
68 if (flags & XFS_ICHGTIME_MOD)
69 inode->i_mtime = tv;
70 if (flags & XFS_ICHGTIME_CHG)
71 inode->i_ctime = tv;
72 if (flags & XFS_ICHGTIME_CREATE)
73 ip->i_crtime = tv;
74 }
75
76 /*
77 * This is called to mark the fields indicated in fieldmask as needing to be
78 * logged when the transaction is committed. The inode must already be
79 * associated with the given transaction.
80 *
81 * The values for fieldmask are defined in xfs_inode_item.h. We always log all
82 * of the core inode if any of it has changed, and we always log all of the
83 * inline data/extents/b-tree root if any of them has changed.
84 *
85 * Grab and pin the cluster buffer associated with this inode to avoid RMW
86 * cycles at inode writeback time. Avoid the need to add error handling to every
87 * xfs_trans_log_inode() call by shutting down on read error. This will cause
88 * transactions to fail and everything to error out, just like if we return a
89 * read error in a dirty transaction and cancel it.
90 */
91 void
xfs_trans_log_inode(struct xfs_trans * tp,struct xfs_inode * ip,uint flags)92 xfs_trans_log_inode(
93 struct xfs_trans *tp,
94 struct xfs_inode *ip,
95 uint flags)
96 {
97 struct xfs_inode_log_item *iip = ip->i_itemp;
98 struct inode *inode = VFS_I(ip);
99 uint iversion_flags = 0;
100
101 ASSERT(iip);
102 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
103 ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
104
105 tp->t_flags |= XFS_TRANS_DIRTY;
106
107 /*
108 * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
109 * don't matter - we either will need an extra transaction in 24 hours
110 * to log the timestamps, or will clear already cleared fields in the
111 * worst case.
112 */
113 if (inode->i_state & I_DIRTY_TIME) {
114 spin_lock(&inode->i_lock);
115 inode->i_state &= ~I_DIRTY_TIME;
116 spin_unlock(&inode->i_lock);
117 }
118
119 /*
120 * First time we log the inode in a transaction, bump the inode change
121 * counter if it is configured for this to occur. While we have the
122 * inode locked exclusively for metadata modification, we can usually
123 * avoid setting XFS_ILOG_CORE if no one has queried the value since
124 * the last time it was incremented. If we have XFS_ILOG_CORE already
125 * set however, then go ahead and bump the i_version counter
126 * unconditionally.
127 */
128 if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
129 if (IS_I_VERSION(inode) &&
130 inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
131 iversion_flags = XFS_ILOG_CORE;
132 }
133
134 /*
135 * If we're updating the inode core or the timestamps and it's possible
136 * to upgrade this inode to bigtime format, do so now.
137 */
138 if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
139 xfs_has_bigtime(ip->i_mount) &&
140 !xfs_inode_has_bigtime(ip)) {
141 ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
142 flags |= XFS_ILOG_CORE;
143 }
144
145 /*
146 * Inode verifiers do not check that the extent size hint is an integer
147 * multiple of the rt extent size on a directory with both rtinherit
148 * and extszinherit flags set. If we're logging a directory that is
149 * misconfigured in this way, clear the hint.
150 */
151 if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
152 (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
153 (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
154 ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
155 XFS_DIFLAG_EXTSZINHERIT);
156 ip->i_extsize = 0;
157 flags |= XFS_ILOG_CORE;
158 }
159
160 /*
161 * Record the specific change for fdatasync optimisation. This allows
162 * fdatasync to skip log forces for inodes that are only timestamp
163 * dirty.
164 */
165 spin_lock(&iip->ili_lock);
166 iip->ili_fsync_fields |= flags;
167
168 if (!iip->ili_item.li_buf) {
169 struct xfs_buf *bp;
170 int error;
171
172 /*
173 * We hold the ILOCK here, so this inode is not going to be
174 * flushed while we are here. Further, because there is no
175 * buffer attached to the item, we know that there is no IO in
176 * progress, so nothing will clear the ili_fields while we read
177 * in the buffer. Hence we can safely drop the spin lock and
178 * read the buffer knowing that the state will not change from
179 * here.
180 */
181 spin_unlock(&iip->ili_lock);
182 error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
183 if (error) {
184 xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
185 return;
186 }
187
188 /*
189 * We need an explicit buffer reference for the log item but
190 * don't want the buffer to remain attached to the transaction.
191 * Hold the buffer but release the transaction reference once
192 * we've attached the inode log item to the buffer log item
193 * list.
194 */
195 xfs_buf_hold(bp);
196 spin_lock(&iip->ili_lock);
197 iip->ili_item.li_buf = bp;
198 bp->b_flags |= _XBF_INODES;
199 list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
200 xfs_trans_brelse(tp, bp);
201 }
202
203 /*
204 * Always OR in the bits from the ili_last_fields field. This is to
205 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
206 * in the eventual clearing of the ili_fields bits. See the big comment
207 * in xfs_iflush() for an explanation of this coordination mechanism.
208 */
209 iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
210 spin_unlock(&iip->ili_lock);
211 }
212
213 int
xfs_trans_roll_inode(struct xfs_trans ** tpp,struct xfs_inode * ip)214 xfs_trans_roll_inode(
215 struct xfs_trans **tpp,
216 struct xfs_inode *ip)
217 {
218 int error;
219
220 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
221 error = xfs_trans_roll(tpp);
222 if (!error)
223 xfs_trans_ijoin(*tpp, ip, 0);
224 return error;
225 }
226