1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * This file is part of UBIFS.
4   *
5   * Copyright (C) 2006-2008 Nokia Corporation.
6   *
7   * Authors: Artem Bityutskiy (Битюцкий Артём)
8   *          Adrian Hunter
9   */
10  
11  /*
12   * This file implements UBIFS shrinker which evicts clean znodes from the TNC
13   * tree when Linux VM needs more RAM.
14   *
15   * We do not implement any LRU lists to find oldest znodes to free because it
16   * would add additional overhead to the file system fast paths. So the shrinker
17   * just walks the TNC tree when searching for znodes to free.
18   *
19   * If the root of a TNC sub-tree is clean and old enough, then the children are
20   * also clean and old enough. So the shrinker walks the TNC in level order and
21   * dumps entire sub-trees.
22   *
23   * The age of znodes is just the time-stamp when they were last looked at.
24   * The current shrinker first tries to evict old znodes, then young ones.
25   *
26   * Since the shrinker is global, it has to protect against races with FS
27   * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
28   */
29  
30  #include "ubifs.h"
31  
32  /* List of all UBIFS file-system instances */
33  LIST_HEAD(ubifs_infos);
34  
35  /*
36   * We number each shrinker run and record the number on the ubifs_info structure
37   * so that we can easily work out which ubifs_info structures have already been
38   * done by the current run.
39   */
40  static unsigned int shrinker_run_no;
41  
42  /* Protects 'ubifs_infos' list */
43  DEFINE_SPINLOCK(ubifs_infos_lock);
44  
45  /* Global clean znode counter (for all mounted UBIFS instances) */
46  atomic_long_t ubifs_clean_zn_cnt;
47  
48  /**
49   * shrink_tnc - shrink TNC tree.
50   * @c: UBIFS file-system description object
51   * @nr: number of znodes to free
52   * @age: the age of znodes to free
53   * @contention: if any contention, this is set to %1
54   *
55   * This function traverses TNC tree and frees clean znodes. It does not free
56   * clean znodes which younger then @age. Returns number of freed znodes.
57   */
shrink_tnc(struct ubifs_info * c,int nr,int age,int * contention)58  static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
59  {
60  	int total_freed = 0;
61  	struct ubifs_znode *znode, *zprev;
62  	time64_t time = ktime_get_seconds();
63  
64  	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
65  	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
66  
67  	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
68  		return 0;
69  
70  	/*
71  	 * Traverse the TNC tree in levelorder manner, so that it is possible
72  	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
73  	 * children are older or of the same age.
74  	 *
75  	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
76  	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
77  	 * changed only when the 'c->tnc_mutex' is held.
78  	 */
79  	zprev = NULL;
80  	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
81  	while (znode && total_freed < nr &&
82  	       atomic_long_read(&c->clean_zn_cnt) > 0) {
83  		int freed;
84  
85  		/*
86  		 * If the znode is clean, but it is in the 'c->cnext' list, this
87  		 * means that this znode has just been written to flash as a
88  		 * part of commit and was marked clean. They will be removed
89  		 * from the list at end commit. We cannot change the list,
90  		 * because it is not protected by any mutex (design decision to
91  		 * make commit really independent and parallel to main I/O). So
92  		 * we just skip these znodes.
93  		 *
94  		 * Note, the 'clean_zn_cnt' counters are not updated until
95  		 * after the commit, so the UBIFS shrinker does not report
96  		 * the znodes which are in the 'c->cnext' list as freeable.
97  		 *
98  		 * Also note, if the root of a sub-tree is not in 'c->cnext',
99  		 * then the whole sub-tree is not in 'c->cnext' as well, so it
100  		 * is safe to dump whole sub-tree.
101  		 */
102  
103  		if (znode->cnext) {
104  			/*
105  			 * Very soon these znodes will be removed from the list
106  			 * and become freeable.
107  			 */
108  			*contention = 1;
109  		} else if (!ubifs_zn_dirty(znode) &&
110  			   abs(time - znode->time) >= age) {
111  			if (znode->parent)
112  				znode->parent->zbranch[znode->iip].znode = NULL;
113  			else
114  				c->zroot.znode = NULL;
115  
116  			freed = ubifs_destroy_tnc_subtree(c, znode);
117  			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
118  			atomic_long_sub(freed, &c->clean_zn_cnt);
119  			total_freed += freed;
120  			znode = zprev;
121  		}
122  
123  		if (unlikely(!c->zroot.znode))
124  			break;
125  
126  		zprev = znode;
127  		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
128  		cond_resched();
129  	}
130  
131  	return total_freed;
132  }
133  
134  /**
135   * shrink_tnc_trees - shrink UBIFS TNC trees.
136   * @nr: number of znodes to free
137   * @age: the age of znodes to free
138   * @contention: if any contention, this is set to %1
139   *
140   * This function walks the list of mounted UBIFS file-systems and frees clean
141   * znodes which are older than @age, until at least @nr znodes are freed.
142   * Returns the number of freed znodes.
143   */
shrink_tnc_trees(int nr,int age,int * contention)144  static int shrink_tnc_trees(int nr, int age, int *contention)
145  {
146  	struct ubifs_info *c;
147  	struct list_head *p;
148  	unsigned int run_no;
149  	int freed = 0;
150  
151  	spin_lock(&ubifs_infos_lock);
152  	do {
153  		run_no = ++shrinker_run_no;
154  	} while (run_no == 0);
155  	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
156  	p = ubifs_infos.next;
157  	while (p != &ubifs_infos) {
158  		c = list_entry(p, struct ubifs_info, infos_list);
159  		/*
160  		 * We move the ones we do to the end of the list, so we stop
161  		 * when we see one we have already done.
162  		 */
163  		if (c->shrinker_run_no == run_no)
164  			break;
165  		if (!mutex_trylock(&c->umount_mutex)) {
166  			/* Some un-mount is in progress, try next FS */
167  			*contention = 1;
168  			p = p->next;
169  			continue;
170  		}
171  		/*
172  		 * We're holding 'c->umount_mutex', so the file-system won't go
173  		 * away.
174  		 */
175  		if (!mutex_trylock(&c->tnc_mutex)) {
176  			mutex_unlock(&c->umount_mutex);
177  			*contention = 1;
178  			p = p->next;
179  			continue;
180  		}
181  		spin_unlock(&ubifs_infos_lock);
182  		/*
183  		 * OK, now we have TNC locked, the file-system cannot go away -
184  		 * it is safe to reap the cache.
185  		 */
186  		c->shrinker_run_no = run_no;
187  		freed += shrink_tnc(c, nr, age, contention);
188  		mutex_unlock(&c->tnc_mutex);
189  		spin_lock(&ubifs_infos_lock);
190  		/* Get the next list element before we move this one */
191  		p = p->next;
192  		/*
193  		 * Move this one to the end of the list to provide some
194  		 * fairness.
195  		 */
196  		list_move_tail(&c->infos_list, &ubifs_infos);
197  		mutex_unlock(&c->umount_mutex);
198  		if (freed >= nr)
199  			break;
200  	}
201  	spin_unlock(&ubifs_infos_lock);
202  	return freed;
203  }
204  
205  /**
206   * kick_a_thread - kick a background thread to start commit.
207   *
208   * This function kicks a background thread to start background commit. Returns
209   * %-1 if a thread was kicked or there is another reason to assume the memory
210   * will soon be freed or become freeable. If there are no dirty znodes, returns
211   * %0.
212   */
kick_a_thread(void)213  static int kick_a_thread(void)
214  {
215  	int i;
216  	struct ubifs_info *c;
217  
218  	/*
219  	 * Iterate over all mounted UBIFS file-systems and find out if there is
220  	 * already an ongoing commit operation there. If no, then iterate for
221  	 * the second time and initiate background commit.
222  	 */
223  	spin_lock(&ubifs_infos_lock);
224  	for (i = 0; i < 2; i++) {
225  		list_for_each_entry(c, &ubifs_infos, infos_list) {
226  			long dirty_zn_cnt;
227  
228  			if (!mutex_trylock(&c->umount_mutex)) {
229  				/*
230  				 * Some un-mount is in progress, it will
231  				 * certainly free memory, so just return.
232  				 */
233  				spin_unlock(&ubifs_infos_lock);
234  				return -1;
235  			}
236  
237  			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
238  
239  			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
240  			    c->ro_mount || c->ro_error) {
241  				mutex_unlock(&c->umount_mutex);
242  				continue;
243  			}
244  
245  			if (c->cmt_state != COMMIT_RESTING) {
246  				spin_unlock(&ubifs_infos_lock);
247  				mutex_unlock(&c->umount_mutex);
248  				return -1;
249  			}
250  
251  			if (i == 1) {
252  				list_move_tail(&c->infos_list, &ubifs_infos);
253  				spin_unlock(&ubifs_infos_lock);
254  
255  				ubifs_request_bg_commit(c);
256  				mutex_unlock(&c->umount_mutex);
257  				return -1;
258  			}
259  			mutex_unlock(&c->umount_mutex);
260  		}
261  	}
262  	spin_unlock(&ubifs_infos_lock);
263  
264  	return 0;
265  }
266  
ubifs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)267  unsigned long ubifs_shrink_count(struct shrinker *shrink,
268  				 struct shrink_control *sc)
269  {
270  	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
271  
272  	/*
273  	 * Due to the way UBIFS updates the clean znode counter it may
274  	 * temporarily be negative.
275  	 */
276  	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
277  }
278  
ubifs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)279  unsigned long ubifs_shrink_scan(struct shrinker *shrink,
280  				struct shrink_control *sc)
281  {
282  	unsigned long nr = sc->nr_to_scan;
283  	int contention = 0;
284  	unsigned long freed;
285  	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286  
287  	if (!clean_zn_cnt) {
288  		/*
289  		 * No clean znodes, nothing to reap. All we can do in this case
290  		 * is to kick background threads to start commit, which will
291  		 * probably make clean znodes which, in turn, will be freeable.
292  		 * And we return -1 which means will make VM call us again
293  		 * later.
294  		 */
295  		dbg_tnc("no clean znodes, kick a thread");
296  		return kick_a_thread();
297  	}
298  
299  	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
300  	if (freed >= nr)
301  		goto out;
302  
303  	dbg_tnc("not enough old znodes, try to free young ones");
304  	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
305  	if (freed >= nr)
306  		goto out;
307  
308  	dbg_tnc("not enough young znodes, free all");
309  	freed += shrink_tnc_trees(nr - freed, 0, &contention);
310  
311  	if (!freed && contention) {
312  		dbg_tnc("freed nothing, but contention");
313  		return SHRINK_STOP;
314  	}
315  
316  out:
317  	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
318  	return freed;
319  }
320