From debf1dee538927b033b3c106d40294ce9287bdb3 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Sat, 26 Dec 2020 13:19:35 -0800
Subject: [PATCH] mbcache: Speed up cache entry creation

In order to prevent redundant entry creation by racing against itself,
mb_cache_entry_create scans through a large hash-list of all current
entries in order to see if another allocation for the requested new
entry has been made. Furthermore, it allocates memory for a new entry
before scanning through this hash-list, which results in that allocated
memory being discarded when the requested new entry is already present.
This happens more than half the time.

Speed up cache entry creation by keeping a small linked list of
requested new entries in progress, and scanning through that first
instead of the large hash-list. Additionally, don't bother allocating
memory for a new entry until it's known that the allocated memory will
be used.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 fs/mbcache.c | 90 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 62 insertions(+), 28 deletions(-)

diff --git a/fs/mbcache.c b/fs/mbcache.c
index 95b047256..b01bcc849 100755
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -27,7 +27,7 @@
 
 struct mb_cache {
 	/* Hash table of entries */
-	struct hlist_bl_head	*c_hash;
+	struct mb_bucket	*c_bucket;
 	/* log2 of hash table size */
 	int			c_bucket_bits;
 	/* Maximum entries in cache to avoid degrading hash too much */
@@ -42,6 +42,17 @@ struct mb_cache {
 	struct work_struct	c_shrink_work;
 };
 
+struct mb_bucket {
+	struct hlist_bl_head hash;
+	struct list_head req_list;
+};
+
+struct mb_cache_req {
+	struct list_head lnode;
+	u32 e_key;
+	u64 e_value;
+};
+
 static struct kmem_cache *mb_entry_cache;
 
 static unsigned long mb_cache_shrink(struct mb_cache *cache,
@@ -50,7 +61,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
 static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
 							u32 key)
 {
-	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	return &cache->c_bucket[hash_32(key, cache->c_bucket_bits)].hash;
 }
 
 /*
@@ -77,6 +88,11 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	struct mb_cache_entry *entry, *dup;
 	struct hlist_bl_node *dup_node;
 	struct hlist_bl_head *head;
+	struct mb_cache_req *tmp_req, req = {
+		.e_key = key,
+		.e_value = value
+	};
+	struct mb_bucket *bucket;
 
 	/* Schedule background reclaim if there are too many entries */
 	if (cache->c_entry_count >= cache->c_max_entries)
@@ -85,11 +101,32 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	if (cache->c_entry_count >= 2*cache->c_max_entries)
 		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
 
-	entry = kmem_cache_alloc(mb_entry_cache, mask);
-	if (!entry)
-		return -ENOMEM;
+	bucket = &cache->c_bucket[hash_32(key, cache->c_bucket_bits)];
+	head = &bucket->hash;
+	hlist_bl_lock(head);
+	list_for_each_entry(tmp_req, &bucket->req_list, lnode) {
+		if (tmp_req->e_key == key && tmp_req->e_value == value) {
+			hlist_bl_unlock(head);
+			return -EBUSY;
+		}
+	}
+	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+		if (dup->e_key == key && dup->e_value == value) {
+			hlist_bl_unlock(head);
+			return -EBUSY;
+		}
+	}
+	list_add(&req.lnode, &bucket->req_list);
+	hlist_bl_unlock(head);
+
+	entry = kmem_cache_alloc(mb_entry_cache, mask);
+	if (!entry) {
+		hlist_bl_lock(head);
+		list_del(&req.lnode);
+		hlist_bl_unlock(head);
+		return -ENOMEM;
+	}
 
-	INIT_LIST_HEAD(&entry->e_list);
 	/*
 	 * We create entry with two references. One reference is kept by the
 	 * hash table, the other reference is used to protect us from
@@ -97,21 +134,16 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	 * avoids nesting of cache->c_list_lock into hash table bit locks which
 	 * is problematic for RT.
 	 */
-	atomic_set(&entry->e_refcnt, 2);
-	entry->e_key = key;
-	entry->e_value = value;
-	entry->e_flags = 0;
-	if (reusable)
-		set_bit(MBE_REUSABLE_B, &entry->e_flags);
-	head = mb_cache_entry_head(cache, key);
+	*entry = (typeof(*entry)){
+		.e_list = LIST_HEAD_INIT(entry->e_list),
+		.e_refcnt = ATOMIC_INIT(2),
+		.e_key = key,
+		.e_value = value,
+		.e_flags = reusable ? MBE_REUSABLE_B : 0
+	};
+
 	hlist_bl_lock(head);
-	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
-		if (dup->e_key == key && dup->e_value == value) {
-			hlist_bl_unlock(head);
-			kmem_cache_free(mb_entry_cache, entry);
-			return -EBUSY;
-		}
-	}
+	list_del(&req.lnode);
 	hlist_bl_add_head(&entry->e_hash_list, head);
 	hlist_bl_unlock(head);
 	spin_lock(&cache->c_list_lock);
@@ -404,21 +436,23 @@ struct mb_cache *mb_cache_create(int bucket_bits)
 	cache->c_max_entries = bucket_count << 4;
 	INIT_LIST_HEAD(&cache->c_list);
 	spin_lock_init(&cache->c_list_lock);
-	cache->c_hash = kmalloc_array(bucket_count,
-				      sizeof(struct hlist_bl_head),
-				      GFP_KERNEL);
-	if (!cache->c_hash) {
+	cache->c_bucket = kmalloc_array(bucket_count,
+					sizeof(*cache->c_bucket),
+					GFP_KERNEL);
+	if (!cache->c_bucket) {
 		kfree(cache);
 		goto err_out;
 	}
-	for (i = 0; i < bucket_count; i++)
-		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+	for (i = 0; i < bucket_count; i++) {
+		INIT_HLIST_BL_HEAD(&cache->c_bucket[i].hash);
+		INIT_LIST_HEAD(&cache->c_bucket[i].req_list);
+	}
 
 	cache->c_shrink.count_objects = mb_cache_count;
 	cache->c_shrink.scan_objects = mb_cache_scan;
 	cache->c_shrink.seeks = DEFAULT_SEEKS;
 	if (register_shrinker(&cache->c_shrink)) {
-		kfree(cache->c_hash);
+		kfree(cache->c_bucket);
 		kfree(cache);
 		goto err_out;
 	}
@@ -454,7 +488,7 @@ void mb_cache_destroy(struct mb_cache *cache)
 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
 		mb_cache_entry_put(cache, entry);
 	}
-	kfree(cache->c_hash);
+	kfree(cache->c_bucket);
 	kfree(cache);
 }
 EXPORT_SYMBOL(mb_cache_destroy);