kernel_samsung_a53x/drivers/md/bcache
Mingzhe Zou 3ebbf81da0 bcache: fixup lock c->root error
commit e34820f984512b433ee1fc291417e60c47d56727 upstream.

We had a problem with io hung because it was waiting for c->root to
release the lock.

crash> cache_set.root -l cache_set.list ffffa03fde4c0050
  root = 0xffff802ef454c800
crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
  [ffff802ef454c858] struct rw_semaphore lock;
crash> struct rw_semaphore ffff802ef454c858
struct rw_semaphore {
  count = {
    counter = -4294967297
  },
  wait_list = {
    next = 0xffff00006786fc28,
    prev = 0xffff00005d0efac8
  },
  wait_lock = {
    raw_lock = {
      {
        val = {
          counter = 0
        },
        {
          locked = 0 '\000',
          pending = 0 '\000'
        },
        {
          locked_pending = 0,
          tail = 0
        }
      }
    }
  },
  osq = {
    tail = {
      counter = 0
    }
  },
  owner = 0xffffa03fdc586603
}

The "counter = -4294967297" means that lock count is -1 and a write lock
is being attempted. Then, we found that there is a btree with a counter
of 1 in btree_cache_freeable.

crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
  [ffffa03fde4c1140] struct list_head btree_cache;
  [ffffa03fde4c1150] struct list_head btree_cache_freeable;
  [ffffa03fde4c1160] struct list_head btree_cache_freed;
  [ffffa03fde4c1170] unsigned int btree_cache_used;
  [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
  [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
crash> list -H ffffa03fde4c1140|wc -l
973
crash> list -H ffffa03fde4c1150|wc -l
1123
crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
  btree_cache_used = 2097
crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
/var/crash/127.0.0.1-2023-08-04-16:40:28
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
      counter = 1

We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
    (1). Thread X has locked c->root(A) write.
    (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
    (3). Thread X bch_btree_set_root() changes c->root from A to B.
    (4). Thread X releases the lock(c->root A).
    (5). Thread Y successfully locks c->root(A).
    (6). Thread Y releases the lock(c->root B).

        down_write locked ---(1)----------------------┐
                |                                     |
                |   down_read waiting ---(2)----┐     |
                |           |               ┌-------------┐ ┌-------------┐
        bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
                |           |               └-------------┘ └-------------┘
            up_write ---(4)---------------------┘     |            |
                            |                         |            |
                    down_read locked ---(5)-----------┘            |
                            |                                      |
                        up_read ---(6)-----------------------------┘

Since c->root may change, the correct steps to lock c->root should be
the same as bch_root_usage(), compare after locking.

static unsigned int bch_root_usage(struct cache_set *c)
{
        unsigned int bytes = 0;
        struct bkey *k;
        struct btree *b;
        struct btree_iter iter;

        goto lock_root;

        do {
                rw_unlock(false, b);
lock_root:
                b = c->root;
                rw_lock(false, b, b->level);
        } while (b != c->root);

        for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
                bytes += bkey_bytes(k);

        rw_unlock(false, b);

        return (bytes * 100) / btree_bytes(c);
}

Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Cc:  <stable@vger.kernel.org>
Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-11-18 12:10:57 +01:00
..
alloc.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
bcache.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
bset.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
bset.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
btree.c bcache: check return value from btree_node_alloc_replacement() 2024-11-18 12:10:57 +01:00
btree.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
closure.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
closure.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
debug.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
debug.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
extents.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
extents.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
features.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
features.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
io.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
journal.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
journal.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
Kconfig Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
Makefile Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
movinggc.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
request.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
request.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
stats.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
stats.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
super.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
sysfs.c bcache: prevent potential division by zero error 2024-11-18 12:10:57 +01:00
sysfs.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
trace.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
util.c Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
util.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00
writeback.c bcache: fixup lock c->root error 2024-11-18 12:10:57 +01:00
writeback.h Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00