• Mingzhe Zou's avatar
    bcache: fixup lock c->root error · e34820f9
    Mingzhe Zou authored
    We had a problem with io hung because it was waiting for c->root to
    release the lock.
    
    crash> cache_set.root -l cache_set.list ffffa03fde4c0050
      root = 0xffff802ef454c800
    crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
      [ffff802ef454c858] struct rw_semaphore lock;
    crash> struct rw_semaphore ffff802ef454c858
    struct rw_semaphore {
      count = {
        counter = -4294967297
      },
      wait_list = {
        next = 0xffff00006786fc28,
        prev = 0xffff00005d0efac8
      },
      wait_lock = {
        raw_lock = {
          {
            val = {
              counter = 0
            },
            {
              locked = 0 '\000',
              pending = 0 '\000'
            },
            {
              locked_pending = 0,
              tail = 0
            }
          }
        }
      },
      osq = {
        tail = {
          counter = 0
        }
      },
      owner = 0xffffa03fdc586603
    }
    
    The "counter = -4294967297" means that lock count is -1 and a write lock
    is being attempted. Then, we found that there is a btree with a counter
    of 1 in btree_cache_freeable.
    
    crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
      [ffffa03fde4c1140] struct list_head btree_cache;
      [ffffa03fde4c1150] struct list_head btree_cache_freeable;
      [ffffa03fde4c1160] struct list_head btree_cache_freed;
      [ffffa03fde4c1170] unsigned int btree_cache_used;
      [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
      [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
    crash> list -H ffffa03fde4c1140|wc -l
    973
    crash> list -H ffffa03fde4c1150|wc -l
    1123
    crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
      btree_cache_used = 2097
    crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
    crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
    [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
    /var/crash/127.0.0.1-2023-08-04-16:40:28
    [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
    [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
          counter = 1
    
    We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
        (1). Thread X has locked c->root(A) write.
        (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
        (3). Thread X bch_btree_set_root() changes c->root from A to B.
        (4). Thread X releases the lock(c->root A).
        (5). Thread Y successfully locks c->root(A).
        (6). Thread Y releases the lock(c->root B).
    
            down_write locked ---(1)----------------------┐
                    |                                     |
                    |   down_read waiting ---(2)----┐     |
                    |           |               ┌-------------┐ ┌-------------┐
            bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
                    |           |               └-------------┘ └-------------┘
                up_write ---(4)---------------------┘     |            |
                                |                         |            |
                        down_read locked ---(5)-----------┘            |
                                |                                      |
                            up_read ---(6)-----------------------------┘
    
    Since c->root may change, the correct steps to lock c->root should be
    the same as bch_root_usage(), compare after locking.
    
    static unsigned int bch_root_usage(struct cache_set *c)
    {
            unsigned int bytes = 0;
            struct bkey *k;
            struct btree *b;
            struct btree_iter iter;
    
            goto lock_root;
    
            do {
                    rw_unlock(false, b);
    lock_root:
                    b = c->root;
                    rw_lock(false, b, b->level);
            } while (b != c->root);
    
            for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
                    bytes += bkey_bytes(k);
    
            rw_unlock(false, b);
    
            return (bytes * 100) / btree_bytes(c);
    }
    
    Fixes: b144e45f ("bcache: make bch_sectors_dirty_init() to be multithreaded")
    Signed-off-by: default avatarMingzhe Zou <mingzhe.zou@easystack.cn>
    Cc:  <stable@vger.kernel.org>
    Signed-off-by: default avatarColy Li <colyli@suse.de>
    Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.deSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
    e34820f9
writeback.c 28.4 KB