1. 21 Aug, 2020 31 commits
    • Ido Schimmel's avatar
      mlxsw: core: Free EMAD transactions using kfree_rcu() · 38823915
      Ido Schimmel authored
      [ Upstream commit 3c8ce24b ]
      
      The lifetime of EMAD transactions (i.e., 'struct mlxsw_reg_trans') is
      managed using RCU. They are freed using kfree_rcu() once the transaction
      ends.
      
      However, in case the transaction failed it is freed immediately after being
      removed from the active transactions list. This is problematic because it is
      still possible for a different CPU to dereference the transaction from an RCU
      read-side critical section while traversing the active transaction list in
      mlxsw_emad_rx_listener_func(). In which case, a use-after-free is triggered
      [1].
      
      Fix this by freeing the transaction after a grace period by calling
      kfree_rcu().
      
      [1]
      BUG: KASAN: use-after-free in mlxsw_emad_rx_listener_func+0x969/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:671
      Read of size 8 at addr ffff88800b7964e8 by task syz-executor.2/2881
      
      CPU: 0 PID: 2881 Comm: syz-executor.2 Not tainted 5.8.0-rc4+ #44
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
      Call Trace:
       <IRQ>
       __dump_stack lib/dump_stack.c:77 [inline]
       dump_stack+0xf6/0x16e lib/dump_stack.c:118
       print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383
       __kasan_report mm/kasan/report.c:513 [inline]
       kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
       mlxsw_emad_rx_listener_func+0x969/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:671
       mlxsw_core_skb_receive+0x571/0x700 drivers/net/ethernet/mellanox/mlxsw/core.c:2061
       mlxsw_pci_cqe_rdq_handle drivers/net/ethernet/mellanox/mlxsw/pci.c:595 [inline]
       mlxsw_pci_cq_tasklet+0x12a6/0x2520 drivers/net/ethernet/mellanox/mlxsw/pci.c:651
       tasklet_action_common.isra.0+0x13f/0x3e0 kernel/softirq.c:550
       __do_softirq+0x223/0x964 kernel/softirq.c:292
       asm_call_on_stack+0x12/0x20 arch/x86/entry/entry_64.S:711
       </IRQ>
       __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline]
       run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline]
       do_softirq_own_stack+0x109/0x140 arch/x86/kernel/irq_64.c:77
       invoke_softirq kernel/softirq.c:387 [inline]
       __irq_exit_rcu kernel/softirq.c:417 [inline]
       irq_exit_rcu+0x16f/0x1a0 kernel/softirq.c:429
       sysvec_apic_timer_interrupt+0x4e/0xd0 arch/x86/kernel/apic/apic.c:1091
       asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:587
      RIP: 0010:arch_local_irq_restore arch/x86/include/asm/irqflags.h:85 [inline]
      RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline]
      RIP: 0010:_raw_spin_unlock_irqrestore+0x3b/0x40 kernel/locking/spinlock.c:191
      Code: e8 2a c3 f4 fc 48 89 ef e8 12 96 f5 fc f6 c7 02 75 11 53 9d e8 d6 db 11 fd 65 ff 0d 1f 21 b3 56 5b 5d c3 e8 a7 d7 11 fd 53 9d <eb> ed 0f 1f 00 55 48 89 fd 65 ff 05 05 21 b3 56 ff 74 24 08 48 8d
      RSP: 0018:ffff8880446ffd80 EFLAGS: 00000286
      RAX: 0000000000000006 RBX: 0000000000000286 RCX: 0000000000000006
      RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffa94ecea9
      RBP: ffff888012934408 R08: 0000000000000000 R09: 0000000000000000
      R10: 0000000000000001 R11: fffffbfff57be301 R12: 1ffff110088dffc1
      R13: ffff888037b817c0 R14: ffff88802442415a R15: ffff888024424000
       __do_sys_perf_event_open+0x1b5d/0x2bd0 kernel/events/core.c:11874
       do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:384
       entry_SYSCALL_64_after_hwframe+0x44/0xa9
      RIP: 0033:0x473dbd
      Code: Bad RIP value.
      RSP: 002b:00007f21e5e9cc28 EFLAGS: 00000246 ORIG_RAX: 000000000000012a
      RAX: ffffffffffffffda RBX: 000000000057bf00 RCX: 0000000000473dbd
      RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000040
      RBP: 000000000057bf00 R08: 0000000000000000 R09: 0000000000000000
      R10: 0000000000000003 R11: 0000000000000246 R12: 000000000057bf0c
      R13: 00007ffd0493503f R14: 00000000004d0f46 R15: 00007f21e5e9cd80
      
      Allocated by task 871:
       save_stack+0x1b/0x40 mm/kasan/common.c:48
       set_track mm/kasan/common.c:56 [inline]
       __kasan_kmalloc mm/kasan/common.c:494 [inline]
       __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467
       kmalloc include/linux/slab.h:555 [inline]
       kzalloc include/linux/slab.h:669 [inline]
       mlxsw_core_reg_access_emad+0x70/0x1410 drivers/net/ethernet/mellanox/mlxsw/core.c:1812
       mlxsw_core_reg_access+0xeb/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1991
       mlxsw_sp_port_get_hw_xstats+0x335/0x7e0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1130
       update_stats_cache+0xf4/0x140 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1173
       process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269
       worker_thread+0x9e/0x1050 kernel/workqueue.c:2415
       kthread+0x355/0x470 kernel/kthread.c:291
       ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293
      
      Freed by task 871:
       save_stack+0x1b/0x40 mm/kasan/common.c:48
       set_track mm/kasan/common.c:56 [inline]
       kasan_set_free_info mm/kasan/common.c:316 [inline]
       __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455
       slab_free_hook mm/slub.c:1474 [inline]
       slab_free_freelist_hook mm/slub.c:1507 [inline]
       slab_free mm/slub.c:3072 [inline]
       kfree+0xe6/0x320 mm/slub.c:4052
       mlxsw_core_reg_access_emad+0xd45/0x1410 drivers/net/ethernet/mellanox/mlxsw/core.c:1819
       mlxsw_core_reg_access+0xeb/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1991
       mlxsw_sp_port_get_hw_xstats+0x335/0x7e0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1130
       update_stats_cache+0xf4/0x140 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1173
       process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269
       worker_thread+0x9e/0x1050 kernel/workqueue.c:2415
       kthread+0x355/0x470 kernel/kthread.c:291
       ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293
      
      The buggy address belongs to the object at ffff88800b796400
       which belongs to the cache kmalloc-512 of size 512
      The buggy address is located 232 bytes inside of
       512-byte region [ffff88800b796400, ffff88800b796600)
      The buggy address belongs to the page:
      page:ffffea00002de500 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 head:ffffea00002de500 order:2 compound_mapcount:0 compound_pincount:0
      flags: 0x100000000010200(slab|head)
      raw: 0100000000010200 dead000000000100 dead000000000122 ffff88806c402500
      raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
      page dumped because: kasan: bad access detected
      
      Memory state around the buggy address:
       ffff88800b796380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
       ffff88800b796400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
      >ffff88800b796480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                                ^
       ffff88800b796500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
       ffff88800b796580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
      
      Fixes: caf7297e ("mlxsw: core: Introduce support for asynchronous EMAD register access")
      Signed-off-by: default avatarIdo Schimmel <idosch@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      38823915
    • Ido Schimmel's avatar
      mlxsw: core: Increase scope of RCU read-side critical section · 26ac4d0d
      Ido Schimmel authored
      [ Upstream commit 7d8e8f34 ]
      
      The lifetime of the Rx listener item ('rxl_item') is managed using RCU,
      but is dereferenced outside of RCU read-side critical section, which can
      lead to a use-after-free.
      
      Fix this by increasing the scope of the RCU read-side critical section.
      
      Fixes: 93c1edb2 ("mlxsw: Introduce Mellanox switch driver core")
      Signed-off-by: default avatarIdo Schimmel <idosch@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      26ac4d0d
    • Jakub Kicinski's avatar
      mlx4: disable device on shutdown · 7a23750f
      Jakub Kicinski authored
      [ Upstream commit 3cab8c65 ]
      
      It appears that not disabling a PCI device on .shutdown may lead to
      a Hardware Error with particular (perhaps buggy) BIOS versions:
      
          mlx4_en: eth0: Close port called
          mlx4_en 0000:04:00.0: removed PHC
          reboot: Restarting system
          {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
          {1}[Hardware Error]: event severity: fatal
          {1}[Hardware Error]:  Error 0, type: fatal
          {1}[Hardware Error]:   section_type: PCIe error
          {1}[Hardware Error]:   port_type: 4, root port
          {1}[Hardware Error]:   version: 1.16
          {1}[Hardware Error]:   command: 0x4010, status: 0x0143
          {1}[Hardware Error]:   device_id: 0000:00:02.2
          {1}[Hardware Error]:   slot: 0
          {1}[Hardware Error]:   secondary_bus: 0x04
          {1}[Hardware Error]:   vendor_id: 0x8086, device_id: 0x2f06
          {1}[Hardware Error]:   class_code: 000604
          {1}[Hardware Error]:   bridge: secondary_status: 0x2000, control: 0x0003
          {1}[Hardware Error]:   aer_uncor_status: 0x00100000, aer_uncor_mask: 0x00000000
          {1}[Hardware Error]:   aer_uncor_severity: 0x00062030
          {1}[Hardware Error]:   TLP Header: 40000018 040000ff 791f4080 00000000
      [hw error repeats]
          Kernel panic - not syncing: Fatal hardware error!
          CPU: 0 PID: 2189 Comm: reboot Kdump: loaded Not tainted 5.6.x-blabla #1
          Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 05/05/2017
      
      Fix the mlx4 driver.
      
      This is a very similar problem to what had been fixed in:
      commit 0d98ba8d ("scsi: hpsa: disable device during shutdown")
      to address https://bugzilla.kernel.org/show_bug.cgi?id=199779.
      
      Fixes: 2ba5fbd6 ("net/mlx4_core: Handle AER flow properly")
      Reported-by: default avatarJake Lawrence <lawja@fb.com>
      Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
      Reviewed-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      7a23750f
    • Johan Hovold's avatar
      net: lan78xx: fix transfer-buffer memory leak · 1c7c7eb7
      Johan Hovold authored
      [ Upstream commit 63634aa6 ]
      
      The interrupt URB transfer-buffer was never freed on disconnect or after
      probe errors.
      
      Fixes: 55d7de9d ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
      Cc: Woojung.Huh@microchip.com <Woojung.Huh@microchip.com>
      Signed-off-by: default avatarJohan Hovold <johan@kernel.org>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      1c7c7eb7
    • Johan Hovold's avatar
      net: lan78xx: add missing endpoint sanity check · 0e87ce58
      Johan Hovold authored
      [ Upstream commit 8d8e95fd ]
      
      Add the missing endpoint sanity check to prevent a NULL-pointer
      dereference should a malicious device lack the expected endpoints.
      
      Note that the driver has a broken endpoint-lookup helper,
      lan78xx_get_endpoints(), which can end up accepting interfaces in an
      altsetting without endpoints as long as *some* altsetting has a bulk-in
      and a bulk-out endpoint.
      
      Fixes: 55d7de9d ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
      Cc: Woojung.Huh@microchip.com <Woojung.Huh@microchip.com>
      Signed-off-by: default avatarJohan Hovold <johan@kernel.org>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      0e87ce58
    • Michael Karcher's avatar
      sh: Fix validation of system call number · 795a4314
      Michael Karcher authored
      [ Upstream commit 04a8a3d0 ]
      
      The slow path for traced system call entries accessed a wrong memory
      location to get the number of the maximum allowed system call number.
      Renumber the numbered "local" label for the correct location to avoid
      collisions with actual local labels.
      Signed-off-by: default avatarMichael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
      Tested-by: default avatarJohn Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
      Fixes: f3a83088 ("sh: Add a few missing irqflags tracing markers.")
      Signed-off-by: default avatarRich Felker <dalias@libc.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      795a4314
    • YueHaibing's avatar
      net/x25: Fix null-ptr-deref in x25_disconnect · 10ceab64
      YueHaibing authored
      commit 8999dc89 upstream.
      
      We should check null before do x25_neigh_put in x25_disconnect,
      otherwise may cause null-ptr-deref like this:
      
       #include <sys/socket.h>
       #include <linux/x25.h>
      
       int main() {
          int sck_x25;
          sck_x25 = socket(AF_X25, SOCK_SEQPACKET, 0);
          close(sck_x25);
          return 0;
       }
      
      BUG: kernel NULL pointer dereference, address: 00000000000000d8
      CPU: 0 PID: 4817 Comm: t2 Not tainted 5.7.0-rc3+ #159
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-
      RIP: 0010:x25_disconnect+0x91/0xe0
      Call Trace:
       x25_release+0x18a/0x1b0
       __sock_release+0x3d/0xc0
       sock_close+0x13/0x20
       __fput+0x107/0x270
       ____fput+0x9/0x10
       task_work_run+0x6d/0xb0
       exit_to_usermode_loop+0x102/0x110
       do_syscall_64+0x23c/0x260
       entry_SYSCALL_64_after_hwframe+0x49/0xb3
      
      Reported-by: syzbot+6db548b615e5aeefdce2@syzkaller.appspotmail.com
      Fixes: 4becb7ee ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect")
      Signed-off-by: default avatarYueHaibing <yuehaibing@huawei.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      10ceab64
    • Xiyu Yang's avatar
      net/x25: Fix x25_neigh refcnt leak when x25 disconnect · 08560e93
      Xiyu Yang authored
      commit 4becb7ee upstream.
      
      x25_connect() invokes x25_get_neigh(), which returns a reference of the
      specified x25_neigh object to "x25->neighbour" with increased refcnt.
      
      When x25 connect success and returns, the reference still be hold by
      "x25->neighbour", so the refcount should be decreased in
      x25_disconnect() to keep refcount balanced.
      
      The reference counting issue happens in x25_disconnect(), which forgets
      to decrease the refcnt increased by x25_get_neigh() in x25_connect(),
      causing a refcnt leak.
      
      Fix this issue by calling x25_neigh_put() before x25_disconnect()
      returns.
      Signed-off-by: default avatarXiyu Yang <xiyuyang19@fudan.edu.cn>
      Signed-off-by: default avatarXin Tan <tanxin.ctf@gmail.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      08560e93
    • Rolf Eike Beer's avatar
      install several missing uapi headers · d472b7e2
      Rolf Eike Beer authored
      Commit fcc8487d ("uapi: export all headers
      under uapi directories") changed the default to install all headers not marked
      to be conditional. This takes the list of headers listed in the commit message
      and manually adds an export for those that are already present in this kernel
      version.
      
      Found during an attempt to build mtd-utils 2.1.2 as it wants hash_info.h, which
      exists since 3.13 but has not been installed until the above mentioned commit,
      which ended up in 4.12.
      Signed-off-by: default avatarRolf Eike Beer <eb@emlix.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      d472b7e2
    • Nicolas Dichtel's avatar
      uapi: includes linux/types.h before exporting files · 5a41bdbb
      Nicolas Dichtel authored
      commit 9078b4ee upstream.
      
      Some files will be exported after a following patch. 0-day tests report the
      following warning/error:
      ./usr/include/linux/bcache.h:8: include of <linux/types.h> is preferred over <asm/types.h>
      ./usr/include/linux/bcache.h:11: found __[us]{8,16,32,64} type without #include <linux/types.h>
      ./usr/include/linux/qrtr.h:8: found __[us]{8,16,32,64} type without #include <linux/types.h>
      ./usr/include/linux/cryptouser.h:39: found __[us]{8,16,32,64} type without #include <linux/types.h>
      ./usr/include/linux/pr.h:14: found __[us]{8,16,32,64} type without #include <linux/types.h>
      ./usr/include/linux/btrfs_tree.h:337: found __[us]{8,16,32,64} type without #include <linux/types.h>
      ./usr/include/rdma/bnxt_re-abi.h:45: found __[us]{8,16,32,64} type without #include <linux/types.h>
      Signed-off-by: default avatarNicolas Dichtel <nicolas.dichtel@6wind.com>
      Signed-off-by: default avatarMasahiro Yamada <yamada.masahiro@socionext.com>
      reb: left out include/uapi/rdma/bnxt_re-abi.h as it's not in this kernel version
      Signed-off-by: default avatarRolf Eike Beer <eb@emlix.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      5a41bdbb
    • Rik van Riel's avatar
      xfs: fix missed wakeup on l_flush_wait · b1835045
      Rik van Riel authored
      commit cdea5459 upstream.
      
      The code in xlog_wait uses the spinlock to make adding the task to
      the wait queue, and setting the task state to UNINTERRUPTIBLE atomic
      with respect to the waker.
      
      Doing the wakeup after releasing the spinlock opens up the following
      race condition:
      
      Task 1					task 2
      add task to wait queue
      					wake up task
      set task state to UNINTERRUPTIBLE
      
      This issue was found through code inspection as a result of kworkers
      being observed stuck in UNINTERRUPTIBLE state with an empty
      wait queue. It is rare and largely unreproducable.
      
      Simply moving the spin_unlock to after the wake_up_all results
      in the waker not being able to see a task on the waitqueue before
      it has set its state to UNINTERRUPTIBLE.
      
      This bug dates back to the conversion of this code to generic
      waitqueue infrastructure from a counting semaphore back in 2008
      which didn't place the wakeups consistently w.r.t. to the relevant
      spin locks.
      
      [dchinner: Also fix a similar issue in the shutdown path on
      xc_commit_wait. Update commit log with more details of the issue.]
      
      Fixes: d748c623 ("[XFS] Convert l_flushsema to a sv_t")
      Reported-by: default avatarChris Mason <clm@fb.com>
      Signed-off-by: default avatarRik van Riel <riel@surriel.com>
      Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
      Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Cc: stable@vger.kernel.org # 4.9.x-4.19.x
      [modified for contextual change near xlog_state_do_callback()]
      Signed-off-by: default avatarSamuel Mendoza-Jonas <samjonas@amazon.com>
      Reviewed-by: default avatarFrank van der Linden <fllinden@amazon.com>
      Reviewed-by: default avatarSuraj Jitindar Singh <surajjs@amazon.com>
      Reviewed-by: default avatarBenjamin Herrenschmidt <benh@amazon.com>
      Reviewed-by: default avatarAnchal Agarwal <anchalag@amazon.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b1835045
    • Peilin Ye's avatar
      rds: Prevent kernel-infoleak in rds_notify_queue_get() · 2b9c45b6
      Peilin Ye authored
      commit bbc8a99e upstream.
      
      rds_notify_queue_get() is potentially copying uninitialized kernel stack
      memory to userspace since the compiler may leave a 4-byte hole at the end
      of `cmsg`.
      
      In 2016 we tried to fix this issue by doing `= { 0 };` on `cmsg`, which
      unfortunately does not always initialize that 4-byte hole. Fix it by using
      memset() instead.
      
      Cc: stable@vger.kernel.org
      Fixes: f037590f ("rds: fix a leak of kernel memory")
      Fixes: bdbe6fbc ("RDS: recv.c")
      Suggested-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: default avatarPeilin Ye <yepeilin.cs@gmail.com>
      Acked-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      2b9c45b6
    • Tetsuo Handa's avatar
      fbdev: Detect integer underflow at "struct fbcon_ops"->clear_margins. · 1231b93e
      Tetsuo Handa authored
      [ Upstream commit 033724d6 ]
      
      syzbot is reporting general protection fault in bitfill_aligned() [1]
      caused by integer underflow in bit_clear_margins(). The cause of this
      problem is when and how do_vc_resize() updates vc->vc_{cols,rows}.
      
      If vc_do_resize() fails (e.g. kzalloc() fails) when var.xres or var.yres
      is going to shrink, vc->vc_{cols,rows} will not be updated. This allows
      bit_clear_margins() to see info->var.xres < (vc->vc_cols * cw) or
      info->var.yres < (vc->vc_rows * ch). Unexpectedly large rw or bh will
      try to overrun the __iomem region and causes general protection fault.
      
      Also, vc_resize(vc, 0, 0) does not set vc->vc_{cols,rows} = 0 due to
      
        new_cols = (cols ? cols : vc->vc_cols);
        new_rows = (lines ? lines : vc->vc_rows);
      
      exception. Since cols and lines are calculated as
      
        cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);
        rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
        cols /= vc->vc_font.width;
        rows /= vc->vc_font.height;
        vc_resize(vc, cols, rows);
      
      in fbcon_modechanged(), var.xres < vc->vc_font.width makes cols = 0
      and var.yres < vc->vc_font.height makes rows = 0. This means that
      
        const int fd = open("/dev/fb0", O_ACCMODE);
        struct fb_var_screeninfo var = { };
        ioctl(fd, FBIOGET_VSCREENINFO, &var);
        var.xres = var.yres = 1;
        ioctl(fd, FBIOPUT_VSCREENINFO, &var);
      
      easily reproduces integer underflow bug explained above.
      
      Of course, callers of vc_resize() are not handling vc_do_resize() failure
      is bad. But we can't avoid vc_resize(vc, 0, 0) which returns 0. Therefore,
      as a band-aid workaround, this patch checks integer underflow in
      "struct fbcon_ops"->clear_margins call, assuming that
      vc->vc_cols * vc->vc_font.width and vc->vc_rows * vc->vc_font.heigh do not
      cause integer overflow.
      
      [1] https://syzkaller.appspot.com/bug?id=a565882df74fa76f10d3a6fec4be31098dbb37c6Reported-and-tested-by: default avatarsyzbot <syzbot+e5fd3e65515b48c02a30@syzkaller.appspotmail.com>
      Signed-off-by: default avatarTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
      Acked-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
      Cc: stable <stable@vger.kernel.org>
      Link: https://lore.kernel.org/r/20200715015102.3814-1-penguin-kernel@I-love.SAKURA.ne.jpSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      1231b93e
    • Joerg Roedel's avatar
      x86, vmlinux.lds: Page-align end of ..page_aligned sections · ce93e016
      Joerg Roedel authored
      [ Upstream commit de2b41be ]
      
      On x86-32 the idt_table with 256 entries needs only 2048 bytes. It is
      page-aligned, but the end of the .bss..page_aligned section is not
      guaranteed to be page-aligned.
      
      As a result, objects from other .bss sections may end up on the same 4k
      page as the idt_table, and will accidentially get mapped read-only during
      boot, causing unexpected page-faults when the kernel writes to them.
      
      This could be worked around by making the objects in the page aligned
      sections page sized, but that's wrong.
      
      Explicit sections which store only page aligned objects have an implicit
      guarantee that the object is alone in the page in which it is placed. That
      works for all objects except the last one. That's inconsistent.
      
      Enforcing page sized objects for these sections would wreckage memory
      sanitizers, because the object becomes artificially larger than it should
      be and out of bound access becomes legit.
      
      Align the end of the .bss..page_aligned and .data..page_aligned section on
      page-size so all objects places in these sections are guaranteed to have
      their own page.
      
      [ tglx: Amended changelog ]
      Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
      Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
      Reviewed-by: default avatarKees Cook <keescook@chromium.org>
      Cc: stable@vger.kernel.org
      Link: https://lkml.kernel.org/r/20200721093448.10417-1-joro@8bytes.orgSigned-off-by: default avatarSasha Levin <sashal@kernel.org>
      ce93e016
    • Sami Tolvanen's avatar
      x86/build/lto: Fix truncated .bss with -fdata-sections · 70061298
      Sami Tolvanen authored
      [ Upstream commit 6a03469a ]
      
      With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with
      -fdata-sections, which also splits the .bss section.
      
      The new section, with a new .bss.* name, which pattern gets missed by the
      main x86 linker script which only expects the '.bss' name. This results
      in the discarding of the second part and a too small, truncated .bss
      section and an unhappy, non-working kernel.
      
      Use the common BSS_MAIN macro in the linker script to properly capture
      and merge all the generated BSS sections.
      Signed-off-by: default avatarSami Tolvanen <samitolvanen@google.com>
      Reviewed-by: default avatarNick Desaulniers <ndesaulniers@google.com>
      Reviewed-by: default avatarKees Cook <keescook@chromium.org>
      Cc: Borislav Petkov <bp@alien8.de>
      Cc: Kees Cook <keescook@chromium.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Nicholas Piggin <npiggin@gmail.com>
      Cc: Nick Desaulniers <ndesaulniers@google.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com
      [ Extended the changelog. ]
      Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      70061298
    • Wang Hai's avatar
      9p/trans_fd: Fix concurrency del of req_list in p9_fd_cancelled/p9_read_work · 294711f9
      Wang Hai authored
      [ Upstream commit 74d6a5d5 ]
      
      p9_read_work and p9_fd_cancelled may be called concurrently.
      In some cases, req->req_list may be deleted by both p9_read_work
      and p9_fd_cancelled.
      
      We can fix it by ignoring replies associated with a cancelled
      request and ignoring cancelled request if message has been received
      before lock.
      
      Link: http://lkml.kernel.org/r/20200612090833.36149-1-wanghai38@huawei.com
      Fixes: 60ff779c ("9p: client: remove unused code and any reference to "cancelled" function")
      Cc: <stable@vger.kernel.org> # v3.12+
      Reported-by: syzbot+77a25acfa0382e06ab23@syzkaller.appspotmail.com
      Signed-off-by: default avatarWang Hai <wanghai38@huawei.com>
      Signed-off-by: default avatarDominique Martinet <asmadeus@codewreck.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      294711f9
    • Dominique Martinet's avatar
      9p/trans_fd: abort p9_read_work if req status changed · e73153ef
      Dominique Martinet authored
      [ Upstream commit e4ca13f7 ]
      
      p9_read_work would try to handle an errored req even if it got put to
      error state by another thread between the lookup (that worked) and the
      time it had been fully read.
      The request itself is safe to use because we hold a ref to it from the
      lookup (for m->rreq, so it was safe to read into the request data buffer
      until this point), but the req_list has been deleted at the same time
      status changed, and client_cb already has been called as well, so we
      should not do either.
      
      Link: http://lkml.kernel.org/r/1539057956-23741-1-git-send-email-asmadeus@codewreck.orgSigned-off-by: default avatarDominique Martinet <dominique.martinet@cea.fr>
      Reported-by: syzbot+2222c34dc40b515f30dc@syzkaller.appspotmail.com
      Cc: Eric Van Hensbergen <ericvh@gmail.com>
      Cc: Latchesar Ionkov <lucho@ionkov.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      e73153ef
    • Sheng Yong's avatar
      f2fs: check if file namelen exceeds max value · 7745e3c6
      Sheng Yong authored
      [ Upstream commit 720db068 ]
      
      Dentry bitmap is not enough to detect incorrect dentries. So this patch
      also checks the namelen value of a dentry.
      Signed-off-by: default avatarGong Chen <gongchen4@huawei.com>
      Signed-off-by: default avatarSheng Yong <shengyong1@huawei.com>
      Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
      Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      7745e3c6
    • Jaegeuk Kim's avatar
      f2fs: check memory boundary by insane namelen · f46a09b7
      Jaegeuk Kim authored
      [ Upstream commit 4e240d1b ]
      
      If namelen is corrupted to have very long value, fill_dentries can copy
      wrong memory area.
      Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
      Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      f46a09b7
    • Steve Cohen's avatar
      drm: hold gem reference until object is no longer accessed · 16d15f97
      Steve Cohen authored
      commit 8490d6a7 upstream.
      
      A use-after-free in drm_gem_open_ioctl can happen if the
      GEM object handle is closed between the idr lookup and
      retrieving the size from said object since a local reference
      is not being held at that point. Hold the local reference
      while the object can still be accessed to fix this and
      plug the potential security hole.
      Signed-off-by: default avatarSteve Cohen <cohens@codeaurora.org>
      Cc: stable@vger.kernel.org
      Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
      Link: https://patchwork.freedesktop.org/patch/msgid/1595284250-31580-1-git-send-email-cohens@codeaurora.orgSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      16d15f97
    • Peilin Ye's avatar
      drm/amdgpu: Prevent kernel-infoleak in amdgpu_info_ioctl() · 59395c4e
      Peilin Ye authored
      commit 543e8669 upstream.
      
      Compiler leaves a 4-byte hole near the end of `dev_info`, causing
      amdgpu_info_ioctl() to copy uninitialized kernel stack memory to userspace
      when `size` is greater than 356.
      
      In 2015 we tried to fix this issue by doing `= {};` on `dev_info`, which
      unfortunately does not initialize that 4-byte hole. Fix it by using
      memset() instead.
      
      Cc: stable@vger.kernel.org
      Fixes: c193fa91 ("drm/amdgpu: information leak in amdgpu_info_ioctl()")
      Fixes: d38ceaf9 ("drm/amdgpu: add core driver (v4)")
      Suggested-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
      Signed-off-by: default avatarPeilin Ye <yepeilin.cs@gmail.com>
      Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      59395c4e
    • Will Deacon's avatar
      ARM: 8986/1: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints · 544e9900
      Will Deacon authored
      commit eec13b42 upstream.
      
      Unprivileged memory accesses generated by the so-called "translated"
      instructions (e.g. LDRT) in kernel mode can cause user watchpoints to fire
      unexpectedly. In such cases, the hw_breakpoint logic will invoke the user
      overflow handler which will typically raise a SIGTRAP back to the current
      task. This is futile when returning back to the kernel because (a) the
      signal won't have been delivered and (b) userspace can't handle the thing
      anyway.
      
      Avoid invoking the user overflow handler for watchpoints triggered by
      kernel uaccess routines, and instead single-step over the faulting
      instruction as we would if no overflow handler had been installed.
      
      Cc: <stable@vger.kernel.org>
      Fixes: f81ef4a9 ("ARM: 6356/1: hw-breakpoint: add ARM backend for the hw-breakpoint framework")
      Reported-by: default avatarLuis Machado <luis.machado@linaro.org>
      Tested-by: default avatarLuis Machado <luis.machado@linaro.org>
      Signed-off-by: default avatarWill Deacon <will@kernel.org>
      Signed-off-by: default avatarRussell King <rmk+kernel@armlinux.org.uk>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      544e9900
    • Robert Hancock's avatar
      PCI/ASPM: Disable ASPM on ASMedia ASM1083/1085 PCIe-to-PCI bridge · 2d9dacf2
      Robert Hancock authored
      commit b361663c upstream.
      
      Recently ASPM handling was changed to allow ASPM on PCIe-to-PCI/PCI-X
      bridges.  Unfortunately the ASMedia ASM1083/1085 PCIe to PCI bridge device
      doesn't seem to function properly with ASPM enabled.  On an Asus PRIME
      H270-PRO motherboard, it causes errors like these:
      
        pcieport 0000:00:1c.0: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
        pcieport 0000:00:1c.0: AER:   device [8086:a292] error status/mask=00003000/00002000
        pcieport 0000:00:1c.0: AER:    [12] Timeout
        pcieport 0000:00:1c.0: AER: Corrected error received: 0000:00:1c.0
        pcieport 0000:00:1c.0: AER: can't find device of ID00e0
      
      In addition to flooding the kernel log, this also causes the machine to
      wake up immediately after suspend is initiated.
      
      The device advertises ASPM L0s and L1 support in the Link Capabilities
      register, but the ASMedia web page for ASM1083 [1] claims "No PCIe ASPM
      support".
      
      Windows 10 (build 2004) enables L0s, but it also logs correctable PCIe
      errors.
      
      Add a quirk to disable ASPM for this device.
      
      [1] https://www.asmedia.com.tw/eng/e_show_products.php?cate_index=169&item=114
      
      [bhelgaas: commit log]
      Fixes: 66ff14e5 ("PCI/ASPM: Allow ASPM on links to PCIe-to-PCI/PCI-X Bridges")
      Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208667
      Link: https://lore.kernel.org/r/20200722021803.17958-1-hancockrwd@gmail.comSigned-off-by: default avatarRobert Hancock <hancockrwd@gmail.com>
      Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      2d9dacf2
    • Navid Emamdoost's avatar
      ath9k: release allocated buffer if timed out · f0b65fee
      Navid Emamdoost authored
      [ Upstream commit 728c1e2a ]
      
      In ath9k_wmi_cmd, the allocated network buffer needs to be released
      if timeout happens. Otherwise memory will be leaked.
      Signed-off-by: default avatarNavid Emamdoost <navid.emamdoost@gmail.com>
      Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      f0b65fee
    • Navid Emamdoost's avatar
      ath9k_htc: release allocated buffer if timed out · 86c3e3e5
      Navid Emamdoost authored
      [ Upstream commit 853acf7c ]
      
      In htc_config_pipe_credits, htc_setup_complete, and htc_connect_service
      if time out happens, the allocated buffer needs to be released.
      Otherwise there will be memory leak.
      Signed-off-by: default avatarNavid Emamdoost <navid.emamdoost@gmail.com>
      Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      86c3e3e5
    • Navid Emamdoost's avatar
      media: rc: prevent memory leak in cx23888_ir_probe · fa5ac051
      Navid Emamdoost authored
      [ Upstream commit a7b2df76 ]
      
      In cx23888_ir_probe if kfifo_alloc fails the allocated memory for state
      should be released.
      Signed-off-by: default avatarNavid Emamdoost <navid.emamdoost@gmail.com>
      Signed-off-by: default avatarSean Young <sean@mess.org>
      Signed-off-by: default avatarMauro Carvalho Chehab <mchehab+samsung@kernel.org>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      fa5ac051
    • Navid Emamdoost's avatar
      crypto: ccp - Release all allocated memory if sha type is invalid · 65d77c87
      Navid Emamdoost authored
      [ Upstream commit 128c6642 ]
      
      Release all allocated memory if sha type is invalid:
      In ccp_run_sha_cmd, if the type of sha is invalid, the allocated
      hmac_buf should be released.
      
      v2: fix the goto.
      Signed-off-by: default avatarNavid Emamdoost <navid.emamdoost@gmail.com>
      Acked-by: default avatarGary R Hook <gary.hook@amd.com>
      Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      65d77c87
    • Wei Yongjun's avatar
      net: phy: mdio-bcm-unimac: fix potential NULL dereference in unimac_mdio_probe() · a7efa804
      Wei Yongjun authored
      [ Upstream commit 297a6961 ]
      
      platform_get_resource() may fail and return NULL, so we should
      better check it's return value to avoid a NULL pointer dereference
      a bit later in the code.
      
      This is detected by Coccinelle semantic patch.
      
      @@
      expression pdev, res, n, t, e, e1, e2;
      @@
      
      res = platform_get_resource(pdev, t, n);
      + if (!res)
      +   return -EINVAL;
      ... when != res == NULL
      e = devm_ioremap(e1, res->start, e2);
      Signed-off-by: default avatarWei Yongjun <weiyongjun1@huawei.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      a7efa804
    • Eric Sandeen's avatar
      xfs: don't call xfs_da_shrink_inode with NULL bp · beff051f
      Eric Sandeen authored
      [ Upstream commit bb3d48dc ]
      
      xfs_attr3_leaf_create may have errored out before instantiating a buffer,
      for example if the blkno is out of range.  In that case there is no work
      to do to remove it, and in fact xfs_da_shrink_inode will lead to an oops
      if we try.
      
      This also seems to fix a flaw where the original error from
      xfs_attr3_leaf_create gets overwritten in the cleanup case, and it
      removes a pointless assignment to bp which isn't used after this.
      
      Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199969Reported-by: default avatarXu, Wen <wen.xu@gatech.edu>
      Tested-by: default avatarXu, Wen <wen.xu@gatech.edu>
      Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
      Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      beff051f
    • Dave Chinner's avatar
      xfs: validate cached inodes are free when allocated · 42c59d54
      Dave Chinner authored
      [ Upstream commit afca6c5b ]
      
      A recent fuzzed filesystem image cached random dcache corruption
      when the reproducer was run. This often showed up as panics in
      lookup_slow() on a null inode->i_ops pointer when doing pathwalks.
      
      BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
      ....
      Call Trace:
       lookup_slow+0x44/0x60
       walk_component+0x3dd/0x9f0
       link_path_walk+0x4a7/0x830
       path_lookupat+0xc1/0x470
       filename_lookup+0x129/0x270
       user_path_at_empty+0x36/0x40
       path_listxattr+0x98/0x110
       SyS_listxattr+0x13/0x20
       do_syscall_64+0xf5/0x280
       entry_SYSCALL_64_after_hwframe+0x42/0xb7
      
      but had many different failure modes including deadlocks trying to
      lock the inode that was just allocated or KASAN reports of
      use-after-free violations.
      
      The cause of the problem was a corrupt INOBT on a v4 fs where the
      root inode was marked as free in the inobt record. Hence when we
      allocated an inode, it chose the root inode to allocate, found it in
      the cache and re-initialised it.
      
      We recently fixed a similar inode allocation issue caused by inobt
      record corruption problem in xfs_iget_cache_miss() in commit
      ee457001 ("xfs: catch inode allocation state mismatch
      corruption"). This change adds similar checks to the cache-hit path
      to catch it, and turns the reproducer into a corruption shutdown
      situation.
      Reported-by: default avatarWen Xu <wen.xu@gatech.edu>
      Signed-Off-By: default avatarDave Chinner <dchinner@redhat.com>
      Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
      Reviewed-by: default avatarCarlos Maiolino <cmaiolino@redhat.com>
      Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      [darrick: fix typos in comment]
      Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      42c59d54
    • Dave Chinner's avatar
      xfs: catch inode allocation state mismatch corruption · 6de46e2c
      Dave Chinner authored
      [ Upstream commit ee457001 ]
      
      We recently came across a V4 filesystem causing memory corruption
      due to a newly allocated inode being setup twice and being added to
      the superblock inode list twice. From code inspection, the only way
      this could happen is if a newly allocated inode was not marked as
      free on disk (i.e. di_mode wasn't zero).
      
      Running the metadump on an upstream debug kernel fails during inode
      allocation like so:
      
      XFS: Assertion failed: ip->i_d.di_nblocks == 0, file: fs/xfs/xfs_inod=
      e.c, line: 838
       ------------[ cut here ]------------
      kernel BUG at fs/xfs/xfs_message.c:114!
      invalid opcode: 0000 [#1] PREEMPT SMP
      CPU: 11 PID: 3496 Comm: mkdir Not tainted 4.16.0-rc5-dgc #442
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/0=
      1/2014
      RIP: 0010:assfail+0x28/0x30
      RSP: 0018:ffffc9000236fc80 EFLAGS: 00010202
      RAX: 00000000ffffffea RBX: 0000000000004000 RCX: 0000000000000000
      RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff8227211b
      RBP: ffffc9000236fce8 R08: 0000000000000000 R09: 0000000000000000
      R10: 0000000000000bec R11: f000000000000000 R12: ffffc9000236fd30
      R13: ffff8805c76bab80 R14: ffff8805c77ac800 R15: ffff88083fb12e10
      FS:  00007fac8cbff040(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000=
      000
      CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
      CR2: 00007fffa6783ff8 CR3: 00000005c6e2b003 CR4: 00000000000606e0
      Call Trace:
       xfs_ialloc+0x383/0x570
       xfs_dir_ialloc+0x6a/0x2a0
       xfs_create+0x412/0x670
       xfs_generic_create+0x1f7/0x2c0
       ? capable_wrt_inode_uidgid+0x3f/0x50
       vfs_mkdir+0xfb/0x1b0
       SyS_mkdir+0xcf/0xf0
       do_syscall_64+0x73/0x1a0
       entry_SYSCALL_64_after_hwframe+0x42/0xb7
      
      Extracting the inode number we crashed on from an event trace and
      looking at it with xfs_db:
      
      xfs_db> inode 184452204
      xfs_db> p
      core.magic = 0x494e
      core.mode = 0100644
      core.version = 2
      core.format = 2 (extents)
      core.nlinkv2 = 1
      core.onlink = 0
      .....
      
      Confirms that it is not a free inode on disk. xfs_repair
      also trips over this inode:
      
      .....
      zero length extent (off = 0, fsbno = 0) in ino 184452204
      correcting nextents for inode 184452204
      bad attribute fork in inode 184452204, would clear attr fork
      bad nblocks 1 for inode 184452204, would reset to 0
      bad anextents 1 for inode 184452204, would reset to 0
      imap claims in-use inode 184452204 is free, would correct imap
      would have cleared inode 184452204
      .....
      disconnected inode 184452204, would move to lost+found
      
      And so we have a situation where the directory structure and the
      inobt thinks the inode is free, but the inode on disk thinks it is
      still in use. Where this corruption came from is not possible to
      diagnose, but we can detect it and prevent the kernel from oopsing
      on lookup. The reproducer now results in:
      
      $ sudo mkdir /mnt/scratch/{0,1,2,3,4,5}{0,1,2,3,4,5}
      mkdir: cannot create directory =E2=80=98/mnt/scratch/00=E2=80=99: File ex=
      ists
      mkdir: cannot create directory =E2=80=98/mnt/scratch/01=E2=80=99: File ex=
      ists
      mkdir: cannot create directory =E2=80=98/mnt/scratch/03=E2=80=99: Structu=
      re needs cleaning
      mkdir: cannot create directory =E2=80=98/mnt/scratch/04=E2=80=99: Input/o=
      utput error
      mkdir: cannot create directory =E2=80=98/mnt/scratch/05=E2=80=99: Input/o=
      utput error
      ....
      
      And this corruption shutdown:
      
      [   54.843517] XFS (loop0): Corruption detected! Free inode 0xafe846c not=
       marked free on disk
      [   54.845885] XFS (loop0): Internal error xfs_trans_cancel at line 1023 =
      of file fs/xfs/xfs_trans.c.  Caller xfs_create+0x425/0x670
      [   54.848994] CPU: 10 PID: 3541 Comm: mkdir Not tainted 4.16.0-rc5-dgc #=
      443
      [   54.850753] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO=
      S 1.10.2-1 04/01/2014
      [   54.852859] Call Trace:
      [   54.853531]  dump_stack+0x85/0xc5
      [   54.854385]  xfs_trans_cancel+0x197/0x1c0
      [   54.855421]  xfs_create+0x425/0x670
      [   54.856314]  xfs_generic_create+0x1f7/0x2c0
      [   54.857390]  ? capable_wrt_inode_uidgid+0x3f/0x50
      [   54.858586]  vfs_mkdir+0xfb/0x1b0
      [   54.859458]  SyS_mkdir+0xcf/0xf0
      [   54.860254]  do_syscall_64+0x73/0x1a0
      [   54.861193]  entry_SYSCALL_64_after_hwframe+0x42/0xb7
      [   54.862492] RIP: 0033:0x7fb73bddf547
      [   54.863358] RSP: 002b:00007ffdaa553338 EFLAGS: 00000246 ORIG_RAX: 0000=
      000000000053
      [   54.865133] RAX: ffffffffffffffda RBX: 00007ffdaa55449a RCX: 00007fb73=
      bddf547
      [   54.866766] RDX: 0000000000000001 RSI: 00000000000001ff RDI: 00007ffda=
      a55449a
      [   54.868432] RBP: 00007ffdaa55449a R08: 00000000000001ff R09: 00005623a=
      8670dd0
      [   54.870110] R10: 00007fb73be72d5b R11: 0000000000000246 R12: 000000000=
      00001ff
      [   54.871752] R13: 00007ffdaa5534b0 R14: 0000000000000000 R15: 00007ffda=
      a553500
      [   54.873429] XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1=
      024 of file fs/xfs/xfs_trans.c.  Return address = ffffffff814cd050
      [   54.882790] XFS (loop0): Corruption of in-memory data detected.  Shutt=
      ing down filesystem
      [   54.884597] XFS (loop0): Please umount the filesystem and rectify the =
      problem(s)
      
      Note that this crash is only possible on v4 filesystemsi or v5
      filesystems mounted with the ikeep mount option. For all other V5
      filesystems, this problem cannot occur because we don't read inodes
      we are allocating from disk - we simply overwrite them with the new
      inode information.
      Signed-Off-By: default avatarDave Chinner <dchinner@redhat.com>
      Reviewed-by: default avatarCarlos Maiolino <cmaiolino@redhat.com>
      Tested-by: default avatarCarlos Maiolino <cmaiolino@redhat.com>
      Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
      Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
      6de46e2c
  2. 31 Jul, 2020 9 commits