1. 03 Mar, 2016 40 commits
    • Paolo Bonzini's avatar
      KVM: x86: fix missed hardware breakpoints · fc90441e
      Paolo Bonzini authored
      commit 172b2386 upstream.
      
      Sometimes when setting a breakpoint a process doesn't stop on it.
      This is because the debug registers are not loaded correctly on
      VCPU load.
      
      The following simple reproducer from Oleg Nesterov tries using debug
      registers in two threads.  To see the bug, run a 2-VCPU guest with
      "taskset -c 0" and run "./bp 0 1" inside the guest.
      
          #include <unistd.h>
          #include <signal.h>
          #include <stdlib.h>
          #include <stdio.h>
          #include <sys/wait.h>
          #include <sys/ptrace.h>
          #include <sys/user.h>
          #include <asm/debugreg.h>
          #include <assert.h>
      
          #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
      
          unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
          {
              unsigned long dr7;
      
              dr7 = ((len | type) & 0xf)
                  << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
              if (enable)
                  dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
      
              return dr7;
          }
      
          int write_dr(int pid, int dr, unsigned long val)
          {
              return ptrace(PTRACE_POKEUSER, pid,
                      offsetof (struct user, u_debugreg[dr]),
                      val);
          }
      
          void set_bp(pid_t pid, void *addr)
          {
              unsigned long dr7;
              assert(write_dr(pid, 0, (long)addr) == 0);
              dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
              assert(write_dr(pid, 7, dr7) == 0);
          }
      
          void *get_rip(int pid)
          {
              return (void*)ptrace(PTRACE_PEEKUSER, pid,
                      offsetof(struct user, regs.rip), 0);
          }
      
          void test(int nr)
          {
              void *bp_addr = &&label + nr, *bp_hit;
              int pid;
      
              printf("test bp %d\n", nr);
              assert(nr < 16); // see 16 asm nops below
      
              pid = fork();
              if (!pid) {
                  assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
                  kill(getpid(), SIGSTOP);
                  for (;;) {
                      label: asm (
                          "nop; nop; nop; nop;"
                          "nop; nop; nop; nop;"
                          "nop; nop; nop; nop;"
                          "nop; nop; nop; nop;"
                      );
                  }
              }
      
              assert(pid == wait(NULL));
              set_bp(pid, bp_addr);
      
              for (;;) {
                  assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0);
                  assert(pid == wait(NULL));
      
                  bp_hit = get_rip(pid);
                  if (bp_hit != bp_addr)
                      fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n",
                          bp_hit - &&label, nr);
              }
          }
      
          int main(int argc, const char *argv[])
          {
              while (--argc) {
                  int nr = atoi(*++argv);
                  if (!fork())
                      test(nr);
              }
      
              while (wait(NULL) > 0)
                  ;
              return 0;
          }
      Suggested-by: default avatarNadav Amit <namit@cs.technion.ac.il>
      Reported-by: default avatarAndrey Wagin <avagin@gmail.com>
      Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      fc90441e
    • Mark Rutland's avatar
      KVM: arm/arm64: vgic: Ensure bitmaps are long enough · d62cca11
      Mark Rutland authored
      commit 236cf17c upstream.
      
      When we allocate bitmaps in vgic_vcpu_init_maps, we divide the number of
      bits we need by 8 to figure out how many bytes to allocate. However,
      bitmap elements are always accessed as unsigned longs, and if we didn't
      happen to allocate a size such that size % sizeof(unsigned long) == 0,
      bitmap accesses may go past the end of the allocation.
      
      When using KASAN (which does byte-granular access checks), this results
      in a continuous stream of BUGs whenever these bitmaps are accessed:
      
      =============================================================================
      BUG kmalloc-128 (Tainted: G    B          ): kasan: bad access detected
      -----------------------------------------------------------------------------
      
      INFO: Allocated in vgic_init.part.25+0x55c/0x990 age=7493 cpu=3 pid=1730
      INFO: Slab 0xffffffbde6d5da40 objects=16 used=15 fp=0xffffffc935769700 flags=0x4000000000000080
      INFO: Object 0xffffffc935769500 @offset=1280 fp=0x          (null)
      
      Bytes b4 ffffffc9357694f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769540: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769550: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769560: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Object ffffffc935769570: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Padding ffffffc9357695b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Padding ffffffc9357695c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Padding ffffffc9357695d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Padding ffffffc9357695e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      Padding ffffffc9357695f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      CPU: 3 PID: 1740 Comm: kvm-vcpu-0 Tainted: G    B           4.4.0+ #17
      Hardware name: ARM Juno development board (r1) (DT)
      Call trace:
      [<ffffffc00008e770>] dump_backtrace+0x0/0x280
      [<ffffffc00008ea04>] show_stack+0x14/0x20
      [<ffffffc000726360>] dump_stack+0x100/0x188
      [<ffffffc00030d324>] print_trailer+0xfc/0x168
      [<ffffffc000312294>] object_err+0x3c/0x50
      [<ffffffc0003140fc>] kasan_report_error+0x244/0x558
      [<ffffffc000314548>] __asan_report_load8_noabort+0x48/0x50
      [<ffffffc000745688>] __bitmap_or+0xc0/0xc8
      [<ffffffc0000d9e44>] kvm_vgic_flush_hwstate+0x1bc/0x650
      [<ffffffc0000c514c>] kvm_arch_vcpu_ioctl_run+0x2ec/0xa60
      [<ffffffc0000b9a6c>] kvm_vcpu_ioctl+0x474/0xa68
      [<ffffffc00036b7b0>] do_vfs_ioctl+0x5b8/0xcb0
      [<ffffffc00036bf34>] SyS_ioctl+0x8c/0xa0
      [<ffffffc000086cb0>] el0_svc_naked+0x24/0x28
      Memory state around the buggy address:
       ffffffc935769400: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
       ffffffc935769480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
      >ffffffc935769500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                         ^
       ffffffc935769580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
       ffffffc935769600: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc
      ==================================================================
      
      Fix the issue by always allocating a multiple of sizeof(unsigned long),
      as we do elsewhere in the vgic code.
      
      Fixes: c1bfb577 ("arm/arm64: KVM: vgic: switch to dynamic allocation")
      Acked-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
      Acked-by: default avatarChristoffer Dall <christoffer.dall@linaro.org>
      Signed-off-by: default avatarMark Rutland <mark.rutland@arm.com>
      Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      d62cca11
    • Christian Borntraeger's avatar
      KVM: async_pf: do not warn on page allocation failures · 85212a36
      Christian Borntraeger authored
      commit d7444794 upstream.
      
      In async_pf we try to allocate with NOWAIT to get an element quickly
      or fail. This code also handle failures gracefully. Lets silence
      potential page allocation failures under load.
      
      qemu-system-s39: page allocation failure: order:0,mode:0x2200000
      [...]
      Call Trace:
      ([<00000000001146b8>] show_trace+0xf8/0x148)
      [<000000000011476a>] show_stack+0x62/0xe8
      [<00000000004a36b8>] dump_stack+0x70/0x98
      [<0000000000272c3a>] warn_alloc_failed+0xd2/0x148
      [<000000000027709e>] __alloc_pages_nodemask+0x94e/0xb38
      [<00000000002cd36a>] new_slab+0x382/0x400
      [<00000000002cf7ac>] ___slab_alloc.constprop.30+0x2dc/0x378
      [<00000000002d03d0>] kmem_cache_alloc+0x160/0x1d0
      [<0000000000133db4>] kvm_setup_async_pf+0x6c/0x198
      [<000000000013dee8>] kvm_arch_vcpu_ioctl_run+0xd48/0xd58
      [<000000000012fcaa>] kvm_vcpu_ioctl+0x372/0x690
      [<00000000002f66f6>] do_vfs_ioctl+0x3be/0x510
      [<00000000002f68ec>] SyS_ioctl+0xa4/0xb8
      [<0000000000781c5e>] system_call+0xd6/0x264
      [<000003ffa24fa06a>] 0x3ffa24fa06a
      Signed-off-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Reviewed-by: default avatarDominik Dingel <dingel@linux.vnet.ibm.com>
      Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      85212a36
    • Robin Murphy's avatar
      of/irq: Fix msi-map calculation for nonzero rid-base · e1d61091
      Robin Murphy authored
      commit 5d589d81 upstream.
      
      The existing msi-map code is fine for shifting the entire RID space
      upwards, but attempting finer-grained remapping reveals a bug. It turns
      out that we are mistakenly treating the msi-base part as an offset, not
      as a new base to remap onto, so things get squiffy when rid-base is
      nonzero. Fix this, and at the same time add a sanity check against
      having msi-map-mask clash with a nonzero rid-base, as that's another
      thing one can easily get wrong.
      Signed-off-by: default avatarRobin Murphy <robin.murphy@arm.com>
      Reviewed-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
      Tested-by: default avatarStuart Yoder <stuart.yoder@nxp.com>
      Acked-by: default avatarMark Rutland <mark.rutland@arm.com>
      Acked-by: default avatarDavid Daney <david.daney@cavium.com>
      Signed-off-by: default avatarRob Herring <robh@kernel.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      e1d61091
    • Benjamin Coddington's avatar
      NFSv4: Fix a dentry leak on alias use · b8558ada
      Benjamin Coddington authored
      commit d9dfd8d7 upstream.
      
      In the case where d_add_unique() finds an appropriate alias to use it will
      have already incremented the reference count.  An additional dget() to swap
      the open context's dentry is unnecessary and will leak a reference.
      Signed-off-by: default avatarBenjamin Coddington <bcodding@redhat.com>
      Fixes: 275bb307 ("NFSv4: Move dentry instantiation into the NFSv4-...")
      Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b8558ada
    • Christoph Hellwig's avatar
      nfs: fix nfs_size_to_loff_t · fd921e57
      Christoph Hellwig authored
      commit 50ab8ec7 upstream.
      
      See http: //www.infradead.org/rpr.html
      X-Evolution-Source: 1451162204.2173.11@leira.trondhjem.org
      Content-Transfer-Encoding: 8bit
      Mime-Version: 1.0
      
      We support OFFSET_MAX just fine, so don't round down below it.  Also
      switch to using min_t to make the helper more readable.
      Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
      Fixes: 433c9237 ("NFS: Clean up nfs_size_to_loff_t()")
      Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      fd921e57
    • Mike Krinkin's avatar
      block: fix use-after-free in dio_bio_complete · 68a91855
      Mike Krinkin authored
      commit 7ddc971f upstream.
      
      kasan reported the following error when i ran xfstest:
      
      [  701.826854] ==================================================================
      [  701.826864] BUG: KASAN: use-after-free in dio_bio_complete+0x41a/0x600 at addr ffff880080b95f94
      [  701.826870] Read of size 4 by task loop2/3874
      [  701.826879] page:ffffea000202e540 count:0 mapcount:0 mapping:          (null) index:0x0
      [  701.826890] flags: 0x100000000000000()
      [  701.826895] page dumped because: kasan: bad access detected
      [  701.826904] CPU: 3 PID: 3874 Comm: loop2 Tainted: G    B   W    L  4.5.0-rc1-next-20160129 #83
      [  701.826910] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013
      [  701.826917]  ffff88008fadf800 ffff88008fadf758 ffffffff81ca67bb 0000000041b58ab3
      [  701.826941]  ffffffff830d1e74 ffffffff81ca6724 ffff88008fadf748 ffffffff8161c05c
      [  701.826963]  0000000000000282 ffff88008fadf800 ffffed0010172bf2 ffffea000202e540
      [  701.826987] Call Trace:
      [  701.826997]  [<ffffffff81ca67bb>] dump_stack+0x97/0xdc
      [  701.827005]  [<ffffffff81ca6724>] ? _atomic_dec_and_lock+0xc4/0xc4
      [  701.827014]  [<ffffffff8161c05c>] ? __dump_page+0x32c/0x490
      [  701.827023]  [<ffffffff816b0d03>] kasan_report_error+0x5f3/0x8b0
      [  701.827033]  [<ffffffff817c302a>] ? dio_bio_complete+0x41a/0x600
      [  701.827040]  [<ffffffff816b1119>] __asan_report_load4_noabort+0x59/0x80
      [  701.827048]  [<ffffffff817c302a>] ? dio_bio_complete+0x41a/0x600
      [  701.827053]  [<ffffffff817c302a>] dio_bio_complete+0x41a/0x600
      [  701.827057]  [<ffffffff81bd19c8>] ? blk_queue_exit+0x108/0x270
      [  701.827060]  [<ffffffff817c32b0>] dio_bio_end_aio+0xa0/0x4d0
      [  701.827063]  [<ffffffff817c3210>] ? dio_bio_complete+0x600/0x600
      [  701.827067]  [<ffffffff81bd2806>] ? blk_account_io_completion+0x316/0x5d0
      [  701.827070]  [<ffffffff81bafe89>] bio_endio+0x79/0x200
      [  701.827074]  [<ffffffff81bd2c9f>] blk_update_request+0x1df/0xc50
      [  701.827078]  [<ffffffff81c02c27>] blk_mq_end_request+0x57/0x120
      [  701.827081]  [<ffffffff81c03670>] __blk_mq_complete_request+0x310/0x590
      [  701.827084]  [<ffffffff812348d8>] ? set_next_entity+0x2f8/0x2ed0
      [  701.827088]  [<ffffffff8124b34d>] ? put_prev_entity+0x22d/0x2a70
      [  701.827091]  [<ffffffff81c0394b>] blk_mq_complete_request+0x5b/0x80
      [  701.827094]  [<ffffffff821e2a33>] loop_queue_work+0x273/0x19d0
      [  701.827098]  [<ffffffff811f6578>] ? finish_task_switch+0x1c8/0x8e0
      [  701.827101]  [<ffffffff8129d058>] ? trace_hardirqs_on_caller+0x18/0x6c0
      [  701.827104]  [<ffffffff821e27c0>] ? lo_read_simple+0x890/0x890
      [  701.827108]  [<ffffffff8129dd60>] ? debug_check_no_locks_freed+0x350/0x350
      [  701.827111]  [<ffffffff811f63b0>] ? __hrtick_start+0x130/0x130
      [  701.827115]  [<ffffffff82a0c8f6>] ? __schedule+0x936/0x20b0
      [  701.827118]  [<ffffffff811dd6bd>] ? kthread_worker_fn+0x3ed/0x8d0
      [  701.827121]  [<ffffffff811dd4ed>] ? kthread_worker_fn+0x21d/0x8d0
      [  701.827125]  [<ffffffff8129d058>] ? trace_hardirqs_on_caller+0x18/0x6c0
      [  701.827128]  [<ffffffff811dd57f>] kthread_worker_fn+0x2af/0x8d0
      [  701.827132]  [<ffffffff811dd2d0>] ? __init_kthread_worker+0x170/0x170
      [  701.827135]  [<ffffffff82a1ea46>] ? _raw_spin_unlock_irqrestore+0x36/0x60
      [  701.827138]  [<ffffffff811dd2d0>] ? __init_kthread_worker+0x170/0x170
      [  701.827141]  [<ffffffff811dd2d0>] ? __init_kthread_worker+0x170/0x170
      [  701.827144]  [<ffffffff811dd00b>] kthread+0x24b/0x3a0
      [  701.827148]  [<ffffffff811dcdc0>] ? kthread_create_on_node+0x4c0/0x4c0
      [  701.827151]  [<ffffffff8129d70d>] ? trace_hardirqs_on+0xd/0x10
      [  701.827155]  [<ffffffff8116d41d>] ? do_group_exit+0xdd/0x350
      [  701.827158]  [<ffffffff811dcdc0>] ? kthread_create_on_node+0x4c0/0x4c0
      [  701.827161]  [<ffffffff82a1f52f>] ret_from_fork+0x3f/0x70
      [  701.827165]  [<ffffffff811dcdc0>] ? kthread_create_on_node+0x4c0/0x4c0
      [  701.827167] Memory state around the buggy address:
      [  701.827170]  ffff880080b95e80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
      [  701.827172]  ffff880080b95f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
      [  701.827175] >ffff880080b95f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
      [  701.827177]                          ^
      [  701.827179]  ffff880080b96000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
      [  701.827182]  ffff880080b96080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
      [  701.827183] ==================================================================
      
      The problem is that bio_check_pages_dirty calls bio_put, so we must
      not access bio fields after bio_check_pages_dirty.
      
      Fixes: 9b81c842 ("block: don't access bio->bi_error after bio_put()").
      Signed-off-by: default avatarMike Krinkin <krinkin.m.u@gmail.com>
      Signed-off-by: default avatarJens Axboe <axboe@fb.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      68a91855
    • Hannes Reinecke's avatar
      bio: return EINTR if copying to user space got interrupted · 3e643b5c
      Hannes Reinecke authored
      commit 2d99b55d upstream.
      
      Commit 35dc2483 introduced a check for
      current->mm to see if we have a user space context and only copies data
      if we do. Now if an IO gets interrupted by a signal data isn't copied
      into user space any more (as we don't have a user space context) but
      user space isn't notified about it.
      
      This patch modifies the behaviour to return -EINTR from bio_uncopy_user()
      to notify userland that a signal has interrupted the syscall, otherwise
      it could lead to a situation where the caller may get a buffer with
      no data returned.
      
      This can be reproduced by issuing SG_IO ioctl()s in one thread while
      constantly sending signals to it.
      
      Fixes: 35dc2483 [SCSI] sg: Fix user memory corruption when SG_IO is interrupted by a signal
      Signed-off-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
      Signed-off-by: default avatarHannes Reinecke <hare@suse.de>
      Signed-off-by: default avatarJens Axboe <axboe@fb.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      3e643b5c
    • Alexandra Yates's avatar
      i2c: i801: Adding Intel Lewisburg support for iTCO · 023e29e4
      Alexandra Yates authored
      commit 1a1503c5 upstream.
      
      Starting from Intel Sunrisepoint (Skylake PCH) the iTCO watchdog
      resources have been moved to reside under the i801 SMBus host
      controller whereas previously they were under the LPC device.
      
      This patch adds Intel lewisburg SMBus support for iTCO device.
      It allows to load watchdog dynamically when the hardware is
      present.
      
      Fixes: cdc5a311 ("i2c: i801: add Intel Lewisburg device IDs")
      Reviewed-by: default avatarJean Delvare <jdelvare@suse.de>
      Signed-off-by: default avatarAlexandra Yates <alexandra.yates@linux.intel.com>
      Signed-off-by: default avatarWolfram Sang <wsa@the-dreams.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      023e29e4
    • Shawn Lin's avatar
      phy: core: fix wrong err handle for phy_power_on · be683dfd
      Shawn Lin authored
      commit b82fcabe upstream.
      
      If phy_pm_runtime_get_sync failed but we already
      enable regulator, current code return directly without
      doing regulator_disable. This patch fix this problem
      and cleanup err handle of phy_power_on to be more readable.
      
      Fixes: 3be88125 ("phy: core: Support regulator ...")
      Cc: Roger Quadros <rogerq@ti.com>
      Cc: Axel Lin <axel.lin@ingics.com>
      Signed-off-by: default avatarShawn Lin <shawn.lin@rock-chips.com>
      Signed-off-by: default avatarKishon Vijay Abraham I <kishon@ti.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      be683dfd
    • Tejun Heo's avatar
      writeback: keep superblock pinned during cgroup writeback association switches · 7c465723
      Tejun Heo authored
      commit 5ff8eaac upstream.
      
      If cgroup writeback is in use, an inode is associated with a cgroup
      for writeback.  If the inode's main dirtier changes to another cgroup,
      the association gets updated asynchronously.  Nothing was pinning the
      superblock while such switches are in progress and superblock could go
      away while async switching is pending or in progress leading to
      crashes like the following.
      
       kernel BUG at fs/jbd2/transaction.c:319!
       invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC
       CPU: 1 PID: 29158 Comm: kworker/1:10 Not tainted 4.5.0-rc3 #51
       Hardware name: Google Google, BIOS Google 01/01/2011
       Workqueue: events inode_switch_wbs_work_fn
       task: ffff880213dbbd40 ti: ffff880209264000 task.ti: ffff880209264000
       RIP: 0010:[<ffffffff803e6922>]  [<ffffffff803e6922>] start_this_handle+0x382/0x3e0
       RSP: 0018:ffff880209267c30  EFLAGS: 00010202
       ...
       Call Trace:
        [<ffffffff803e6be4>] jbd2__journal_start+0xf4/0x190
        [<ffffffff803cfc7e>] __ext4_journal_start_sb+0x4e/0x70
        [<ffffffff803b31ec>] ext4_evict_inode+0x12c/0x3d0
        [<ffffffff8035338b>] evict+0xbb/0x190
        [<ffffffff80354190>] iput+0x130/0x190
        [<ffffffff80360223>] inode_switch_wbs_work_fn+0x343/0x4c0
        [<ffffffff80279819>] process_one_work+0x129/0x300
        [<ffffffff80279b16>] worker_thread+0x126/0x480
        [<ffffffff8027ed14>] kthread+0xc4/0xe0
        [<ffffffff809771df>] ret_from_fork+0x3f/0x70
      
      Fix it by bumping s_active while cgroup association switching is in
      flight.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Reported-and-tested-by: default avatarTahsin Erdogan <tahsin@google.com>
      Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com
      Fixes: d10c8095 ("writeback: implement foreign cgroup inode bdi_writeback switching")
      Signed-off-by: default avatarJens Axboe <axboe@fb.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      7c465723
    • Tejun Heo's avatar
      cgroup: make sure a parent css isn't offlined before its children · 4cbd1963
      Tejun Heo authored
      commit aa226ff4 upstream.
      
      There are three subsystem callbacks in css shutdown path -
      css_offline(), css_released() and css_free().  Except for
      css_released(), cgroup core didn't guarantee the order of invocation.
      css_offline() or css_free() could be called on a parent css before its
      children.  This behavior is unexpected and led to bugs in cpu and
      memory controller.
      
      This patch updates offline path so that a parent css is never offlined
      before its children.  Each css keeps online_cnt which reaches zero iff
      itself and all its children are offline and offline_css() is invoked
      only after online_cnt reaches zero.
      
      This fixes the memory controller bug and allows the fix for cpu
      controller.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Reported-and-tested-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Reported-by: default avatarBrian Christiansen <brian.o.christiansen@gmail.com>
      Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com
      Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      4cbd1963
    • Tejun Heo's avatar
      cpuset: make mm migration asynchronous · fff4dc84
      Tejun Heo authored
      commit e93ad19d upstream.
      
      If "cpuset.memory_migrate" is set, when a process is moved from one
      cpuset to another with a different memory node mask, pages in used by
      the process are migrated to the new set of nodes.  This was performed
      synchronously in the ->attach() callback, which is synchronized
      against process management.  Recently, the synchronization was changed
      from per-process rwsem to global percpu rwsem for simplicity and
      optimization.
      
      Combined with the synchronous mm migration, this led to deadlocks
      because mm migration could schedule a work item which may in turn try
      to create a new worker blocking on the process management lock held
      from cgroup process migration path.
      
      This heavy an operation shouldn't be performed synchronously from that
      deep inside cgroup migration in the first place.  This patch punts the
      actual migration to an ordered workqueue and updates cgroup process
      migration and cpuset config update paths to flush the workqueue after
      all locks are released.  This way, the operations still seem
      synchronous to userland without entangling mm migration with process
      management synchronization.  CPU hotplug can also invoke mm migration
      but there's no reason for it to wait for mm migrations and thus
      doesn't synchronize against their completions.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Reported-and-tested-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      fff4dc84
    • Sebastian Andrzej Siewior's avatar
      PCI/AER: Flush workqueue on device remove to avoid use-after-free · a2d25804
      Sebastian Andrzej Siewior authored
      commit 4ae2182b upstream.
      
      A Root Port's AER structure (rpc) contains a queue of events.  aer_irq()
      enqueues AER status information and schedules aer_isr() to dequeue and
      process it.  When we remove a device, aer_remove() waits for the queue to
      be empty, then frees the rpc struct.
      
      But aer_isr() references the rpc struct after dequeueing and possibly
      emptying the queue, which can cause a use-after-free error as in the
      following scenario with two threads, aer_isr() on the left and a
      concurrent aer_remove() on the right:
      
        Thread A                      Thread B
        --------                      --------
        aer_irq():
          rpc->prod_idx++
                                      aer_remove():
                                        wait_event(rpc->prod_idx == rpc->cons_idx)
                                        # now blocked until queue becomes empty
        aer_isr():                      # ...
          rpc->cons_idx++               # unblocked because queue is now empty
          ...                           kfree(rpc)
          mutex_unlock(&rpc->rpc_mutex)
      
      To prevent this problem, use flush_work() to wait until the last scheduled
      instance of aer_isr() has completed before freeing the rpc struct in
      aer_remove().
      
      I reproduced this use-after-free by flashing a device FPGA and
      re-enumerating the bus to find the new device.  With SLUB debug, this
      crashes with 0x6b bytes (POISON_FREE, the use-after-free magic number) in
      GPR25:
      
        pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000
        Unable to handle kernel paging request for data at address 0x27ef9e3e
        Workqueue: events aer_isr
        GPR24: dd6aa000 6b6b6b6b 605f8378 605f8360 d99b12c0 604fc674 606b1704 d99b12c0
        NIP [602f5328] pci_walk_bus+0xd4/0x104
      
      [bhelgaas: changelog, stable tag]
      Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
      Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      a2d25804
    • Vineet Gupta's avatar
      ARCv2: SMP: Emulate IPI to self using software triggered interrupt · 1de8f1bc
      Vineet Gupta authored
      commit bb143f81 upstream.
      
      ARConnect/MCIP Inter-Core-Interrupt module can't send interrupt to
      local core. So use core intc capability to trigger software
      interrupt to self, using an unsued IRQ #21.
      
      This showed up as csd deadlock with LTP trace_sched on a dual core
      system. This test acts as scheduler fuzzer, triggering all sorts of
      schedulting activity. Trouble starts with IPI to self, which doesn't get
      delivered (effectively lost due to H/w capability), but the msg intended
      to be sent remain enqueued in per-cpu @ipi_data.
      
      All subsequent IPIs to this core from other cores get elided due to the
      IPI coalescing optimization in ipi_send_msg_one() where a pending msg
      implies an IPI already sent and assumes other core is yet to ack it.
      After the elided IPI, other core simply goes into csd_lock_wait()
      but never comes out as this core never sees the interrupt.
      
      Fixes STAR 9001008624
      
      Cc: Peter Zijlstra <peterz@infradead.org>
      Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      1de8f1bc
    • Vineet Gupta's avatar
      ARCv2: STAR 9000950267: Handle return from intr to Delay Slot #2 · 0bdce40c
      Vineet Gupta authored
      commit cbfe74a7 upstream.
      
      Returning to delay slot, riding an interrupti, had one loose end.
      AUX_USER_SP used for restoring user mode SP upon RTIE was not being
      setup from orig task's saved value, causing task to use wrong SP,
      leading to ProtV errors.
      
      The reason being:
       - INTERRUPT_EPILOGUE returns to a kernel trampoline, thus not expected to restore it
       - EXCEPTION_EPILOGUE is not used at all
      
      Fix that by restoring AUX_USER_SP explicitly in the trampoline.
      
      This was broken in the original workaround, but the error scenarios got
      reduced considerably since v3.14 due to following:
      
       1. The Linuxthreads.old based userspace at the time caused many more
          exceptions in delay slot than the current NPTL based one.
          Infact with current userspace the error doesn't happen at all.
      
       2. Return from interrupt (delay slot or otherwise) doesn't get exercised much
          after commit 4de0e528 ("Really Re-enable interrupts to avoid deadlocks")
          since IRQ_ACTIVE.active being clear means most returns are as if from pure
          kernel (even for active interrupts)
      
      Infact the issue only happened in an experimental branch where I was tinkering with
      reverted 4de0e528
      
      Fixes: 4255b07f ("ARCv2: STAR 9000793984: Handle return from intr to Delay Slot")
      Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      0bdce40c
    • Tejun Heo's avatar
      libata: fix sff host state machine locking while polling · aff45148
      Tejun Heo authored
      commit 8eee1d3e upstream.
      
      The bulk of ATA host state machine is implemented by
      ata_sff_hsm_move().  The function is called from either the interrupt
      handler or, if polling, a work item.  Unlike from the interrupt path,
      the polling path calls the function without holding the host lock and
      ata_sff_hsm_move() selectively grabs the lock.
      
      This is completely broken.  If an IRQ triggers while polling is in
      progress, the two can easily race and end up accessing the hardware
      and updating state machine state at the same time.  This can put the
      state machine in an illegal state and lead to a crash like the
      following.
      
        kernel BUG at drivers/ata/libata-sff.c:1302!
        invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN
        Modules linked in:
        CPU: 1 PID: 10679 Comm: syz-executor Not tainted 4.5.0-rc1+ #300
        Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
        task: ffff88002bd00000 ti: ffff88002e048000 task.ti: ffff88002e048000
        RIP: 0010:[<ffffffff83a83409>]  [<ffffffff83a83409>] ata_sff_hsm_move+0x619/0x1c60
        ...
        Call Trace:
         <IRQ>
         [<ffffffff83a84c31>] __ata_sff_port_intr+0x1e1/0x3a0 drivers/ata/libata-sff.c:1584
         [<ffffffff83a85611>] ata_bmdma_port_intr+0x71/0x400 drivers/ata/libata-sff.c:2877
         [<     inline     >] __ata_sff_interrupt drivers/ata/libata-sff.c:1629
         [<ffffffff83a85bf3>] ata_bmdma_interrupt+0x253/0x580 drivers/ata/libata-sff.c:2902
         [<ffffffff81479f98>] handle_irq_event_percpu+0x108/0x7e0 kernel/irq/handle.c:157
         [<ffffffff8147a717>] handle_irq_event+0xa7/0x140 kernel/irq/handle.c:205
         [<ffffffff81484573>] handle_edge_irq+0x1e3/0x8d0 kernel/irq/chip.c:623
         [<     inline     >] generic_handle_irq_desc include/linux/irqdesc.h:146
         [<ffffffff811a92bc>] handle_irq+0x10c/0x2a0 arch/x86/kernel/irq_64.c:78
         [<ffffffff811a7e4d>] do_IRQ+0x7d/0x1a0 arch/x86/kernel/irq.c:240
         [<ffffffff86653d4c>] common_interrupt+0x8c/0x8c arch/x86/entry/entry_64.S:520
         <EOI>
         [<     inline     >] rcu_lock_acquire include/linux/rcupdate.h:490
         [<     inline     >] rcu_read_lock include/linux/rcupdate.h:874
         [<ffffffff8164b4a1>] filemap_map_pages+0x131/0xba0 mm/filemap.c:2145
         [<     inline     >] do_fault_around mm/memory.c:2943
         [<     inline     >] do_read_fault mm/memory.c:2962
         [<     inline     >] do_fault mm/memory.c:3133
         [<     inline     >] handle_pte_fault mm/memory.c:3308
         [<     inline     >] __handle_mm_fault mm/memory.c:3418
         [<ffffffff816efb16>] handle_mm_fault+0x2516/0x49a0 mm/memory.c:3447
         [<ffffffff8127dc16>] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238
         [<ffffffff8127e358>] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331
         [<ffffffff8126f514>] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264
         [<ffffffff86655578>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986
      
      Fix it by ensuring that the polling path is holding the host lock
      before entering ata_sff_hsm_move() so that all hardware accesses and
      state updates are performed under the host lock.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Reported-and-tested-by: default avatarDmitry Vyukov <dvyukov@google.com>
      Link: http://lkml.kernel.org/g/CACT4Y+b_JsOxJu2EZyEf+mOXORc_zid5V1-pLZSroJVxyWdSpw@mail.gmail.comSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      aff45148
    • Quinn Tran's avatar
      qla2xxx: Fix stale pointer access. · 921d439c
      Quinn Tran authored
      commit cb43285f upstream.
      
      [ Upstream Commit 84e32a06 ]
      
      Commit 84e32a06 ("qla2xxx: Use pci_enable_msix_range() instead of
      pci_enable_msix()") introduced a regression when target mode is enabled.
      In qla24xx_enable_msix(), ha->max_rsp_queues was incorrectly set
      to a value higher than the number of response queues allocated causing
      an invalid dereference. Specifically here in qla2x00_init_rings():
          *rsp->in_ptr = 0;
      
      Add additional check to make sure the pointer is valid. following
      call stack will be seen
      
      ---- 8< ----
      RIP: 0010:[<ffffffffa02ccadc>]  [<ffffffffa02ccadc>] qla2x00_init_rings+0xdc/0x320 [qla2xxx]
      RSP: 0018:ffff880429447dd8  EFLAGS: 00010082
      ....
      Call Trace:
      [<ffffffffa02ceb40>] qla2x00_abort_isp+0x170/0x6b0 [qla2xxx]
      [<ffffffffa02c6f77>] qla2x00_do_dpc+0x357/0x7f0 [qla2xxx]
      [<ffffffffa02c6c20>] ? qla2x00_relogin+0x260/0x260 [qla2xxx]
      [<ffffffff8107d2c9>] kthread+0xc9/0xe0
      [<ffffffff8107d200>] ? flush_kthread_worker+0x90/0x90
      [<ffffffff8172cc6f>] ret_from_fork+0x3f/0x70
      [<ffffffff8107d200>] ? flush_kthread_worker+0x90/0x90
      ---- 8< ----
      Signed-off-by: default avatarQuinn Tran <quinn.tran@qlogic.com>
      Signed-off-by: default avatarHimanshu Madhani <himanshu.madhani@qlogic.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      921d439c
    • Cyrille Pitchen's avatar
      spi: atmel: fix gpio chip-select in case of non-DT platform · 44c7d762
      Cyrille Pitchen authored
      commit 70f340df upstream.
      
      The non-DT platform that uses this driver (actually the AVR32) was taking a bad
      branch for determining if the IP would use gpio for CS.
      Adding the presence of DT as a condition fixes this issue.
      
      Fixes: 48203034 ("spi: atmel: add support for the internal chip-select of the spi controller")
      Reported-by: default avatarMans Rullgard <mans@mansr.com>
      Signed-off-by: default avatarCyrille Pitchen <cyrille.pitchen@atmel.com>
      [nicolas.ferre@atmel.com: extract from ml discussion]
      Signed-off-by: default avatarNicolas Ferre <nicolas.ferre@atmel.com>
      Tested-by: default avatarMans Rullgard <mans@mansr.com>
      Signed-off-by: default avatarMark Brown <broonie@kernel.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      44c7d762
    • Nicholas Bellinger's avatar
      target: Fix race with SCF_SEND_DELAYED_TAS handling · b9d99202
      Nicholas Bellinger authored
      commit 310d3d31 upstream.
      
      This patch fixes a race between setting of SCF_SEND_DELAYED_TAS
      in transport_send_task_abort(), and check of the same bit in
      transport_check_aborted_status().
      
      It adds a __transport_check_aborted_status() version that is
      used by target_execute_cmd() when se_cmd->t_state_lock is
      held, and a transport_check_aborted_status() wrapper for
      all other existing callers.
      
      Also, it handles the case where the check happens before
      transport_send_task_abort() gets called.  For this, go
      ahead and set SCF_SEND_DELAYED_TAS early when necessary,
      and have transport_send_task_abort() send the abort.
      
      Cc: Quinn Tran <quinn.tran@qlogic.com>
      Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
      Cc: Sagi Grimberg <sagig@mellanox.com>
      Cc: Christoph Hellwig <hch@lst.de>
      Cc: Hannes Reinecke <hare@suse.de>
      Cc: Andy Grover <agrover@redhat.com>
      Cc: Mike Christie <mchristi@redhat.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b9d99202
    • Nicholas Bellinger's avatar
      target: Fix remote-port TMR ABORT + se_cmd fabric stop · fb6a326e
      Nicholas Bellinger authored
      commit 0f4a9431 upstream.
      
      To address the bug where fabric driver level shutdown
      of se_cmd occurs at the same time when TMR CMD_T_ABORTED
      is happening resulting in a -1 ->cmd_kref, this patch
      adds a CMD_T_FABRIC_STOP bit that is used to determine
      when TMR + driver I_T nexus shutdown is happening
      concurrently.
      
      It changes target_sess_cmd_list_set_waiting() to obtain
      se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local
      reference in target_wait_for_sess_cmds() and invoke extra
      target_put_sess_cmd() during Task Aborted Status (TAS)
      when necessary.
      
      Also, it adds a new target_wait_free_cmd() wrapper around
      transport_wait_for_tasks() for the special case within
      transport_generic_free_cmd() to set CMD_T_FABRIC_STOP,
      and is now aware of CMD_T_ABORTED + CMD_T_TAS status
      bits to know when an extra transport_put_cmd() during
      TAS is required.
      
      Note transport_generic_free_cmd() is expected to block on
      cmd->cmd_wait_comp in order to follow what iscsi-target
      expects during iscsi_conn context se_cmd shutdown.
      
      Cc: Quinn Tran <quinn.tran@qlogic.com>
      Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
      Cc: Sagi Grimberg <sagig@mellanox.com>
      Cc: Christoph Hellwig <hch@lst.de>
      Cc: Hannes Reinecke <hare@suse.de>
      Cc: Andy Grover <agrover@redhat.com>
      Cc: Mike Christie <mchristi@redhat.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@daterainc.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      fb6a326e
    • Nicholas Bellinger's avatar
      target: Fix TAS handling for multi-session se_node_acls · 547551e5
      Nicholas Bellinger authored
      commit ebde1ca5 upstream.
      
      This patch fixes a bug in TMR task aborted status (TAS)
      handling when multiple sessions are connected to the
      same target WWPN endpoint and se_node_acl descriptor,
      resulting in TASK_ABORTED status to not be generated
      for aborted se_cmds on the remote port.
      
      This is due to core_tmr_handle_tas_abort() incorrectly
      comparing se_node_acl instead of se_session, for which
      the multi-session case is expected to be sharing the
      same se_node_acl.
      
      Instead, go ahead and update core_tmr_handle_tas_abort()
      to compare tmr_sess + cmd->se_sess in order to determine
      if the LUN_RESET was received on a different I_T nexus,
      and TASK_ABORTED status response needs to be generated.
      Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
      Cc: Quinn Tran <quinn.tran@qlogic.com>
      Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
      Cc: Sagi Grimberg <sagig@mellanox.com>
      Cc: Hannes Reinecke <hare@suse.de>
      Cc: Andy Grover <agrover@redhat.com>
      Cc: Mike Christie <mchristi@redhat.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      547551e5
    • Nicholas Bellinger's avatar
      target: Fix LUN_RESET active TMR descriptor handling · 91583a21
      Nicholas Bellinger authored
      commit a6d9bb1c upstream.
      
      This patch fixes a NULL pointer se_cmd->cmd_kref < 0
      refcount bug during TMR LUN_RESET with active TMRs,
      triggered during se_cmd + se_tmr_req descriptor
      shutdown + release via core_tmr_drain_tmr_list().
      
      To address this bug, go ahead and obtain a local
      kref_get_unless_zero(&se_cmd->cmd_kref) for active I/O
      to set CMD_T_ABORTED, and transport_wait_for_tasks()
      followed by the final target_put_sess_cmd() to drop
      the local ->cmd_kref.
      
      Also add two new checks within target_tmr_work() to
      avoid CMD_T_ABORTED -> TFO->queue_tm_rsp() callbacks
      ahead of invoking the backend -> fabric put in
      transport_cmd_check_stop_to_fabric().
      
      For good measure, also change core_tmr_release_req()
      to use list_del_init() ahead of se_tmr_req memory
      free.
      Reviewed-by: default avatarQuinn Tran <quinn.tran@qlogic.com>
      Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
      Cc: Sagi Grimberg <sagig@mellanox.com>
      Cc: Christoph Hellwig <hch@lst.de>
      Cc: Hannes Reinecke <hare@suse.de>
      Cc: Andy Grover <agrover@redhat.com>
      Cc: Mike Christie <mchristi@redhat.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      91583a21
    • Nicholas Bellinger's avatar
      target: Fix LUN_RESET active I/O handling for ACK_KREF · 3b493f9f
      Nicholas Bellinger authored
      commit febe562c upstream.
      
      This patch fixes a NULL pointer se_cmd->cmd_kref < 0
      refcount bug during TMR LUN_RESET with active se_cmd
      I/O, that can be triggered during se_cmd descriptor
      shutdown + release via core_tmr_drain_state_list() code.
      
      To address this bug, add common __target_check_io_state()
      helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE
      checking, and set CMD_T_ABORTED + obtain ->cmd_kref for
      both cases ahead of last target_put_sess_cmd() after
      TFO->aborted_task() -> transport_cmd_finish_abort()
      callback has completed.
      
      It also introduces SCF_ACK_KREF to determine when
      transport_cmd_finish_abort() needs to drop the second
      extra reference, ahead of calling target_put_sess_cmd()
      for the final kref_put(&se_cmd->cmd_kref).
      
      It also updates transport_cmd_check_stop() to avoid
      holding se_cmd->t_state_lock while dropping se_cmd
      device state via target_remove_from_state_list(), now
      that core_tmr_drain_state_list() is holding the
      se_device lock while checking se_cmd state from
      within TMR logic.
      
      Finally, move transport_put_cmd() release of SGL +
      TMR + extended CDB memory into target_free_cmd_mem()
      in order to avoid potential resource leaks in TMR
      ABORT_TASK + LUN_RESET code-paths.  Also update
      target_release_cmd_kref() accordingly.
      Reviewed-by: default avatarQuinn Tran <quinn.tran@qlogic.com>
      Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
      Cc: Sagi Grimberg <sagig@mellanox.com>
      Cc: Christoph Hellwig <hch@lst.de>
      Cc: Hannes Reinecke <hare@suse.de>
      Cc: Andy Grover <agrover@redhat.com>
      Cc: Mike Christie <mchristi@redhat.com>
      Signed-off-by: default avatarNicholas Bellinger <nab@linux-iscsi.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      3b493f9f
    • Kai-Heng Feng's avatar
      ALSA: hda - Fixing background noise on Dell Inspiron 3162 · 7cb32ae0
      Kai-Heng Feng authored
      commit 3b43b71f upstream.
      
      After login to the desktop on Dell Inspiron 3162,
      there's a very loud background noise comes from the builtin speaker.
      The noise does not go away even if the speaker is muted.
      
      The noise disappears after using the aamix fixup.
      
      Codec: Realtek ALC3234
      Address: 0
      AFG Function Id: 0x1 (unsol 1)
          Vendor Id: 0x10ec0255
          Subsystem Id: 0x10280725
          Revision Id: 0x100002
          No Modem Function Group found
      
      BugLink: http://bugs.launchpad.net/bugs/1549620Signed-off-by: default avatarKai-Heng Feng <kai.heng.feng@canonical.com>
      Signed-off-by: default avatarTakashi Iwai <tiwai@suse.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      7cb32ae0
    • Takashi Iwai's avatar
      ALSA: hda - Apply clock gate workaround to Skylake, too · 1436e689
      Takashi Iwai authored
      commit 7e31a015 upstream.
      
      Some Skylake machines show the codec probe errors in certain
      situations, e.g. HP Z240 desktop fails to probe the onboard Realtek
      codec at reloading the snd-hda-intel module like:
        snd_hda_intel 0000:00:1f.3: spurious response 0x200:0x2, last cmd=0x000000
        snd_hda_intel 0000:00:1f.3: azx_get_response timeout, switching to polling mode: lastcmd=0x000f0000
        snd_hda_intel 0000:00:1f.3: No response from codec, disabling MSI: last cmd=0x000f0000
        snd_hda_intel 0000:00:1f.3: Codec #0 probe error; disabling it...
        hdaudio hdaudioC0D2: no AFG or MFG node found
        snd_hda_intel 0000:00:1f.3: no codecs initialized
      
      Also, HP G470 G3 suffers from the similar problem, as reported in
      bugzilla below.  On this machine, the codec probe error appears even
      at a fresh boot.
      
      As Libin suggested, the same workaround used for Broxton in the commit
      [6639484d: ALSA: hda - disable dynamic clock gating on Broxton
       before reset] can be applied for Skylake in order to fix this problem.
      The Intel HW team also confirmed that this is needed for SKL.
      
      This patch makes the workaround applied to both SKL and BXT
      platforms.  The referred macros are moved and one superfluous macro
      (IS_BROXTON()) is another one (IS_BXT()) as well.
      
      Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112731Suggested-by: default avatarLibin Yang <libin.yang@linux.intel.com>
      Signed-off-by: default avatarTakashi Iwai <tiwai@suse.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      1436e689
    • Tejun Heo's avatar
      Revert "workqueue: make sure delayed work run in local cpu" · 66847104
      Tejun Heo authored
      commit 041bd12e upstream.
      
      This reverts commit 874bbfe6.
      
      Workqueue used to implicity guarantee that work items queued without
      explicit CPU specified are put on the local CPU.  Recent changes in
      timer broke the guarantee and led to vmstat breakage which was fixed
      by 176bed1d ("vmstat: explicitly schedule per-cpu work on the CPU
      we need it to run on").
      
      vmstat is the most likely to expose the issue and it's quite possible
      that there are other similar problems which are a lot more difficult
      to trigger.  As a preventive measure, 874bbfe6 ("workqueue: make
      sure delayed work run in local cpu") was applied to restore the local
      CPU guarnatee.  Unfortunately, the change exposed a bug in timer code
      which got fixed by 22b886dd ("timers: Use proper base migration in
      add_timer_on()").  Due to code restructuring, the commit couldn't be
      backported beyond certain point and stable kernels which only had
      874bbfe6 started crashing.
      
      The local CPU guarantee was accidental more than anything else and we
      want to get rid of it anyway.  As, with the vmstat case fixed,
      874bbfe6 is causing more problems than it's fixing, it has been
      decided to take the chance and officially break the guarantee by
      reverting the commit.  A debug feature will be added to force foreign
      CPU assignment to expose cases relying on the guarantee and fixes for
      the individual cases will be backported to stable as necessary.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Fixes: 874bbfe6 ("workqueue: make sure delayed work run in local cpu")
      Link: http://lkml.kernel.org/g/20160120211926.GJ10810@quack.suse.cz
      Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
      Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
      Cc: Daniel Bilik <daniel.bilik@neosystem.cz>
      Cc: Jan Kara <jack@suse.cz>
      Cc: Shaohua Li <shli@fb.com>
      Cc: Sasha Levin <sasha.levin@oracle.com>
      Cc: Ben Hutchings <ben@decadent.org.uk>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Daniel Bilik <daniel.bilik@neosystem.cz>
      Cc: Jiri Slaby <jslaby@suse.cz>
      Cc: Michal Hocko <mhocko@kernel.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      66847104
    • Tejun Heo's avatar
      workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup · 21b34b45
      Tejun Heo authored
      commit d6e022f1 upstream.
      
      When looking up the pool_workqueue to use for an unbound workqueue,
      workqueue assumes that the target CPU is always bound to a valid NUMA
      node.  However, currently, when a CPU goes offline, the mapping is
      destroyed and cpu_to_node() returns NUMA_NO_NODE.
      
      This has always been broken but hasn't triggered often enough before
      874bbfe6 ("workqueue: make sure delayed work run in local cpu").
      After the commit, workqueue forcifully assigns the local CPU for
      delayed work items without explicit target CPU to fix a different
      issue.  This widens the window where CPU can go offline while a
      delayed work item is pending causing delayed work items dispatched
      with target CPU set to an already offlined CPU.  The resulting
      NUMA_NO_NODE mapping makes workqueue try to queue the work item on a
      NULL pool_workqueue and thus crash.
      
      While 874bbfe6 has been reverted for a different reason making the
      bug less visible again, it can still happen.  Fix it by mapping
      NUMA_NO_NODE to the default pool_workqueue from unbound_pwq_by_node().
      This is a temporary workaround.  The long term solution is keeping CPU
      -> NODE mapping stable across CPU off/online cycles which is being
      worked on.
      Signed-off-by: default avatarTejun Heo <tj@kernel.org>
      Reported-by: default avatarMike Galbraith <umgwanakikbuti@gmail.com>
      Cc: Tang Chen <tangchen@cn.fujitsu.com>
      Cc: Rafael J. Wysocki <rafael@kernel.org>
      Cc: Len Brown <len.brown@intel.com>
      Link: http://lkml.kernel.org/g/1454424264.11183.46.camel@gmail.com
      Link: http://lkml.kernel.org/g/1453702100-2597-1-git-send-email-tangchen@cn.fujitsu.comSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      21b34b45
    • Sachin Kulkarni's avatar
      mac80211: Requeue work after scan complete for all VIF types. · 81bb6553
      Sachin Kulkarni authored
      commit 4fa11ec7 upstream.
      
      During a sw scan ieee80211_iface_work ignores work items for all vifs.
      However after the scan complete work is requeued only for STA, ADHOC
      and MESH iftypes.
      
      This occasionally results in event processing getting delayed/not
      processed for iftype AP when it coexists with a STA. This can result
      in data halt and eventually disconnection on the AP interface.
      Signed-off-by: default avatarSachin Kulkarni <Sachin.Kulkarni@imgtec.com>
      Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      81bb6553
    • Johannes Berg's avatar
      rfkill: fix rfkill_fop_read wait_event usage · 33bf18e4
      Johannes Berg authored
      commit 6736fde9 upstream.
      
      The code within wait_event_interruptible() is called with
      !TASK_RUNNING, so mustn't call any functions that can sleep,
      like mutex_lock().
      
      Since we re-check the list_empty() in a loop after the wait,
      it's safe to simply use list_empty() without locking.
      
      This bug has existed forever, but was only discovered now
      because all userspace implementations, including the default
      'rfkill' tool, use poll() or select() to get a readable fd
      before attempting to read.
      
      Fixes: c64fb016 ("rfkill: create useful userspace interface")
      Reported-by: default avatarDmitry Vyukov <dvyukov@google.com>
      Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      33bf18e4
    • Wanpeng Li's avatar
      tick/nohz: Set the correct expiry when switching to nohz/lowres mode · d024d46e
      Wanpeng Li authored
      commit 1ca8ec53 upstream.
      
      commit 0ff53d09 sets the next tick interrupt to the last jiffies update,
      i.e. in the past, because the forward operation is invoked before the set
      operation. There is no resulting damage (yet), but we get an extra pointless
      tick interrupt.
      
      Revert the order so we get the next tick interrupt in the future.
      
      Fixes: commit 0ff53d09 "tick: sched: Force tick interrupt and get rid of softirq magic"
      Signed-off-by: default avatarWanpeng Li <wanpeng.li@hotmail.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Frederic Weisbecker <fweisbec@gmail.com>
      Link: http://lkml.kernel.org/r/1453893967-3458-1-git-send-email-wanpeng.li@hotmail.comSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      d024d46e
    • Jiri Olsa's avatar
      perf stat: Do not clean event's private stats · e4319361
      Jiri Olsa authored
      commit 3f416f22 upstream.
      
      Mel reported stddev reporting was broken due to following commit:
      
      	106a94a0 ("perf stat: Introduce read_counters function")
      
      This commit merged interval and overall counters reading into single
      read_counters function.
      
      The old interval code cleaned the stddev data for some reason (it's
      never displayed in interval mode) and the mentioned commit kept on
      cleaning the stddev data in merged function, which resulted in the
      stddev not being displayed.
      
      Removing the wrong stddev data cleanup init_stats call.
      Reported-and-Tested-by: default avatarMel Gorman <mgorman@techsingularity.net>
      Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
      Fixes: 106a94a0 ("perf stat: Introduce read_counters function")
      Link: http://lkml.kernel.org/r/1453290995-18485-4-git-send-email-jolsa@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      e4319361
    • Oliver Neukum's avatar
      cdc-acm:exclude Samsung phone 04e8:685d · 19964740
      Oliver Neukum authored
      commit e912e685 upstream.
      
      This phone needs to be handled by a specialised firmware tool
      and is reported to crash irrevocably if cdc-acm takes it.
      Signed-off-by: default avatarOliver Neukum <oneukum@suse.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      19964740
    • Sudip Mukherjee's avatar
      Revert "Staging: panel: usleep_range is preferred over udelay" · f021f05c
      Sudip Mukherjee authored
      commit b64a1cbe upstream.
      
      This reverts commit ebd43516.
      
      We should not be sleeping inside spin_lock.
      
      Fixes: ebd43516 ("Staging: panel: usleep_range is preferred over udelay")
      Cc: Sirnam Swetha <theonly.ultimate@gmail.com>
      Signed-off-by: default avatarSudip Mukherjee <sudip@vectorindia.org>
      Reported-by: default avatarHuang, Ying <ying.huang@intel.com>
      Tested-by: default avatarHuang, Ying <ying.huang@intel.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      f021f05c
    • Samuel Thibault's avatar
      Staging: speakup: Fix getting port information · 16d9f522
      Samuel Thibault authored
      commit 327b882d upstream.
      
      Commit f79b0d9c ("staging: speakup: Fixed warning <linux/serial.h>
      instead of <asm/serial.h>") broke the port information in the speakup
      driver: SERIAL_PORT_DFNS only gets defined if asm/serial.h is included,
      and no other header includes asm/serial.h.
      
      We here make sure serialio.c does get the arch-specific definition of
      SERIAL_PORT_DFNS from asm/serial.h, if any.
      
      Along the way, this makes sure that we do have information for the
      requested serial port number (index)
      
      Fixes: f79b0d9c ("staging: speakup: Fixed warning <linux/serial.h> instead of <asm/serial.h>")
      Signed-off-by: default avatarSamuel Thibault <samuel.thibault@ens-lyon.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      16d9f522
    • Martin K. Petersen's avatar
      sd: Optimal I/O size is in bytes, not sectors · f313f1d8
      Martin K. Petersen authored
      commit d0eb20a8 upstream.
      
      Commit ca369d51 ("block/sd: Fix device-imposed transfer length
      limits") accidentally switched optimal I/O size reporting from bytes to
      block layer sectors.
      Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
      Reported-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Tested-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Fixes: ca369d51Reviewed-by: default avatarJames E.J. Bottomley <James.Bottomley@HansenPartnership.com>
      Reviewed-by: default avatarEwan D. Milne <emilne@redhat.com>
      Reviewed-by: default avatarMatthew R. Ochs <mrochs@linux.vnet.ibm.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      f313f1d8
    • Ilya Dryomov's avatar
      libceph: don't spam dmesg with stray reply warnings · 01c3c0f9
      Ilya Dryomov authored
      commit cd8140c6 upstream.
      
      Commit d15f9d69 ("libceph: check data_len in ->alloc_msg()")
      mistakenly bumped the log level on the "tid %llu unknown, skipping"
      message.  Turn it back into a dout() - stray replies are perfectly
      normal when OSDs flap, crash, get killed for testing purposes, etc.
      Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
      Reviewed-by: default avatarAlex Elder <elder@linaro.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      01c3c0f9
    • Ilya Dryomov's avatar
      libceph: use the right footer size when skipping a message · 10dada9d
      Ilya Dryomov authored
      commit dbc0d3ca upstream.
      
      ceph_msg_footer is 21 bytes long, while ceph_msg_footer_old is only 13.
      Don't skip too much when CEPH_FEATURE_MSG_AUTH isn't negotiated.
      Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
      Reviewed-by: default avatarAlex Elder <elder@linaro.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      10dada9d
    • Ilya Dryomov's avatar
      libceph: don't bail early from try_read() when skipping a message · 50c6a283
      Ilya Dryomov authored
      commit e7a88e82 upstream.
      
      The contract between try_read() and try_write() is that when called
      each processes as much data as possible.  When instructed by osd_client
      to skip a message, try_read() is violating this contract by returning
      after receiving and discarding a single message instead of checking for
      more.  try_write() then gets a chance to write out more requests,
      generating more replies/skips for try_read() to handle, forcing the
      messenger into a starvation loop.
      Reported-by: default avatarVarada Kari <Varada.Kari@sandisk.com>
      Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
      Tested-by: default avatarVarada Kari <Varada.Kari@sandisk.com>
      Reviewed-by: default avatarAlex Elder <elder@linaro.org>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      50c6a283
    • Ilya Dryomov's avatar
      libceph: fix ceph_msg_revoke() · ee834473
      Ilya Dryomov authored
      commit 67645d76 upstream.
      
      There are a number of problems with revoking a "was sending" message:
      
      (1) We never make any attempt to revoke data - only kvecs contibute to
      con->out_skip.  However, once the header (envelope) is written to the
      socket, our peer learns data_len and sets itself to expect at least
      data_len bytes to follow front or front+middle.  If ceph_msg_revoke()
      is called while the messenger is sending message's data portion,
      anything we send after that call is counted by the OSD towards the now
      revoked message's data portion.  The effects vary, the most common one
      is the eventual hang - higher layers get stuck waiting for the reply to
      the message that was sent out after ceph_msg_revoke() returned and
      treated by the OSD as a bunch of data bytes.  This is what Matt ran
      into.
      
      (2) Flat out zeroing con->out_kvec_bytes worth of bytes to handle kvecs
      is wrong.  If ceph_msg_revoke() is called before the tag is sent out or
      while the messenger is sending the header, we will get a connection
      reset, either due to a bad tag (0 is not a valid tag) or a bad header
      CRC, which kind of defeats the purpose of revoke.  Currently the kernel
      client refuses to work with header CRCs disabled, but that will likely
      change in the future, making this even worse.
      
      (3) con->out_skip is not reset on connection reset, leading to one or
      more spurious connection resets if we happen to get a real one between
      con->out_skip is set in ceph_msg_revoke() and before it's cleared in
      write_partial_skip().
      
      Fixing (1) and (3) is trivial.  The idea behind fixing (2) is to never
      zero the tag or the header, i.e. send out tag+header regardless of when
      ceph_msg_revoke() is called.  That way the header is always correct, no
      unnecessary resets are induced and revoke stands ready for disabled
      CRCs.  Since ceph_msg_revoke() rips out con->out_msg, introduce a new
      "message out temp" and copy the header into it before sending.
      Reported-by: default avatarMatt Conner <matt.conner@keepertech.com>
      Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
      Tested-by: default avatarMatt Conner <matt.conner@keepertech.com>
      Reviewed-by: default avatarSage Weil <sage@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      ee834473