1. 02 Mar, 2018 1 commit
    • himanshu.madhani@cavium.com's avatar
      scsi: qla2xxx: Fix NULL pointer crash due to active timer for ABTS · 1514839b
      himanshu.madhani@cavium.com authored
      This patch fixes NULL pointer crash due to active timer running for abort
      IOCB.
      
      From crash dump analysis it was discoverd that get_next_timer_interrupt()
      encountered a corrupted entry on the timer list.
      
       #9 [ffff95e1f6f0fd40] page_fault at ffffffff914fe8f8
          [exception RIP: get_next_timer_interrupt+440]
          RIP: ffffffff90ea3088  RSP: ffff95e1f6f0fdf0  RFLAGS: 00010013
          RAX: ffff95e1f6451028  RBX: 000218e2389e5f40  RCX: 00000001232ad600
          RDX: 0000000000000001  RSI: ffff95e1f6f0fdf0  RDI: 0000000001232ad6
          RBP: ffff95e1f6f0fe40   R8: ffff95e1f6451188   R9: 0000000000000001
          R10: 0000000000000016  R11: 0000000000000016  R12: 00000001232ad5f6
          R13: ffff95e1f6450000  R14: ffff95e1f6f0fdf8  R15: ffff95e1f6f0fe10
          ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
      
      Looking at the assembly of get_next_timer_interrupt(), address came
      from %r8 (ffff95e1f6451188) which is pointing to list_head with single
      entry at ffff95e5ff621178.
      
       0xffffffff90ea307a <get_next_timer_interrupt+426>:      mov    (%r8),%rdx
       0xffffffff90ea307d <get_next_timer_interrupt+429>:      cmp    %r8,%rdx
       0xffffffff90ea3080 <get_next_timer_interrupt+432>:      je     0xffffffff90ea30a7 <get_next_timer_interrupt+471>
       0xffffffff90ea3082 <get_next_timer_interrupt+434>:      nopw   0x0(%rax,%rax,1)
       0xffffffff90ea3088 <get_next_timer_interrupt+440>:      testb  $0x1,0x18(%rdx)
      
       crash> rd ffff95e1f6451188 10
       ffff95e1f6451188:  ffff95e5ff621178 ffff95e5ff621178   x.b.....x.b.....
       ffff95e1f6451198:  ffff95e1f6451198 ffff95e1f6451198   ..E.......E.....
       ffff95e1f64511a8:  ffff95e1f64511a8 ffff95e1f64511a8   ..E.......E.....
       ffff95e1f64511b8:  ffff95e77cf509a0 ffff95e77cf509a0   ...|.......|....
       ffff95e1f64511c8:  ffff95e1f64511c8 ffff95e1f64511c8   ..E.......E.....
      
       crash> rd ffff95e5ff621178 10
       ffff95e5ff621178:  0000000000000001 ffff95e15936aa00   ..........6Y....
       ffff95e5ff621188:  0000000000000000 00000000ffffffff   ................
       ffff95e5ff621198:  00000000000000a0 0000000000000010   ................
       ffff95e5ff6211a8:  ffff95e5ff621198 000000000000000c   ..b.............
       ffff95e5ff6211b8:  00000f5800000000 ffff95e751f8d720   ....X... ..Q....
      
       ffff95e5ff621178 belongs to freed mempool object at ffff95e5ff621080.
      
       CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
       ffff95dc7fd74d00 mnt_cache                384      19785     24948    594    16k
         SLAB              MEMORY            NODE  TOTAL  ALLOCATED  FREE
         ffffdc5dabfd8800  ffff95e5ff620000     1     42         29    13
         FREE / [ALLOCATED]
          ffff95e5ff621080  (cpu 6 cache)
      
      Examining the contents of that memory reveals a pointer to a constant string
      in the driver, "abort\0", which is set by qla24xx_async_abort_cmd().
      
       crash> rd ffffffffc059277c 20
       ffffffffc059277c:  6e490074726f6261 0074707572726574   abort.Interrupt.
       ffffffffc059278c:  00676e696c6c6f50 6920726576697244   Polling.Driver i
       ffffffffc059279c:  646f6d207325206e 6974736554000a65   n %s mode..Testi
       ffffffffc05927ac:  636976656420676e 786c252074612065   ng device at %lx
       ffffffffc05927bc:  6b63656843000a2e 646f727020676e69   ...Checking prod
       ffffffffc05927cc:  6f20444920746375 0a2e706968632066   uct ID of chip..
       ffffffffc05927dc:  5120646e756f4600 204130303232414c   .Found QLA2200A
       ffffffffc05927ec:  43000a2e70696843 20676e696b636568   Chip...Checking
       ffffffffc05927fc:  65786f626c69616d 6c636e69000a2e73   mailboxes...incl
       ffffffffc059280c:  756e696c2f656475 616d2d616d642f78   ude/linux/dma-ma
      
       crash> struct -ox srb_iocb
       struct srb_iocb {
                 union {
                     struct {...} logio;
                     struct {...} els_logo;
                     struct {...} tmf;
                     struct {...} fxiocb;
                     struct {...} abt;
                     struct ct_arg ctarg;
                     struct {...} mbx;
                     struct {...} nack;
          [0x0 ] } u;
          [0xb8] struct timer_list timer;
          [0x108] void (*timeout)(void *);
       }
       SIZE: 0x110
      
       crash> ! bc
       ibase=16
       obase=10
       B8+40
       F8
      
      The object is a srb_t, and at offset 0xf8 within that structure
      (i.e. ffff95e5ff621080 + f8 -> ffff95e5ff621178) is a struct timer_list.
      
      Cc: <stable@vger.kernel.org> #4.4+
      Fixes: 4440e46d ("[SCSI] qla2xxx: Add IOCB Abort command asynchronous handling.")
      Signed-off-by: default avatarHimanshu Madhani <himanshu.madhani@cavium.com>
      Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
      Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
      1514839b
  2. 22 Feb, 2018 3 commits
  3. 15 Feb, 2018 1 commit
  4. 14 Feb, 2018 7 commits
  5. 06 Feb, 2018 1 commit
  6. 31 Jan, 2018 12 commits
  7. 23 Jan, 2018 15 commits