• Christophe Leroy's avatar
    powerpc: Implement csum_ipv6_magic in assembly · e9c4943a
    Christophe Leroy authored
    The generic csum_ipv6_magic() generates a pretty bad result
    
    00000000 <csum_ipv6_magic>: (PPC32)
       0:	81 23 00 00 	lwz     r9,0(r3)
       4:	81 03 00 04 	lwz     r8,4(r3)
       8:	7c e7 4a 14 	add     r7,r7,r9
       c:	7d 29 38 10 	subfc   r9,r9,r7
      10:	7d 4a 51 10 	subfe   r10,r10,r10
      14:	7d 27 42 14 	add     r9,r7,r8
      18:	7d 2a 48 50 	subf    r9,r10,r9
      1c:	80 e3 00 08 	lwz     r7,8(r3)
      20:	7d 08 48 10 	subfc   r8,r8,r9
      24:	7d 4a 51 10 	subfe   r10,r10,r10
      28:	7d 29 3a 14 	add     r9,r9,r7
      2c:	81 03 00 0c 	lwz     r8,12(r3)
      30:	7d 2a 48 50 	subf    r9,r10,r9
      34:	7c e7 48 10 	subfc   r7,r7,r9
      38:	7d 4a 51 10 	subfe   r10,r10,r10
      3c:	7d 29 42 14 	add     r9,r9,r8
      40:	7d 2a 48 50 	subf    r9,r10,r9
      44:	80 e4 00 00 	lwz     r7,0(r4)
      48:	7d 08 48 10 	subfc   r8,r8,r9
      4c:	7d 4a 51 10 	subfe   r10,r10,r10
      50:	7d 29 3a 14 	add     r9,r9,r7
      54:	7d 2a 48 50 	subf    r9,r10,r9
      58:	81 04 00 04 	lwz     r8,4(r4)
      5c:	7c e7 48 10 	subfc   r7,r7,r9
      60:	7d 4a 51 10 	subfe   r10,r10,r10
      64:	7d 29 42 14 	add     r9,r9,r8
      68:	7d 2a 48 50 	subf    r9,r10,r9
      6c:	80 e4 00 08 	lwz     r7,8(r4)
      70:	7d 08 48 10 	subfc   r8,r8,r9
      74:	7d 4a 51 10 	subfe   r10,r10,r10
      78:	7d 29 3a 14 	add     r9,r9,r7
      7c:	7d 2a 48 50 	subf    r9,r10,r9
      80:	81 04 00 0c 	lwz     r8,12(r4)
      84:	7c e7 48 10 	subfc   r7,r7,r9
      88:	7d 4a 51 10 	subfe   r10,r10,r10
      8c:	7d 29 42 14 	add     r9,r9,r8
      90:	7d 2a 48 50 	subf    r9,r10,r9
      94:	7d 08 48 10 	subfc   r8,r8,r9
      98:	7d 4a 51 10 	subfe   r10,r10,r10
      9c:	7d 29 2a 14 	add     r9,r9,r5
      a0:	7d 2a 48 50 	subf    r9,r10,r9
      a4:	7c a5 48 10 	subfc   r5,r5,r9
      a8:	7c 63 19 10 	subfe   r3,r3,r3
      ac:	7d 29 32 14 	add     r9,r9,r6
      b0:	7d 23 48 50 	subf    r9,r3,r9
      b4:	7c c6 48 10 	subfc   r6,r6,r9
      b8:	7c 63 19 10 	subfe   r3,r3,r3
      bc:	7c 63 48 50 	subf    r3,r3,r9
      c0:	54 6a 80 3e 	rotlwi  r10,r3,16
      c4:	7c 63 52 14 	add     r3,r3,r10
      c8:	7c 63 18 f8 	not     r3,r3
      cc:	54 63 84 3e 	rlwinm  r3,r3,16,16,31
      d0:	4e 80 00 20 	blr
    
    0000000000000000 <.csum_ipv6_magic>: (PPC64)
       0:	81 23 00 00 	lwz     r9,0(r3)
       4:	80 03 00 04 	lwz     r0,4(r3)
       8:	81 63 00 08 	lwz     r11,8(r3)
       c:	7c e7 4a 14 	add     r7,r7,r9
      10:	7f 89 38 40 	cmplw   cr7,r9,r7
      14:	7d 47 02 14 	add     r10,r7,r0
      18:	7d 30 10 26 	mfocrf  r9,1
      1c:	55 29 f7 fe 	rlwinm  r9,r9,30,31,31
      20:	7d 4a 4a 14 	add     r10,r10,r9
      24:	7f 80 50 40 	cmplw   cr7,r0,r10
      28:	7d 2a 5a 14 	add     r9,r10,r11
      2c:	80 03 00 0c 	lwz     r0,12(r3)
      30:	81 44 00 00 	lwz     r10,0(r4)
      34:	7d 10 10 26 	mfocrf  r8,1
      38:	55 08 f7 fe 	rlwinm  r8,r8,30,31,31
      3c:	7d 29 42 14 	add     r9,r9,r8
      40:	81 04 00 04 	lwz     r8,4(r4)
      44:	7f 8b 48 40 	cmplw   cr7,r11,r9
      48:	7d 29 02 14 	add     r9,r9,r0
      4c:	7d 70 10 26 	mfocrf  r11,1
      50:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
      54:	7d 29 5a 14 	add     r9,r9,r11
      58:	7f 80 48 40 	cmplw   cr7,r0,r9
      5c:	7d 29 52 14 	add     r9,r9,r10
      60:	7c 10 10 26 	mfocrf  r0,1
      64:	54 00 f7 fe 	rlwinm  r0,r0,30,31,31
      68:	7d 69 02 14 	add     r11,r9,r0
      6c:	7f 8a 58 40 	cmplw   cr7,r10,r11
      70:	7c 0b 42 14 	add     r0,r11,r8
      74:	81 44 00 08 	lwz     r10,8(r4)
      78:	7c f0 10 26 	mfocrf  r7,1
      7c:	54 e7 f7 fe 	rlwinm  r7,r7,30,31,31
      80:	7c 00 3a 14 	add     r0,r0,r7
      84:	7f 88 00 40 	cmplw   cr7,r8,r0
      88:	7d 20 52 14 	add     r9,r0,r10
      8c:	80 04 00 0c 	lwz     r0,12(r4)
      90:	7d 70 10 26 	mfocrf  r11,1
      94:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
      98:	7d 29 5a 14 	add     r9,r9,r11
      9c:	7f 8a 48 40 	cmplw   cr7,r10,r9
      a0:	7d 29 02 14 	add     r9,r9,r0
      a4:	7d 70 10 26 	mfocrf  r11,1
      a8:	55 6b f7 fe 	rlwinm  r11,r11,30,31,31
      ac:	7d 29 5a 14 	add     r9,r9,r11
      b0:	7f 80 48 40 	cmplw   cr7,r0,r9
      b4:	7d 29 2a 14 	add     r9,r9,r5
      b8:	7c 10 10 26 	mfocrf  r0,1
      bc:	54 00 f7 fe 	rlwinm  r0,r0,30,31,31
      c0:	7d 29 02 14 	add     r9,r9,r0
      c4:	7f 85 48 40 	cmplw   cr7,r5,r9
      c8:	7c 09 32 14 	add     r0,r9,r6
      cc:	7d 50 10 26 	mfocrf  r10,1
      d0:	55 4a f7 fe 	rlwinm  r10,r10,30,31,31
      d4:	7c 00 52 14 	add     r0,r0,r10
      d8:	7f 80 30 40 	cmplw   cr7,r0,r6
      dc:	7d 30 10 26 	mfocrf  r9,1
      e0:	55 29 ef fe 	rlwinm  r9,r9,29,31,31
      e4:	7c 09 02 14 	add     r0,r9,r0
      e8:	54 03 80 3e 	rotlwi  r3,r0,16
      ec:	7c 03 02 14 	add     r0,r3,r0
      f0:	7c 03 00 f8 	not     r3,r0
      f4:	78 63 84 22 	rldicl  r3,r3,48,48
      f8:	4e 80 00 20 	blr
    
    This patch implements it in assembly for both PPC32 and PPC64
    
    Link: https://github.com/linuxppc/linux/issues/9Signed-off-by: default avatarChristophe Leroy <christophe.leroy@c-s.fr>
    Reviewed-by: default avatarSegher Boessenkool <segher@kernel.crashing.org>
    Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
    e9c4943a
checksum_32.S 7 KB