Commit d7866e50 authored by Uros Bizjak's avatar Uros Bizjak Committed by Herbert Xu

crypto: x86 - Remove include/asm/inst.h

Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.

Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.

The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: default avatarUros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 2c2e1836
...@@ -63,7 +63,6 @@ ...@@ -63,7 +63,6 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h>
#define VMOVDQ vmovdqu #define VMOVDQ vmovdqu
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h> #include <asm/frame.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
...@@ -201,7 +200,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff ...@@ -201,7 +200,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
mov \SUBKEY, %r12 mov \SUBKEY, %r12
movdqu (%r12), \TMP3 movdqu (%r12), \TMP3
movdqa SHUF_MASK(%rip), \TMP2 movdqa SHUF_MASK(%rip), \TMP2
PSHUFB_XMM \TMP2, \TMP3 pshufb \TMP2, \TMP3
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) # precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
...@@ -263,7 +262,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff ...@@ -263,7 +262,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
movdqa SHUF_MASK(%rip), %xmm2 movdqa SHUF_MASK(%rip), %xmm2
PSHUFB_XMM %xmm2, %xmm0 pshufb %xmm2, %xmm0
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
...@@ -347,7 +346,7 @@ _zero_cipher_left_\@: ...@@ -347,7 +346,7 @@ _zero_cipher_left_\@:
paddd ONE(%rip), %xmm0 # INCR CNT to get Yn paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
movdqu %xmm0, CurCount(%arg2) movdqu %xmm0, CurCount(%arg2)
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm0 pshufb %xmm10, %xmm0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
movdqu %xmm0, PBlockEncKey(%arg2) movdqu %xmm0, PBlockEncKey(%arg2)
...@@ -377,7 +376,7 @@ _large_enough_update_\@: ...@@ -377,7 +376,7 @@ _large_enough_update_\@:
# get the appropriate shuffle mask # get the appropriate shuffle mask
movdqu (%r12), %xmm2 movdqu (%r12), %xmm2
# shift right 16-r13 bytes # shift right 16-r13 bytes
PSHUFB_XMM %xmm2, %xmm1 pshufb %xmm2, %xmm1
_data_read_\@: _data_read_\@:
lea ALL_F+16(%rip), %r12 lea ALL_F+16(%rip), %r12
...@@ -393,12 +392,12 @@ _data_read_\@: ...@@ -393,12 +392,12 @@ _data_read_\@:
.ifc \operation, dec .ifc \operation, dec
pand %xmm1, %xmm2 pand %xmm1, %xmm2
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10 ,%xmm2 pshufb %xmm10 ,%xmm2
pxor %xmm2, %xmm8 pxor %xmm2, %xmm8
.else .else
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10,%xmm0 pshufb %xmm10,%xmm0
pxor %xmm0, %xmm8 pxor %xmm0, %xmm8
.endif .endif
...@@ -408,17 +407,17 @@ _data_read_\@: ...@@ -408,17 +407,17 @@ _data_read_\@:
# GHASH computation for the last <16 byte block # GHASH computation for the last <16 byte block
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
# shuffle xmm0 back to output as ciphertext # shuffle xmm0 back to output as ciphertext
PSHUFB_XMM %xmm10, %xmm0 pshufb %xmm10, %xmm0
.endif .endif
# Output %r13 bytes # Output %r13 bytes
MOVQ_R64_XMM %xmm0, %rax movq %xmm0, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left_\@ jle _less_than_8_bytes_left_\@
mov %rax, (%arg3 , %r11, 1) mov %rax, (%arg3 , %r11, 1)
add $8, %r11 add $8, %r11
psrldq $8, %xmm0 psrldq $8, %xmm0
MOVQ_R64_XMM %xmm0, %rax movq %xmm0, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left_\@: _less_than_8_bytes_left_\@:
mov %al, (%arg3, %r11, 1) mov %al, (%arg3, %r11, 1)
...@@ -449,7 +448,7 @@ _partial_done\@: ...@@ -449,7 +448,7 @@ _partial_done\@:
movd %r12d, %xmm15 # len(A) in %xmm15 movd %r12d, %xmm15 # len(A) in %xmm15
mov InLen(%arg2), %r12 mov InLen(%arg2), %r12
shl $3, %r12 # len(C) in bits (*128) shl $3, %r12 # len(C) in bits (*128)
MOVQ_R64_XMM %r12, %xmm1 movq %r12, %xmm1
pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
...@@ -457,7 +456,7 @@ _partial_done\@: ...@@ -457,7 +456,7 @@ _partial_done\@:
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# final GHASH computation # final GHASH computation
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm8 pshufb %xmm10, %xmm8
movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
...@@ -470,7 +469,7 @@ _return_T_\@: ...@@ -470,7 +469,7 @@ _return_T_\@:
cmp $8, %r11 cmp $8, %r11
jl _T_4_\@ jl _T_4_\@
_T_8_\@: _T_8_\@:
MOVQ_R64_XMM %xmm0, %rax movq %xmm0, %rax
mov %rax, (%r10) mov %rax, (%r10)
add $8, %r10 add $8, %r10
sub $8, %r11 sub $8, %r11
...@@ -518,9 +517,9 @@ _return_T_done_\@: ...@@ -518,9 +517,9 @@ _return_T_done_\@:
pshufd $78, \HK, \TMP3 pshufd $78, \HK, \TMP3
pxor \GH, \TMP2 # TMP2 = a1+a0 pxor \GH, \TMP2 # TMP2 = a1+a0
pxor \HK, \TMP3 # TMP3 = b1+b0 pxor \HK, \TMP3 # TMP3 = b1+b0
PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 pclmulqdq $0x00, \HK, \GH # GH = a0*b0
PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
pxor \GH, \TMP2 pxor \GH, \TMP2
pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
movdqa \TMP2, \TMP3 movdqa \TMP2, \TMP3
...@@ -570,7 +569,7 @@ _return_T_done_\@: ...@@ -570,7 +569,7 @@ _return_T_done_\@:
cmp $8, \DLEN cmp $8, \DLEN
jl _read_lt8_\@ jl _read_lt8_\@
mov (\DPTR), %rax mov (\DPTR), %rax
MOVQ_R64_XMM %rax, \XMMDst movq %rax, \XMMDst
sub $8, \DLEN sub $8, \DLEN
jz _done_read_partial_block_\@ jz _done_read_partial_block_\@
xor %eax, %eax xor %eax, %eax
...@@ -579,7 +578,7 @@ _read_next_byte_\@: ...@@ -579,7 +578,7 @@ _read_next_byte_\@:
mov 7(\DPTR, \DLEN, 1), %al mov 7(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_\@ jnz _read_next_byte_\@
MOVQ_R64_XMM %rax, \XMM1 movq %rax, \XMM1
pslldq $8, \XMM1 pslldq $8, \XMM1
por \XMM1, \XMMDst por \XMM1, \XMMDst
jmp _done_read_partial_block_\@ jmp _done_read_partial_block_\@
...@@ -590,7 +589,7 @@ _read_next_byte_lt8_\@: ...@@ -590,7 +589,7 @@ _read_next_byte_lt8_\@:
mov -1(\DPTR, \DLEN, 1), %al mov -1(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_lt8_\@ jnz _read_next_byte_lt8_\@
MOVQ_R64_XMM %rax, \XMMDst movq %rax, \XMMDst
_done_read_partial_block_\@: _done_read_partial_block_\@:
.endm .endm
...@@ -608,7 +607,7 @@ _done_read_partial_block_\@: ...@@ -608,7 +607,7 @@ _done_read_partial_block_\@:
jl _get_AAD_rest\@ jl _get_AAD_rest\@
_get_AAD_blocks\@: _get_AAD_blocks\@:
movdqu (%r10), \TMP7 movdqu (%r10), \TMP7
PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data pshufb %xmm14, \TMP7 # byte-reflect the AAD data
pxor \TMP7, \TMP6 pxor \TMP7, \TMP6
GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
add $16, %r10 add $16, %r10
...@@ -624,7 +623,7 @@ _get_AAD_rest\@: ...@@ -624,7 +623,7 @@ _get_AAD_rest\@:
je _get_AAD_done\@ je _get_AAD_done\@
READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data pshufb %xmm14, \TMP7 # byte-reflect the AAD data
pxor \TMP6, \TMP7 pxor \TMP6, \TMP7
GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
movdqu \TMP7, \TMP6 movdqu \TMP7, \TMP6
...@@ -667,7 +666,7 @@ _data_read_\@: # Finished reading in data ...@@ -667,7 +666,7 @@ _data_read_\@: # Finished reading in data
# r16-r13 is the number of bytes in plaintext mod 16) # r16-r13 is the number of bytes in plaintext mod 16)
add %r13, %r12 add %r13, %r12
movdqu (%r12), %xmm2 # get the appropriate shuffle mask movdqu (%r12), %xmm2 # get the appropriate shuffle mask
PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes pshufb %xmm2, %xmm9 # shift right r13 bytes
.ifc \operation, dec .ifc \operation, dec
movdqa %xmm1, %xmm3 movdqa %xmm1, %xmm3
...@@ -689,8 +688,8 @@ _no_extra_mask_1_\@: ...@@ -689,8 +688,8 @@ _no_extra_mask_1_\@:
pand %xmm1, %xmm3 pand %xmm1, %xmm3
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm3 pshufb %xmm10, %xmm3
PSHUFB_XMM %xmm2, %xmm3 pshufb %xmm2, %xmm3
pxor %xmm3, \AAD_HASH pxor %xmm3, \AAD_HASH
cmp $0, %r10 cmp $0, %r10
...@@ -724,8 +723,8 @@ _no_extra_mask_2_\@: ...@@ -724,8 +723,8 @@ _no_extra_mask_2_\@:
pand %xmm1, %xmm9 pand %xmm1, %xmm9
movdqa SHUF_MASK(%rip), %xmm1 movdqa SHUF_MASK(%rip), %xmm1
PSHUFB_XMM %xmm1, %xmm9 pshufb %xmm1, %xmm9
PSHUFB_XMM %xmm2, %xmm9 pshufb %xmm2, %xmm9
pxor %xmm9, \AAD_HASH pxor %xmm9, \AAD_HASH
cmp $0, %r10 cmp $0, %r10
...@@ -744,8 +743,8 @@ _encode_done_\@: ...@@ -744,8 +743,8 @@ _encode_done_\@:
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
# shuffle xmm9 back to output as ciphertext # shuffle xmm9 back to output as ciphertext
PSHUFB_XMM %xmm10, %xmm9 pshufb %xmm10, %xmm9
PSHUFB_XMM %xmm2, %xmm9 pshufb %xmm2, %xmm9
.endif .endif
# output encrypted Bytes # output encrypted Bytes
cmp $0, %r10 cmp $0, %r10
...@@ -759,14 +758,14 @@ _partial_fill_\@: ...@@ -759,14 +758,14 @@ _partial_fill_\@:
mov \PLAIN_CYPH_LEN, %r13 mov \PLAIN_CYPH_LEN, %r13
_count_set_\@: _count_set_\@:
movdqa %xmm9, %xmm0 movdqa %xmm9, %xmm0
MOVQ_R64_XMM %xmm0, %rax movq %xmm0, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left_\@ jle _less_than_8_bytes_left_\@
mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $8, \DATA_OFFSET add $8, \DATA_OFFSET
psrldq $8, %xmm0 psrldq $8, %xmm0
MOVQ_R64_XMM %xmm0, %rax movq %xmm0, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left_\@: _less_than_8_bytes_left_\@:
movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
...@@ -810,7 +809,7 @@ _partial_block_done_\@: ...@@ -810,7 +809,7 @@ _partial_block_done_\@:
.else .else
MOVADQ \XMM0, %xmm\index MOVADQ \XMM0, %xmm\index
.endif .endif
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap pshufb %xmm14, %xmm\index # perform a 16 byte swap
pxor \TMP2, %xmm\index pxor \TMP2, %xmm\index
.endr .endr
lea 0x10(%arg1),%r10 lea 0x10(%arg1),%r10
...@@ -821,7 +820,7 @@ _partial_block_done_\@: ...@@ -821,7 +820,7 @@ _partial_block_done_\@:
aes_loop_initial_\@: aes_loop_initial_\@:
MOVADQ (%r10),\TMP1 MOVADQ (%r10),\TMP1
.irpc index, \i_seq .irpc index, \i_seq
AESENC \TMP1, %xmm\index aesenc \TMP1, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
...@@ -829,7 +828,7 @@ aes_loop_initial_\@: ...@@ -829,7 +828,7 @@ aes_loop_initial_\@:
MOVADQ (%r10), \TMP1 MOVADQ (%r10), \TMP1
.irpc index, \i_seq .irpc index, \i_seq
AESENCLAST \TMP1, %xmm\index # Last Round aesenclast \TMP1, %xmm\index # Last Round
.endr .endr
.irpc index, \i_seq .irpc index, \i_seq
movdqu (%arg4 , %r11, 1), \TMP1 movdqu (%arg4 , %r11, 1), \TMP1
...@@ -841,7 +840,7 @@ aes_loop_initial_\@: ...@@ -841,7 +840,7 @@ aes_loop_initial_\@:
.ifc \operation, dec .ifc \operation, dec
movdqa \TMP1, %xmm\index movdqa \TMP1, %xmm\index
.endif .endif
PSHUFB_XMM %xmm14, %xmm\index pshufb %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation # prepare plaintext/ciphertext for GHASH computation
.endr .endr
...@@ -876,19 +875,19 @@ aes_loop_initial_\@: ...@@ -876,19 +875,19 @@ aes_loop_initial_\@:
MOVADQ ONE(%RIP),\TMP1 MOVADQ ONE(%RIP),\TMP1
paddd \TMP1, \XMM0 # INCR Y0 paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM1 MOVADQ \XMM0, \XMM1
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap pshufb %xmm14, \XMM1 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0 paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM2 MOVADQ \XMM0, \XMM2
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap pshufb %xmm14, \XMM2 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0 paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM3 MOVADQ \XMM0, \XMM3
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap pshufb %xmm14, \XMM3 # perform a 16 byte swap
paddd \TMP1, \XMM0 # INCR Y0 paddd \TMP1, \XMM0 # INCR Y0
MOVADQ \XMM0, \XMM4 MOVADQ \XMM0, \XMM4
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap pshufb %xmm14, \XMM4 # perform a 16 byte swap
MOVADQ 0(%arg1),\TMP1 MOVADQ 0(%arg1),\TMP1
pxor \TMP1, \XMM1 pxor \TMP1, \XMM1
...@@ -897,17 +896,17 @@ aes_loop_initial_\@: ...@@ -897,17 +896,17 @@ aes_loop_initial_\@:
pxor \TMP1, \XMM4 pxor \TMP1, \XMM4
.irpc index, 1234 # do 4 rounds .irpc index, 1234 # do 4 rounds
movaps 0x10*\index(%arg1), \TMP1 movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1 aesenc \TMP1, \XMM1
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
.endr .endr
.irpc index, 56789 # do next 5 rounds .irpc index, 56789 # do next 5 rounds
movaps 0x10*\index(%arg1), \TMP1 movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1 aesenc \TMP1, \XMM1
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
.endr .endr
lea 0xa0(%arg1),%r10 lea 0xa0(%arg1),%r10
mov keysize,%eax mov keysize,%eax
...@@ -918,7 +917,7 @@ aes_loop_initial_\@: ...@@ -918,7 +917,7 @@ aes_loop_initial_\@:
aes_loop_pre_\@: aes_loop_pre_\@:
MOVADQ (%r10),\TMP2 MOVADQ (%r10),\TMP2
.irpc index, 1234 .irpc index, 1234
AESENC \TMP2, %xmm\index aesenc \TMP2, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
...@@ -926,10 +925,10 @@ aes_loop_pre_\@: ...@@ -926,10 +925,10 @@ aes_loop_pre_\@:
aes_loop_pre_done\@: aes_loop_pre_done\@:
MOVADQ (%r10), \TMP2 MOVADQ (%r10), \TMP2
AESENCLAST \TMP2, \XMM1 aesenclast \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2 aesenclast \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3 aesenclast \TMP2, \XMM3
AESENCLAST \TMP2, \XMM4 aesenclast \TMP2, \XMM4
movdqu 16*0(%arg4 , %r11 , 1), \TMP1 movdqu 16*0(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM1 pxor \TMP1, \XMM1
.ifc \operation, dec .ifc \operation, dec
...@@ -961,12 +960,12 @@ aes_loop_pre_done\@: ...@@ -961,12 +960,12 @@ aes_loop_pre_done\@:
.endif .endif
add $64, %r11 add $64, %r11
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap pshufb %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1 pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext # combine GHASHed value with the corresponding ciphertext
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap pshufb %xmm14, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap pshufb %xmm14, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap pshufb %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\@: _initial_blocks_done\@:
...@@ -994,7 +993,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -994,7 +993,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor \XMM5, \TMP6 pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqu HashKey_4(%arg2), \TMP5 movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1 movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2 movdqa \XMM0, \XMM2
...@@ -1002,51 +1001,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1002,51 +1001,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM0, \XMM3 movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4 movdqa \XMM0, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap pshufb %xmm15, \XMM1 # perform a 16 byte swap
PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap pshufb %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap pshufb %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1 pxor (%arg1), \XMM1
pxor (%arg1), \XMM2 pxor (%arg1), \XMM2
pxor (%arg1), \XMM3 pxor (%arg1), \XMM3
pxor (%arg1), \XMM4 pxor (%arg1), \XMM4
movdqu HashKey_4_k(%arg2), \TMP5 movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1 movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1 aesenc \TMP1, \XMM1 # Round 1
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1 movaps 0x20(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 2 aesenc \TMP1, \XMM1 # Round 2
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
movdqa \XMM6, \TMP1 movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2 pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2 pxor \XMM6, \TMP2
movdqu HashKey_3(%arg2), \TMP5 movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3 movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3 aesenc \TMP3, \XMM1 # Round 3
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3 movaps 0x40(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 4 aesenc \TMP3, \XMM1 # Round 4
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
movdqu HashKey_3_k(%arg2), \TMP5 movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3 movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5 aesenc \TMP3, \XMM1 # Round 5
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4 pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5 pxor \XMM6, \XMM5
...@@ -1058,25 +1057,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1058,25 +1057,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
# Multiply TMP5 * HashKey using karatsuba # Multiply TMP5 * HashKey using karatsuba
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3 movaps 0x60(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 6 aesenc \TMP3, \XMM1 # Round 6
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3 movaps 0x70(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 7 aesenc \TMP3, \XMM1 # Round 7
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
movdqu HashKey_2_k(%arg2), \TMP5 movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3 movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8 aesenc \TMP3, \XMM1 # Round 8
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4 pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5 pxor \XMM7, \XMM5
...@@ -1089,13 +1088,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1089,13 +1088,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM8, \TMP2 pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2 pxor \XMM8, \TMP2
movdqu HashKey(%arg2), \TMP5 movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3 movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9 aesenc \TMP3, \XMM1 # Round 9
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
lea 0xa0(%arg1),%r10 lea 0xa0(%arg1),%r10
mov keysize,%eax mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
...@@ -1105,7 +1104,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1105,7 +1104,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
aes_loop_par_enc\@: aes_loop_par_enc\@:
MOVADQ (%r10),\TMP3 MOVADQ (%r10),\TMP3
.irpc index, 1234 .irpc index, 1234
AESENC \TMP3, %xmm\index aesenc \TMP3, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
...@@ -1113,12 +1112,12 @@ aes_loop_par_enc\@: ...@@ -1113,12 +1112,12 @@ aes_loop_par_enc\@:
aes_loop_par_enc_done\@: aes_loop_par_enc_done\@:
MOVADQ (%r10), \TMP3 MOVADQ (%r10), \TMP3
AESENCLAST \TMP3, \XMM1 # Round 10 aesenclast \TMP3, \XMM1 # Round 10
AESENCLAST \TMP3, \XMM2 aesenclast \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3 aesenclast \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4 aesenclast \TMP3, \XMM4
movdqu HashKey_k(%arg2), \TMP5 movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3 movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu 16(%arg4,%r11,1), \TMP3 movdqu 16(%arg4,%r11,1), \TMP3
...@@ -1131,10 +1130,10 @@ aes_loop_par_enc_done\@: ...@@ -1131,10 +1130,10 @@ aes_loop_par_enc_done\@:
movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer
movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer
movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap pshufb %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap pshufb %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap pshufb %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1 pxor \TMP4, \TMP1
pxor \XMM8, \XMM5 pxor \XMM8, \XMM5
...@@ -1202,7 +1201,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1202,7 +1201,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor \XMM5, \TMP6 pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqu HashKey_4(%arg2), \TMP5 movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1 movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2 movdqa \XMM0, \XMM2
...@@ -1210,51 +1209,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1210,51 +1209,51 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM0, \XMM3 movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4 movdqa \XMM0, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap pshufb %xmm15, \XMM1 # perform a 16 byte swap
PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap pshufb %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap pshufb %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1 pxor (%arg1), \XMM1
pxor (%arg1), \XMM2 pxor (%arg1), \XMM2
pxor (%arg1), \XMM3 pxor (%arg1), \XMM3
pxor (%arg1), \XMM4 pxor (%arg1), \XMM4
movdqu HashKey_4_k(%arg2), \TMP5 movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1 movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1 aesenc \TMP1, \XMM1 # Round 1
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1 movaps 0x20(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 2 aesenc \TMP1, \XMM1 # Round 2
AESENC \TMP1, \XMM2 aesenc \TMP1, \XMM2
AESENC \TMP1, \XMM3 aesenc \TMP1, \XMM3
AESENC \TMP1, \XMM4 aesenc \TMP1, \XMM4
movdqa \XMM6, \TMP1 movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2 pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2 pxor \XMM6, \TMP2
movdqu HashKey_3(%arg2), \TMP5 movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3 movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3 aesenc \TMP3, \XMM1 # Round 3
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3 movaps 0x40(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 4 aesenc \TMP3, \XMM1 # Round 4
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
movdqu HashKey_3_k(%arg2), \TMP5 movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3 movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5 aesenc \TMP3, \XMM1 # Round 5
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4 pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5 pxor \XMM6, \XMM5
...@@ -1266,25 +1265,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1266,25 +1265,25 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
# Multiply TMP5 * HashKey using karatsuba # Multiply TMP5 * HashKey using karatsuba
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3 movaps 0x60(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 6 aesenc \TMP3, \XMM1 # Round 6
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3 movaps 0x70(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 7 aesenc \TMP3, \XMM1 # Round 7
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
movdqu HashKey_2_k(%arg2), \TMP5 movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3 movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8 aesenc \TMP3, \XMM1 # Round 8
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
pxor \TMP1, \TMP4 pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5 pxor \XMM7, \XMM5
...@@ -1297,13 +1296,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1297,13 +1296,13 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM8, \TMP2 pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2 pxor \XMM8, \TMP2
movdqu HashKey(%arg2), \TMP5 movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3 movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9 aesenc \TMP3, \XMM1 # Round 9
AESENC \TMP3, \XMM2 aesenc \TMP3, \XMM2
AESENC \TMP3, \XMM3 aesenc \TMP3, \XMM3
AESENC \TMP3, \XMM4 aesenc \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
lea 0xa0(%arg1),%r10 lea 0xa0(%arg1),%r10
mov keysize,%eax mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
...@@ -1313,7 +1312,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1313,7 +1312,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
aes_loop_par_dec\@: aes_loop_par_dec\@:
MOVADQ (%r10),\TMP3 MOVADQ (%r10),\TMP3
.irpc index, 1234 .irpc index, 1234
AESENC \TMP3, %xmm\index aesenc \TMP3, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
...@@ -1321,12 +1320,12 @@ aes_loop_par_dec\@: ...@@ -1321,12 +1320,12 @@ aes_loop_par_dec\@:
aes_loop_par_dec_done\@: aes_loop_par_dec_done\@:
MOVADQ (%r10), \TMP3 MOVADQ (%r10), \TMP3
AESENCLAST \TMP3, \XMM1 # last round aesenclast \TMP3, \XMM1 # last round
AESENCLAST \TMP3, \XMM2 aesenclast \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3 aesenclast \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4 aesenclast \TMP3, \XMM4
movdqu HashKey_k(%arg2), \TMP5 movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3 movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer
...@@ -1343,10 +1342,10 @@ aes_loop_par_dec_done\@: ...@@ -1343,10 +1342,10 @@ aes_loop_par_dec_done\@:
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM4 movdqa \TMP3, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap pshufb %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap pshufb %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap pshufb %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap pshufb %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1 pxor \TMP4, \TMP1
pxor \XMM8, \XMM5 pxor \XMM8, \XMM5
...@@ -1402,10 +1401,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst ...@@ -1402,10 +1401,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM1, \TMP2 pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2 pxor \XMM1, \TMP2
movdqu HashKey_4(%arg2), \TMP5 movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0
movdqu HashKey_4_k(%arg2), \TMP4 movdqu HashKey_4_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
...@@ -1415,10 +1414,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst ...@@ -1415,10 +1414,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM2, \TMP2 pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2 pxor \XMM2, \TMP2
movdqu HashKey_3(%arg2), \TMP5 movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0
movdqu HashKey_3_k(%arg2), \TMP4 movdqu HashKey_3_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6 pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst pxor \XMM2, \XMMDst
pxor \TMP2, \XMM1 pxor \TMP2, \XMM1
...@@ -1430,10 +1429,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst ...@@ -1430,10 +1429,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM3, \TMP2 pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2 pxor \XMM3, \TMP2
movdqu HashKey_2(%arg2), \TMP5 movdqu HashKey_2(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0
movdqu HashKey_2_k(%arg2), \TMP4 movdqu HashKey_2_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6 pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst pxor \XMM3, \XMMDst
pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
...@@ -1443,10 +1442,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst ...@@ -1443,10 +1442,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pshufd $78, \XMM4, \TMP2 pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2 pxor \XMM4, \TMP2
movdqu HashKey(%arg2), \TMP5 movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0
movdqu HashKey_k(%arg2), \TMP4 movdqu HashKey_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6 pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst pxor \XMM4, \XMMDst
pxor \XMM1, \TMP2 pxor \XMM1, \TMP2
...@@ -1504,13 +1503,13 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst ...@@ -1504,13 +1503,13 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
_esb_loop_\@: _esb_loop_\@:
MOVADQ (%r10),\TMP1 MOVADQ (%r10),\TMP1
AESENC \TMP1,\XMM0 aesenc \TMP1,\XMM0
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
jnz _esb_loop_\@ jnz _esb_loop_\@
MOVADQ (%r10),\TMP1 MOVADQ (%r10),\TMP1
AESENCLAST \TMP1,\XMM0 aesenclast \TMP1,\XMM0
.endm .endm
/***************************************************************************** /*****************************************************************************
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
...@@ -1849,72 +1848,72 @@ SYM_FUNC_START(aesni_set_key) ...@@ -1849,72 +1848,72 @@ SYM_FUNC_START(aesni_set_key)
movups 0x10(UKEYP), %xmm2 # other user key movups 0x10(UKEYP), %xmm2 # other user key
movaps %xmm2, (TKEYP) movaps %xmm2, (TKEYP)
add $0x10, TKEYP add $0x10, TKEYP
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x1 %xmm0 %xmm1 aeskeygenassist $0x1, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x2 %xmm0 %xmm1 aeskeygenassist $0x2, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x4 %xmm0 %xmm1 aeskeygenassist $0x4, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x8 %xmm0 %xmm1 aeskeygenassist $0x8, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x10 %xmm0 %xmm1 aeskeygenassist $0x10, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
call _key_expansion_256a call _key_expansion_256a
AESKEYGENASSIST 0x20 %xmm0 %xmm1 aeskeygenassist $0x20, %xmm0, %xmm1
call _key_expansion_256b call _key_expansion_256b
AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
call _key_expansion_256a call _key_expansion_256a
jmp .Ldec_key jmp .Ldec_key
.Lenc_key192: .Lenc_key192:
movq 0x10(UKEYP), %xmm2 # other user key movq 0x10(UKEYP), %xmm2 # other user key
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
call _key_expansion_192a call _key_expansion_192a
AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
call _key_expansion_192b call _key_expansion_192b
AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
call _key_expansion_192a call _key_expansion_192a
AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
call _key_expansion_192b call _key_expansion_192b
AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
call _key_expansion_192a call _key_expansion_192a
AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
call _key_expansion_192b call _key_expansion_192b
AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
call _key_expansion_192a call _key_expansion_192a
AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
call _key_expansion_192b call _key_expansion_192b
jmp .Ldec_key jmp .Ldec_key
.Lenc_key128: .Lenc_key128:
AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
call _key_expansion_128 call _key_expansion_128
AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
call _key_expansion_128 call _key_expansion_128
.Ldec_key: .Ldec_key:
sub $0x10, TKEYP sub $0x10, TKEYP
...@@ -1927,7 +1926,7 @@ SYM_FUNC_START(aesni_set_key) ...@@ -1927,7 +1926,7 @@ SYM_FUNC_START(aesni_set_key)
.align 4 .align 4
.Ldec_key_loop: .Ldec_key_loop:
movaps (KEYP), %xmm0 movaps (KEYP), %xmm0
AESIMC %xmm0 %xmm1 aesimc %xmm0, %xmm1
movaps %xmm1, (UKEYP) movaps %xmm1, (UKEYP)
add $0x10, KEYP add $0x10, KEYP
sub $0x10, UKEYP sub $0x10, UKEYP
...@@ -1988,37 +1987,37 @@ SYM_FUNC_START_LOCAL(_aesni_enc1) ...@@ -1988,37 +1987,37 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
je .Lenc192 je .Lenc192
add $0x20, TKEYP add $0x20, TKEYP
movaps -0x60(TKEYP), KEY movaps -0x60(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps -0x50(TKEYP), KEY movaps -0x50(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
.align 4 .align 4
.Lenc192: .Lenc192:
movaps -0x40(TKEYP), KEY movaps -0x40(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps -0x30(TKEYP), KEY movaps -0x30(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
.align 4 .align 4
.Lenc128: .Lenc128:
movaps -0x20(TKEYP), KEY movaps -0x20(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps -0x10(TKEYP), KEY movaps -0x10(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps (TKEYP), KEY movaps (TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x10(TKEYP), KEY movaps 0x10(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x20(TKEYP), KEY movaps 0x20(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x30(TKEYP), KEY movaps 0x30(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x40(TKEYP), KEY movaps 0x40(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x50(TKEYP), KEY movaps 0x50(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x60(TKEYP), KEY movaps 0x60(TKEYP), KEY
AESENC KEY STATE aesenc KEY, STATE
movaps 0x70(TKEYP), KEY movaps 0x70(TKEYP), KEY
AESENCLAST KEY STATE aesenclast KEY, STATE
ret ret
SYM_FUNC_END(_aesni_enc1) SYM_FUNC_END(_aesni_enc1)
...@@ -2054,79 +2053,79 @@ SYM_FUNC_START_LOCAL(_aesni_enc4) ...@@ -2054,79 +2053,79 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
je .L4enc192 je .L4enc192
add $0x20, TKEYP add $0x20, TKEYP
movaps -0x60(TKEYP), KEY movaps -0x60(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps -0x50(TKEYP), KEY movaps -0x50(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
#.align 4 #.align 4
.L4enc192: .L4enc192:
movaps -0x40(TKEYP), KEY movaps -0x40(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps -0x30(TKEYP), KEY movaps -0x30(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
#.align 4 #.align 4
.L4enc128: .L4enc128:
movaps -0x20(TKEYP), KEY movaps -0x20(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps -0x10(TKEYP), KEY movaps -0x10(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps (TKEYP), KEY movaps (TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x10(TKEYP), KEY movaps 0x10(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x20(TKEYP), KEY movaps 0x20(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x30(TKEYP), KEY movaps 0x30(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x40(TKEYP), KEY movaps 0x40(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x50(TKEYP), KEY movaps 0x50(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x60(TKEYP), KEY movaps 0x60(TKEYP), KEY
AESENC KEY STATE1 aesenc KEY, STATE1
AESENC KEY STATE2 aesenc KEY, STATE2
AESENC KEY STATE3 aesenc KEY, STATE3
AESENC KEY STATE4 aesenc KEY, STATE4
movaps 0x70(TKEYP), KEY movaps 0x70(TKEYP), KEY
AESENCLAST KEY STATE1 # last round aesenclast KEY, STATE1 # last round
AESENCLAST KEY STATE2 aesenclast KEY, STATE2
AESENCLAST KEY STATE3 aesenclast KEY, STATE3
AESENCLAST KEY STATE4 aesenclast KEY, STATE4
ret ret
SYM_FUNC_END(_aesni_enc4) SYM_FUNC_END(_aesni_enc4)
...@@ -2178,37 +2177,37 @@ SYM_FUNC_START_LOCAL(_aesni_dec1) ...@@ -2178,37 +2177,37 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
je .Ldec192 je .Ldec192
add $0x20, TKEYP add $0x20, TKEYP
movaps -0x60(TKEYP), KEY movaps -0x60(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps -0x50(TKEYP), KEY movaps -0x50(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
.align 4 .align 4
.Ldec192: .Ldec192:
movaps -0x40(TKEYP), KEY movaps -0x40(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps -0x30(TKEYP), KEY movaps -0x30(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
.align 4 .align 4
.Ldec128: .Ldec128:
movaps -0x20(TKEYP), KEY movaps -0x20(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps -0x10(TKEYP), KEY movaps -0x10(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps (TKEYP), KEY movaps (TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x10(TKEYP), KEY movaps 0x10(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x20(TKEYP), KEY movaps 0x20(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x30(TKEYP), KEY movaps 0x30(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x40(TKEYP), KEY movaps 0x40(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x50(TKEYP), KEY movaps 0x50(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x60(TKEYP), KEY movaps 0x60(TKEYP), KEY
AESDEC KEY STATE aesdec KEY, STATE
movaps 0x70(TKEYP), KEY movaps 0x70(TKEYP), KEY
AESDECLAST KEY STATE aesdeclast KEY, STATE
ret ret
SYM_FUNC_END(_aesni_dec1) SYM_FUNC_END(_aesni_dec1)
...@@ -2244,79 +2243,79 @@ SYM_FUNC_START_LOCAL(_aesni_dec4) ...@@ -2244,79 +2243,79 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
je .L4dec192 je .L4dec192
add $0x20, TKEYP add $0x20, TKEYP
movaps -0x60(TKEYP), KEY movaps -0x60(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps -0x50(TKEYP), KEY movaps -0x50(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
.align 4 .align 4
.L4dec192: .L4dec192:
movaps -0x40(TKEYP), KEY movaps -0x40(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps -0x30(TKEYP), KEY movaps -0x30(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
.align 4 .align 4
.L4dec128: .L4dec128:
movaps -0x20(TKEYP), KEY movaps -0x20(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps -0x10(TKEYP), KEY movaps -0x10(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps (TKEYP), KEY movaps (TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x10(TKEYP), KEY movaps 0x10(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x20(TKEYP), KEY movaps 0x20(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x30(TKEYP), KEY movaps 0x30(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x40(TKEYP), KEY movaps 0x40(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x50(TKEYP), KEY movaps 0x50(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x60(TKEYP), KEY movaps 0x60(TKEYP), KEY
AESDEC KEY STATE1 aesdec KEY, STATE1
AESDEC KEY STATE2 aesdec KEY, STATE2
AESDEC KEY STATE3 aesdec KEY, STATE3
AESDEC KEY STATE4 aesdec KEY, STATE4
movaps 0x70(TKEYP), KEY movaps 0x70(TKEYP), KEY
AESDECLAST KEY STATE1 # last round aesdeclast KEY, STATE1 # last round
AESDECLAST KEY STATE2 aesdeclast KEY, STATE2
AESDECLAST KEY STATE3 aesdeclast KEY, STATE3
AESDECLAST KEY STATE4 aesdeclast KEY, STATE4
ret ret
SYM_FUNC_END(_aesni_dec4) SYM_FUNC_END(_aesni_dec4)
...@@ -2599,10 +2598,10 @@ SYM_FUNC_END(aesni_cbc_dec) ...@@ -2599,10 +2598,10 @@ SYM_FUNC_END(aesni_cbc_dec)
SYM_FUNC_START_LOCAL(_aesni_inc_init) SYM_FUNC_START_LOCAL(_aesni_inc_init)
movaps .Lbswap_mask, BSWAP_MASK movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR pshufb BSWAP_MASK, CTR
mov $1, TCTR_LOW mov $1, TCTR_LOW
MOVQ_R64_XMM TCTR_LOW INC movq TCTR_LOW, INC
MOVQ_R64_XMM CTR TCTR_LOW movq CTR, TCTR_LOW
ret ret
SYM_FUNC_END(_aesni_inc_init) SYM_FUNC_END(_aesni_inc_init)
...@@ -2630,7 +2629,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc) ...@@ -2630,7 +2629,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
psrldq $8, INC psrldq $8, INC
.Linc_low: .Linc_low:
movaps CTR, IV movaps CTR, IV
PSHUFB_XMM BSWAP_MASK IV pshufb BSWAP_MASK, IV
ret ret
SYM_FUNC_END(_aesni_inc) SYM_FUNC_END(_aesni_inc)
......
...@@ -120,7 +120,6 @@ ...@@ -120,7 +120,6 @@
## ##
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h>
# constants in mergeable sections, linker can reorder and merge # constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.POLY, "aM", @progbits, 16 .section .rodata.cst16.POLY, "aM", @progbits, 16
......
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h>
.section .rodata .section .rodata
...@@ -129,17 +128,17 @@ loop_64:/* 64 bytes Full cache line folding */ ...@@ -129,17 +128,17 @@ loop_64:/* 64 bytes Full cache line folding */
#ifdef __x86_64__ #ifdef __x86_64__
movdqa %xmm4, %xmm8 movdqa %xmm4, %xmm8
#endif #endif
PCLMULQDQ 00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
PCLMULQDQ 00, CONSTANT, %xmm2 pclmulqdq $0x00, CONSTANT, %xmm2
PCLMULQDQ 00, CONSTANT, %xmm3 pclmulqdq $0x00, CONSTANT, %xmm3
#ifdef __x86_64__ #ifdef __x86_64__
PCLMULQDQ 00, CONSTANT, %xmm4 pclmulqdq $0x00, CONSTANT, %xmm4
#endif #endif
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
PCLMULQDQ 0x11, CONSTANT, %xmm6 pclmulqdq $0x11, CONSTANT, %xmm6
PCLMULQDQ 0x11, CONSTANT, %xmm7 pclmulqdq $0x11, CONSTANT, %xmm7
#ifdef __x86_64__ #ifdef __x86_64__
PCLMULQDQ 0x11, CONSTANT, %xmm8 pclmulqdq $0x11, CONSTANT, %xmm8
#endif #endif
pxor %xmm5, %xmm1 pxor %xmm5, %xmm1
pxor %xmm6, %xmm2 pxor %xmm6, %xmm2
...@@ -149,8 +148,8 @@ loop_64:/* 64 bytes Full cache line folding */ ...@@ -149,8 +148,8 @@ loop_64:/* 64 bytes Full cache line folding */
#else #else
/* xmm8 unsupported for x32 */ /* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5 movdqa %xmm4, %xmm5
PCLMULQDQ 00, CONSTANT, %xmm4 pclmulqdq $0x00, CONSTANT, %xmm4
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4 pxor %xmm5, %xmm4
#endif #endif
...@@ -172,20 +171,20 @@ less_64:/* Folding cache line into 128bit */ ...@@ -172,20 +171,20 @@ less_64:/* Folding cache line into 128bit */
prefetchnta (BUF) prefetchnta (BUF)
movdqa %xmm1, %xmm5 movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1 pxor %xmm5, %xmm1
pxor %xmm2, %xmm1 pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5 movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1 pxor %xmm5, %xmm1
pxor %xmm3, %xmm1 pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5 movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1 pxor %xmm5, %xmm1
pxor %xmm4, %xmm1 pxor %xmm4, %xmm1
...@@ -193,8 +192,8 @@ less_64:/* Folding cache line into 128bit */ ...@@ -193,8 +192,8 @@ less_64:/* Folding cache line into 128bit */
jb fold_64 jb fold_64
loop_16:/* Folding rest buffer into 128bit */ loop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5 movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5 pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1 pxor %xmm5, %xmm1
pxor (BUF), %xmm1 pxor (BUF), %xmm1
sub $0x10, LEN sub $0x10, LEN
...@@ -205,7 +204,7 @@ loop_16:/* Folding rest buffer into 128bit */ ...@@ -205,7 +204,7 @@ loop_16:/* Folding rest buffer into 128bit */
fold_64: fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes /* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */ * to the input stream */
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1 psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1 pxor CONSTANT, %xmm1
...@@ -220,7 +219,7 @@ fold_64: ...@@ -220,7 +219,7 @@ fold_64:
#endif #endif
psrldq $0x04, %xmm2 psrldq $0x04, %xmm2
pand %xmm3, %xmm1 pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1 pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
...@@ -231,11 +230,11 @@ fold_64: ...@@ -231,11 +230,11 @@ fold_64:
#endif #endif
movdqa %xmm1, %xmm2 movdqa %xmm1, %xmm2
pand %xmm3, %xmm1 pand %xmm3, %xmm1
PCLMULQDQ 0x10, CONSTANT, %xmm1 pclmulqdq $0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1 pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1 pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1 pxor %xmm2, %xmm1
PEXTRD 0x01, %xmm1, %eax pextrd $0x01, %xmm1, %eax
ret ret
SYM_FUNC_END(crc32_pclmul_le_16) SYM_FUNC_END(crc32_pclmul_le_16)
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <asm/inst.h>
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
...@@ -225,10 +224,10 @@ LABEL crc_ %i ...@@ -225,10 +224,10 @@ LABEL crc_ %i
subq %rax, tmp # tmp -= rax*24 subq %rax, tmp # tmp -= rax*24
movq crc_init, %xmm1 # CRC for block 1 movq crc_init, %xmm1 # CRC for block 1
PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2 movq crc1, %xmm2 # CRC for block 2
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1 pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1 pxor %xmm2,%xmm1
movq %xmm1, %rax movq %xmm1, %rax
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h> #include <asm/frame.h>
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .section .rodata.cst16.bswap_mask, "aM", @progbits, 16
...@@ -51,9 +50,9 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) ...@@ -51,9 +50,9 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
pxor DATA, T2 pxor DATA, T2
pxor SHASH, T3 pxor SHASH, T3
PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2 pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0 pxor T1, T2 # T2 = a0 * b1 + a1 * b0
...@@ -95,9 +94,9 @@ SYM_FUNC_START(clmul_ghash_mul) ...@@ -95,9 +94,9 @@ SYM_FUNC_START(clmul_ghash_mul)
movups (%rdi), DATA movups (%rdi), DATA
movups (%rsi), SHASH movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP movaps .Lbswap_mask, BSWAP
PSHUFB_XMM BSWAP DATA pshufb BSWAP, DATA
call __clmul_gf128mul_ble call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA pshufb BSWAP, DATA
movups DATA, (%rdi) movups DATA, (%rdi)
FRAME_END FRAME_END
ret ret
...@@ -114,18 +113,18 @@ SYM_FUNC_START(clmul_ghash_update) ...@@ -114,18 +113,18 @@ SYM_FUNC_START(clmul_ghash_update)
movaps .Lbswap_mask, BSWAP movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA movups (%rdi), DATA
movups (%rcx), SHASH movups (%rcx), SHASH
PSHUFB_XMM BSWAP DATA pshufb BSWAP, DATA
.align 4 .align 4
.Lupdate_loop: .Lupdate_loop:
movups (%rsi), IN1 movups (%rsi), IN1
PSHUFB_XMM BSWAP IN1 pshufb BSWAP, IN1
pxor IN1, DATA pxor IN1, DATA
call __clmul_gf128mul_ble call __clmul_gf128mul_ble
sub $16, %rdx sub $16, %rdx
add $16, %rsi add $16, %rsi
cmp $16, %rdx cmp $16, %rdx
jge .Lupdate_loop jge .Lupdate_loop
PSHUFB_XMM BSWAP DATA pshufb BSWAP, DATA
movups DATA, (%rdi) movups DATA, (%rdi)
.Lupdate_just_ret: .Lupdate_just_ret:
FRAME_END FRAME_END
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Generate .byte code for some instructions not supported by old
* binutils.
*/
#ifndef X86_ASM_INST_H
#define X86_ASM_INST_H
#ifdef __ASSEMBLY__
#define REG_NUM_INVALID 100
#define REG_TYPE_R32 0
#define REG_TYPE_R64 1
#define REG_TYPE_XMM 2
#define REG_TYPE_INVALID 100
.macro R32_NUM opd r32
\opd = REG_NUM_INVALID
.ifc \r32,%eax
\opd = 0
.endif
.ifc \r32,%ecx
\opd = 1
.endif
.ifc \r32,%edx
\opd = 2
.endif
.ifc \r32,%ebx
\opd = 3
.endif
.ifc \r32,%esp
\opd = 4
.endif
.ifc \r32,%ebp
\opd = 5
.endif
.ifc \r32,%esi
\opd = 6
.endif
.ifc \r32,%edi
\opd = 7
.endif
#ifdef CONFIG_X86_64
.ifc \r32,%r8d
\opd = 8
.endif
.ifc \r32,%r9d
\opd = 9
.endif
.ifc \r32,%r10d
\opd = 10
.endif
.ifc \r32,%r11d
\opd = 11
.endif
.ifc \r32,%r12d
\opd = 12
.endif
.ifc \r32,%r13d
\opd = 13
.endif
.ifc \r32,%r14d
\opd = 14
.endif
.ifc \r32,%r15d
\opd = 15
.endif
#endif
.endm
.macro R64_NUM opd r64
\opd = REG_NUM_INVALID
#ifdef CONFIG_X86_64
.ifc \r64,%rax
\opd = 0
.endif
.ifc \r64,%rcx
\opd = 1
.endif
.ifc \r64,%rdx
\opd = 2
.endif
.ifc \r64,%rbx
\opd = 3
.endif
.ifc \r64,%rsp
\opd = 4
.endif
.ifc \r64,%rbp
\opd = 5
.endif
.ifc \r64,%rsi
\opd = 6
.endif
.ifc \r64,%rdi
\opd = 7
.endif
.ifc \r64,%r8
\opd = 8
.endif
.ifc \r64,%r9
\opd = 9
.endif
.ifc \r64,%r10
\opd = 10
.endif
.ifc \r64,%r11
\opd = 11
.endif
.ifc \r64,%r12
\opd = 12
.endif
.ifc \r64,%r13
\opd = 13
.endif
.ifc \r64,%r14
\opd = 14
.endif
.ifc \r64,%r15
\opd = 15
.endif
#endif
.endm
.macro XMM_NUM opd xmm
\opd = REG_NUM_INVALID
.ifc \xmm,%xmm0
\opd = 0
.endif
.ifc \xmm,%xmm1
\opd = 1
.endif
.ifc \xmm,%xmm2
\opd = 2
.endif
.ifc \xmm,%xmm3
\opd = 3
.endif
.ifc \xmm,%xmm4
\opd = 4
.endif
.ifc \xmm,%xmm5
\opd = 5
.endif
.ifc \xmm,%xmm6
\opd = 6
.endif
.ifc \xmm,%xmm7
\opd = 7
.endif
.ifc \xmm,%xmm8
\opd = 8
.endif
.ifc \xmm,%xmm9
\opd = 9
.endif
.ifc \xmm,%xmm10
\opd = 10
.endif
.ifc \xmm,%xmm11
\opd = 11
.endif
.ifc \xmm,%xmm12
\opd = 12
.endif
.ifc \xmm,%xmm13
\opd = 13
.endif
.ifc \xmm,%xmm14
\opd = 14
.endif
.ifc \xmm,%xmm15
\opd = 15
.endif
.endm
.macro REG_TYPE type reg
R32_NUM reg_type_r32 \reg
R64_NUM reg_type_r64 \reg
XMM_NUM reg_type_xmm \reg
.if reg_type_r64 <> REG_NUM_INVALID
\type = REG_TYPE_R64
.elseif reg_type_r32 <> REG_NUM_INVALID
\type = REG_TYPE_R32
.elseif reg_type_xmm <> REG_NUM_INVALID
\type = REG_TYPE_XMM
.else
\type = REG_TYPE_INVALID
.endif
.endm
.macro PFX_OPD_SIZE
.byte 0x66
.endm
.macro PFX_REX opd1 opd2 W=0
.if ((\opd1 | \opd2) & 8) || \W
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
.endif
.endm
.macro MODRM mod opd1 opd2
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
.endm
.macro PSHUFB_XMM xmm1 xmm2
XMM_NUM pshufb_opd1 \xmm1
XMM_NUM pshufb_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX pshufb_opd1 pshufb_opd2
.byte 0x0f, 0x38, 0x00
MODRM 0xc0 pshufb_opd1 pshufb_opd2
.endm
.macro PCLMULQDQ imm8 xmm1 xmm2
XMM_NUM clmul_opd1 \xmm1
XMM_NUM clmul_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX clmul_opd1 clmul_opd2
.byte 0x0f, 0x3a, 0x44
MODRM 0xc0 clmul_opd1 clmul_opd2
.byte \imm8
.endm
.macro PEXTRD imm8 xmm gpr
R32_NUM extrd_opd1 \gpr
XMM_NUM extrd_opd2 \xmm
PFX_OPD_SIZE
PFX_REX extrd_opd1 extrd_opd2
.byte 0x0f, 0x3a, 0x16
MODRM 0xc0 extrd_opd1 extrd_opd2
.byte \imm8
.endm
.macro AESKEYGENASSIST rcon xmm1 xmm2
XMM_NUM aeskeygen_opd1 \xmm1
XMM_NUM aeskeygen_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aeskeygen_opd1 aeskeygen_opd2
.byte 0x0f, 0x3a, 0xdf
MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
.byte \rcon
.endm
.macro AESIMC xmm1 xmm2
XMM_NUM aesimc_opd1 \xmm1
XMM_NUM aesimc_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aesimc_opd1 aesimc_opd2
.byte 0x0f, 0x38, 0xdb
MODRM 0xc0 aesimc_opd1 aesimc_opd2
.endm
.macro AESENC xmm1 xmm2
XMM_NUM aesenc_opd1 \xmm1
XMM_NUM aesenc_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aesenc_opd1 aesenc_opd2
.byte 0x0f, 0x38, 0xdc
MODRM 0xc0 aesenc_opd1 aesenc_opd2
.endm
.macro AESENCLAST xmm1 xmm2
XMM_NUM aesenclast_opd1 \xmm1
XMM_NUM aesenclast_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aesenclast_opd1 aesenclast_opd2
.byte 0x0f, 0x38, 0xdd
MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
.endm
.macro AESDEC xmm1 xmm2
XMM_NUM aesdec_opd1 \xmm1
XMM_NUM aesdec_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aesdec_opd1 aesdec_opd2
.byte 0x0f, 0x38, 0xde
MODRM 0xc0 aesdec_opd1 aesdec_opd2
.endm
.macro AESDECLAST xmm1 xmm2
XMM_NUM aesdeclast_opd1 \xmm1
XMM_NUM aesdeclast_opd2 \xmm2
PFX_OPD_SIZE
PFX_REX aesdeclast_opd1 aesdeclast_opd2
.byte 0x0f, 0x38, 0xdf
MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
.endm
.macro MOVQ_R64_XMM opd1 opd2
REG_TYPE movq_r64_xmm_opd1_type \opd1
.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
XMM_NUM movq_r64_xmm_opd1 \opd1
R64_NUM movq_r64_xmm_opd2 \opd2
.else
R64_NUM movq_r64_xmm_opd1 \opd1
XMM_NUM movq_r64_xmm_opd2 \opd2
.endif
PFX_OPD_SIZE
PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
.byte 0x0f, 0x7e
.else
.byte 0x0f, 0x6e
.endif
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
.endm
#endif
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment