Commit 1d4b0ff3 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: x86/aesni - Use local .L symbols for code

Avoid cluttering up the kallsyms symbol table with entries that should
not end up in things like backtraces, as they have undescriptive and
generated identifiers.
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent e4ab7680
...@@ -288,53 +288,53 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff ...@@ -288,53 +288,53 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
# Encrypt/Decrypt first few blocks # Encrypt/Decrypt first few blocks
and $(3<<4), %r12 and $(3<<4), %r12
jz _initial_num_blocks_is_0_\@ jz .L_initial_num_blocks_is_0_\@
cmp $(2<<4), %r12 cmp $(2<<4), %r12
jb _initial_num_blocks_is_1_\@ jb .L_initial_num_blocks_is_1_\@
je _initial_num_blocks_is_2_\@ je .L_initial_num_blocks_is_2_\@
_initial_num_blocks_is_3_\@: .L_initial_num_blocks_is_3_\@:
INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
sub $48, %r13 sub $48, %r13
jmp _initial_blocks_\@ jmp .L_initial_blocks_\@
_initial_num_blocks_is_2_\@: .L_initial_num_blocks_is_2_\@:
INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
sub $32, %r13 sub $32, %r13
jmp _initial_blocks_\@ jmp .L_initial_blocks_\@
_initial_num_blocks_is_1_\@: .L_initial_num_blocks_is_1_\@:
INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
sub $16, %r13 sub $16, %r13
jmp _initial_blocks_\@ jmp .L_initial_blocks_\@
_initial_num_blocks_is_0_\@: .L_initial_num_blocks_is_0_\@:
INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
_initial_blocks_\@: .L_initial_blocks_\@:
# Main loop - Encrypt/Decrypt remaining blocks # Main loop - Encrypt/Decrypt remaining blocks
test %r13, %r13 test %r13, %r13
je _zero_cipher_left_\@ je .L_zero_cipher_left_\@
sub $64, %r13 sub $64, %r13
je _four_cipher_left_\@ je .L_four_cipher_left_\@
_crypt_by_4_\@: .L_crypt_by_4_\@:
GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \
%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
%xmm7, %xmm8, enc %xmm7, %xmm8, enc
add $64, %r11 add $64, %r11
sub $64, %r13 sub $64, %r13
jne _crypt_by_4_\@ jne .L_crypt_by_4_\@
_four_cipher_left_\@: .L_four_cipher_left_\@:
GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
_zero_cipher_left_\@: .L_zero_cipher_left_\@:
movdqu %xmm8, AadHash(%arg2) movdqu %xmm8, AadHash(%arg2)
movdqu %xmm0, CurCount(%arg2) movdqu %xmm0, CurCount(%arg2)
mov %arg5, %r13 mov %arg5, %r13
and $15, %r13 # %r13 = arg5 (mod 16) and $15, %r13 # %r13 = arg5 (mod 16)
je _multiple_of_16_bytes_\@ je .L_multiple_of_16_bytes_\@
mov %r13, PBlockLen(%arg2) mov %r13, PBlockLen(%arg2)
...@@ -348,14 +348,14 @@ _zero_cipher_left_\@: ...@@ -348,14 +348,14 @@ _zero_cipher_left_\@:
movdqu %xmm0, PBlockEncKey(%arg2) movdqu %xmm0, PBlockEncKey(%arg2)
cmp $16, %arg5 cmp $16, %arg5
jge _large_enough_update_\@ jge .L_large_enough_update_\@
lea (%arg4,%r11,1), %r10 lea (%arg4,%r11,1), %r10
mov %r13, %r12 mov %r13, %r12
READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
jmp _data_read_\@ jmp .L_data_read_\@
_large_enough_update_\@: .L_large_enough_update_\@:
sub $16, %r11 sub $16, %r11
add %r13, %r11 add %r13, %r11
...@@ -374,7 +374,7 @@ _large_enough_update_\@: ...@@ -374,7 +374,7 @@ _large_enough_update_\@:
# shift right 16-r13 bytes # shift right 16-r13 bytes
pshufb %xmm2, %xmm1 pshufb %xmm2, %xmm1
_data_read_\@: .L_data_read_\@:
lea ALL_F+16(%rip), %r12 lea ALL_F+16(%rip), %r12
sub %r13, %r12 sub %r13, %r12
...@@ -409,19 +409,19 @@ _data_read_\@: ...@@ -409,19 +409,19 @@ _data_read_\@:
# Output %r13 bytes # Output %r13 bytes
movq %xmm0, %rax movq %xmm0, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left_\@ jle .L_less_than_8_bytes_left_\@
mov %rax, (%arg3 , %r11, 1) mov %rax, (%arg3 , %r11, 1)
add $8, %r11 add $8, %r11
psrldq $8, %xmm0 psrldq $8, %xmm0
movq %xmm0, %rax movq %xmm0, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left_\@: .L_less_than_8_bytes_left_\@:
mov %al, (%arg3, %r11, 1) mov %al, (%arg3, %r11, 1)
add $1, %r11 add $1, %r11
shr $8, %rax shr $8, %rax
sub $1, %r13 sub $1, %r13
jne _less_than_8_bytes_left_\@ jne .L_less_than_8_bytes_left_\@
_multiple_of_16_bytes_\@: .L_multiple_of_16_bytes_\@:
.endm .endm
# GCM_COMPLETE Finishes update of tag of last partial block # GCM_COMPLETE Finishes update of tag of last partial block
...@@ -434,11 +434,11 @@ _multiple_of_16_bytes_\@: ...@@ -434,11 +434,11 @@ _multiple_of_16_bytes_\@:
mov PBlockLen(%arg2), %r12 mov PBlockLen(%arg2), %r12
test %r12, %r12 test %r12, %r12
je _partial_done\@ je .L_partial_done\@
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
_partial_done\@: .L_partial_done\@:
mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes)
shl $3, %r12 # convert into number of bits shl $3, %r12 # convert into number of bits
movd %r12d, %xmm15 # len(A) in %xmm15 movd %r12d, %xmm15 # len(A) in %xmm15
...@@ -457,44 +457,44 @@ _partial_done\@: ...@@ -457,44 +457,44 @@ _partial_done\@:
movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
pxor %xmm8, %xmm0 pxor %xmm8, %xmm0
_return_T_\@: .L_return_T_\@:
mov \AUTHTAG, %r10 # %r10 = authTag mov \AUTHTAG, %r10 # %r10 = authTag
mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len
cmp $16, %r11 cmp $16, %r11
je _T_16_\@ je .L_T_16_\@
cmp $8, %r11 cmp $8, %r11
jl _T_4_\@ jl .L_T_4_\@
_T_8_\@: .L_T_8_\@:
movq %xmm0, %rax movq %xmm0, %rax
mov %rax, (%r10) mov %rax, (%r10)
add $8, %r10 add $8, %r10
sub $8, %r11 sub $8, %r11
psrldq $8, %xmm0 psrldq $8, %xmm0
test %r11, %r11 test %r11, %r11
je _return_T_done_\@ je .L_return_T_done_\@
_T_4_\@: .L_T_4_\@:
movd %xmm0, %eax movd %xmm0, %eax
mov %eax, (%r10) mov %eax, (%r10)
add $4, %r10 add $4, %r10
sub $4, %r11 sub $4, %r11
psrldq $4, %xmm0 psrldq $4, %xmm0
test %r11, %r11 test %r11, %r11
je _return_T_done_\@ je .L_return_T_done_\@
_T_123_\@: .L_T_123_\@:
movd %xmm0, %eax movd %xmm0, %eax
cmp $2, %r11 cmp $2, %r11
jl _T_1_\@ jl .L_T_1_\@
mov %ax, (%r10) mov %ax, (%r10)
cmp $2, %r11 cmp $2, %r11
je _return_T_done_\@ je .L_return_T_done_\@
add $2, %r10 add $2, %r10
sar $16, %eax sar $16, %eax
_T_1_\@: .L_T_1_\@:
mov %al, (%r10) mov %al, (%r10)
jmp _return_T_done_\@ jmp .L_return_T_done_\@
_T_16_\@: .L_T_16_\@:
movdqu %xmm0, (%r10) movdqu %xmm0, (%r10)
_return_T_done_\@: .L_return_T_done_\@:
.endm .endm
#ifdef __x86_64__ #ifdef __x86_64__
...@@ -563,30 +563,30 @@ _return_T_done_\@: ...@@ -563,30 +563,30 @@ _return_T_done_\@:
# Clobbers %rax, DLEN and XMM1 # Clobbers %rax, DLEN and XMM1
.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst .macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
cmp $8, \DLEN cmp $8, \DLEN
jl _read_lt8_\@ jl .L_read_lt8_\@
mov (\DPTR), %rax mov (\DPTR), %rax
movq %rax, \XMMDst movq %rax, \XMMDst
sub $8, \DLEN sub $8, \DLEN
jz _done_read_partial_block_\@ jz .L_done_read_partial_block_\@
xor %eax, %eax xor %eax, %eax
_read_next_byte_\@: .L_read_next_byte_\@:
shl $8, %rax shl $8, %rax
mov 7(\DPTR, \DLEN, 1), %al mov 7(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_\@ jnz .L_read_next_byte_\@
movq %rax, \XMM1 movq %rax, \XMM1
pslldq $8, \XMM1 pslldq $8, \XMM1
por \XMM1, \XMMDst por \XMM1, \XMMDst
jmp _done_read_partial_block_\@ jmp .L_done_read_partial_block_\@
_read_lt8_\@: .L_read_lt8_\@:
xor %eax, %eax xor %eax, %eax
_read_next_byte_lt8_\@: .L_read_next_byte_lt8_\@:
shl $8, %rax shl $8, %rax
mov -1(\DPTR, \DLEN, 1), %al mov -1(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_lt8_\@ jnz .L_read_next_byte_lt8_\@
movq %rax, \XMMDst movq %rax, \XMMDst
_done_read_partial_block_\@: .L_done_read_partial_block_\@:
.endm .endm
# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. # CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
...@@ -600,8 +600,8 @@ _done_read_partial_block_\@: ...@@ -600,8 +600,8 @@ _done_read_partial_block_\@:
pxor \TMP6, \TMP6 pxor \TMP6, \TMP6
cmp $16, %r11 cmp $16, %r11
jl _get_AAD_rest\@ jl .L_get_AAD_rest\@
_get_AAD_blocks\@: .L_get_AAD_blocks\@:
movdqu (%r10), \TMP7 movdqu (%r10), \TMP7
pshufb %xmm14, \TMP7 # byte-reflect the AAD data pshufb %xmm14, \TMP7 # byte-reflect the AAD data
pxor \TMP7, \TMP6 pxor \TMP7, \TMP6
...@@ -609,14 +609,14 @@ _get_AAD_blocks\@: ...@@ -609,14 +609,14 @@ _get_AAD_blocks\@:
add $16, %r10 add $16, %r10
sub $16, %r11 sub $16, %r11
cmp $16, %r11 cmp $16, %r11
jge _get_AAD_blocks\@ jge .L_get_AAD_blocks\@
movdqu \TMP6, \TMP7 movdqu \TMP6, \TMP7
/* read the last <16B of AAD */ /* read the last <16B of AAD */
_get_AAD_rest\@: .L_get_AAD_rest\@:
test %r11, %r11 test %r11, %r11
je _get_AAD_done\@ je .L_get_AAD_done\@
READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
pshufb %xmm14, \TMP7 # byte-reflect the AAD data pshufb %xmm14, \TMP7 # byte-reflect the AAD data
...@@ -624,7 +624,7 @@ _get_AAD_rest\@: ...@@ -624,7 +624,7 @@ _get_AAD_rest\@:
GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
movdqu \TMP7, \TMP6 movdqu \TMP7, \TMP6
_get_AAD_done\@: .L_get_AAD_done\@:
movdqu \TMP6, AadHash(%arg2) movdqu \TMP6, AadHash(%arg2)
.endm .endm
...@@ -637,21 +637,21 @@ _get_AAD_done\@: ...@@ -637,21 +637,21 @@ _get_AAD_done\@:
AAD_HASH operation AAD_HASH operation
mov PBlockLen(%arg2), %r13 mov PBlockLen(%arg2), %r13
test %r13, %r13 test %r13, %r13
je _partial_block_done_\@ # Leave Macro if no partial blocks je .L_partial_block_done_\@ # Leave Macro if no partial blocks
# Read in input data without over reading # Read in input data without over reading
cmp $16, \PLAIN_CYPH_LEN cmp $16, \PLAIN_CYPH_LEN
jl _fewer_than_16_bytes_\@ jl .L_fewer_than_16_bytes_\@
movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
jmp _data_read_\@ jmp .L_data_read_\@
_fewer_than_16_bytes_\@: .L_fewer_than_16_bytes_\@:
lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
mov \PLAIN_CYPH_LEN, %r12 mov \PLAIN_CYPH_LEN, %r12
READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
mov PBlockLen(%arg2), %r13 mov PBlockLen(%arg2), %r13
_data_read_\@: # Finished reading in data .L_data_read_\@: # Finished reading in data
movdqu PBlockEncKey(%arg2), %xmm9 movdqu PBlockEncKey(%arg2), %xmm9
movdqu HashKey(%arg2), %xmm13 movdqu HashKey(%arg2), %xmm13
...@@ -674,9 +674,9 @@ _data_read_\@: # Finished reading in data ...@@ -674,9 +674,9 @@ _data_read_\@: # Finished reading in data
sub $16, %r10 sub $16, %r10
# Determine if if partial block is not being filled and # Determine if if partial block is not being filled and
# shift mask accordingly # shift mask accordingly
jge _no_extra_mask_1_\@ jge .L_no_extra_mask_1_\@
sub %r10, %r12 sub %r10, %r12
_no_extra_mask_1_\@: .L_no_extra_mask_1_\@:
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out bottom r13 bytes of xmm9 # get the appropriate mask to mask out bottom r13 bytes of xmm9
...@@ -689,17 +689,17 @@ _no_extra_mask_1_\@: ...@@ -689,17 +689,17 @@ _no_extra_mask_1_\@:
pxor %xmm3, \AAD_HASH pxor %xmm3, \AAD_HASH
test %r10, %r10 test %r10, %r10
jl _partial_incomplete_1_\@ jl .L_partial_incomplete_1_\@
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %eax, %eax xor %eax, %eax
mov %rax, PBlockLen(%arg2) mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@ jmp .L_dec_done_\@
_partial_incomplete_1_\@: .L_partial_incomplete_1_\@:
add \PLAIN_CYPH_LEN, PBlockLen(%arg2) add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
_dec_done_\@: .L_dec_done_\@:
movdqu \AAD_HASH, AadHash(%arg2) movdqu \AAD_HASH, AadHash(%arg2)
.else .else
pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn)
...@@ -710,9 +710,9 @@ _dec_done_\@: ...@@ -710,9 +710,9 @@ _dec_done_\@:
sub $16, %r10 sub $16, %r10
# Determine if if partial block is not being filled and # Determine if if partial block is not being filled and
# shift mask accordingly # shift mask accordingly
jge _no_extra_mask_2_\@ jge .L_no_extra_mask_2_\@
sub %r10, %r12 sub %r10, %r12
_no_extra_mask_2_\@: .L_no_extra_mask_2_\@:
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out bottom r13 bytes of xmm9 # get the appropriate mask to mask out bottom r13 bytes of xmm9
...@@ -724,17 +724,17 @@ _no_extra_mask_2_\@: ...@@ -724,17 +724,17 @@ _no_extra_mask_2_\@:
pxor %xmm9, \AAD_HASH pxor %xmm9, \AAD_HASH
test %r10, %r10 test %r10, %r10
jl _partial_incomplete_2_\@ jl .L_partial_incomplete_2_\@
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %eax, %eax xor %eax, %eax
mov %rax, PBlockLen(%arg2) mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@ jmp .L_encode_done_\@
_partial_incomplete_2_\@: .L_partial_incomplete_2_\@:
add \PLAIN_CYPH_LEN, PBlockLen(%arg2) add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
_encode_done_\@: .L_encode_done_\@:
movdqu \AAD_HASH, AadHash(%arg2) movdqu \AAD_HASH, AadHash(%arg2)
movdqa SHUF_MASK(%rip), %xmm10 movdqa SHUF_MASK(%rip), %xmm10
...@@ -744,32 +744,32 @@ _encode_done_\@: ...@@ -744,32 +744,32 @@ _encode_done_\@:
.endif .endif
# output encrypted Bytes # output encrypted Bytes
test %r10, %r10 test %r10, %r10
jl _partial_fill_\@ jl .L_partial_fill_\@
mov %r13, %r12 mov %r13, %r12
mov $16, %r13 mov $16, %r13
# Set r13 to be the number of bytes to write out # Set r13 to be the number of bytes to write out
sub %r12, %r13 sub %r12, %r13
jmp _count_set_\@ jmp .L_count_set_\@
_partial_fill_\@: .L_partial_fill_\@:
mov \PLAIN_CYPH_LEN, %r13 mov \PLAIN_CYPH_LEN, %r13
_count_set_\@: .L_count_set_\@:
movdqa %xmm9, %xmm0 movdqa %xmm9, %xmm0
movq %xmm0, %rax movq %xmm0, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left_\@ jle .L_less_than_8_bytes_left_\@
mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $8, \DATA_OFFSET add $8, \DATA_OFFSET
psrldq $8, %xmm0 psrldq $8, %xmm0
movq %xmm0, %rax movq %xmm0, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left_\@: .L_less_than_8_bytes_left_\@:
movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $1, \DATA_OFFSET add $1, \DATA_OFFSET
shr $8, %rax shr $8, %rax
sub $1, %r13 sub $1, %r13
jne _less_than_8_bytes_left_\@ jne .L_less_than_8_bytes_left_\@
_partial_block_done_\@: .L_partial_block_done_\@:
.endm # PARTIAL_BLOCK .endm # PARTIAL_BLOCK
/* /*
...@@ -813,14 +813,14 @@ _partial_block_done_\@: ...@@ -813,14 +813,14 @@ _partial_block_done_\@:
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
add $5,%eax # 128->9, 192->11, 256->13 add $5,%eax # 128->9, 192->11, 256->13
aes_loop_initial_\@: .Laes_loop_initial_\@:
MOVADQ (%r10),\TMP1 MOVADQ (%r10),\TMP1
.irpc index, \i_seq .irpc index, \i_seq
aesenc \TMP1, %xmm\index aesenc \TMP1, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
jnz aes_loop_initial_\@ jnz .Laes_loop_initial_\@
MOVADQ (%r10), \TMP1 MOVADQ (%r10), \TMP1
.irpc index, \i_seq .irpc index, \i_seq
...@@ -861,7 +861,7 @@ aes_loop_initial_\@: ...@@ -861,7 +861,7 @@ aes_loop_initial_\@:
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.endif .endif
cmp $64, %r13 cmp $64, %r13
jl _initial_blocks_done\@ jl .L_initial_blocks_done\@
# no need for precomputed values # no need for precomputed values
/* /*
* *
...@@ -908,18 +908,18 @@ aes_loop_initial_\@: ...@@ -908,18 +908,18 @@ aes_loop_initial_\@:
mov keysize,%eax mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4 sub $4,%eax # 128->0, 192->2, 256->4
jz aes_loop_pre_done\@ jz .Laes_loop_pre_done\@
aes_loop_pre_\@: .Laes_loop_pre_\@:
MOVADQ (%r10),\TMP2 MOVADQ (%r10),\TMP2
.irpc index, 1234 .irpc index, 1234
aesenc \TMP2, %xmm\index aesenc \TMP2, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
jnz aes_loop_pre_\@ jnz .Laes_loop_pre_\@
aes_loop_pre_done\@: .Laes_loop_pre_done\@:
MOVADQ (%r10), \TMP2 MOVADQ (%r10), \TMP2
aesenclast \TMP2, \XMM1 aesenclast \TMP2, \XMM1
aesenclast \TMP2, \XMM2 aesenclast \TMP2, \XMM2
...@@ -963,7 +963,7 @@ aes_loop_pre_done\@: ...@@ -963,7 +963,7 @@ aes_loop_pre_done\@:
pshufb %xmm14, \XMM3 # perform a 16 byte swap pshufb %xmm14, \XMM3 # perform a 16 byte swap
pshufb %xmm14, \XMM4 # perform a 16 byte swap pshufb %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\@: .L_initial_blocks_done\@:
.endm .endm
...@@ -1095,18 +1095,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1095,18 +1095,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
mov keysize,%eax mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4 sub $4,%eax # 128->0, 192->2, 256->4
jz aes_loop_par_enc_done\@ jz .Laes_loop_par_enc_done\@
aes_loop_par_enc\@: .Laes_loop_par_enc\@:
MOVADQ (%r10),\TMP3 MOVADQ (%r10),\TMP3
.irpc index, 1234 .irpc index, 1234
aesenc \TMP3, %xmm\index aesenc \TMP3, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
jnz aes_loop_par_enc\@ jnz .Laes_loop_par_enc\@
aes_loop_par_enc_done\@: .Laes_loop_par_enc_done\@:
MOVADQ (%r10), \TMP3 MOVADQ (%r10), \TMP3
aesenclast \TMP3, \XMM1 # Round 10 aesenclast \TMP3, \XMM1 # Round 10
aesenclast \TMP3, \XMM2 aesenclast \TMP3, \XMM2
...@@ -1303,18 +1303,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation ...@@ -1303,18 +1303,18 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
mov keysize,%eax mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8 shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4 sub $4,%eax # 128->0, 192->2, 256->4
jz aes_loop_par_dec_done\@ jz .Laes_loop_par_dec_done\@
aes_loop_par_dec\@: .Laes_loop_par_dec\@:
MOVADQ (%r10),\TMP3 MOVADQ (%r10),\TMP3
.irpc index, 1234 .irpc index, 1234
aesenc \TMP3, %xmm\index aesenc \TMP3, %xmm\index
.endr .endr
add $16,%r10 add $16,%r10
sub $1,%eax sub $1,%eax
jnz aes_loop_par_dec\@ jnz .Laes_loop_par_dec\@
aes_loop_par_dec_done\@: .Laes_loop_par_dec_done\@:
MOVADQ (%r10), \TMP3 MOVADQ (%r10), \TMP3
aesenclast \TMP3, \XMM1 # last round aesenclast \TMP3, \XMM1 # last round
aesenclast \TMP3, \XMM2 aesenclast \TMP3, \XMM2
......
...@@ -278,68 +278,68 @@ VARIABLE_OFFSET = 16*8 ...@@ -278,68 +278,68 @@ VARIABLE_OFFSET = 16*8
mov %r13, %r12 mov %r13, %r12
shr $4, %r12 shr $4, %r12
and $7, %r12 and $7, %r12
jz _initial_num_blocks_is_0\@ jz .L_initial_num_blocks_is_0\@
cmp $7, %r12 cmp $7, %r12
je _initial_num_blocks_is_7\@ je .L_initial_num_blocks_is_7\@
cmp $6, %r12 cmp $6, %r12
je _initial_num_blocks_is_6\@ je .L_initial_num_blocks_is_6\@
cmp $5, %r12 cmp $5, %r12
je _initial_num_blocks_is_5\@ je .L_initial_num_blocks_is_5\@
cmp $4, %r12 cmp $4, %r12
je _initial_num_blocks_is_4\@ je .L_initial_num_blocks_is_4\@
cmp $3, %r12 cmp $3, %r12
je _initial_num_blocks_is_3\@ je .L_initial_num_blocks_is_3\@
cmp $2, %r12 cmp $2, %r12
je _initial_num_blocks_is_2\@ je .L_initial_num_blocks_is_2\@
jmp _initial_num_blocks_is_1\@ jmp .L_initial_num_blocks_is_1\@
_initial_num_blocks_is_7\@: .L_initial_num_blocks_is_7\@:
\INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*7, %r13 sub $16*7, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_6\@: .L_initial_num_blocks_is_6\@:
\INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*6, %r13 sub $16*6, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_5\@: .L_initial_num_blocks_is_5\@:
\INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*5, %r13 sub $16*5, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_4\@: .L_initial_num_blocks_is_4\@:
\INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*4, %r13 sub $16*4, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_3\@: .L_initial_num_blocks_is_3\@:
\INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*3, %r13 sub $16*3, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_2\@: .L_initial_num_blocks_is_2\@:
\INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*2, %r13 sub $16*2, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_1\@: .L_initial_num_blocks_is_1\@:
\INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
sub $16*1, %r13 sub $16*1, %r13
jmp _initial_blocks_encrypted\@ jmp .L_initial_blocks_encrypted\@
_initial_num_blocks_is_0\@: .L_initial_num_blocks_is_0\@:
\INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
_initial_blocks_encrypted\@: .L_initial_blocks_encrypted\@:
test %r13, %r13 test %r13, %r13
je _zero_cipher_left\@ je .L_zero_cipher_left\@
sub $128, %r13 sub $128, %r13
je _eight_cipher_left\@ je .L_eight_cipher_left\@
...@@ -349,9 +349,9 @@ _initial_blocks_encrypted\@: ...@@ -349,9 +349,9 @@ _initial_blocks_encrypted\@:
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
_encrypt_by_8_new\@: .L_encrypt_by_8_new\@:
cmp $(255-8), %r15d cmp $(255-8), %r15d
jg _encrypt_by_8\@ jg .L_encrypt_by_8\@
...@@ -359,30 +359,30 @@ _encrypt_by_8_new\@: ...@@ -359,30 +359,30 @@ _encrypt_by_8_new\@:
\GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
add $128, %r11 add $128, %r11
sub $128, %r13 sub $128, %r13
jne _encrypt_by_8_new\@ jne .L_encrypt_by_8_new\@
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
jmp _eight_cipher_left\@ jmp .L_eight_cipher_left\@
_encrypt_by_8\@: .L_encrypt_by_8\@:
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
add $8, %r15b add $8, %r15b
\GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
add $128, %r11 add $128, %r11
sub $128, %r13 sub $128, %r13
jne _encrypt_by_8_new\@ jne .L_encrypt_by_8_new\@
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
_eight_cipher_left\@: .L_eight_cipher_left\@:
\GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
_zero_cipher_left\@: .L_zero_cipher_left\@:
vmovdqu %xmm14, AadHash(arg2) vmovdqu %xmm14, AadHash(arg2)
vmovdqu %xmm9, CurCount(arg2) vmovdqu %xmm9, CurCount(arg2)
...@@ -390,7 +390,7 @@ _zero_cipher_left\@: ...@@ -390,7 +390,7 @@ _zero_cipher_left\@:
mov arg5, %r13 mov arg5, %r13
and $15, %r13 # r13 = (arg5 mod 16) and $15, %r13 # r13 = (arg5 mod 16)
je _multiple_of_16_bytes\@ je .L_multiple_of_16_bytes\@
# handle the last <16 Byte block separately # handle the last <16 Byte block separately
...@@ -404,7 +404,7 @@ _zero_cipher_left\@: ...@@ -404,7 +404,7 @@ _zero_cipher_left\@:
vmovdqu %xmm9, PBlockEncKey(arg2) vmovdqu %xmm9, PBlockEncKey(arg2)
cmp $16, arg5 cmp $16, arg5
jge _large_enough_update\@ jge .L_large_enough_update\@
lea (arg4,%r11,1), %r10 lea (arg4,%r11,1), %r10
mov %r13, %r12 mov %r13, %r12
...@@ -416,9 +416,9 @@ _zero_cipher_left\@: ...@@ -416,9 +416,9 @@ _zero_cipher_left\@:
# able to shift 16-r13 bytes (r13 is the # able to shift 16-r13 bytes (r13 is the
# number of bytes in plaintext mod 16) # number of bytes in plaintext mod 16)
jmp _final_ghash_mul\@ jmp .L_final_ghash_mul\@
_large_enough_update\@: .L_large_enough_update\@:
sub $16, %r11 sub $16, %r11
add %r13, %r11 add %r13, %r11
...@@ -437,7 +437,7 @@ _large_enough_update\@: ...@@ -437,7 +437,7 @@ _large_enough_update\@:
# shift right 16-r13 bytes # shift right 16-r13 bytes
vpshufb %xmm2, %xmm1, %xmm1 vpshufb %xmm2, %xmm1, %xmm1
_final_ghash_mul\@: .L_final_ghash_mul\@:
.if \ENC_DEC == DEC .if \ENC_DEC == DEC
vmovdqa %xmm1, %xmm2 vmovdqa %xmm1, %xmm2
vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
...@@ -466,7 +466,7 @@ _final_ghash_mul\@: ...@@ -466,7 +466,7 @@ _final_ghash_mul\@:
# output r13 Bytes # output r13 Bytes
vmovq %xmm9, %rax vmovq %xmm9, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left\@ jle .L_less_than_8_bytes_left\@
mov %rax, (arg3 , %r11) mov %rax, (arg3 , %r11)
add $8, %r11 add $8, %r11
...@@ -474,15 +474,15 @@ _final_ghash_mul\@: ...@@ -474,15 +474,15 @@ _final_ghash_mul\@:
vmovq %xmm9, %rax vmovq %xmm9, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left\@: .L_less_than_8_bytes_left\@:
movb %al, (arg3 , %r11) movb %al, (arg3 , %r11)
add $1, %r11 add $1, %r11
shr $8, %rax shr $8, %rax
sub $1, %r13 sub $1, %r13
jne _less_than_8_bytes_left\@ jne .L_less_than_8_bytes_left\@
############################# #############################
_multiple_of_16_bytes\@: .L_multiple_of_16_bytes\@:
.endm .endm
...@@ -495,12 +495,12 @@ _multiple_of_16_bytes\@: ...@@ -495,12 +495,12 @@ _multiple_of_16_bytes\@:
mov PBlockLen(arg2), %r12 mov PBlockLen(arg2), %r12
test %r12, %r12 test %r12, %r12
je _partial_done\@ je .L_partial_done\@
#GHASH computation for the last <16 Byte block #GHASH computation for the last <16 Byte block
\GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
_partial_done\@: .L_partial_done\@:
mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes) mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes)
shl $3, %r12 # convert into number of bits shl $3, %r12 # convert into number of bits
vmovd %r12d, %xmm15 # len(A) in xmm15 vmovd %r12d, %xmm15 # len(A) in xmm15
...@@ -523,49 +523,49 @@ _partial_done\@: ...@@ -523,49 +523,49 @@ _partial_done\@:
_return_T\@: .L_return_T\@:
mov \AUTH_TAG, %r10 # r10 = authTag mov \AUTH_TAG, %r10 # r10 = authTag
mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len
cmp $16, %r11 cmp $16, %r11
je _T_16\@ je .L_T_16\@
cmp $8, %r11 cmp $8, %r11
jl _T_4\@ jl .L_T_4\@
_T_8\@: .L_T_8\@:
vmovq %xmm9, %rax vmovq %xmm9, %rax
mov %rax, (%r10) mov %rax, (%r10)
add $8, %r10 add $8, %r10
sub $8, %r11 sub $8, %r11
vpsrldq $8, %xmm9, %xmm9 vpsrldq $8, %xmm9, %xmm9
test %r11, %r11 test %r11, %r11
je _return_T_done\@ je .L_return_T_done\@
_T_4\@: .L_T_4\@:
vmovd %xmm9, %eax vmovd %xmm9, %eax
mov %eax, (%r10) mov %eax, (%r10)
add $4, %r10 add $4, %r10
sub $4, %r11 sub $4, %r11
vpsrldq $4, %xmm9, %xmm9 vpsrldq $4, %xmm9, %xmm9
test %r11, %r11 test %r11, %r11
je _return_T_done\@ je .L_return_T_done\@
_T_123\@: .L_T_123\@:
vmovd %xmm9, %eax vmovd %xmm9, %eax
cmp $2, %r11 cmp $2, %r11
jl _T_1\@ jl .L_T_1\@
mov %ax, (%r10) mov %ax, (%r10)
cmp $2, %r11 cmp $2, %r11
je _return_T_done\@ je .L_return_T_done\@
add $2, %r10 add $2, %r10
sar $16, %eax sar $16, %eax
_T_1\@: .L_T_1\@:
mov %al, (%r10) mov %al, (%r10)
jmp _return_T_done\@ jmp .L_return_T_done\@
_T_16\@: .L_T_16\@:
vmovdqu %xmm9, (%r10) vmovdqu %xmm9, (%r10)
_return_T_done\@: .L_return_T_done\@:
.endm .endm
.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8 .macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8
...@@ -579,8 +579,8 @@ _return_T_done\@: ...@@ -579,8 +579,8 @@ _return_T_done\@:
vpxor \T8, \T8, \T8 vpxor \T8, \T8, \T8
vpxor \T7, \T7, \T7 vpxor \T7, \T7, \T7
cmp $16, %r11 cmp $16, %r11
jl _get_AAD_rest8\@ jl .L_get_AAD_rest8\@
_get_AAD_blocks\@: .L_get_AAD_blocks\@:
vmovdqu (%r10), \T7 vmovdqu (%r10), \T7
vpshufb SHUF_MASK(%rip), \T7, \T7 vpshufb SHUF_MASK(%rip), \T7, \T7
vpxor \T7, \T8, \T8 vpxor \T7, \T8, \T8
...@@ -589,29 +589,29 @@ _get_AAD_blocks\@: ...@@ -589,29 +589,29 @@ _get_AAD_blocks\@:
sub $16, %r12 sub $16, %r12
sub $16, %r11 sub $16, %r11
cmp $16, %r11 cmp $16, %r11
jge _get_AAD_blocks\@ jge .L_get_AAD_blocks\@
vmovdqu \T8, \T7 vmovdqu \T8, \T7
test %r11, %r11 test %r11, %r11
je _get_AAD_done\@ je .L_get_AAD_done\@
vpxor \T7, \T7, \T7 vpxor \T7, \T7, \T7
/* read the last <16B of AAD. since we have at least 4B of /* read the last <16B of AAD. since we have at least 4B of
data right after the AAD (the ICV, and maybe some CT), we can data right after the AAD (the ICV, and maybe some CT), we can
read 4B/8B blocks safely, and then get rid of the extra stuff */ read 4B/8B blocks safely, and then get rid of the extra stuff */
_get_AAD_rest8\@: .L_get_AAD_rest8\@:
cmp $4, %r11 cmp $4, %r11
jle _get_AAD_rest4\@ jle .L_get_AAD_rest4\@
movq (%r10), \T1 movq (%r10), \T1
add $8, %r10 add $8, %r10
sub $8, %r11 sub $8, %r11
vpslldq $8, \T1, \T1 vpslldq $8, \T1, \T1
vpsrldq $8, \T7, \T7 vpsrldq $8, \T7, \T7
vpxor \T1, \T7, \T7 vpxor \T1, \T7, \T7
jmp _get_AAD_rest8\@ jmp .L_get_AAD_rest8\@
_get_AAD_rest4\@: .L_get_AAD_rest4\@:
test %r11, %r11 test %r11, %r11
jle _get_AAD_rest0\@ jle .L_get_AAD_rest0\@
mov (%r10), %eax mov (%r10), %eax
movq %rax, \T1 movq %rax, \T1
add $4, %r10 add $4, %r10
...@@ -619,7 +619,7 @@ _get_AAD_rest4\@: ...@@ -619,7 +619,7 @@ _get_AAD_rest4\@:
vpslldq $12, \T1, \T1 vpslldq $12, \T1, \T1
vpsrldq $4, \T7, \T7 vpsrldq $4, \T7, \T7
vpxor \T1, \T7, \T7 vpxor \T1, \T7, \T7
_get_AAD_rest0\@: .L_get_AAD_rest0\@:
/* finalize: shift out the extra bytes we read, and align /* finalize: shift out the extra bytes we read, and align
left. since pslldq can only shift by an immediate, we use left. since pslldq can only shift by an immediate, we use
vpshufb and a pair of shuffle masks */ vpshufb and a pair of shuffle masks */
...@@ -629,12 +629,12 @@ _get_AAD_rest0\@: ...@@ -629,12 +629,12 @@ _get_AAD_rest0\@:
andq $~3, %r11 andq $~3, %r11
vpshufb (%r11), \T7, \T7 vpshufb (%r11), \T7, \T7
vpand \T1, \T7, \T7 vpand \T1, \T7, \T7
_get_AAD_rest_final\@: .L_get_AAD_rest_final\@:
vpshufb SHUF_MASK(%rip), \T7, \T7 vpshufb SHUF_MASK(%rip), \T7, \T7
vpxor \T8, \T7, \T7 vpxor \T8, \T7, \T7
\GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6 \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6
_get_AAD_done\@: .L_get_AAD_done\@:
vmovdqu \T7, AadHash(arg2) vmovdqu \T7, AadHash(arg2)
.endm .endm
...@@ -685,28 +685,28 @@ _get_AAD_done\@: ...@@ -685,28 +685,28 @@ _get_AAD_done\@:
vpxor \XMMDst, \XMMDst, \XMMDst vpxor \XMMDst, \XMMDst, \XMMDst
cmp $8, \DLEN cmp $8, \DLEN
jl _read_lt8_\@ jl .L_read_lt8_\@
mov (\DPTR), %rax mov (\DPTR), %rax
vpinsrq $0, %rax, \XMMDst, \XMMDst vpinsrq $0, %rax, \XMMDst, \XMMDst
sub $8, \DLEN sub $8, \DLEN
jz _done_read_partial_block_\@ jz .L_done_read_partial_block_\@
xor %eax, %eax xor %eax, %eax
_read_next_byte_\@: .L_read_next_byte_\@:
shl $8, %rax shl $8, %rax
mov 7(\DPTR, \DLEN, 1), %al mov 7(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_\@ jnz .L_read_next_byte_\@
vpinsrq $1, %rax, \XMMDst, \XMMDst vpinsrq $1, %rax, \XMMDst, \XMMDst
jmp _done_read_partial_block_\@ jmp .L_done_read_partial_block_\@
_read_lt8_\@: .L_read_lt8_\@:
xor %eax, %eax xor %eax, %eax
_read_next_byte_lt8_\@: .L_read_next_byte_lt8_\@:
shl $8, %rax shl $8, %rax
mov -1(\DPTR, \DLEN, 1), %al mov -1(\DPTR, \DLEN, 1), %al
dec \DLEN dec \DLEN
jnz _read_next_byte_lt8_\@ jnz .L_read_next_byte_lt8_\@
vpinsrq $0, %rax, \XMMDst, \XMMDst vpinsrq $0, %rax, \XMMDst, \XMMDst
_done_read_partial_block_\@: .L_done_read_partial_block_\@:
.endm .endm
# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks # PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
...@@ -718,21 +718,21 @@ _done_read_partial_block_\@: ...@@ -718,21 +718,21 @@ _done_read_partial_block_\@:
AAD_HASH ENC_DEC AAD_HASH ENC_DEC
mov PBlockLen(arg2), %r13 mov PBlockLen(arg2), %r13
test %r13, %r13 test %r13, %r13
je _partial_block_done_\@ # Leave Macro if no partial blocks je .L_partial_block_done_\@ # Leave Macro if no partial blocks
# Read in input data without over reading # Read in input data without over reading
cmp $16, \PLAIN_CYPH_LEN cmp $16, \PLAIN_CYPH_LEN
jl _fewer_than_16_bytes_\@ jl .L_fewer_than_16_bytes_\@
vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
jmp _data_read_\@ jmp .L_data_read_\@
_fewer_than_16_bytes_\@: .L_fewer_than_16_bytes_\@:
lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
mov \PLAIN_CYPH_LEN, %r12 mov \PLAIN_CYPH_LEN, %r12
READ_PARTIAL_BLOCK %r10 %r12 %xmm1 READ_PARTIAL_BLOCK %r10 %r12 %xmm1
mov PBlockLen(arg2), %r13 mov PBlockLen(arg2), %r13
_data_read_\@: # Finished reading in data .L_data_read_\@: # Finished reading in data
vmovdqu PBlockEncKey(arg2), %xmm9 vmovdqu PBlockEncKey(arg2), %xmm9
vmovdqu HashKey(arg2), %xmm13 vmovdqu HashKey(arg2), %xmm13
...@@ -755,9 +755,9 @@ _data_read_\@: # Finished reading in data ...@@ -755,9 +755,9 @@ _data_read_\@: # Finished reading in data
sub $16, %r10 sub $16, %r10
# Determine if if partial block is not being filled and # Determine if if partial block is not being filled and
# shift mask accordingly # shift mask accordingly
jge _no_extra_mask_1_\@ jge .L_no_extra_mask_1_\@
sub %r10, %r12 sub %r10, %r12
_no_extra_mask_1_\@: .L_no_extra_mask_1_\@:
vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out bottom r13 bytes of xmm9 # get the appropriate mask to mask out bottom r13 bytes of xmm9
...@@ -770,17 +770,17 @@ _no_extra_mask_1_\@: ...@@ -770,17 +770,17 @@ _no_extra_mask_1_\@:
vpxor %xmm3, \AAD_HASH, \AAD_HASH vpxor %xmm3, \AAD_HASH, \AAD_HASH
test %r10, %r10 test %r10, %r10
jl _partial_incomplete_1_\@ jl .L_partial_incomplete_1_\@
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
\GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %eax,%eax xor %eax,%eax
mov %rax, PBlockLen(arg2) mov %rax, PBlockLen(arg2)
jmp _dec_done_\@ jmp .L_dec_done_\@
_partial_incomplete_1_\@: .L_partial_incomplete_1_\@:
add \PLAIN_CYPH_LEN, PBlockLen(arg2) add \PLAIN_CYPH_LEN, PBlockLen(arg2)
_dec_done_\@: .L_dec_done_\@:
vmovdqu \AAD_HASH, AadHash(arg2) vmovdqu \AAD_HASH, AadHash(arg2)
.else .else
vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
...@@ -791,9 +791,9 @@ _dec_done_\@: ...@@ -791,9 +791,9 @@ _dec_done_\@:
sub $16, %r10 sub $16, %r10
# Determine if if partial block is not being filled and # Determine if if partial block is not being filled and
# shift mask accordingly # shift mask accordingly
jge _no_extra_mask_2_\@ jge .L_no_extra_mask_2_\@
sub %r10, %r12 sub %r10, %r12
_no_extra_mask_2_\@: .L_no_extra_mask_2_\@:
vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out bottom r13 bytes of xmm9 # get the appropriate mask to mask out bottom r13 bytes of xmm9
...@@ -805,17 +805,17 @@ _no_extra_mask_2_\@: ...@@ -805,17 +805,17 @@ _no_extra_mask_2_\@:
vpxor %xmm9, \AAD_HASH, \AAD_HASH vpxor %xmm9, \AAD_HASH, \AAD_HASH
test %r10, %r10 test %r10, %r10
jl _partial_incomplete_2_\@ jl .L_partial_incomplete_2_\@
# GHASH computation for the last <16 Byte block # GHASH computation for the last <16 Byte block
\GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %eax,%eax xor %eax,%eax
mov %rax, PBlockLen(arg2) mov %rax, PBlockLen(arg2)
jmp _encode_done_\@ jmp .L_encode_done_\@
_partial_incomplete_2_\@: .L_partial_incomplete_2_\@:
add \PLAIN_CYPH_LEN, PBlockLen(arg2) add \PLAIN_CYPH_LEN, PBlockLen(arg2)
_encode_done_\@: .L_encode_done_\@:
vmovdqu \AAD_HASH, AadHash(arg2) vmovdqu \AAD_HASH, AadHash(arg2)
vmovdqa SHUF_MASK(%rip), %xmm10 vmovdqa SHUF_MASK(%rip), %xmm10
...@@ -825,32 +825,32 @@ _encode_done_\@: ...@@ -825,32 +825,32 @@ _encode_done_\@:
.endif .endif
# output encrypted Bytes # output encrypted Bytes
test %r10, %r10 test %r10, %r10
jl _partial_fill_\@ jl .L_partial_fill_\@
mov %r13, %r12 mov %r13, %r12
mov $16, %r13 mov $16, %r13
# Set r13 to be the number of bytes to write out # Set r13 to be the number of bytes to write out
sub %r12, %r13 sub %r12, %r13
jmp _count_set_\@ jmp .L_count_set_\@
_partial_fill_\@: .L_partial_fill_\@:
mov \PLAIN_CYPH_LEN, %r13 mov \PLAIN_CYPH_LEN, %r13
_count_set_\@: .L_count_set_\@:
vmovdqa %xmm9, %xmm0 vmovdqa %xmm9, %xmm0
vmovq %xmm0, %rax vmovq %xmm0, %rax
cmp $8, %r13 cmp $8, %r13
jle _less_than_8_bytes_left_\@ jle .L_less_than_8_bytes_left_\@
mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $8, \DATA_OFFSET add $8, \DATA_OFFSET
psrldq $8, %xmm0 psrldq $8, %xmm0
vmovq %xmm0, %rax vmovq %xmm0, %rax
sub $8, %r13 sub $8, %r13
_less_than_8_bytes_left_\@: .L_less_than_8_bytes_left_\@:
movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
add $1, \DATA_OFFSET add $1, \DATA_OFFSET
shr $8, %rax shr $8, %rax
sub $1, %r13 sub $1, %r13
jne _less_than_8_bytes_left_\@ jne .L_less_than_8_bytes_left_\@
_partial_block_done_\@: .L_partial_block_done_\@:
.endm # PARTIAL_BLOCK .endm # PARTIAL_BLOCK
############################################################################### ###############################################################################
...@@ -1051,7 +1051,7 @@ _partial_block_done_\@: ...@@ -1051,7 +1051,7 @@ _partial_block_done_\@:
vmovdqa \XMM8, \T3 vmovdqa \XMM8, \T3
cmp $128, %r13 cmp $128, %r13
jl _initial_blocks_done\@ # no need for precomputed constants jl .L_initial_blocks_done\@ # no need for precomputed constants
############################################################################### ###############################################################################
# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
...@@ -1193,7 +1193,7 @@ _partial_block_done_\@: ...@@ -1193,7 +1193,7 @@ _partial_block_done_\@:
############################################################################### ###############################################################################
_initial_blocks_done\@: .L_initial_blocks_done\@:
.endm .endm
...@@ -2001,7 +2001,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ...@@ -2001,7 +2001,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
vmovdqa \XMM8, \T3 vmovdqa \XMM8, \T3
cmp $128, %r13 cmp $128, %r13
jl _initial_blocks_done\@ # no need for precomputed constants jl .L_initial_blocks_done\@ # no need for precomputed constants
############################################################################### ###############################################################################
# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
...@@ -2145,7 +2145,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ...@@ -2145,7 +2145,7 @@ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
############################################################################### ###############################################################################
_initial_blocks_done\@: .L_initial_blocks_done\@:
.endm .endm
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment