summary refs log tree commit diff
path: root/arch/sparc/crypto/aes_asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/crypto/aes_asm.S')
-rw-r--r--arch/sparc/crypto/aes_asm.S879
1 files changed, 591 insertions, 288 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index f656dc7a173e..50faae03c592 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S
@@ -44,8 +44,8 @@
 	.word	0x85b02307;
 #define MOVXTOD_O0_F0		\
 	.word	0x81b02308;
-#define MOVXTOD_O1_F2		\
-	.word	0x85b02309;
+#define MOVXTOD_O5_F2		\
+	.word	0x85b0230d;
 
 #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
 	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
@@ -86,45 +86,46 @@
 	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
 	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
 
-#define DECRYPT_TWO_ROUNDS(KEY_TOP, I0, I1, T0, T1) \
-	AES_DROUND23(KEY_TOP -  2, I0, I1, T1) \
-	AES_DROUND01(KEY_TOP -  4, I0, I1, T0) \
-	AES_DROUND23(KEY_TOP -  6, T0, T1, I1) \
-	AES_DROUND01(KEY_TOP -  8, T0, T1, I0)
+#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
 
-#define DECRYPT_TWO_ROUNDS_LAST(KEY_TOP, I0, I1, T0, T1) \
-	AES_DROUND23(KEY_TOP -  2, I0, I1, T1) \
-	AES_DROUND01(KEY_TOP -  4, I0, I1, T0) \
-	AES_DROUND23_L(KEY_TOP -  6, T0, T1, I1) \
-	AES_DROUND01_L(KEY_TOP -  8, T0, T1, I0)
+#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
 
 	/* 10 rounds */
-#define DECRYPT_128(KEY_TOP, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 32, I0, I1, T0, T1)
+#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
 
 	/* 12 rounds */
-#define DECRYPT_192(KEY_TOP, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 40, I0, I1, T0, T1)
+#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
 
 	/* 14 rounds */
-#define DECRYPT_256(KEY_TOP, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP -  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_TOP - 40, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 48, I0, I1, T0, T1)
-
+#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+	.align	32
 ENTRY(aes_sparc64_key_expand)
 	/* %o0=input_key, %o1=output_key, %o2=key_len */
 	VISEntry
@@ -314,34 +315,63 @@ ENTRY(aes_sparc64_key_expand)
 	 VISExit
 ENDPROC(aes_sparc64_key_expand)
 
-ENTRY(aes_sparc64_encrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
+	.align		32
+ENTRY(aes_sparc64_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output */
 	VISEntry
 	ld		[%o1 + 0x00], %f4
 	ld		[%o1 + 0x04], %f5
 	ld		[%o1 + 0x08], %f6
 	ld		[%o1 + 0x0c], %f7
-
 	ldd		[%o0 + 0x00], %f8
 	ldd		[%o0 + 0x08], %f10
-	cmp		%o3, 24
+	ldd		[%o0 + 0x10], %f12
+	ldd		[%o0 + 0x18], %f14
+	ldd		[%o0 + 0x20], %f16
+	ldd		[%o0 + 0x28], %f18
+	ldd		[%o0 + 0x30], %f20
+	ldd		[%o0 + 0x38], %f22
+	ldd		[%o0 + 0x40], %f24
+	ldd		[%o0 + 0x48], %f26
+	ldd		[%o0 + 0x50], %f28
+	ldd		[%o0 + 0x58], %f30
+	ldd		[%o0 + 0x60], %f32
+	ldd		[%o0 + 0x68], %f34
+	ldd		[%o0 + 0x70], %f36
+	ldd		[%o0 + 0x78], %f38
+	ldd		[%o0 + 0x80], %f40
+	ldd		[%o0 + 0x88], %f42
+	ldd		[%o0 + 0x90], %f44
+	ldd		[%o0 + 0x98], %f46
+	ldd		[%o0 + 0xa0], %f48
+	ldd		[%o0 + 0xa8], %f50
 	fxor		%f8, %f4, %f4
-	bl		2f
-	 fxor		%f10, %f6, %f6
+	fxor		%f10, %f6, %f6
+	ENCRYPT_128(12, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_encrypt_128)
 
-	be		1f
-	 ldd		[%o0 + 0x10], %f8
+	.align		32
+ENTRY(aes_sparc64_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
 
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	add		%o0, 0x20, %o0
+	ldd		[%o0 + 0x00], %f8
+	ldd		[%o0 + 0x08], %f10
 
-	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
 
 	ldd		[%o0 + 0x10], %f8
-
-1:
 	ldd		[%o0 + 0x18], %f10
 	ldd		[%o0 + 0x20], %f12
 	ldd		[%o0 + 0x28], %f14
@@ -349,7 +379,6 @@ ENTRY(aes_sparc64_encrypt)
 
 	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
 
-2:
 	ldd		[%o0 + 0x10], %f12
 	ldd		[%o0 + 0x18], %f14
 	ldd		[%o0 + 0x20], %f16
@@ -381,66 +410,63 @@ ENTRY(aes_sparc64_encrypt)
 
 	retl
 	 VISExit
-ENDPROC(aes_sparc64_encrypt)
+ENDPROC(aes_sparc64_encrypt_192)
 
-ENTRY(aes_sparc64_decrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=exp_key_len */
+	.align		32
+ENTRY(aes_sparc64_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output */
 	VISEntry
 	ld		[%o1 + 0x00], %f4
-	add		%o0, %o4, %o0
 	ld		[%o1 + 0x04], %f5
 	ld		[%o1 + 0x08], %f6
 	ld		[%o1 + 0x0c], %f7
-	
-	ldd		[%o0 - 0x08], %f8
-	ldd		[%o0 - 0x10], %f10
 
-	cmp		%o3, 24
-	fxor		%f10, %f4, %f4
-	bl		2f
-	 fxor		%f8, %f6, %f6
+	ldd		[%o0 + 0x00], %f8
+	ldd		[%o0 + 0x08], %f10
 
-	be		1f
-	 ldd		[%o0 - 0x30], %f8
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
 
-	ldd		[%o0 - 0x28], %f10
-	ldd		[%o0 - 0x20], %f12
-	ldd		[%o0 - 0x18], %f14
-	sub		%o0, 0x20, %o0
+	ldd		[%o0 + 0x10], %f8
 
-	DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2)
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	add		%o0, 0x20, %o0
 
-	ldd		[%o0 - 0x30], %f8
-1:
-	ldd		[%o0 - 0x28], %f10
-	ldd		[%o0 - 0x20], %f12
-	ldd		[%o0 - 0x18], %f14
-	sub		%o0, 0x20, %o0
+	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
 
-	DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2)
-2:
-	ldd		[%o0 - 0xb0], %f12
-	ldd		[%o0 - 0xa8], %f14
-	ldd		[%o0 - 0xa0], %f16
-	ldd		[%o0 - 0x98], %f18
-	ldd		[%o0 - 0x90], %f20
-	ldd		[%o0 - 0x88], %f22
-	ldd		[%o0 - 0x80], %f24
-	ldd		[%o0 - 0x78], %f26
-	ldd		[%o0 - 0x70], %f28
-	ldd		[%o0 - 0x68], %f30
-	ldd		[%o0 - 0x60], %f32
-	ldd		[%o0 - 0x58], %f34
-	ldd		[%o0 - 0x50], %f36
-	ldd		[%o0 - 0x48], %f38
-	ldd		[%o0 - 0x40], %f40
-	ldd		[%o0 - 0x38], %f42
-	ldd		[%o0 - 0x30], %f44
-	ldd		[%o0 - 0x28], %f46
-	ldd		[%o0 - 0x20], %f48
-	ldd		[%o0 - 0x18], %f50
-
-	DECRYPT_128(52, 4, 6, 0, 2)
+	ldd		[%o0 + 0x10], %f8
+
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	add		%o0, 0x20, %o0
+
+	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+	ldd		[%o0 + 0x10], %f12
+	ldd		[%o0 + 0x18], %f14
+	ldd		[%o0 + 0x20], %f16
+	ldd		[%o0 + 0x28], %f18
+	ldd		[%o0 + 0x30], %f20
+	ldd		[%o0 + 0x38], %f22
+	ldd		[%o0 + 0x40], %f24
+	ldd		[%o0 + 0x48], %f26
+	ldd		[%o0 + 0x50], %f28
+	ldd		[%o0 + 0x58], %f30
+	ldd		[%o0 + 0x60], %f32
+	ldd		[%o0 + 0x68], %f34
+	ldd		[%o0 + 0x70], %f36
+	ldd		[%o0 + 0x78], %f38
+	ldd		[%o0 + 0x80], %f40
+	ldd		[%o0 + 0x88], %f42
+	ldd		[%o0 + 0x90], %f44
+	ldd		[%o0 + 0x98], %f46
+	ldd		[%o0 + 0xa0], %f48
+	ldd		[%o0 + 0xa8], %f50
+
+	ENCRYPT_128(12, 4, 6, 0, 2)
 
 	st		%f4, [%o2 + 0x00]
 	st		%f5, [%o2 + 0x04]
@@ -449,15 +475,231 @@ ENTRY(aes_sparc64_decrypt)
 
 	retl
 	 VISExit
-ENDPROC(aes_sparc64_decrypt)
+ENDPROC(aes_sparc64_encrypt_256)
 
-ENTRY(aes_sparc64_load_decrypt_keys)
+	.align		32
+ENTRY(aes_sparc64_decrypt_128)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xa0], %f8
+	ldd		[%o0 + 0xa8], %f10
+	ldd		[%o0 + 0x98], %f12
+	ldd		[%o0 + 0x90], %f14
+	ldd		[%o0 + 0x88], %f16
+	ldd		[%o0 + 0x80], %f18
+	ldd		[%o0 + 0x78], %f20
+	ldd		[%o0 + 0x70], %f22
+	ldd		[%o0 + 0x68], %f24
+	ldd		[%o0 + 0x60], %f26
+	ldd		[%o0 + 0x58], %f28
+	ldd		[%o0 + 0x50], %f30
+	ldd		[%o0 + 0x48], %f32
+	ldd		[%o0 + 0x40], %f34
+	ldd		[%o0 + 0x38], %f36
+	ldd		[%o0 + 0x30], %f38
+	ldd		[%o0 + 0x28], %f40
+	ldd		[%o0 + 0x20], %f42
+	ldd		[%o0 + 0x18], %f44
+	ldd		[%o0 + 0x10], %f46
+	ldd		[%o0 + 0x08], %f48
+	ldd		[%o0 + 0x00], %f50
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+	DECRYPT_128(12, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_decrypt_192)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xc0], %f8
+	ldd		[%o0 + 0xc8], %f10
+	ldd		[%o0 + 0xb8], %f12
+	ldd		[%o0 + 0xb0], %f14
+	ldd		[%o0 + 0xa8], %f16
+	ldd		[%o0 + 0xa0], %f18
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+	ldd		[%o0 + 0x98], %f20
+	ldd		[%o0 + 0x90], %f22
+	ldd		[%o0 + 0x88], %f24
+	ldd		[%o0 + 0x80], %f26
+	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
+	ldd		[%o0 + 0x78], %f28
+	ldd		[%o0 + 0x70], %f30
+	ldd		[%o0 + 0x68], %f32
+	ldd		[%o0 + 0x60], %f34
+	ldd		[%o0 + 0x58], %f36
+	ldd		[%o0 + 0x50], %f38
+	ldd		[%o0 + 0x48], %f40
+	ldd		[%o0 + 0x40], %f42
+	ldd		[%o0 + 0x38], %f44
+	ldd		[%o0 + 0x30], %f46
+	ldd		[%o0 + 0x28], %f48
+	ldd		[%o0 + 0x20], %f50
+	ldd		[%o0 + 0x18], %f52
+	ldd		[%o0 + 0x10], %f54
+	ldd		[%o0 + 0x08], %f56
+	ldd		[%o0 + 0x00], %f58
+	DECRYPT_128(20, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_decrypt_256)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xe0], %f8
+	ldd		[%o0 + 0xe8], %f10
+	ldd		[%o0 + 0xd8], %f12
+	ldd		[%o0 + 0xd0], %f14
+	ldd		[%o0 + 0xc8], %f16
+	fxor		%f8, %f4, %f4
+	ldd		[%o0 + 0xc0], %f18
+	fxor		%f10, %f6, %f6
+	ldd		[%o0 + 0xb8], %f20
+	AES_DROUND23(12, 4, 6, 2)
+	ldd		[%o0 + 0xb0], %f22
+	AES_DROUND01(14, 4, 6, 0)
+	ldd		[%o0 + 0xa8], %f24
+	AES_DROUND23(16, 0, 2, 6)
+	ldd		[%o0 + 0xa0], %f26
+	AES_DROUND01(18, 0, 2, 4)
+	ldd		[%o0 + 0x98], %f12
+	AES_DROUND23(20, 4, 6, 2)
+	ldd		[%o0 + 0x90], %f14
+	AES_DROUND01(22, 4, 6, 0)
+	ldd		[%o0 + 0x88], %f16
+	AES_DROUND23(24, 0, 2, 6)
+	ldd		[%o0 + 0x80], %f18
+	AES_DROUND01(26, 0, 2, 4)
+	ldd		[%o0 + 0x78], %f20
+	AES_DROUND23(12, 4, 6, 2)
+	ldd		[%o0 + 0x70], %f22
+	AES_DROUND01(14, 4, 6, 0)
+	ldd		[%o0 + 0x68], %f24
+	AES_DROUND23(16, 0, 2, 6)
+	ldd		[%o0 + 0x60], %f26
+	AES_DROUND01(18, 0, 2, 4)
+	ldd		[%o0 + 0x58], %f28
+	AES_DROUND23(20, 4, 6, 2)
+	ldd		[%o0 + 0x50], %f30
+	AES_DROUND01(22, 4, 6, 0)
+	ldd		[%o0 + 0x48], %f32
+	AES_DROUND23(24, 0, 2, 6)
+	ldd		[%o0 + 0x40], %f34
+	AES_DROUND01(26, 0, 2, 4)
+	ldd		[%o0 + 0x38], %f36
+	AES_DROUND23(28, 4, 6, 2)
+	ldd		[%o0 + 0x30], %f38
+	AES_DROUND01(30, 4, 6, 0)
+	ldd		[%o0 + 0x28], %f40
+	AES_DROUND23(32, 0, 2, 6)
+	ldd		[%o0 + 0x20], %f42
+	AES_DROUND01(34, 0, 2, 4)
+	ldd		[%o0 + 0x18], %f44
+	AES_DROUND23(36, 4, 6, 2)
+	ldd		[%o0 + 0x10], %f46
+	AES_DROUND01(38, 4, 6, 0)
+	ldd		[%o0 + 0x08], %f48
+	AES_DROUND23(40, 0, 2, 6)
+	ldd		[%o0 + 0x00], %f50
+	AES_DROUND01(42, 0, 2, 4)
+	AES_DROUND23(44, 4, 6, 2)
+	AES_DROUND01(46, 4, 6, 0)
+	AES_DROUND23_L(48, 0, 2, 6)
+	AES_DROUND01_L(50, 0, 2, 4)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_128)
 	/* %o0=key */
-	ba,pt		%xcc, aes_sparc64_load_encrypt_keys
-	 sub		%o0, 0x10, %o0
-ENDPROC(aes_sparc64_load_decrypt_keys)
+	VISEntry
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	ldd		[%o0 + 0x30], %f16
+	ldd		[%o0 + 0x38], %f18
+	ldd		[%o0 + 0x40], %f20
+	ldd		[%o0 + 0x48], %f22
+	ldd		[%o0 + 0x50], %f24
+	ldd		[%o0 + 0x58], %f26
+	ldd		[%o0 + 0x60], %f28
+	ldd		[%o0 + 0x68], %f30
+	ldd		[%o0 + 0x70], %f32
+	ldd		[%o0 + 0x78], %f34
+	ldd		[%o0 + 0x80], %f36
+	ldd		[%o0 + 0x88], %f38
+	ldd		[%o0 + 0x90], %f40
+	ldd		[%o0 + 0x98], %f42
+	ldd		[%o0 + 0xa0], %f44
+	retl
+	 ldd		[%o0 + 0xa8], %f46
+ENDPROC(aes_sparc64_load_encrypt_keys_128)
 
-ENTRY(aes_sparc64_load_encrypt_keys)
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_192)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	ldd		[%o0 + 0x30], %f16
+	ldd		[%o0 + 0x38], %f18
+	ldd		[%o0 + 0x40], %f20
+	ldd		[%o0 + 0x48], %f22
+	ldd		[%o0 + 0x50], %f24
+	ldd		[%o0 + 0x58], %f26
+	ldd		[%o0 + 0x60], %f28
+	ldd		[%o0 + 0x68], %f30
+	ldd		[%o0 + 0x70], %f32
+	ldd		[%o0 + 0x78], %f34
+	ldd		[%o0 + 0x80], %f36
+	ldd		[%o0 + 0x88], %f38
+	ldd		[%o0 + 0x90], %f40
+	ldd		[%o0 + 0x98], %f42
+	ldd		[%o0 + 0xa0], %f44
+	ldd		[%o0 + 0xa8], %f46
+	ldd		[%o0 + 0xb0], %f48
+	ldd		[%o0 + 0xb8], %f50
+	ldd		[%o0 + 0xc0], %f52
+	retl
+	 ldd		[%o0 + 0xc8], %f54
+ENDPROC(aes_sparc64_load_encrypt_keys_192)
+
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_256)
 	/* %o0=key */
 	VISEntry
 	ldd		[%o0 + 0x10], %f8
@@ -489,171 +731,241 @@ ENTRY(aes_sparc64_load_encrypt_keys)
 	ldd		[%o0 + 0xe0], %f60
 	retl
 	 ldd		[%o0 + 0xe8], %f62
-ENDPROC(aes_sparc64_load_encrypt_keys)
+ENDPROC(aes_sparc64_load_encrypt_keys_256)
+
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_128)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x98], %f8
+	ldd		[%o0 + 0x90], %f10
+	ldd		[%o0 + 0x88], %f12
+	ldd		[%o0 + 0x80], %f14
+	ldd		[%o0 + 0x78], %f16
+	ldd		[%o0 + 0x70], %f18
+	ldd		[%o0 + 0x68], %f20
+	ldd		[%o0 + 0x60], %f22
+	ldd		[%o0 + 0x58], %f24
+	ldd		[%o0 + 0x50], %f26
+	ldd		[%o0 + 0x48], %f28
+	ldd		[%o0 + 0x40], %f30
+	ldd		[%o0 + 0x38], %f32
+	ldd		[%o0 + 0x30], %f34
+	ldd		[%o0 + 0x28], %f36
+	ldd		[%o0 + 0x20], %f38
+	ldd		[%o0 + 0x18], %f40
+	ldd		[%o0 + 0x10], %f42
+	ldd		[%o0 + 0x08], %f44
+	retl
+	 ldd		[%o0 + 0x00], %f46
+ENDPROC(aes_sparc64_load_decrypt_keys_128)
 
-ENTRY(aes_sparc64_ecb_encrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_192)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0xb8], %f8
+	ldd		[%o0 + 0xb0], %f10
+	ldd		[%o0 + 0xa8], %f12
+	ldd		[%o0 + 0xa0], %f14
+	ldd		[%o0 + 0x98], %f16
+	ldd		[%o0 + 0x90], %f18
+	ldd		[%o0 + 0x88], %f20
+	ldd		[%o0 + 0x80], %f22
+	ldd		[%o0 + 0x78], %f24
+	ldd		[%o0 + 0x70], %f26
+	ldd		[%o0 + 0x68], %f28
+	ldd		[%o0 + 0x60], %f30
+	ldd		[%o0 + 0x58], %f32
+	ldd		[%o0 + 0x50], %f34
+	ldd		[%o0 + 0x48], %f36
+	ldd		[%o0 + 0x40], %f38
+	ldd		[%o0 + 0x38], %f40
+	ldd		[%o0 + 0x30], %f42
+	ldd		[%o0 + 0x28], %f44
+	ldd		[%o0 + 0x20], %f46
+	ldd		[%o0 + 0x18], %f48
+	ldd		[%o0 + 0x10], %f50
+	ldd		[%o0 + 0x08], %f52
+	retl
+	 ldd		[%o0 + 0x00], %f54
+ENDPROC(aes_sparc64_load_decrypt_keys_192)
+
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_256)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0xd8], %f8
+	ldd		[%o0 + 0xd0], %f10
+	ldd		[%o0 + 0xc8], %f12
+	ldd		[%o0 + 0xc0], %f14
+	ldd		[%o0 + 0xb8], %f16
+	ldd		[%o0 + 0xb0], %f18
+	ldd		[%o0 + 0xa8], %f20
+	ldd		[%o0 + 0xa0], %f22
+	ldd		[%o0 + 0x98], %f24
+	ldd		[%o0 + 0x90], %f26
+	ldd		[%o0 + 0x88], %f28
+	ldd		[%o0 + 0x80], %f30
+	ldd		[%o0 + 0x78], %f32
+	ldd		[%o0 + 0x70], %f34
+	ldd		[%o0 + 0x68], %f36
+	ldd		[%o0 + 0x60], %f38
+	ldd		[%o0 + 0x58], %f40
+	ldd		[%o0 + 0x50], %f42
+	ldd		[%o0 + 0x48], %f44
+	ldd		[%o0 + 0x40], %f46
+	ldd		[%o0 + 0x38], %f48
+	ldd		[%o0 + 0x30], %f50
+	ldd		[%o0 + 0x28], %f52
+	ldd		[%o0 + 0x20], %f54
+	ldd		[%o0 + 0x18], %f56
+	ldd		[%o0 + 0x10], %f58
+	ldd		[%o0 + 0x08], %f60
+	retl
+	 ldd		[%o0 + 0x00], %f62
+ENDPROC(aes_sparc64_load_decrypt_keys_256)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
 	ldx		[%o0 + 0x00], %g1
 	ldx		[%o0 + 0x08], %g2
-	cmp		%o3, 24
-	bl		2f
-	 nop
-	be		1f
-	 nop
-
-0:
-	/* 256-bit key */
-	ldx		[%o1 + 0x00], %g3
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	ENCRYPT_256(8, 4, 6, 0, 2)
-
+	ENCRYPT_128(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 0b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
+ENDPROC(aes_sparc64_ecb_encrypt_128)
 
-1:
-	/* 192-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
 	ENCRYPT_192(8, 4, 6, 0, 2)
-
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
+	subcc		%o3, 0x10, %o3
 	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
+ENDPROC(aes_sparc64_ecb_encrypt_192)
 
-2:
-	/* 128-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	ENCRYPT_128(8, 4, 6, 0, 2)
-
+	ENCRYPT_256(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 2b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
-ENDPROC(aes_sparc64_ecb_encrypt)
+ENDPROC(aes_sparc64_ecb_encrypt_256)
 
-ENTRY(aes_sparc64_ecb_decrypt)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=key_len, %o4=len, %o5=iv */
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_128)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
 	ldx		[%o0 - 0x10], %g1
 	ldx		[%o0 - 0x08], %g2
-	cmp		%o3, 24
-	bl		2f
-	 nop
-	be		1f
-	 nop
-
-0:
-	/* 256-bit key */
-	ldx		[%o1 + 0x00], %g3
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_256(64, 4, 6, 0, 2)
-
+	DECRYPT_128(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 0b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
+ENDPROC(aes_sparc64_ecb_decrypt_128)
 
-1:
-	/* 192-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_192)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_192(56, 4, 6, 0, 2)
-
+	DECRYPT_192(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
+	subcc		%o3, 0x10, %o3
 	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
+ENDPROC(aes_sparc64_ecb_decrypt_192)
 
-2:
-	/* 128-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_256)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_128(48, 4, 6, 0, 2)
-
+	DECRYPT_256(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 2b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
 	retl
 	 nop
-ENDPROC(aes_sparc64_ecb_decrypt)
+ENDPROC(aes_sparc64_ecb_decrypt_256)
 
-ENTRY(aes_sparc64_cbc_encrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */
-	ldd		[%o5 + 0x00], %f4
-	ldd		[%o5 + 0x08], %f6
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
 	ldx		[%o0 + 0x00], %g1
 	ldx		[%o0 + 0x08], %g2
-	cmp		%o3, 24
-	bl		2f
-	 nop
-	be		1f
-	 nop
-
-0:
-	/* 256-bit key */
-	ldx		[%o1 + 0x00], %g3
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
@@ -662,24 +974,26 @@ ENTRY(aes_sparc64_cbc_encrypt)
 	MOVXTOD_G7_F2
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
-	ENCRYPT_256(8, 4, 6, 0, 2)
-
+	ENCRYPT_128(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 0b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
-	std		%f4, [%o5 + 0x00]
-	std		%f6, [%o5 + 0x08]
-
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
 	retl
 	 nop
+ENDPROC(aes_sparc64_cbc_encrypt_128)
 
-1:
-	/* 192-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
@@ -688,24 +1002,26 @@ ENTRY(aes_sparc64_cbc_encrypt)
 	MOVXTOD_G7_F2
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
 	ENCRYPT_192(8, 4, 6, 0, 2)
-
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
+	subcc		%o3, 0x10, %o3
 	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
-	std		%f4, [%o5 + 0x00]
-	std		%f6, [%o5 + 0x08]
-
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
 	retl
 	 nop
+ENDPROC(aes_sparc64_cbc_encrypt_192)
 
-2:
-	/* 128-bit key */
-	ldx		[%o1 + 0x00], %g3
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
 	ldx		[%o1 + 0x08], %g7
 	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
@@ -714,123 +1030,110 @@ ENTRY(aes_sparc64_cbc_encrypt)
 	MOVXTOD_G7_F2
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
-	ENCRYPT_128(8, 4, 6, 0, 2)
-
+	ENCRYPT_256(8, 4, 6, 0, 2)
 	std		%f4, [%o2 + 0x00]
 	std		%f6, [%o2 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 2b
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
 	 add		%o2, 0x10, %o2
-
-	std		%f4, [%o5 + 0x00]
-	std		%f6, [%o5 + 0x08]
-
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
 	retl
 	 nop
-ENDPROC(aes_sparc64_cbc_encrypt)
+ENDPROC(aes_sparc64_cbc_encrypt_256)
 
-ENTRY(aes_sparc64_cbc_decrypt)
-	/* %o0=&key[key_len], %o1=key_len, %o2=input, %o3=output, %o4=len, %o5=iv */
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_128)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
 	ldx		[%o0 - 0x10], %g1
 	ldx		[%o0 - 0x08], %g2
-	cmp		%o1, 24
-	ldx		[%o5 + 0x00], %o0
-	bl		2f
-	 ldx		[%o5 + 0x08], %o1
-	be		1f
-	 nop
-
-0:
-	/* 256-bit key */
-	ldx		[%o2 + 0x00], %g3
-	ldx		[%o2 + 0x08], %g7
-	add		%o2, 0x10, %o2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_256(64, 4, 6, 0, 2)
-
+	DECRYPT_128(8, 4, 6, 0, 2)
 	MOVXTOD_O0_F0
-	MOVXTOD_O1_F2
+	MOVXTOD_O5_F2
 	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o1
+	xor		%g2, %g7, %o5
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
-	std		%f4, [%o3 + 0x00]
-	std		%f6, [%o3 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 0b
-	 add		%o3, 0x10, %o3
-
-	stx		%o0, [%o5 + 0x00]
-	stx		%o1, [%o5 + 0x08]
-
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
 	retl
 	 nop
+ENDPROC(aes_sparc64_cbc_decrypt_128)
 
-1:
-	/* 192-bit key */
-	ldx		[%o2 + 0x00], %g3
-	ldx		[%o2 + 0x08], %g7
-	add		%o2, 0x10, %o2
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_192)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_192(56, 4, 6, 0, 2)
-
+	DECRYPT_192(8, 4, 6, 0, 2)
 	MOVXTOD_O0_F0
-	MOVXTOD_O1_F2
+	MOVXTOD_O5_F2
 	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o1
+	xor		%g2, %g7, %o5
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
-	std		%f4, [%o3 + 0x00]
-	std		%f6, [%o3 + 0x08]
-	subcc		%o4, 0x10, %o4
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
 	bne,pt		%xcc, 1b
-	 add		%o3, 0x10, %o3
-
-	stx		%o0, [%o5 + 0x00]
-	stx		%o1, [%o5 + 0x08]
-
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
 	retl
 	 nop
+ENDPROC(aes_sparc64_cbc_decrypt_192)
 
-2:
-	/* 128-bit key */
-	ldx		[%o2 + 0x00], %g3
-	ldx		[%o2 + 0x08], %g7
-	add		%o2, 0x10, %o2
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_256)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
 	xor		%g1, %g3, %g3
 	xor		%g2, %g7, %g7
 	MOVXTOD_G3_F4
 	MOVXTOD_G7_F6
-
-	DECRYPT_128(48, 4, 6, 0, 2)
-
+	DECRYPT_256(8, 4, 6, 0, 2)
 	MOVXTOD_O0_F0
-	MOVXTOD_O1_F2
+	MOVXTOD_O5_F2
 	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o1
+	xor		%g2, %g7, %o5
 	fxor		%f4, %f0, %f4
 	fxor		%f6, %f2, %f6
-
-	std		%f4, [%o3 + 0x00]
-	std		%f6, [%o3 + 0x08]
-	subcc		%o4, 0x10, %o4
-	bne,pt		%xcc, 2b
-	 add		%o3, 0x10, %o3
-
-	stx		%o0, [%o5 + 0x00]
-	stx		%o1, [%o5 + 0x08]
-
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
 	retl
 	 nop
-ENDPROC(aes_sparc64_cbc_decrypt)
+ENDPROC(aes_sparc64_cbc_decrypt_256)