summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 13:20:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 13:20:43 -0700
commitb8716614a7cc2fc15ea2a518edd04755fb08d922 (patch)
tree2a8a5d04066b2bd589ba2ebbeb228e2a6a178ec9 /arch
parent31f6765266417c0d99f0e922fe82848a7c9c2ae9 (diff)
parent2dc9b5dbdef09840de852a4f0cc6a9c9eece7220 (diff)
downloadlinux-b8716614a7cc2fc15ea2a518edd04755fb08d922.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
 "* sha512 bug fixes (already in your tree).
  * SHA224/SHA384 AEAD support in caam.
  * X86-64 optimised version of Camellia.
  * Tegra AES support.
  * Bulk algorithm registration interface to make driver registration easier.
  * padata race fixes.
  * Misc fixes."

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (31 commits)
  padata: Fix race on sequence number wrap
  padata: Fix race in the serialization path
  crypto: camellia - add assembler implementation for x86_64
  crypto: camellia - rename camellia.c to camellia_generic.c
  crypto: camellia - fix checkpatch warnings
  crypto: camellia - rename camellia module to camellia_generic
  crypto: tcrypt - add more camellia tests
  crypto: testmgr - add more camellia test vectors
  crypto: camellia - simplify key setup and CAMELLIA_ROUNDSM macro
  crypto: twofish-x86_64/i586 - set alignmask to zero
  crypto: blowfish-x86_64 - set alignmask to zero
  crypto: serpent-sse2 - combine ablk_*_init functions
  crypto: blowfish-x86_64 - use crypto_[un]register_algs
  crypto: twofish-x86_64-3way - use crypto_[un]register_algs
  crypto: serpent-sse2 - use crypto_[un]register_algs
  crypto: serpent-sse2 - remove dead code from serpent_sse2_glue.c::serpent_sse2_init()
  crypto: twofish-x86 - Remove dead code from twofish_glue_3way.c::init()
  crypto: In crypto_add_alg(), 'exact' wants to be initialized to 0
  crypto: caam - fix gcc 4.6 warning
  crypto: Add bulk algorithm registration interface
  ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/mach-tegra/fuse.c2
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/blowfish_glue.c191
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S520
-rw-r--r--arch/x86/crypto/camellia_glue.c1952
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S29
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S29
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c394
-rw-r--r--arch/x86/crypto/twofish_glue.c2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c265
10 files changed, 2866 insertions, 520 deletions
diff --git a/arch/arm/mach-tegra/fuse.c b/arch/arm/mach-tegra/fuse.c
index 1fa26d9a1a68..ea49bd93c6b9 100644
--- a/arch/arm/mach-tegra/fuse.c
+++ b/arch/arm/mach-tegra/fuse.c
@@ -19,6 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/module.h>
 
 #include <mach/iomap.h>
 
@@ -58,6 +59,7 @@ unsigned long long tegra_chip_uid(void)
 	hi = fuse_readl(FUSE_UID_HIGH);
 	return (hi << 32ull) | lo;
 }
+EXPORT_SYMBOL(tegra_chip_uid);
 
 int tegra_sku_id(void)
 {
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2b0b9631474b..e191ac048b59 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index b05aa163d55a..7967474de8f7 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,6 +25,7 @@
  *
  */
 
+#include <asm/processor.h>
 #include <crypto/blowfish.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
@@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
-static struct crypto_alg bf_alg = {
-	.cra_name		=	"blowfish",
-	.cra_driver_name	=	"blowfish-asm",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	BF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct bf_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(bf_alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	BF_MIN_KEY_SIZE,
-			.cia_max_keysize	=	BF_MAX_KEY_SIZE,
-			.cia_setkey		=	blowfish_setkey,
-			.cia_encrypt		=	blowfish_encrypt,
-			.cia_decrypt		=	blowfish_decrypt,
-		}
-	}
-};
-
 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
 		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
@@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
 }
 
-static struct crypto_alg blk_ecb_alg = {
-	.cra_name		= "ecb(blowfish)",
-	.cra_driver_name	= "ecb-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-};
-
 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 				  struct blkcipher_walk *walk)
 {
@@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_cbc_alg = {
-	.cra_name		= "cbc(blowfish)",
-	.cra_driver_name	= "cbc-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-};
-
 static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
@@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_ctr_alg = {
+static struct crypto_alg bf_algs[4] = { {
+	.cra_name		= "blowfish",
+	.cra_driver_name	= "blowfish-asm",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(bf_algs[0].cra_list),
+	.cra_u = {
+		.cipher = {
+			.cia_min_keysize	= BF_MIN_KEY_SIZE,
+			.cia_max_keysize	= BF_MAX_KEY_SIZE,
+			.cia_setkey		= blowfish_setkey,
+			.cia_encrypt		= blowfish_encrypt,
+			.cia_decrypt		= blowfish_decrypt,
+		}
+	}
+}, {
+	.cra_name		= "ecb(blowfish)",
+	.cra_driver_name	= "ecb-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(bf_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(blowfish)",
+	.cra_driver_name	= "cbc-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(bf_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
 	.cra_name		= "ctr(blowfish)",
 	.cra_driver_name	= "ctr-blowfish-asm",
 	.cra_priority		= 300,
@@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_list		= LIST_HEAD_INIT(bf_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = {
 			.decrypt	= ctr_crypt,
 		},
 	},
-};
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return false;
+
+	if (boot_cpu_data.x86 == 0x0f) {
+		/*
+		 * On Pentium 4, blowfish-x86_64 is slower than generic C
+		 * implementation because use of 64bit rotates (which are really
+		 * slow on P4). Therefore blacklist P4s.
+		 */
+		return true;
+	}
+
+	return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init init(void)
 {
-	int err;
+	if (!force && is_blacklisted_cpu()) {
+		printk(KERN_INFO
+			"blowfish-x86_64: performance on this CPU "
+			"would be suboptimal: disabling "
+			"blowfish-x86_64.\n");
+		return -ENODEV;
+	}
 
-	err = crypto_register_alg(&bf_alg);
-	if (err)
-		goto bf_err;
-	err = crypto_register_alg(&blk_ecb_alg);
-	if (err)
-		goto ecb_err;
-	err = crypto_register_alg(&blk_cbc_alg);
-	if (err)
-		goto cbc_err;
-	err = crypto_register_alg(&blk_ctr_alg);
-	if (err)
-		goto ctr_err;
-
-	return 0;
-
-ctr_err:
-	crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
-	crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
-	crypto_unregister_alg(&bf_alg);
-bf_err:
-	return err;
+	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_alg(&blk_ctr_alg);
-	crypto_unregister_alg(&blk_cbc_alg);
-	crypto_unregister_alg(&blk_ecb_alg);
-	crypto_unregister_alg(&bf_alg);
+	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
 }
 
 module_init(init);
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
new file mode 100644
index 000000000000..0b3374335fdc
--- /dev/null
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -0,0 +1,520 @@
+/*
+ * Camellia Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "camellia-x86_64-asm_64.S"
+.text
+
+.extern camellia_sp10011110;
+.extern camellia_sp22000222;
+.extern camellia_sp03303033;
+.extern camellia_sp00444404;
+.extern camellia_sp02220222;
+.extern camellia_sp30333033;
+.extern camellia_sp44044404;
+.extern camellia_sp11101110;
+
+#define sp10011110 camellia_sp10011110
+#define sp22000222 camellia_sp22000222
+#define sp03303033 camellia_sp03303033
+#define sp00444404 camellia_sp00444404
+#define sp02220222 camellia_sp02220222
+#define sp30333033 camellia_sp30333033
+#define sp44044404 camellia_sp44044404
+#define sp11101110 camellia_sp11101110
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct camellia_ctx: */
+#define key_table 0
+#define key_length CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RIOd %esi
+
+#define RAB0 %rax
+#define RCD0 %rcx
+#define RAB1 %rbx
+#define RCD1 %rdx
+
+#define RAB0d %eax
+#define RCD0d %ecx
+#define RAB1d %ebx
+#define RCD1d %edx
+
+#define RAB0bl %al
+#define RCD0bl %cl
+#define RAB1bl %bl
+#define RCD1bl %dl
+
+#define RAB0bh %ah
+#define RCD0bh %ch
+#define RAB1bh %bh
+#define RCD1bh %dh
+
+#define RT0 %rsi
+#define RT1 %rbp
+#define RT2 %r8
+
+#define RT0d %esi
+#define RT1d %ebp
+#define RT2d %r8d
+
+#define RT2bl %r8b
+
+#define RXOR %r9
+#define RRBP %r10
+#define RDST %r11
+
+#define RXORd %r9d
+#define RXORbl %r9b
+
+#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
+	movzbl ab ## bl,		tmp2 ## d; \
+	movzbl ab ## bh,		tmp1 ## d; \
+	rorq $16,			ab; \
+	xorq T0(, tmp2, 8),		dst; \
+	xorq T1(, tmp1, 8),		dst;
+
+/**********************************************************************
+  1-way camellia
+ **********************************************************************/
+#define roundsm(ab, subkey, cd) \
+	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
+	\
+	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+	\
+	xorq RT2,					cd ## 0;
+
+#define fls(l, r, kl, kr) \
+	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
+	andl l ## 0d,					RT0d; \
+	roll $1,					RT0d; \
+	shlq $32,					RT0; \
+	xorq RT0,					l ## 0; \
+	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
+	orq r ## 0,					RT1; \
+	shrq $32,					RT1; \
+	xorq RT1,					r ## 0; \
+	\
+	movq (key_table + ((kl) * 2) * 4)(CTX),		RT2; \
+	orq l ## 0,					RT2; \
+	shrq $32,					RT2; \
+	xorq RT2,					l ## 0; \
+	movl (key_table + ((kr) * 2) * 4)(CTX),		RT0d; \
+	andl r ## 0d,					RT0d; \
+	roll $1,					RT0d; \
+	shlq $32,					RT0; \
+	xorq RT0,					r ## 0;
+
+#define enc_rounds(i) \
+	roundsm(RAB, i + 2, RCD); \
+	roundsm(RCD, i + 3, RAB); \
+	roundsm(RAB, i + 4, RCD); \
+	roundsm(RCD, i + 5, RAB); \
+	roundsm(RAB, i + 6, RCD); \
+	roundsm(RCD, i + 7, RAB);
+
+#define enc_fls(i) \
+	fls(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack() \
+	movq (RIO),			RAB0; \
+	bswapq				RAB0; \
+	rolq $32,			RAB0; \
+	movq 4*2(RIO),			RCD0; \
+	bswapq				RCD0; \
+	rorq $32,			RCD0; \
+	xorq key_table(CTX),		RAB0;
+
+#define enc_outunpack(op, max) \
+	xorq key_table(CTX, max, 8),	RCD0; \
+	rorq $32,			RCD0; \
+	bswapq				RCD0; \
+	op ## q RCD0,			(RIO); \
+	rolq $32,			RAB0; \
+	bswapq				RAB0; \
+	op ## q RAB0,			4*2(RIO);
+
+#define dec_rounds(i) \
+	roundsm(RAB, i + 7, RCD); \
+	roundsm(RCD, i + 6, RAB); \
+	roundsm(RAB, i + 5, RCD); \
+	roundsm(RCD, i + 4, RAB); \
+	roundsm(RAB, i + 3, RCD); \
+	roundsm(RCD, i + 2, RAB);
+
+#define dec_fls(i) \
+	fls(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack(max) \
+	movq (RIO),			RAB0; \
+	bswapq				RAB0; \
+	rolq $32,			RAB0; \
+	movq 4*2(RIO),			RCD0; \
+	bswapq				RCD0; \
+	rorq $32,			RCD0; \
+	xorq key_table(CTX, max, 8),	RAB0;
+
+#define dec_outunpack() \
+	xorq key_table(CTX),		RCD0; \
+	rorq $32,			RCD0; \
+	bswapq				RCD0; \
+	movq RCD0,			(RIO); \
+	rolq $32,			RAB0; \
+	bswapq				RAB0; \
+	movq RAB0,			4*2(RIO);
+
+.global __camellia_enc_blk;
+.type   __camellia_enc_blk,@function;
+
+__camellia_enc_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool xor
+	 */
+	movq %rbp, RRBP;
+
+	movq %rcx, RXOR;
+	movq %rsi, RDST;
+	movq %rdx, RIO;
+
+	enc_inpack();
+
+	enc_rounds(0);
+	enc_fls(8);
+	enc_rounds(8);
+	enc_fls(16);
+	enc_rounds(16);
+	movl $24, RT1d; /* max */
+
+	cmpb $16, key_length(CTX);
+	je __enc_done;
+
+	enc_fls(24);
+	enc_rounds(24);
+	movl $32, RT1d; /* max */
+
+__enc_done:
+	testb RXORbl, RXORbl;
+	movq RDST, RIO;
+
+	jnz __enc_xor;
+
+	enc_outunpack(mov, RT1);
+
+	movq RRBP, %rbp;
+	ret;
+
+__enc_xor:
+	enc_outunpack(xor, RT1);
+
+	movq RRBP, %rbp;
+	ret;
+
+.global camellia_dec_blk;
+.type   camellia_dec_blk,@function;
+
+camellia_dec_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	cmpl $16, key_length(CTX);
+	movl $32, RT2d;
+	movl $24, RXORd;
+	cmovel RXORd, RT2d; /* max */
+
+	movq %rbp, RRBP;
+	movq %rsi, RDST;
+	movq %rdx, RIO;
+
+	dec_inpack(RT2);
+
+	cmpb $24, RT2bl;
+	je __dec_rounds16;
+
+	dec_rounds(24);
+	dec_fls(24);
+
+__dec_rounds16:
+	dec_rounds(16);
+	dec_fls(16);
+	dec_rounds(8);
+	dec_fls(8);
+	dec_rounds(0);
+
+	movq RDST, RIO;
+
+	dec_outunpack();
+
+	movq RRBP, %rbp;
+	ret;
+
+/**********************************************************************
+  2-way camellia
+ **********************************************************************/
+#define roundsm2(ab, subkey, cd) \
+	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
+	xorq RT2,					cd ## 1; \
+	\
+	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+	\
+		xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
+		xorq RT2,					cd ## 0; \
+		xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
+		xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
+		xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
+
+#define fls2(l, r, kl, kr) \
+	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
+	andl l ## 0d,					RT0d; \
+	roll $1,					RT0d; \
+	shlq $32,					RT0; \
+	xorq RT0,					l ## 0; \
+	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
+	orq r ## 0,					RT1; \
+	shrq $32,					RT1; \
+	xorq RT1,					r ## 0; \
+	\
+		movl (key_table + ((kl) * 2) * 4)(CTX),		RT2d; \
+		andl l ## 1d,					RT2d; \
+		roll $1,					RT2d; \
+		shlq $32,					RT2; \
+		xorq RT2,					l ## 1; \
+		movq (key_table + ((kr) * 2) * 4)(CTX),		RT0; \
+		orq r ## 1,					RT0; \
+		shrq $32,					RT0; \
+		xorq RT0,					r ## 1; \
+	\
+	movq (key_table + ((kl) * 2) * 4)(CTX),		RT1; \
+	orq l ## 0,					RT1; \
+	shrq $32,					RT1; \
+	xorq RT1,					l ## 0; \
+	movl (key_table + ((kr) * 2) * 4)(CTX),		RT2d; \
+	andl r ## 0d,					RT2d; \
+	roll $1,					RT2d; \
+	shlq $32,					RT2; \
+	xorq RT2,					r ## 0; \
+	\
+		movq (key_table + ((kl) * 2) * 4)(CTX),		RT0; \
+		orq l ## 1,					RT0; \
+		shrq $32,					RT0; \
+		xorq RT0,					l ## 1; \
+		movl (key_table + ((kr) * 2) * 4)(CTX),		RT1d; \
+		andl r ## 1d,					RT1d; \
+		roll $1,					RT1d; \
+		shlq $32,					RT1; \
+		xorq RT1,					r ## 1;
+
+#define enc_rounds2(i) \
+	roundsm2(RAB, i + 2, RCD); \
+	roundsm2(RCD, i + 3, RAB); \
+	roundsm2(RAB, i + 4, RCD); \
+	roundsm2(RCD, i + 5, RAB); \
+	roundsm2(RAB, i + 6, RCD); \
+	roundsm2(RCD, i + 7, RAB);
+
+#define enc_fls2(i) \
+	fls2(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack2() \
+	movq (RIO),			RAB0; \
+	bswapq				RAB0; \
+	rorq $32,			RAB0; \
+	movq 4*2(RIO),			RCD0; \
+	bswapq				RCD0; \
+	rolq $32,			RCD0; \
+	xorq key_table(CTX),		RAB0; \
+	\
+		movq 8*2(RIO),			RAB1; \
+		bswapq				RAB1; \
+		rorq $32,			RAB1; \
+		movq 12*2(RIO),			RCD1; \
+		bswapq				RCD1; \
+		rolq $32,			RCD1; \
+		xorq key_table(CTX),		RAB1;
+
+#define enc_outunpack2(op, max) \
+	xorq key_table(CTX, max, 8),	RCD0; \
+	rolq $32,			RCD0; \
+	bswapq				RCD0; \
+	op ## q RCD0,			(RIO); \
+	rorq $32,			RAB0; \
+	bswapq				RAB0; \
+	op ## q RAB0,			4*2(RIO); \
+	\
+		xorq key_table(CTX, max, 8),	RCD1; \
+		rolq $32,			RCD1; \
+		bswapq				RCD1; \
+		op ## q RCD1,			8*2(RIO); \
+		rorq $32,			RAB1; \
+		bswapq				RAB1; \
+		op ## q RAB1,			12*2(RIO);
+
+#define dec_rounds2(i) \
+	roundsm2(RAB, i + 7, RCD); \
+	roundsm2(RCD, i + 6, RAB); \
+	roundsm2(RAB, i + 5, RCD); \
+	roundsm2(RCD, i + 4, RAB); \
+	roundsm2(RAB, i + 3, RCD); \
+	roundsm2(RCD, i + 2, RAB);
+
+#define dec_fls2(i) \
+	fls2(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack2(max) \
+	movq (RIO),			RAB0; \
+	bswapq				RAB0; \
+	rorq $32,			RAB0; \
+	movq 4*2(RIO),			RCD0; \
+	bswapq				RCD0; \
+	rolq $32,			RCD0; \
+	xorq key_table(CTX, max, 8),	RAB0; \
+	\
+		movq 8*2(RIO),			RAB1; \
+		bswapq				RAB1; \
+		rorq $32,			RAB1; \
+		movq 12*2(RIO),			RCD1; \
+		bswapq				RCD1; \
+		rolq $32,			RCD1; \
+		xorq key_table(CTX, max, 8),	RAB1;
+
+#define dec_outunpack2() \
+	xorq key_table(CTX),		RCD0; \
+	rolq $32,			RCD0; \
+	bswapq				RCD0; \
+	movq RCD0,			(RIO); \
+	rorq $32,			RAB0; \
+	bswapq				RAB0; \
+	movq RAB0,			4*2(RIO); \
+	\
+		xorq key_table(CTX),		RCD1; \
+		rolq $32,			RCD1; \
+		bswapq				RCD1; \
+		movq RCD1,			8*2(RIO); \
+		rorq $32,			RAB1; \
+		bswapq				RAB1; \
+		movq RAB1,			12*2(RIO);
+
+.global __camellia_enc_blk_2way;
+.type   __camellia_enc_blk_2way,@function;
+
+__camellia_enc_blk_2way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool xor
+	 */
+	pushq %rbx;
+
+	movq %rbp, RRBP;
+	movq %rcx, RXOR;
+	movq %rsi, RDST;
+	movq %rdx, RIO;
+
+	enc_inpack2();
+
+	enc_rounds2(0);
+	enc_fls2(8);
+	enc_rounds2(8);
+	enc_fls2(16);
+	enc_rounds2(16);
+	movl $24, RT2d; /* max */
+
+	cmpb $16, key_length(CTX);
+	je __enc2_done;
+
+	enc_fls2(24);
+	enc_rounds2(24);
+	movl $32, RT2d; /* max */
+
+__enc2_done:
+	test RXORbl, RXORbl;
+	movq RDST, RIO;
+	jnz __enc2_xor;
+
+	enc_outunpack2(mov, RT2);
+
+	movq RRBP, %rbp;
+	popq %rbx;
+	ret;
+
+__enc2_xor:
+	enc_outunpack2(xor, RT2);
+
+	movq RRBP, %rbp;
+	popq %rbx;
+	ret;
+
+.global camellia_dec_blk_2way;
+.type   camellia_dec_blk_2way,@function;
+
+camellia_dec_blk_2way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	cmpl $16, key_length(CTX);
+	movl $32, RT2d;
+	movl $24, RXORd;
+	cmovel RXORd, RT2d; /* max */
+
+	movq %rbx, RXOR;
+	movq %rbp, RRBP;
+	movq %rsi, RDST;
+	movq %rdx, RIO;
+
+	dec_inpack2(RT2);
+
+	cmpb $24, RT2bl;
+	je __dec2_rounds16;
+
+	dec_rounds2(24);
+	dec_fls2(24);
+
+__dec2_rounds16:
+	dec_rounds2(16);
+	dec_fls2(16);
+	dec_rounds2(8);
+	dec_fls2(8);
+	dec_rounds2(0);
+
+	movq RDST, RIO;
+
+	dec_outunpack2();
+
+	movq RRBP, %rbp;
+	movq RXOR, %rbx;
+	ret;
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
new file mode 100644
index 000000000000..1ca36a93fd2f
--- /dev/null
+++ b/arch/x86/crypto/camellia_glue.c
@@ -0,0 +1,1952 @@
+/*
+ * Glue Code for assembler optimized version of Camellia
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Camellia parts based on code by:
+ *  Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+
+#define CAMELLIA_MIN_KEY_SIZE	16
+#define CAMELLIA_MAX_KEY_SIZE	32
+#define CAMELLIA_BLOCK_SIZE	16
+#define CAMELLIA_TABLE_BYTE_LEN	272
+
+struct camellia_ctx {
+	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+	u32 key_length;
+};
+
+/* regular block cipher functions */
+asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+				   const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+				 const u8 *src);
+
+/* 2-way parallel cipher functions */
+asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+					const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+				      const u8 *src);
+
+static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+				    const u8 *src)
+{
+	__camellia_enc_blk(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__camellia_enc_blk(ctx, dst, src, true);
+}
+
+static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+					 const u8 *src)
+{
+	__camellia_enc_blk_2way(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+					     const u8 *src)
+{
+	__camellia_enc_blk_2way(ctx, dst, src, true);
+}
+
+static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+/* camellia sboxes */
+const u64 camellia_sp10011110[256] = {
+	0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
+	0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
+	0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
+	0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
+	0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
+	0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
+	0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
+	0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
+	0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
+	0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
+	0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
+	0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
+	0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
+	0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
+	0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
+	0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
+	0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
+	0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
+	0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
+	0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
+	0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
+	0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
+	0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
+	0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
+	0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
+	0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
+	0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
+	0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
+	0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
+	0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
+	0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
+	0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
+	0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
+	0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
+	0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
+	0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
+	0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
+	0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
+	0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
+	0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
+	0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
+	0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
+	0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
+	0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
+	0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
+	0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
+	0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
+	0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
+	0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
+	0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
+	0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
+	0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
+	0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
+	0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
+	0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
+	0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
+	0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
+	0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
+	0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
+	0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
+	0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
+	0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
+	0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
+	0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
+	0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
+	0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
+	0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
+	0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
+	0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
+	0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
+	0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
+	0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
+	0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
+	0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
+	0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
+	0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
+	0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
+	0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
+	0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
+	0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
+	0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
+	0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
+	0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
+	0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
+	0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
+	0x9e00009e9e9e9e00,
+};
+
+const u64 camellia_sp22000222[256] = {
+	0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
+	0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
+	0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
+	0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
+	0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
+	0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
+	0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
+	0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
+	0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
+	0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
+	0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
+	0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
+	0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
+	0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
+	0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
+	0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
+	0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
+	0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
+	0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
+	0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
+	0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
+	0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
+	0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
+	0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
+	0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
+	0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
+	0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
+	0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
+	0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
+	0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
+	0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
+	0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
+	0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
+	0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
+	0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
+	0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
+	0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
+	0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
+	0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
+	0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
+	0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
+	0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
+	0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
+	0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
+	0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
+	0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
+	0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
+	0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
+	0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
+	0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
+	0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
+	0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
+	0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
+	0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
+	0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
+	0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
+	0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
+	0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
+	0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
+	0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
+	0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
+	0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
+	0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
+	0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
+	0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
+	0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
+	0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
+	0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
+	0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
+	0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
+	0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
+	0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
+	0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
+	0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
+	0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
+	0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
+	0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
+	0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
+	0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
+	0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
+	0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
+	0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
+	0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
+	0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
+	0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
+	0x3d3d0000003d3d3d,
+};
+
+const u64 camellia_sp03303033[256] = {
+	0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
+	0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
+	0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
+	0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
+	0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
+	0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
+	0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
+	0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
+	0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
+	0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
+	0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
+	0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
+	0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
+	0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
+	0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
+	0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
+	0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
+	0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
+	0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
+	0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
+	0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
+	0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
+	0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
+	0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
+	0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
+	0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
+	0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
+	0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
+	0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
+	0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
+	0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
+	0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
+	0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
+	0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
+	0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
+	0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
+	0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
+	0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
+	0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
+	0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
+	0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
+	0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
+	0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
+	0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
+	0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
+	0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
+	0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
+	0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
+	0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
+	0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
+	0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
+	0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
+	0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
+	0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
+	0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
+	0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
+	0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
+	0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
+	0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
+	0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
+	0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
+	0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
+	0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
+	0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
+	0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
+	0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
+	0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
+	0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
+	0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
+	0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
+	0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
+	0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
+	0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
+	0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
+	0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
+	0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
+	0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
+	0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
+	0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
+	0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
+	0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
+	0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
+	0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
+	0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
+	0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
+	0x004f4f004f004f4f,
+};
+
+const u64 camellia_sp00444404[256] = {
+	0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
+	0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
+	0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
+	0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
+	0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
+	0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
+	0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
+	0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
+	0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
+	0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
+	0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
+	0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
+	0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
+	0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
+	0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
+	0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
+	0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
+	0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
+	0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
+	0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
+	0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
+	0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
+	0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
+	0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
+	0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
+	0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
+	0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
+	0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
+	0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
+	0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
+	0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
+	0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
+	0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
+	0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
+	0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
+	0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
+	0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
+	0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
+	0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
+	0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
+	0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
+	0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
+	0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
+	0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
+	0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
+	0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
+	0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
+	0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
+	0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
+	0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
+	0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
+	0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
+	0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
+	0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
+	0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
+	0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
+	0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
+	0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
+	0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
+	0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
+	0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
+	0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
+	0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
+	0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
+	0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
+	0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
+	0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
+	0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
+	0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
+	0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
+	0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
+	0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
+	0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
+	0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
+	0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
+	0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
+	0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
+	0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
+	0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
+	0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
+	0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
+	0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
+	0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
+	0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
+	0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
+	0x00009e9e9e9e009e,
+};
+
+const u64 camellia_sp02220222[256] = {
+	0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
+	0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
+	0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
+	0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
+	0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
+	0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
+	0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
+	0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
+	0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
+	0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
+	0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
+	0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
+	0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
+	0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
+	0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
+	0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
+	0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
+	0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
+	0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
+	0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
+	0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
+	0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
+	0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
+	0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
+	0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
+	0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
+	0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
+	0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
+	0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
+	0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
+	0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
+	0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
+	0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
+	0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
+	0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
+	0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
+	0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
+	0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
+	0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
+	0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
+	0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
+	0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
+	0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
+	0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
+	0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
+	0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
+	0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
+	0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
+	0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
+	0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
+	0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
+	0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
+	0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
+	0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
+	0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
+	0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
+	0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
+	0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
+	0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
+	0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
+	0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
+	0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
+	0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
+	0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
+	0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
+	0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
+	0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
+	0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
+	0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
+	0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
+	0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
+	0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
+	0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
+	0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
+	0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
+	0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
+	0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
+	0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
+	0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
+	0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
+	0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
+	0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
+	0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
+	0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
+	0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
+	0x003d3d3d003d3d3d,
+};
+
+const u64 camellia_sp30333033[256] = {
+	0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
+	0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
+	0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
+	0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
+	0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
+	0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
+	0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
+	0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
+	0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
+	0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
+	0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
+	0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
+	0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
+	0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
+	0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
+	0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
+	0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
+	0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
+	0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
+	0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
+	0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
+	0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
+	0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
+	0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
+	0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
+	0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
+	0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
+	0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
+	0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
+	0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
+	0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
+	0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
+	0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
+	0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
+	0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
+	0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
+	0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
+	0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
+	0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
+	0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
+	0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
+	0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
+	0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
+	0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
+	0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
+	0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
+	0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
+	0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
+	0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
+	0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
+	0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
+	0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
+	0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
+	0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
+	0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
+	0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
+	0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
+	0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
+	0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
+	0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
+	0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
+	0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
+	0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
+	0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
+	0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
+	0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
+	0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
+	0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
+	0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
+	0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
+	0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
+	0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
+	0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
+	0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
+	0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
+	0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
+	0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
+	0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
+	0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
+	0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
+	0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
+	0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
+	0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
+	0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
+	0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
+	0x4f004f4f4f004f4f,
+};
+
+const u64 camellia_sp44044404[256] = {
+	0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
+	0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
+	0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
+	0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
+	0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
+	0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
+	0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
+	0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
+	0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
+	0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
+	0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
+	0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
+	0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
+	0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
+	0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
+	0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
+	0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
+	0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
+	0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
+	0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
+	0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
+	0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
+	0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
+	0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
+	0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
+	0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
+	0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
+	0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
+	0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
+	0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
+	0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
+	0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
+	0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
+	0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
+	0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
+	0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
+	0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
+	0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
+	0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
+	0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
+	0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
+	0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
+	0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
+	0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
+	0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
+	0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
+	0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
+	0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
+	0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
+	0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
+	0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
+	0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
+	0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
+	0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
+	0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
+	0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
+	0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
+	0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
+	0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
+	0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
+	0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
+	0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
+	0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
+	0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
+	0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
+	0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
+	0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
+	0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
+	0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
+	0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
+	0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
+	0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
+	0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
+	0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
+	0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
+	0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
+	0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
+	0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
+	0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
+	0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
+	0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
+	0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
+	0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
+	0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
+	0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
+	0x9e9e009e9e9e009e,
+};
+
+const u64 camellia_sp11101110[256] = {
+	0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
+	0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
+	0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
+	0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
+	0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
+	0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
+	0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
+	0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
+	0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
+	0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
+	0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
+	0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
+	0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
+	0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
+	0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
+	0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
+	0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
+	0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
+	0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
+	0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
+	0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
+	0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
+	0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
+	0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
+	0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
+	0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
+	0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
+	0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
+	0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
+	0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
+	0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
+	0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
+	0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
+	0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
+	0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
+	0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
+	0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
+	0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
+	0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
+	0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
+	0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
+	0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
+	0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
+	0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
+	0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
+	0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
+	0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
+	0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
+	0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
+	0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
+	0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
+	0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
+	0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
+	0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
+	0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
+	0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
+	0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
+	0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
+	0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
+	0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
+	0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
+	0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
+	0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
+	0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
+	0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
+	0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
+	0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
+	0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
+	0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
+	0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
+	0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
+	0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
+	0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
+	0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
+	0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
+	0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
+	0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
+	0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
+	0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
+	0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
+	0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
+	0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
+	0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
+	0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
+	0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
+	0x9e9e9e009e9e9e00,
+};
+
+/* key constants */
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/* macros */
+#define ROLDQ(l, r, bits) ({ \
+	u64 t = l;					\
+	l = (l << bits) | (r >> (64 - bits));		\
+	r = (r << bits) | (t >> (64 - bits));		\
+})
+
+#define CAMELLIA_F(x, kl, kr, y) ({ \
+	u64 ii = x ^ (((u64)kl << 32) | kr);				\
+	y = camellia_sp11101110[(uint8_t)ii];				\
+	y ^= camellia_sp44044404[(uint8_t)(ii >> 8)];			\
+	ii >>= 16;							\
+	y ^= camellia_sp30333033[(uint8_t)ii];				\
+	y ^= camellia_sp02220222[(uint8_t)(ii >> 8)];			\
+	ii >>= 16;							\
+	y ^= camellia_sp00444404[(uint8_t)ii];				\
+	y ^= camellia_sp03303033[(uint8_t)(ii >> 8)];			\
+	ii >>= 16;							\
+	y ^= camellia_sp22000222[(uint8_t)ii];				\
+	y ^= camellia_sp10011110[(uint8_t)(ii >> 8)];			\
+	y = ror64(y, 32);						\
+})
+
+#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
+
+static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
+{
+	u64 kw4, tt;
+	u32 dw, tl, tr;
+
+	/* absorb kw2 to other subkeys */
+	/* round 2 */
+	subRL[3] ^= subRL[1];
+	/* round 4 */
+	subRL[5] ^= subRL[1];
+	/* round 6 */
+	subRL[7] ^= subRL[1];
+
+	subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
+	/* modified for FLinv(kl2) */
+	dw = (subRL[1] & subRL[9]) >> 32,
+		subRL[1] ^= rol32(dw, 1);
+
+	/* round 8 */
+	subRL[11] ^= subRL[1];
+	/* round 10 */
+	subRL[13] ^= subRL[1];
+	/* round 12 */
+	subRL[15] ^= subRL[1];
+
+	subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
+	/* modified for FLinv(kl4) */
+	dw = (subRL[1] & subRL[17]) >> 32,
+		subRL[1] ^= rol32(dw, 1);
+
+	/* round 14 */
+	subRL[19] ^= subRL[1];
+	/* round 16 */
+	subRL[21] ^= subRL[1];
+	/* round 18 */
+	subRL[23] ^= subRL[1];
+
+	if (max == 24) {
+		/* kw3 */
+		subRL[24] ^= subRL[1];
+
+		/* absorb kw4 to other subkeys */
+		kw4 = subRL[25];
+	} else {
+		subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
+		/* modified for FLinv(kl6) */
+		dw = (subRL[1] & subRL[25]) >> 32,
+			subRL[1] ^= rol32(dw, 1);
+
+		/* round 20 */
+		subRL[27] ^= subRL[1];
+		/* round 22 */
+		subRL[29] ^= subRL[1];
+		/* round 24 */
+		subRL[31] ^= subRL[1];
+		/* kw3 */
+		subRL[32] ^= subRL[1];
+
+		/* absorb kw4 to other subkeys */
+		kw4 = subRL[33];
+		/* round 23 */
+		subRL[30] ^= kw4;
+		/* round 21 */
+		subRL[28] ^= kw4;
+		/* round 19 */
+		subRL[26] ^= kw4;
+
+		kw4 ^= (kw4 & ~subRL[24]) << 32;
+		/* modified for FL(kl5) */
+		dw = (kw4 & subRL[24]) >> 32,
+			kw4 ^= rol32(dw, 1);
+	}
+
+	/* round 17 */
+	subRL[22] ^= kw4;
+	/* round 15 */
+	subRL[20] ^= kw4;
+	/* round 13 */
+	subRL[18] ^= kw4;
+
+	kw4 ^= (kw4 & ~subRL[16]) << 32;
+	/* modified for FL(kl3) */
+	dw = (kw4 & subRL[16]) >> 32,
+		kw4 ^= rol32(dw, 1);
+
+	/* round 11 */
+	subRL[14] ^= kw4;
+	/* round 9 */
+	subRL[12] ^= kw4;
+	/* round 7 */
+	subRL[10] ^= kw4;
+
+	kw4 ^= (kw4 & ~subRL[8]) << 32;
+	/* modified for FL(kl1) */
+	dw = (kw4 & subRL[8]) >> 32,
+		kw4 ^= rol32(dw, 1);
+
+	/* round 5 */
+	subRL[6] ^= kw4;
+	/* round 3 */
+	subRL[4] ^= kw4;
+	/* round 1 */
+	subRL[2] ^= kw4;
+	/* kw1 */
+	subRL[0] ^= kw4;
+
+	/* key XOR is end of F-function */
+	SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]);			/* kw1 */
+	SET_SUBKEY_LR(2, subRL[3]);				/* round 1 */
+	SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]);			/* round 2 */
+	SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]);			/* round 3 */
+	SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]);			/* round 4 */
+	SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]);			/* round 5 */
+
+	tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
+	dw = tl & (subRL[8] >> 32),				/* FL(kl1) */
+		tr = subRL[10] ^ rol32(dw, 1);
+	tt = (tr | ((u64)tl << 32));
+
+	SET_SUBKEY_LR(7, subRL[6] ^ tt);			/* round 6 */
+	SET_SUBKEY_LR(8, subRL[8]);				/* FL(kl1) */
+	SET_SUBKEY_LR(9, subRL[9]);				/* FLinv(kl2) */
+
+	tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
+	dw = tl & (subRL[9] >> 32),				/* FLinv(kl2) */
+		tr = subRL[7] ^ rol32(dw, 1);
+	tt = (tr | ((u64)tl << 32));
+
+	SET_SUBKEY_LR(10, subRL[11] ^ tt);			/* round 7 */
+	SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]);		/* round 8 */
+	SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]);		/* round 9 */
+	SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]);		/* round 10 */
+	SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]);		/* round 11 */
+
+	tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
+	dw = tl & (subRL[16] >> 32),				/* FL(kl3) */
+		tr = subRL[18] ^ rol32(dw, 1);
+	tt = (tr | ((u64)tl << 32));
+
+	SET_SUBKEY_LR(15, subRL[14] ^ tt);			/* round 12 */
+	SET_SUBKEY_LR(16, subRL[16]);				/* FL(kl3) */
+	SET_SUBKEY_LR(17, subRL[17]);				/* FLinv(kl4) */
+
+	tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
+	dw = tl & (subRL[17] >> 32),				/* FLinv(kl4) */
+		tr = subRL[15] ^ rol32(dw, 1);
+	tt = (tr | ((u64)tl << 32));
+
+	SET_SUBKEY_LR(18, subRL[19] ^ tt);			/* round 13 */
+	SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]);		/* round 14 */
+	SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]);		/* round 15 */
+	SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]);		/* round 16 */
+	SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]);		/* round 17 */
+
+	if (max == 24) {
+		SET_SUBKEY_LR(23, subRL[22]);			/* round 18 */
+		SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]);	/* kw3 */
+	} else {
+		tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
+		dw = tl & (subRL[24] >> 32),			/* FL(kl5) */
+			tr = subRL[26] ^ rol32(dw, 1);
+		tt = (tr | ((u64)tl << 32));
+
+		SET_SUBKEY_LR(23, subRL[22] ^ tt);		/* round 18 */
+		SET_SUBKEY_LR(24, subRL[24]);			/* FL(kl5) */
+		SET_SUBKEY_LR(25, subRL[25]);			/* FLinv(kl6) */
+
+		tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
+		dw = tl & (subRL[25] >> 32),			/* FLinv(kl6) */
+			tr = subRL[23] ^ rol32(dw, 1);
+		tt = (tr | ((u64)tl << 32));
+
+		SET_SUBKEY_LR(26, subRL[27] ^ tt);		/* round 19 */
+		SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]);	/* round 20 */
+		SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]);	/* round 21 */
+		SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]);	/* round 22 */
+		SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]);	/* round 23 */
+		SET_SUBKEY_LR(31, subRL[30]);			/* round 24 */
+		SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]);	/* kw3 */
+	}
+}
+
+static void camellia_setup128(const unsigned char *key, u64 *subkey)
+{
+	u64 kl, kr, ww;
+	u64 subRL[26];
+
+	/**
+	 *  k == kl || kr (|| is concatenation)
+	 */
+	kl = get_unaligned_be64(key);
+	kr = get_unaligned_be64(key + 8);
+
+	/* generate KL dependent subkeys */
+	/* kw1 */
+	subRL[0] = kl;
+	/* kw2 */
+	subRL[1] = kr;
+
+	/* rotation left shift 15bit */
+	ROLDQ(kl, kr, 15);
+
+	/* k3 */
+	subRL[4] = kl;
+	/* k4 */
+	subRL[5] = kr;
+
+	/* rotation left shift 15+30bit */
+	ROLDQ(kl, kr, 30);
+
+	/* k7 */
+	subRL[10] = kl;
+	/* k8 */
+	subRL[11] = kr;
+
+	/* rotation left shift 15+30+15bit */
+	ROLDQ(kl, kr, 15);
+
+	/* k10 */
+	subRL[13] = kr;
+	/* rotation left shift 15+30+15+17 bit */
+	ROLDQ(kl, kr, 17);
+
+	/* kl3 */
+	subRL[16] = kl;
+	/* kl4 */
+	subRL[17] = kr;
+
+	/* rotation left shift 15+30+15+17+17 bit */
+	ROLDQ(kl, kr, 17);
+
+	/* k13 */
+	subRL[18] = kl;
+	/* k14 */
+	subRL[19] = kr;
+
+	/* rotation left shift 15+30+15+17+17+17 bit */
+	ROLDQ(kl, kr, 17);
+
+	/* k17 */
+	subRL[22] = kl;
+	/* k18 */
+	subRL[23] = kr;
+
+	/* generate KA */
+	kl = subRL[0];
+	kr = subRL[1];
+	CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+	kr ^= ww;
+	CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+
+	/* current status == (kll, klr, w0, w1) */
+	CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+	kr ^= ww;
+	CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+	kl ^= ww;
+
+	/* generate KA dependent subkeys */
+	/* k1, k2 */
+	subRL[2] = kl;
+	subRL[3] = kr;
+	ROLDQ(kl, kr, 15);
+	/* k5,k6 */
+	subRL[6] = kl;
+	subRL[7] = kr;
+	ROLDQ(kl, kr, 15);
+	/* kl1, kl2 */
+	subRL[8] = kl;
+	subRL[9] = kr;
+	ROLDQ(kl, kr, 15);
+	/* k9 */
+	subRL[12] = kl;
+	ROLDQ(kl, kr, 15);
+	/* k11, k12 */
+	subRL[14] = kl;
+	subRL[15] = kr;
+	ROLDQ(kl, kr, 34);
+	/* k15, k16 */
+	subRL[20] = kl;
+	subRL[21] = kr;
+	ROLDQ(kl, kr, 17);
+	/* kw3, kw4 */
+	subRL[24] = kl;
+	subRL[25] = kr;
+
+	camellia_setup_tail(subkey, subRL, 24);
+}
+
+static void camellia_setup256(const unsigned char *key, u64 *subkey)
+{
+	u64 kl, kr;			/* left half of key */
+	u64 krl, krr;			/* right half of key */
+	u64 ww;				/* temporary variables */
+	u64 subRL[34];
+
+	/**
+	 *  key = (kl || kr || krl || krr) (|| is concatenation)
+	 */
+	kl = get_unaligned_be64(key);
+	kr = get_unaligned_be64(key + 8);
+	krl = get_unaligned_be64(key + 16);
+	krr = get_unaligned_be64(key + 24);
+
+	/* generate KL dependent subkeys */
+	/* kw1 */
+	subRL[0] = kl;
+	/* kw2 */
+	subRL[1] = kr;
+	ROLDQ(kl, kr, 45);
+	/* k9 */
+	subRL[12] = kl;
+	/* k10 */
+	subRL[13] = kr;
+	ROLDQ(kl, kr, 15);
+	/* kl3 */
+	subRL[16] = kl;
+	/* kl4 */
+	subRL[17] = kr;
+	ROLDQ(kl, kr, 17);
+	/* k17 */
+	subRL[22] = kl;
+	/* k18 */
+	subRL[23] = kr;
+	ROLDQ(kl, kr, 34);
+	/* k23 */
+	subRL[30] = kl;
+	/* k24 */
+	subRL[31] = kr;
+
+	/* generate KR dependent subkeys */
+	ROLDQ(krl, krr, 15);
+	/* k3 */
+	subRL[4] = krl;
+	/* k4 */
+	subRL[5] = krr;
+	ROLDQ(krl, krr, 15);
+	/* kl1 */
+	subRL[8] = krl;
+	/* kl2 */
+	subRL[9] = krr;
+	ROLDQ(krl, krr, 30);
+	/* k13 */
+	subRL[18] = krl;
+	/* k14 */
+	subRL[19] = krr;
+	ROLDQ(krl, krr, 34);
+	/* k19 */
+	subRL[26] = krl;
+	/* k20 */
+	subRL[27] = krr;
+	ROLDQ(krl, krr, 34);
+
+	/* generate KA */
+	kl = subRL[0] ^ krl;
+	kr = subRL[1] ^ krr;
+
+	CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+	kr ^= ww;
+	CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+	kl ^= krl;
+	CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+	kr ^= ww ^ krr;
+	CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+	kl ^= ww;
+
+	/* generate KB */
+	krl ^= kl;
+	krr ^= kr;
+	CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
+	krr ^= ww;
+	CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
+	krl ^= ww;
+
+	/* generate KA dependent subkeys */
+	ROLDQ(kl, kr, 15);
+	/* k5 */
+	subRL[6] = kl;
+	/* k6 */
+	subRL[7] = kr;
+	ROLDQ(kl, kr, 30);
+	/* k11 */
+	subRL[14] = kl;
+	/* k12 */
+	subRL[15] = kr;
+	/* rotation left shift 32bit */
+	ROLDQ(kl, kr, 32);
+	/* kl5 */
+	subRL[24] = kl;
+	/* kl6 */
+	subRL[25] = kr;
+	/* rotation left shift 17 from k11,k12 -> k21,k22 */
+	ROLDQ(kl, kr, 17);
+	/* k21 */
+	subRL[28] = kl;
+	/* k22 */
+	subRL[29] = kr;
+
+	/* generate KB dependent subkeys */
+	/* k1 */
+	subRL[2] = krl;
+	/* k2 */
+	subRL[3] = krr;
+	ROLDQ(krl, krr, 30);
+	/* k7 */
+	subRL[10] = krl;
+	/* k8 */
+	subRL[11] = krr;
+	ROLDQ(krl, krr, 30);
+	/* k15 */
+	subRL[20] = krl;
+	/* k16 */
+	subRL[21] = krr;
+	ROLDQ(krl, krr, 51);
+	/* kw3 */
+	subRL[32] = krl;
+	/* kw4 */
+	subRL[33] = krr;
+
+	camellia_setup_tail(subkey, subRL, 32);
+}
+
+static void camellia_setup192(const unsigned char *key, u64 *subkey)
+{
+	unsigned char kk[32];
+	u64 krl, krr;
+
+	memcpy(kk, key, 24);
+	memcpy((unsigned char *)&krl, key+16, 8);
+	krr = ~krl;
+	memcpy(kk+24, (unsigned char *)&krr, 8);
+	camellia_setup256(kk, subkey);
+}
+
+static int __camellia_setkey(struct camellia_ctx *cctx,
+			     const unsigned char *key,
+			     unsigned int key_len, u32 *flags)
+{
+	if (key_len != 16 && key_len != 24 && key_len != 32) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	cctx->key_length = key_len;
+
+	switch (key_len) {
+	case 16:
+		camellia_setup128(key, cctx->key_table);
+		break;
+	case 24:
+		camellia_setup192(key, cctx->key_table);
+		break;
+	case 32:
+		camellia_setup256(key, cctx->key_table);
+		break;
+	}
+
+	return 0;
+}
+
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+				 &tfm->crt_flags);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
+		     void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
+{
+	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process two block batch */
+		if (nbytes >= bsize * 2) {
+			do {
+				fn_2way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 2;
+				wdst += bsize * 2;
+				nbytes -= bsize * 2;
+			} while (nbytes >= bsize * 2);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 *iv = (u128 *)walk->iv;
+
+	do {
+		u128_xor(dst, src, iv);
+		camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ivs[2 - 1];
+	u128 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process two block batch */
+	if (nbytes >= bsize * 2) {
+		do {
+			nbytes -= bsize * (2 - 1);
+			src -= 2 - 1;
+			dst -= 2 - 1;
+
+			ivs[0] = src[0];
+
+			camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+
+			u128_xor(dst + 1, dst + 1, ivs + 0);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			u128_xor(dst, dst, src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 2);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		u128_xor(dst, dst, src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	u128_xor(dst, dst, (u128 *)walk->iv);
+	*(u128 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+	dst->a = cpu_to_be64(src->a);
+	dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+	dst->a = be64_to_cpu(src->a);
+	dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+	i->b++;
+	if (!i->b)
+		i->a++;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 keystream[CAMELLIA_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+	u128 ctrblk;
+
+	memcpy(keystream, src, nbytes);
+	camellia_enc_blk_xor(ctx, keystream, walk->iv);
+	memcpy(dst, keystream, nbytes);
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+	u128_inc(&ctrblk);
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ctrblk;
+	be128 ctrblocks[2];
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+	/* Process two block batch */
+	if (nbytes >= bsize * 2) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			u128_to_be128(&ctrblocks[0], &ctrblk);
+			u128_inc(&ctrblk);
+			u128_to_be128(&ctrblocks[1], &ctrblk);
+			u128_inc(&ctrblk);
+
+			camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
+						 (u8 *)ctrblocks);
+
+			src += 2;
+			dst += 2;
+			nbytes -= bsize * 2;
+		} while (nbytes >= bsize * 2);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		u128_to_be128(&ctrblocks[0], &ctrblk);
+		u128_inc(&ctrblk);
+
+		camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	struct camellia_ctx *ctx = priv;
+	int i;
+
+	while (nbytes >= 2 * bsize) {
+		camellia_enc_blk_2way(ctx, srcdst, srcdst);
+		srcdst += bsize * 2;
+		nbytes -= bsize * 2;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		camellia_enc_blk(ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+	struct camellia_ctx *ctx = priv;
+	int i;
+
+	while (nbytes >= 2 * bsize) {
+		camellia_dec_blk_2way(ctx, srcdst, srcdst);
+		srcdst += bsize * 2;
+		nbytes -= bsize * 2;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		camellia_dec_blk(ctx, srcdst, srcdst);
+}
+
+struct camellia_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct camellia_ctx camellia_ctx;
+};
+
+static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = __camellia_setkey(&ctx->camellia_ctx, key,
+				keylen - CAMELLIA_BLOCK_SIZE,
+				&tfm->crt_flags);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table,
+			      key + keylen - CAMELLIA_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[2 * 4];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &ctx->camellia_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+
+	return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[2 * 4];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &ctx->camellia_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+
+	return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+struct camellia_xts_ctx {
+	struct camellia_ctx tweak_ctx;
+	struct camellia_ctx crypt_ctx;
+};
+
+static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[2 * 4];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+		.crypt_ctx = &ctx->crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+
+	return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[2 * 4];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+		.crypt_ctx = &ctx->crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+
+	return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static struct crypto_alg camellia_algs[6] = { {
+	.cra_name		= "camellia",
+	.cra_driver_name	= "camellia-asm",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_ctx),
+	.cra_alignmask		= 0,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[0].cra_list),
+	.cra_u			= {
+		.cipher = {
+			.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
+			.cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
+			.cia_setkey	 = camellia_setkey,
+			.cia_encrypt	 = camellia_encrypt,
+			.cia_decrypt	 = camellia_decrypt
+		}
+	}
+}, {
+	.cra_name		= "ecb(camellia)",
+	.cra_driver_name	= "ecb-camellia-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.setkey		= camellia_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(camellia)",
+	.cra_driver_name	= "cbc-camellia-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
+			.setkey		= camellia_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(camellia)",
+	.cra_driver_name	= "ctr-camellia-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct camellia_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[3].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
+			.setkey		= camellia_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "lrw(camellia)",
+	.cra_driver_name	= "lrw-camellia-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[4].cra_list),
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
+						CAMELLIA_BLOCK_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
+						CAMELLIA_BLOCK_SIZE,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
+			.setkey		= lrw_camellia_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(camellia)",
+	.cra_driver_name	= "xts-camellia-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(camellia_algs[5].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
+			.setkey		= xts_camellia_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return false;
+
+	if (boot_cpu_data.x86 == 0x0f) {
+		/*
+		 * On Pentium 4, camellia-asm is slower than original assembler
+		 * implementation because excessive uses of 64bit rotate and
+		 * left-shifts (which are really slow on P4) needed to store and
+		 * handle 128bit block in two 64bit registers.
+		 */
+		return true;
+	}
+
+	return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
+
+int __init init(void)
+{
+	if (!force && is_blacklisted_cpu()) {
+		printk(KERN_INFO
+			"camellia-x86_64: performance on this CPU "
+			"would be suboptimal: disabling "
+			"camellia-x86_64.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+void __exit fini(void)
+{
+	crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
+MODULE_ALIAS("camellia");
+MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 4e37677ca851..c00053d42f99 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -463,23 +463,20 @@
 	pand x0,		x4; \
 	pxor x2,		x4;
 
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
-	movdqa x2,		t3; \
-	movdqa x0,		t1; \
-	unpcklps x3,		t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	movdqa x0,		t2; \
-	unpcklps x1,		t1; \
-	unpckhps x1,		t2; \
-	movdqa t3,		x1; \
-	unpckhps x3,		x2; \
-	movdqa t1,		x0; \
-	movhlps t1,		x1; \
-	movdqa t2,		t1; \
-	movlhps t3,		x0; \
-	movlhps x2,		t1; \
-	movhlps t2,		x2; \
-	movdqa x2,		x3; \
-	movdqa t1,		x2;
+	punpckldq x1,		x0; \
+	punpckhdq x1,		t2; \
+	movdqa x2,		t1; \
+	punpckhdq x3,		x2; \
+	punpckldq x3,		t1; \
+	movdqa x0,		x1; \
+	punpcklqdq t1,		x0; \
+	punpckhqdq t1,		x1; \
+	movdqa t2,		x3; \
+	punpcklqdq x2,		t2; \
+	punpckhqdq x2,		x3; \
+	movdqa t2,		x2;
 
 #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
 	movdqu (0*4*4)(in),	x0; \
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 7f24a1540821..3ee1ff04d3e9 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -585,23 +585,20 @@
 	get_key(i, 1, RK1); \
 	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
 
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
-	movdqa x2,		t3; \
-	movdqa x0,		t1; \
-	unpcklps x3,		t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	movdqa x0,		t2; \
-	unpcklps x1,		t1; \
-	unpckhps x1,		t2; \
-	movdqa t3,		x1; \
-	unpckhps x3,		x2; \
-	movdqa t1,		x0; \
-	movhlps t1,		x1; \
-	movdqa t2,		t1; \
-	movlhps t3,		x0; \
-	movlhps x2,		t1; \
-	movhlps t2,		x2; \
-	movdqa x2,		x3; \
-	movdqa t1,		x2;
+	punpckldq x1,		x0; \
+	punpckhdq x1,		t2; \
+	movdqa x2,		t1; \
+	punpckhdq x3,		x2; \
+	punpckldq x3,		t1; \
+	movdqa x0,		x1; \
+	punpcklqdq t1,		x0; \
+	punpckhqdq t1,		x1; \
+	movdqa t2,		x3; \
+	punpcklqdq x2,		t2; \
+	punpckhqdq x2,		x3; \
+	movdqa t2,		x2;
 
 #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
 	movdqu (0*4*4)(in),	x0; \
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 7955a9b76b91..4b21be85e0a1 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return ecb_crypt(desc, &walk, false);
 }
 
-static struct crypto_alg blk_ecb_alg = {
-	.cra_name		= "__ecb-serpent-sse2",
-	.cra_driver_name	= "__driver-ecb-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-};
-
 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 				  struct blkcipher_walk *walk)
 {
@@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_cbc_alg = {
-	.cra_name		= "__cbc-serpent-sse2",
-	.cra_driver_name	= "__driver-cbc-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-};
-
 static inline void u128_to_be128(be128 *dst, const u128 *src)
 {
 	dst->a = cpu_to_be64(src->a);
@@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_ctr_alg = {
-	.cra_name		= "__ctr-serpent-sse2",
-	.cra_driver_name	= "__driver-ctr-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-};
-
 struct crypt_priv {
 	struct serpent_ctx *ctx;
 	bool fpu_enabled;
@@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
 	lrw_free_table(&ctx->lrw_table);
 }
 
-static struct crypto_alg blk_lrw_alg = {
-	.cra_name		= "__lrw-serpent-sse2",
-	.cra_driver_name	= "__driver-lrw-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_lrw_alg.cra_list),
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-};
-
 struct serpent_xts_ctx {
 	struct serpent_ctx tweak_ctx;
 	struct serpent_ctx crypt_ctx;
@@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return ret;
 }
 
-static struct crypto_alg blk_xts_alg = {
-	.cra_name		= "__xts-serpent-sse2",
-	.cra_driver_name	= "__driver-xts-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_xts_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-};
-
 static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
@@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm)
 	cryptd_free_ablkcipher(ctx->cryptd_tfm);
 }
 
-static void ablk_init_common(struct crypto_tfm *tfm,
-			     struct cryptd_ablkcipher *cryptd_tfm)
+static int ablk_init(struct crypto_tfm *tfm)
 {
 	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_ablkcipher *cryptd_tfm;
+	char drv_name[CRYPTO_MAX_ALG_NAME];
+
+	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+					crypto_tfm_alg_driver_name(tfm));
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
 
 	ctx->cryptd_tfm = cryptd_tfm;
 	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
 		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-}
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
 
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
 	return 0;
 }
 
-static struct crypto_alg ablk_ecb_alg = {
+static struct crypto_alg serpent_algs[10] = { {
+	.cra_name		= "__ecb-serpent-sse2",
+	.cra_driver_name	= "__driver-ecb-serpent-sse2",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-serpent-sse2",
+	.cra_driver_name	= "__driver-cbc-serpent-sse2",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-serpent-sse2",
+	.cra_driver_name	= "__driver-ctr-serpent-sse2",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct serpent_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= serpent_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-serpent-sse2",
+	.cra_driver_name	= "__driver-lrw-serpent-sse2",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE +
+					  SERPENT_BLOCK_SIZE,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= lrw_serpent_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-serpent-sse2",
+	.cra_driver_name	= "__driver-xts-serpent-sse2",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
+			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
+			.ivsize		= SERPENT_BLOCK_SIZE,
+			.setkey		= xts_serpent_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
 	.cra_name		= "ecb(serpent)",
 	.cra_driver_name	= "ecb-serpent-sse2",
 	.cra_priority		= 400,
@@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
-	.cra_init		= ablk_ecb_init,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-};
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_cbc_alg = {
+}, {
 	.cra_name		= "cbc(serpent)",
 	.cra_driver_name	= "cbc-serpent-sse2",
 	.cra_priority		= 400,
@@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
-	.cra_init		= ablk_cbc_init,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-};
-
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_ctr_alg = {
+}, {
 	.cra_name		= "ctr(serpent)",
 	.cra_driver_name	= "ctr-serpent-sse2",
 	.cra_priority		= 400,
@@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
-	.cra_init		= ablk_ctr_init,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = {
 			.geniv		= "chainiv",
 		},
 	},
-};
-
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_lrw_alg = {
+}, {
 	.cra_name		= "lrw(serpent)",
 	.cra_driver_name	= "lrw-serpent-sse2",
 	.cra_priority		= 400,
@@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
-	.cra_init		= ablk_lrw_init,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-};
-
-static int ablk_xts_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_xts_alg = {
+}, {
 	.cra_name		= "xts(serpent)",
 	.cra_driver_name	= "xts-serpent-sse2",
 	.cra_priority		= 400,
@@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_xts_alg.cra_list),
-	.cra_init		= ablk_xts_init,
+	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-};
+} };
 
 static int __init serpent_sse2_init(void)
 {
-	int err;
-
 	if (!cpu_has_xmm2) {
 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
 		return -ENODEV;
 	}
 
-	err = crypto_register_alg(&blk_ecb_alg);
-	if (err)
-		goto blk_ecb_err;
-	err = crypto_register_alg(&blk_cbc_alg);
-	if (err)
-		goto blk_cbc_err;
-	err = crypto_register_alg(&blk_ctr_alg);
-	if (err)
-		goto blk_ctr_err;
-	err = crypto_register_alg(&ablk_ecb_alg);
-	if (err)
-		goto ablk_ecb_err;
-	err = crypto_register_alg(&ablk_cbc_alg);
-	if (err)
-		goto ablk_cbc_err;
-	err = crypto_register_alg(&ablk_ctr_alg);
-	if (err)
-		goto ablk_ctr_err;
-	err = crypto_register_alg(&blk_lrw_alg);
-	if (err)
-		goto blk_lrw_err;
-	err = crypto_register_alg(&ablk_lrw_alg);
-	if (err)
-		goto ablk_lrw_err;
-	err = crypto_register_alg(&blk_xts_alg);
-	if (err)
-		goto blk_xts_err;
-	err = crypto_register_alg(&ablk_xts_alg);
-	if (err)
-		goto ablk_xts_err;
-	return err;
-
-	crypto_unregister_alg(&ablk_xts_alg);
-ablk_xts_err:
-	crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
-	crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
-	crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
-	crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
-	crypto_unregister_alg(&ablk_cbc_alg);
-ablk_cbc_err:
-	crypto_unregister_alg(&ablk_ecb_alg);
-ablk_ecb_err:
-	crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
-	crypto_unregister_alg(&blk_cbc_alg);
-blk_cbc_err:
-	crypto_unregister_alg(&blk_ecb_alg);
-blk_ecb_err:
-	return err;
+	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
 }
 
 static void __exit serpent_sse2_exit(void)
 {
-	crypto_unregister_alg(&ablk_xts_alg);
-	crypto_unregister_alg(&blk_xts_alg);
-	crypto_unregister_alg(&ablk_lrw_alg);
-	crypto_unregister_alg(&blk_lrw_alg);
-	crypto_unregister_alg(&ablk_ctr_alg);
-	crypto_unregister_alg(&ablk_cbc_alg);
-	crypto_unregister_alg(&ablk_ecb_alg);
-	crypto_unregister_alg(&blk_ctr_alg);
-	crypto_unregister_alg(&blk_cbc_alg);
-	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
 }
 
 module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index dc6b3fb817fc..359ae084275c 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -68,7 +68,7 @@ static struct crypto_alg alg = {
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	TF_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct twofish_ctx),
-	.cra_alignmask		=	3,
+	.cra_alignmask		=	0,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u			=	{
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 7fee8c152f93..408fc0c5814e 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,6 +25,7 @@
  *
  */
 
+#include <asm/processor.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
 }
 
-static struct crypto_alg blk_ecb_alg = {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-};
-
 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 				  struct blkcipher_walk *walk)
 {
@@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_cbc_alg = {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-};
-
 static inline void u128_to_be128(be128 *dst, const u128 *src)
 {
 	dst->a = cpu_to_be64(src->a);
@@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return err;
 }
 
-static struct crypto_alg blk_ctr_alg = {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-};
-
 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
 {
 	const unsigned int bsize = TF_BLOCK_SIZE;
@@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
 	lrw_free_table(&ctx->lrw_table);
 }
 
-static struct crypto_alg blk_lrw_alg = {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_lrw_alg.cra_list),
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-};
-
 struct twofish_xts_ctx {
 	struct twofish_ctx tweak_ctx;
 	struct twofish_ctx crypt_ctx;
@@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	return xts_crypt(desc, dst, src, nbytes, &req);
 }
 
-static struct crypto_alg blk_xts_alg = {
+static struct crypto_alg tf_algs[5] = { {
+	.cra_name		= "ecb(twofish)",
+	.cra_driver_name	= "ecb-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(tf_algs[0].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(twofish)",
+	.cra_driver_name	= "cbc-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(tf_algs[1].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(twofish)",
+	.cra_driver_name	= "ctr-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(tf_algs[2].cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "lrw(twofish)",
+	.cra_driver_name	= "lrw-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= lrw_twofish_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
 	.cra_name		= "xts(twofish)",
 	.cra_driver_name	= "xts-twofish-3way",
 	.cra_priority		= 300,
@@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_xts_alg.cra_list),
+	.cra_list		= LIST_HEAD_INIT(tf_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
@@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = {
 			.decrypt	= xts_decrypt,
 		},
 	},
-};
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return false;
+
+	if (boot_cpu_data.x86 == 0x06 &&
+		(boot_cpu_data.x86_model == 0x1c ||
+		 boot_cpu_data.x86_model == 0x26 ||
+		 boot_cpu_data.x86_model == 0x36)) {
+		/*
+		 * On Atom, twofish-3way is slower than original assembler
+		 * implementation. Twofish-3way trades off some performance in
+		 * storing blocks in 64bit registers to allow three blocks to
+		 * be processed parallel. Parallel operation then allows gaining
+		 * more performance than was trade off, on out-of-order CPUs.
+		 * However Atom does not benefit from this parallellism and
+		 * should be blacklisted.
+		 */
+		return true;
+	}
+
+	if (boot_cpu_data.x86 == 0x0f) {
+		/*
+		 * On Pentium 4, twofish-3way is slower than original assembler
+		 * implementation because excessive uses of 64bit rotate and
+		 * left-shifts (which are really slow on P4) needed to store and
+		 * handle 128bit block in two 64bit registers.
+		 */
+		return true;
+	}
+
+	return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 int __init init(void)
 {
-	int err;
+	if (!force && is_blacklisted_cpu()) {
+		printk(KERN_INFO
+			"twofish-x86_64-3way: performance on this CPU "
+			"would be suboptimal: disabling "
+			"twofish-x86_64-3way.\n");
+		return -ENODEV;
+	}
 
-	err = crypto_register_alg(&blk_ecb_alg);
-	if (err)
-		goto ecb_err;
-	err = crypto_register_alg(&blk_cbc_alg);
-	if (err)
-		goto cbc_err;
-	err = crypto_register_alg(&blk_ctr_alg);
-	if (err)
-		goto ctr_err;
-	err = crypto_register_alg(&blk_lrw_alg);
-	if (err)
-		goto blk_lrw_err;
-	err = crypto_register_alg(&blk_xts_alg);
-	if (err)
-		goto blk_xts_err;
-
-	return 0;
-
-	crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
-	crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
-	crypto_unregister_alg(&blk_ctr_alg);
-ctr_err:
-	crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
-	crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
-	return err;
+	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
 }
 
 void __exit fini(void)
 {
-	crypto_unregister_alg(&blk_xts_alg);
-	crypto_unregister_alg(&blk_lrw_alg);
-	crypto_unregister_alg(&blk_ctr_alg);
-	crypto_unregister_alg(&blk_cbc_alg);
-	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
 }
 
 module_init(init);