summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/crypto/arm-cryptocell.txt22
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt3
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt4
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt19
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt22
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm6368.txt17
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c10
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c2
-rw-r--r--arch/arm64/crypto/Kconfig18
-rw-r--r--arch/arm64/crypto/Makefile11
-rw-r--r--arch/arm64/crypto/aes-ce-core.S87
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c (renamed from arch/arm64/crypto/aes-ce-cipher.c)115
-rw-r--r--arch/arm64/crypto/aes-cipher-core.S19
-rw-r--r--arch/arm64/crypto/aes-glue.c1
-rw-r--r--arch/arm64/crypto/aes-neon.S8
-rw-r--r--arch/arm64/crypto/crc32-ce-core.S7
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c2
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S17
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S20
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S4
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S210
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c161
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S204
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c119
-rw-r--r--arch/arm64/crypto/sha512-glue.c1
-rw-r--r--arch/arm64/crypto/sm3-ce-core.S141
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c92
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c1
-rw-r--r--arch/s390/crypto/crc32-vx.c3
-rw-r--r--arch/sparc/crypto/crc32c_glue.c1
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S199
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c70
-rw-r--r--arch/x86/crypto/chacha20_glue.c1
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c1
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c1
-rw-r--r--arch/x86/crypto/poly1305_glue.c2
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S184
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S114
-rw-r--r--arch/x86/crypto/salsa20_glue.c105
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S112
-rw-r--r--crypto/Kconfig4
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/ablk_helper.c5
-rw-r--r--crypto/aead.c19
-rw-r--r--crypto/af_alg.c10
-rw-r--r--crypto/ahash.c33
-rw-r--r--crypto/algapi.c13
-rw-r--r--crypto/algif_aead.c15
-rw-r--r--crypto/algif_hash.c52
-rw-r--r--crypto/algif_skcipher.c59
-rw-r--r--crypto/api.c6
-rw-r--r--crypto/authenc.c4
-rw-r--r--crypto/authencesn.c4
-rw-r--r--crypto/blkcipher.c1
-rw-r--r--crypto/camellia_generic.c3
-rw-r--r--crypto/cast5_generic.c3
-rw-r--r--crypto/cast6_generic.c3
-rw-r--r--crypto/chacha20_generic.c33
-rw-r--r--crypto/crc32_generic.c1
-rw-r--r--crypto/crc32c_generic.c1
-rw-r--r--crypto/cryptd.c17
-rw-r--r--crypto/crypto_user.c4
-rw-r--r--crypto/ecc.c2
-rw-r--r--crypto/echainiv.c5
-rw-r--r--crypto/gcm.c4
-rw-r--r--crypto/gf128mul.c2
-rw-r--r--crypto/ghash-generic.c6
-rw-r--r--crypto/internal.h8
-rw-r--r--crypto/keywrap.c4
-rw-r--r--crypto/mcryptd.c11
-rw-r--r--crypto/poly1305_generic.c27
-rw-r--r--crypto/proc.c2
-rw-r--r--crypto/salsa20_generic.c240
-rw-r--r--crypto/seqiv.c5
-rw-r--r--crypto/sha3_generic.c332
-rw-r--r--crypto/shash.c25
-rw-r--r--crypto/simd.c4
-rw-r--r--crypto/skcipher.c30
-rw-r--r--crypto/tcrypt.c1083
-rw-r--r--crypto/testmgr.c41
-rw-r--r--crypto/testmgr.h550
-rw-r--r--crypto/twofish_common.c5
-rw-r--r--crypto/twofish_generic.c5
-rw-r--r--crypto/xcbc.c3
-rw-r--r--drivers/char/hw_random/Kconfig32
-rw-r--r--drivers/char/hw_random/Makefile2
-rw-r--r--drivers/char/hw_random/bcm2835-rng.c169
-rw-r--r--drivers/char/hw_random/bcm63xx-rng.c154
-rw-r--r--drivers/char/hw_random/core.c4
-rw-r--r--drivers/char/hw_random/exynos-trng.c235
-rw-r--r--drivers/char/hw_random/imx-rngc.c13
-rw-r--r--drivers/char/hw_random/mtk-rng.c1
-rw-r--r--drivers/char/random.c24
-rw-r--r--drivers/crypto/Kconfig1
-rw-r--r--drivers/crypto/amcc/crypto4xx_alg.c6
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c131
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.h4
-rw-r--r--drivers/crypto/amcc/crypto4xx_reg_def.h4
-rw-r--r--drivers/crypto/amcc/crypto4xx_trng.c2
-rw-r--r--drivers/crypto/axis/artpec6_crypto.c8
-rw-r--r--drivers/crypto/bcm/cipher.c1
-rw-r--r--drivers/crypto/bfin_crc.c3
-rw-r--r--drivers/crypto/caam/caamalg.c120
-rw-r--r--drivers/crypto/caam/caamalg_desc.c182
-rw-r--r--drivers/crypto/caam/caamalg_desc.h10
-rw-r--r--drivers/crypto/caam/caamalg_qi.c68
-rw-r--r--drivers/crypto/caam/caamhash.c73
-rw-r--r--drivers/crypto/caam/ctrl.c4
-rw-r--r--drivers/crypto/caam/desc.h29
-rw-r--r--drivers/crypto/caam/desc_constr.h51
-rw-r--r--drivers/crypto/caam/intern.h1
-rw-r--r--drivers/crypto/caam/key_gen.c30
-rw-r--r--drivers/crypto/caam/key_gen.h30
-rw-r--r--drivers/crypto/cavium/cpt/cptvf_reqmanager.c3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_reqmgr.c1
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-galois.c1
-rw-r--r--drivers/crypto/chelsio/Kconfig10
-rw-r--r--drivers/crypto/chelsio/Makefile1
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c540
-rw-r--r--drivers/crypto/chelsio/chcr_algo.h15
-rw-r--r--drivers/crypto/chelsio/chcr_core.c14
-rw-r--r--drivers/crypto/chelsio/chcr_core.h38
-rw-r--r--drivers/crypto/chelsio/chcr_crypto.h76
-rw-r--r--drivers/crypto/chelsio/chcr_ipsec.c654
-rw-r--r--drivers/crypto/exynos-rng.c108
-rw-r--r--drivers/crypto/hifn_795x.c1
-rw-r--r--drivers/crypto/inside-secure/safexcel.c370
-rw-r--r--drivers/crypto/inside-secure/safexcel.h173
-rw-r--r--drivers/crypto/inside-secure/safexcel_cipher.c53
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c125
-rw-r--r--drivers/crypto/ixp4xx_crypto.c7
-rw-r--r--drivers/crypto/marvell/cesa.c19
-rw-r--r--drivers/crypto/nx/nx-842-powernv.c4
-rw-r--r--drivers/crypto/picoxcell_crypto.c27
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c133
-rw-r--r--drivers/crypto/s5p-sss.c26
-rw-r--r--drivers/crypto/stm32/Kconfig13
-rw-r--r--drivers/crypto/stm32/Makefile5
-rw-r--r--drivers/crypto/stm32/stm32-cryp.c1170
-rw-r--r--drivers/crypto/stm32/stm32_crc32.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h23
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c102
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h7
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c1
-rw-r--r--include/crypto/aead.h10
-rw-r--r--include/crypto/chacha20.h3
-rw-r--r--include/crypto/hash.h46
-rw-r--r--include/crypto/internal/hash.h2
-rw-r--r--include/crypto/internal/scompress.h11
-rw-r--r--include/crypto/null.h10
-rw-r--r--include/crypto/poly1305.h2
-rw-r--r--include/crypto/salsa20.h27
-rw-r--r--include/crypto/sha3.h6
-rw-r--r--include/crypto/skcipher.h11
-rw-r--r--include/linux/crypto.h10
-rw-r--r--kernel/padata.c1
-rw-r--r--lib/chacha20.c71
162 files changed, 7472 insertions, 2683 deletions
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
new file mode 100644
index 000000000000..cec8d5d74e26
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
@@ -0,0 +1,22 @@
+Arm TrustZone CryptoCell cryptographic engine
+
+Required properties:
+- compatible: Should be "arm,cryptocell-712-ree".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt number for the device.
+
+Optional properties:
+- interrupt-parent: The phandle for the interrupt controller that services
+  interrupts for this device.
+- clocks: Reference to the crypto engine clock.
+- dma-coherent: Present if dma operations are coherent.
+
+Examples:
+
+       arm_cc712: crypto@80000000 {
+               compatible = "arm,cryptocell-712-ree";
+               interrupt-parent = <&intc>;
+               interrupts = < 0 30 4 >;
+               reg = < 0x80000000 0x10000 >;
+
+       };
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
index fbc07d12322f..30c3ce6b502e 100644
--- a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
+++ b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
@@ -1,7 +1,8 @@
 Inside Secure SafeXcel cryptographic engine
 
 Required properties:
-- compatible: Should be "inside-secure,safexcel-eip197".
+- compatible: Should be "inside-secure,safexcel-eip197" or
+              "inside-secure,safexcel-eip97".
 - reg: Base physical address of the engine and length of memory mapped region.
 - interrupts: Interrupt numbers for the rings and engine.
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
diff --git a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
index 4ca8dd4d7e66..a13fbdb4bd88 100644
--- a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
+++ b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
@@ -2,7 +2,9 @@ Exynos Pseudo Random Number Generator
 
 Required properties:
 
-- compatible  : Should be "samsung,exynos4-rng".
+- compatible  : One of:
+                - "samsung,exynos4-rng" for Exynos4210 and Exynos4412
+                - "samsung,exynos5250-prng" for Exynos5250+
 - reg         : Specifies base physical address and size of the registers map.
 - clocks      : Phandle to clock-controller plus clock-specifier pair.
 - clock-names : "secss" as a clock name.
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
new file mode 100644
index 000000000000..970487fa40b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
@@ -0,0 +1,19 @@
+* STMicroelectronics STM32 CRYP
+
+Required properties:
+- compatible: Should be "st,stm32f756-cryp".
+- reg: The address and length of the peripheral registers space
+- clocks: The input clock of the CRYP instance
+- interrupts: The CRYP interrupt
+
+Optional properties:
+- resets: The input reset of the CRYP instance
+
+Example:
+crypto@50060000 {
+	compatible = "st,stm32f756-cryp";
+	reg = <0x50060000 0x400>;
+	interrupts = <79>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
+	resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
index 26542690b578..627b29531a32 100644
--- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -1,11 +1,19 @@
-BCM2835 Random number generator
+BCM2835/6368 Random number generator
 
 Required properties:
 
-- compatible : should be "brcm,bcm2835-rng"  or "brcm,bcm-nsp-rng" or
-  "brcm,bcm5301x-rng"
+- compatible : should be one of
+	"brcm,bcm2835-rng"
+	"brcm,bcm-nsp-rng"
+	"brcm,bcm5301x-rng" or
+	"brcm,bcm6368-rng"
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "ipsec" as a clock name
+
 Example:
 
 rng {
@@ -17,3 +25,11 @@ rng@18033000 {
 	compatible = "brcm,bcm-nsp-rng";
 	reg = <0x18033000 0x14>;
 };
+
+random: rng@10004180 {
+	compatible = "brcm,bcm6368-rng";
+	reg = <0x10004180 0x14>;
+
+	clocks = <&periph_clk 18>;
+	clock-names = "ipsec";
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt b/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
deleted file mode 100644
index 4b5ac600bfbd..000000000000
--- a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-BCM6368 Random number generator
-
-Required properties:
-
-- compatible : should be "brcm,bcm6368-rng"
-- reg : Specifies base physical address and size of the registers
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "ipsec" as a clock name
-
-Example:
-	random: rng@10004180 {
-		compatible = "brcm,bcm6368-rng";
-		reg = <0x10004180 0x14>;
-
-		clocks = <&periph_clk 18>;
-		clock-names = "ipsec";
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index 7872d430e7b1..e6aa3922a32b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11964,6 +11964,13 @@ S:	Maintained
 F:	drivers/crypto/exynos-rng.c
 F:	Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
 
+SAMSUNG EXYNOS TRUE RANDOM NUMBER GENERATOR (TRNG) DRIVER
+M:	Łukasz Stelmach <l.stelmach@samsung.com>
+L:	linux-samsung-soc@vger.kernel.org
+S:	Maintained
+F:	drivers/char/hw_random/exynos-trng.c
+F:	Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
+
 SAMSUNG FRAMEBUFFER DRIVER
 M:	Jingoo Han <jingoohan1@gmail.com>
 L:	linux-fbdev@vger.kernel.org
@@ -12026,6 +12033,7 @@ F:	drivers/media/i2c/s5k5baf.c
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
 M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Vladimir Zapolskiy <vz@mleia.com>
+M:	Kamil Konieczny <k.konieczny@partner.samsung.com>
 L:	linux-crypto@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 18768f330449..07e31941dc67 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -181,9 +181,8 @@ static int cbc_init(struct crypto_tfm *tfm)
 	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->enc_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->enc_tfm))
-		return PTR_ERR(ctx->enc_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->enc_tfm);
 }
 
 static void cbc_exit(struct crypto_tfm *tfm)
@@ -258,9 +257,8 @@ static int xts_init(struct crypto_tfm *tfm)
 	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->tweak_tfm))
-		return PTR_ERR(ctx->tweak_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
 }
 
 static void xts_exit(struct crypto_tfm *tfm)
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 1b0e0e86ee9c..96e62ec105d0 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -188,6 +188,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32",
 	.base.cra_driver_name	= "crc32-arm-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 }, {
@@ -203,6 +204,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32c",
 	.base.cra_driver_name	= "crc32c-arm-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 } };
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 70c517aa4501..285c36c7b408 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -29,6 +29,24 @@ config CRYPTO_SHA2_ARM64_CE
 	select CRYPTO_HASH
 	select CRYPTO_SHA256_ARM64
 
+config CRYPTO_SHA512_ARM64_CE
+	tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SHA512_ARM64
+
+config CRYPTO_SHA3_ARM64
+	tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SHA3
+
+config CRYPTO_SM3_ARM64_CE
+	tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SM3
+
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index b5edc5918c28..cee9b8d9830b 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -14,6 +14,15 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
 obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
 sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 
+obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o
+sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
+sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
+sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
+
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
@@ -24,7 +33,7 @@ obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
 crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8efdfdade393
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.arch		armv8-a+crypto
+
+ENTRY(__aes_ce_encrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aese		v0.16b, v2.16b
+	aesmc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aese		v0.16b, v3.16b
+	aesmc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aese		v0.16b, v1.16b
+	aesmc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aese		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_encrypt)
+
+ENTRY(__aes_ce_decrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aesd		v0.16b, v2.16b
+	aesimc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aesd		v0.16b, v3.16b
+	aesimc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aesd		v0.16b, v1.16b
+	aesimc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aesd		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ *                  substitution on each byte in 'input'
+ */
+ENTRY(__aes_ce_sub)
+	dup		v1.4s, w0
+	movi		v0.16b, #0
+	aese		v0.16b, v1.16b
+	umov		w0, v0.s[0]
+	ret
+ENDPROC(__aes_ce_sub)
+
+ENTRY(__aes_ce_invert)
+	ld1		{v0.4s}, [x1]
+	aesimc		v1.16b, v0.16b
+	st1		{v1.4s}, [x0]
+	ret
+ENDPROC(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-glue.c
index 6a75cd75ed11..e6b3227bbf57 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -29,6 +29,13 @@ struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
 
+asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+				const struct aes_block *in);
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
 	/*
@@ -44,10 +51,6 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
 static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
@@ -55,49 +58,13 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aese	v0.16b, v2.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aese	v0.16b, v3.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aese	v0.16b, v1.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aese	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_enc),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
 static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
@@ -105,62 +72,10 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aesd	v0.16b, v2.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aesd	v0.16b, v3.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aesd	v0.16b, v1.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aesd	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_dec),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
-/*
- * aes_sub() - use the aese instruction to perform the AES sbox substitution
- *             on each byte in 'input'
- */
-static u32 aes_sub(u32 input)
-{
-	u32 ret;
-
-	__asm__("dup	v1.4s, %w[in]		;"
-		"movi	v0.16b, #0		;"
-		"aese	v0.16b, v1.16b		;"
-		"umov	%w[out], v0.4s[0]	;"
-
-	:	[out]	"=r"(ret)
-	:	[in]	"r"(input)
-	:		"v0","v1");
-
-	return ret;
-}
-
 int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		     unsigned int key_len)
 {
@@ -189,7 +104,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+		rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -202,7 +117,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		} else if (key_len == AES_KEYSIZE_256) {
 			if (i >= 6)
 				break;
-			rko[4] = aes_sub(rko[3]) ^ rki[4];
+			rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
 			rko[5] = rko[4] ^ rki[5];
 			rko[6] = rko[5] ^ rki[6];
 			rko[7] = rko[6] ^ rki[7];
@@ -221,13 +136,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.4s}, %[in]		;"
-			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.4s}, %[out]	;"
-
-		:	[out]	"=Q"(key_dec[i])
-		:	[in]	"Q"(key_enc[j])
-		:		"v0","v1");
+		__aes_ce_invert(key_dec + i, key_enc + j);
 	key_dec[i] = key_enc[0];
 
 	kernel_neon_end();
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index 6d2445d603cc..3a44eada2347 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -125,6 +125,16 @@ CPU_BE(	rev		w7, w7		)
 	ret
 	.endm
 
+ENTRY(__aes_arm64_encrypt)
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ENDPROC(__aes_arm64_encrypt)
+
+	.align		5
+ENTRY(__aes_arm64_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
+ENDPROC(__aes_arm64_decrypt)
+
+	.section	".rodata", "a"
 	.align		L1_CACHE_SHIFT
 	.type		__aes_arm64_inverse_sbox, %object
 __aes_arm64_inverse_sbox:
@@ -161,12 +171,3 @@ __aes_arm64_inverse_sbox:
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
-
-ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm64_encrypt)
-
-	.align		5
-ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
-ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 998ba519a026..2fa850e86aa8 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -665,6 +665,7 @@ static int __init aes_init(void)
 
 unregister_simds:
 	aes_exit();
+	return err;
 unregister_ciphers:
 	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	return err;
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index f1e3aa2732f9..1c7b45b7268e 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -32,10 +32,10 @@
 
 	/* preload the entire Sbox */
 	.macro		prepare, sbox, shiftrows, temp
-	adr		\temp, \sbox
 	movi		v12.16b, #0x1b
-	ldr		q13, \shiftrows
-	ldr		q14, .Lror32by8
+	ldr_l		q13, \shiftrows, \temp
+	ldr_l		q14, .Lror32by8, \temp
+	adr_l		\temp, \sbox
 	ld1		{v16.16b-v19.16b}, [\temp], #64
 	ld1		{v20.16b-v23.16b}, [\temp], #64
 	ld1		{v24.16b-v27.16b}, [\temp], #64
@@ -272,7 +272,7 @@
 
 #include "aes-modes.S"
 
-	.text
+	.section	".rodata", "a"
 	.align		6
 .LForward_Sbox:
 	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
index 18f5a8442276..16ed3c7ebd37 100644
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -50,7 +50,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	.text
+	.section	".rodata", "a"
 	.align		6
 	.cpu		generic+crypto+crc
 
@@ -115,12 +115,13 @@
 	 * uint crc32_pmull_le(unsigned char const *buffer,
 	 *                     size_t len, uint crc32)
 	 */
+	.text
 ENTRY(crc32_pmull_le)
-	adr		x3, .Lcrc32_constants
+	adr_l		x3, .Lcrc32_constants
 	b		0f
 
 ENTRY(crc32c_pmull_le)
-	adr		x3, .Lcrc32c_constants
+	adr_l		x3, .Lcrc32c_constants
 
 0:	bic		LEN, LEN, #15
 	ld1		{v1.16b-v4.16b}, [BUF], #0x40
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index 624f4137918c..34b4e3d46aab 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -185,6 +185,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32",
 	.base.cra_driver_name	= "crc32-arm64-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 }, {
@@ -200,6 +201,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32c",
 	.base.cra_driver_name	= "crc32c-arm64-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 } };
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index d5b5a8c038c8..f179c01bd55c 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -128,7 +128,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	// XOR the initial_crc value
 	eor		v0.16b, v0.16b, v10.16b
 
-	ldr		q10, rk3	// xmm10 has rk3 and rk4
+	ldr_l		q10, rk3, x8	// xmm10 has rk3 and rk4
 					// type of pmull instruction
 					// will determine which constant to use
 
@@ -184,13 +184,13 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 	// fold the 8 vector registers to 1 vector register with different
 	// constants
 
-	ldr		q10, rk9
+	ldr_l		q10, rk9, x8
 
 	.macro		fold16, reg, rk
 	pmull		v8.1q, \reg\().1d, v10.1d
 	pmull2		\reg\().1q, \reg\().2d, v10.2d
 	.ifnb		\rk
-	ldr		q10, \rk
+	ldr_l		q10, \rk, x8
 	.endif
 	eor		v7.16b, v7.16b, v8.16b
 	eor		v7.16b, v7.16b, \reg\().16b
@@ -251,7 +251,7 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
 	// get rid of the extra data that was loaded before
 	// load the shift constant
-	adr		x4, tbl_shf_table + 16
+	adr_l		x4, tbl_shf_table + 16
 	sub		x4, x4, arg3
 	ld1		{v0.16b}, [x4]
 
@@ -275,7 +275,7 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
 _128_done:
 	// compute crc of a 128-bit value
-	ldr		q10, rk5		// rk5 and rk6 in xmm10
+	ldr_l		q10, rk5, x8		// rk5 and rk6 in xmm10
 
 	// 64b fold
 	ext		v0.16b, vzr.16b, v7.16b, #8
@@ -291,7 +291,7 @@ _128_done:
 
 	// barrett reduction
 _barrett:
-	ldr		q10, rk7
+	ldr_l		q10, rk7, x8
 	mov		v0.d[0], v7.d[1]
 
 	pmull		v0.1q, v0.1d, v10.1d
@@ -321,7 +321,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	b.eq		_128_done		// exactly 16 left
 	b.lt		_less_than_16_left
 
-	ldr		q10, rk1		// rk1 and rk2 in xmm10
+	ldr_l		q10, rk1, x8		// rk1 and rk2 in xmm10
 
 	// update the counter. subtract 32 instead of 16 to save one
 	// instruction from the loop
@@ -333,7 +333,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
 _less_than_16_left:
 	// shl r9, 4
-	adr		x0, tbl_shf_table + 16
+	adr_l		x0, tbl_shf_table + 16
 	sub		x0, x0, arg3
 	ld1		{v0.16b}, [x0]
 	movi		v9.16b, #0x80
@@ -345,6 +345,7 @@ ENDPROC(crc_t10dif_pmull)
 // precomputed constants
 // these constants are precomputed from the poly:
 // 0x8bb70000 (0x8bb7 scaled to 32 bits)
+	.section	".rodata", "a"
 	.align		4
 // Q = 0x18BB70000
 // rk1 = 2^(32*3) mod Q << 32
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 8550408735a0..46049850727d 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -58,12 +58,11 @@
 	sha1su1		v\s0\().4s, v\s3\().4s
 	.endm
 
-	/*
-	 * The SHA1 round constants
-	 */
-	.align		4
-.Lsha1_rcon:
-	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+	.macro		loadrc, k, val, tmp
+	movz		\tmp, :abs_g0_nc:\val
+	movk		\tmp, :abs_g1:\val
+	dup		\k, \tmp
+	.endm
 
 	/*
 	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
@@ -71,11 +70,10 @@
 	 */
 ENTRY(sha1_ce_transform)
 	/* load round constants */
-	adr		x6, .Lsha1_rcon
-	ld1r		{k0.4s}, [x6], #4
-	ld1r		{k1.4s}, [x6], #4
-	ld1r		{k2.4s}, [x6], #4
-	ld1r		{k3.4s}, [x6]
+	loadrc		k0.4s, 0x5a827999, w6
+	loadrc		k1.4s, 0x6ed9eba1, w6
+	loadrc		k2.4s, 0x8f1bbcdc, w6
+	loadrc		k3.4s, 0xca62c1d6, w6
 
 	/* load state */
 	ld1		{dgav.4s}, [x0]
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 679c6c002f4f..4c3c89b812ce 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -53,6 +53,7 @@
 	/*
 	 * The SHA-256 round constants
 	 */
+	.section	".rodata", "a"
 	.align		4
 .Lsha2_rcon:
 	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
@@ -76,9 +77,10 @@
 	 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 	 *			  int blocks)
 	 */
+	.text
 ENTRY(sha2_ce_transform)
 	/* load round constants */
-	adr		x8, .Lsha2_rcon
+	adr_l		x8, .Lsha2_rcon
 	ld1		{ v0.4s- v3.4s}, [x8], #64
 	ld1		{ v4.4s- v7.4s}, [x8], #64
 	ld1		{ v8.4s-v11.4s}, [x8], #64
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
new file mode 100644
index 000000000000..332ad7530690
--- /dev/null
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+	.set	.Lv\b\().2d, \b
+	.set	.Lv\b\().16b, \b
+	.endr
+
+	/*
+	 * ARMv8.2 Crypto Extensions instructions
+	 */
+	.macro	eor3, rd, rn, rm, ra
+	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro	rax1, rd, rn, rm
+	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro	bcax, rd, rn, rm, ra
+	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro	xar, rd, rn, rm, imm6
+	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
+	.endm
+
+	/*
+	 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
+	 */
+	.text
+ENTRY(sha3_ce_transform)
+	/* load state */
+	add	x8, x0, #32
+	ld1	{ v0.1d- v3.1d}, [x0]
+	ld1	{ v4.1d- v7.1d}, [x8], #32
+	ld1	{ v8.1d-v11.1d}, [x8], #32
+	ld1	{v12.1d-v15.1d}, [x8], #32
+	ld1	{v16.1d-v19.1d}, [x8], #32
+	ld1	{v20.1d-v23.1d}, [x8], #32
+	ld1	{v24.1d}, [x8]
+
+0:	sub	w2, w2, #1
+	mov	w8, #24
+	adr_l	x9, .Lsha3_rcon
+
+	/* load input */
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b-v31.8b}, [x1], #24
+	eor	v0.8b, v0.8b, v25.8b
+	eor	v1.8b, v1.8b, v26.8b
+	eor	v2.8b, v2.8b, v27.8b
+	eor	v3.8b, v3.8b, v28.8b
+	eor	v4.8b, v4.8b, v29.8b
+	eor	v5.8b, v5.8b, v30.8b
+	eor	v6.8b, v6.8b, v31.8b
+
+	tbnz	x3, #6, 2f		// SHA3-512
+
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b-v30.8b}, [x1], #16
+	eor	 v7.8b,  v7.8b, v25.8b
+	eor	 v8.8b,  v8.8b, v26.8b
+	eor	 v9.8b,  v9.8b, v27.8b
+	eor	v10.8b, v10.8b, v28.8b
+	eor	v11.8b, v11.8b, v29.8b
+	eor	v12.8b, v12.8b, v30.8b
+
+	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
+
+	// SHA3-256
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	eor	v13.8b, v13.8b, v25.8b
+	eor	v14.8b, v14.8b, v26.8b
+	eor	v15.8b, v15.8b, v27.8b
+	eor	v16.8b, v16.8b, v28.8b
+	b	3f
+
+1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
+
+	// SHA3-224
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b}, [x1], #8
+	eor	v13.8b, v13.8b, v25.8b
+	eor	v14.8b, v14.8b, v26.8b
+	eor	v15.8b, v15.8b, v27.8b
+	eor	v16.8b, v16.8b, v28.8b
+	eor	v17.8b, v17.8b, v29.8b
+	b	3f
+
+	// SHA3-512
+2:	ld1	{v25.8b-v26.8b}, [x1], #16
+	eor	 v7.8b,  v7.8b, v25.8b
+	eor	 v8.8b,  v8.8b, v26.8b
+
+3:	sub	w8, w8, #1
+
+	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
+	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
+	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
+	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
+	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
+	eor3	v29.16b, v29.16b, v19.16b, v24.16b
+	eor3	v26.16b, v26.16b, v16.16b, v21.16b
+	eor3	v28.16b, v28.16b, v18.16b, v23.16b
+	eor3	v25.16b, v25.16b, v15.16b, v20.16b
+	eor3	v27.16b, v27.16b, v17.16b, v22.16b
+
+	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
+	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
+	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
+	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
+	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
+
+	eor	 v0.16b,  v0.16b, v30.16b
+	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
+	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
+	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
+	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
+	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
+	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
+	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
+	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
+	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
+	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
+	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
+	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
+	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
+	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
+	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
+	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
+	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
+	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
+	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
+	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
+	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
+	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
+	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
+	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
+
+	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
+	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
+	bcax	v22.16b, v22.16b, v24.16b, v23.16b
+	bcax	v23.16b, v23.16b, v31.16b, v24.16b
+	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
+
+	ld1r	{v31.2d}, [x9], #8
+
+	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
+	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
+	bcax	v19.16b, v19.16b, v16.16b, v15.16b
+	bcax	v15.16b, v15.16b, v25.16b, v16.16b
+	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
+
+	bcax	v10.16b, v29.16b, v12.16b, v26.16b
+	bcax	v11.16b, v26.16b, v13.16b, v12.16b
+	bcax	v12.16b, v12.16b, v14.16b, v13.16b
+	bcax	v13.16b, v13.16b, v29.16b, v14.16b
+	bcax	v14.16b, v14.16b, v26.16b, v29.16b
+
+	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
+	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
+	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
+	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
+	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
+
+	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
+	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
+	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
+	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
+	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
+
+	eor	 v0.16b,  v0.16b, v31.16b
+
+	cbnz	w8, 3b
+	cbnz	w2, 0b
+
+	/* save state */
+	st1	{ v0.1d- v3.1d}, [x0], #32
+	st1	{ v4.1d- v7.1d}, [x0], #32
+	st1	{ v8.1d-v11.1d}, [x0], #32
+	st1	{v12.1d-v15.1d}, [x0], #32
+	st1	{v16.1d-v19.1d}, [x0], #32
+	st1	{v20.1d-v23.1d}, [x0], #32
+	st1	{v24.1d}, [x0]
+	ret
+ENDPROC(sha3_ce_transform)
+
+	.section	".rodata", "a"
+	.align		8
+.Lsha3_rcon:
+	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
+	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
+	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
+	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
+	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
+	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
+	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
+	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
new file mode 100644
index 000000000000..da8222e528bd
--- /dev/null
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha3.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
+				  int md_len);
+
+static int sha3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+
+	if (!may_use_simd())
+		return crypto_sha3_update(desc, data, len);
+
+	if ((sctx->partial + len) >= sctx->rsiz) {
+		int blocks;
+
+		if (sctx->partial) {
+			int p = sctx->rsiz - sctx->partial;
+
+			memcpy(sctx->buf + sctx->partial, data, p);
+			kernel_neon_begin();
+			sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
+			kernel_neon_end();
+
+			data += p;
+			len -= p;
+			sctx->partial = 0;
+		}
+
+		blocks = len / sctx->rsiz;
+		len %= sctx->rsiz;
+
+		if (blocks) {
+			kernel_neon_begin();
+			sha3_ce_transform(sctx->st, data, blocks, digest_size);
+			kernel_neon_end();
+			data += blocks * sctx->rsiz;
+		}
+	}
+
+	if (len) {
+		memcpy(sctx->buf + sctx->partial, data, len);
+		sctx->partial += len;
+	}
+	return 0;
+}
+
+static int sha3_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+	__le64 *digest = (__le64 *)out;
+	int i;
+
+	if (!may_use_simd())
+		return crypto_sha3_final(desc, out);
+
+	sctx->buf[sctx->partial++] = 0x06;
+	memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial);
+	sctx->buf[sctx->rsiz - 1] |= 0x80;
+
+	kernel_neon_begin();
+	sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
+	kernel_neon_end();
+
+	for (i = 0; i < digest_size / 8; i++)
+		put_unaligned_le64(sctx->st[i], digest++);
+
+	if (digest_size & 4)
+		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
+
+	*sctx = (struct sha3_state){};
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.digestsize		= SHA3_224_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-224",
+	.base.cra_driver_name	= "sha3-224-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_224_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_256_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-256",
+	.base.cra_driver_name	= "sha3-256-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_256_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_384_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-384",
+	.base.cra_driver_name	= "sha3-384-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_384_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_512_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-512",
+	.base.cra_driver_name	= "sha3-512-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+} };
+
+static int __init sha3_neon_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha3_neon_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA3, sha3_neon_mod_init);
+module_exit(sha3_neon_mod_fini);
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
new file mode 100644
index 000000000000..7f3bca5c59a2
--- /dev/null
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp		b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+	.set		.Lq\b, \b
+	.set		.Lv\b\().2d, \b
+	.endr
+
+	.macro		sha512h, rd, rn, rm
+	.inst		0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sha512h2, rd, rn, rm
+	.inst		0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sha512su0, rd, rn
+	.inst		0xcec08000 | .L\rd | (.L\rn << 5)
+	.endm
+
+	.macro		sha512su1, rd, rn, rm
+	.inst		0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	/*
+	 * The SHA-512 round constants
+	 */
+	.section	".rodata", "a"
+	.align		4
+.Lsha512_rcon:
+	.quad		0x428a2f98d728ae22, 0x7137449123ef65cd
+	.quad		0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
+	.quad		0x3956c25bf348b538, 0x59f111f1b605d019
+	.quad		0x923f82a4af194f9b, 0xab1c5ed5da6d8118
+	.quad		0xd807aa98a3030242, 0x12835b0145706fbe
+	.quad		0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
+	.quad		0x72be5d74f27b896f, 0x80deb1fe3b1696b1
+	.quad		0x9bdc06a725c71235, 0xc19bf174cf692694
+	.quad		0xe49b69c19ef14ad2, 0xefbe4786384f25e3
+	.quad		0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
+	.quad		0x2de92c6f592b0275, 0x4a7484aa6ea6e483
+	.quad		0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
+	.quad		0x983e5152ee66dfab, 0xa831c66d2db43210
+	.quad		0xb00327c898fb213f, 0xbf597fc7beef0ee4
+	.quad		0xc6e00bf33da88fc2, 0xd5a79147930aa725
+	.quad		0x06ca6351e003826f, 0x142929670a0e6e70
+	.quad		0x27b70a8546d22ffc, 0x2e1b21385c26c926
+	.quad		0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
+	.quad		0x650a73548baf63de, 0x766a0abb3c77b2a8
+	.quad		0x81c2c92e47edaee6, 0x92722c851482353b
+	.quad		0xa2bfe8a14cf10364, 0xa81a664bbc423001
+	.quad		0xc24b8b70d0f89791, 0xc76c51a30654be30
+	.quad		0xd192e819d6ef5218, 0xd69906245565a910
+	.quad		0xf40e35855771202a, 0x106aa07032bbd1b8
+	.quad		0x19a4c116b8d2d0c8, 0x1e376c085141ab53
+	.quad		0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
+	.quad		0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
+	.quad		0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
+	.quad		0x748f82ee5defb2fc, 0x78a5636f43172f60
+	.quad		0x84c87814a1f0ab72, 0x8cc702081a6439ec
+	.quad		0x90befffa23631e28, 0xa4506cebde82bde9
+	.quad		0xbef9a3f7b2c67915, 0xc67178f2e372532b
+	.quad		0xca273eceea26619c, 0xd186b8c721c0c207
+	.quad		0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
+	.quad		0x06f067aa72176fba, 0x0a637dc5a2c898a6
+	.quad		0x113f9804bef90dae, 0x1b710b35131c471b
+	.quad		0x28db77f523047d84, 0x32caab7b40c72493
+	.quad		0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
+	.quad		0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
+	.quad		0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+
+	.macro		dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
+	.ifnb		\rc1
+	ld1		{v\rc1\().2d}, [x4], #16
+	.endif
+	add		v5.2d, v\rc0\().2d, v\in0\().2d
+	ext		v6.16b, v\i2\().16b, v\i3\().16b, #8
+	ext		v5.16b, v5.16b, v5.16b, #8
+	ext		v7.16b, v\i1\().16b, v\i2\().16b, #8
+	add		v\i3\().2d, v\i3\().2d, v5.2d
+	.ifnb		\in1
+	ext		v5.16b, v\in3\().16b, v\in4\().16b, #8
+	sha512su0	v\in0\().2d, v\in1\().2d
+	.endif
+	sha512h		q\i3, q6, v7.2d
+	.ifnb		\in1
+	sha512su1	v\in0\().2d, v\in2\().2d, v5.2d
+	.endif
+	add		v\i4\().2d, v\i1\().2d, v\i3\().2d
+	sha512h2	q\i3, q\i1, v\i0\().2d
+	.endm
+
+	/*
+	 * void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+	 *			  int blocks)
+	 */
+	.text
+ENTRY(sha512_ce_transform)
+	/* load state */
+	ld1		{v8.2d-v11.2d}, [x0]
+
+	/* load first 4 round constants */
+	adr_l		x3, .Lsha512_rcon
+	ld1		{v20.2d-v23.2d}, [x3], #64
+
+	/* load input */
+0:	ld1		{v12.2d-v15.2d}, [x1], #64
+	ld1		{v16.2d-v19.2d}, [x1], #64
+	sub		w2, w2, #1
+
+CPU_LE(	rev64		v12.16b, v12.16b	)
+CPU_LE(	rev64		v13.16b, v13.16b	)
+CPU_LE(	rev64		v14.16b, v14.16b	)
+CPU_LE(	rev64		v15.16b, v15.16b	)
+CPU_LE(	rev64		v16.16b, v16.16b	)
+CPU_LE(	rev64		v17.16b, v17.16b	)
+CPU_LE(	rev64		v18.16b, v18.16b	)
+CPU_LE(	rev64		v19.16b, v19.16b	)
+
+	mov		x4, x3				// rc pointer
+
+	mov		v0.16b, v8.16b
+	mov		v1.16b, v9.16b
+	mov		v2.16b, v10.16b
+	mov		v3.16b, v11.16b
+
+	// v0  ab  cd  --  ef  gh  ab
+	// v1  cd  --  ef  gh  ab  cd
+	// v2  ef  gh  ab  cd  --  ef
+	// v3  gh  ab  cd  --  ef  gh
+	// v4  --  ef  gh  ab  cd  --
+
+	dround		0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
+	dround		3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
+	dround		2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
+	dround		4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
+	dround		1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
+
+	dround		0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
+	dround		3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
+	dround		2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
+	dround		4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
+	dround		1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
+
+	dround		0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
+	dround		3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
+	dround		2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
+	dround		4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
+	dround		1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
+
+	dround		0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
+	dround		3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
+	dround		2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
+	dround		4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
+	dround		1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
+
+	dround		0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
+	dround		3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
+	dround		2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
+	dround		4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
+	dround		1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
+
+	dround		0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
+	dround		3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
+	dround		2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
+	dround		4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
+	dround		1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
+
+	dround		0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
+	dround		3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
+	dround		2, 3, 1, 4, 0, 28, 24, 12
+	dround		4, 2, 0, 1, 3, 29, 25, 13
+	dround		1, 4, 3, 0, 2, 30, 26, 14
+
+	dround		0, 1, 2, 3, 4, 31, 27, 15
+	dround		3, 0, 4, 2, 1, 24,   , 16
+	dround		2, 3, 1, 4, 0, 25,   , 17
+	dround		4, 2, 0, 1, 3, 26,   , 18
+	dround		1, 4, 3, 0, 2, 27,   , 19
+
+	/* update state */
+	add		v8.2d, v8.2d, v0.2d
+	add		v9.2d, v9.2d, v1.2d
+	add		v10.2d, v10.2d, v2.2d
+	add		v11.2d, v11.2d, v3.2d
+
+	/* handled all input blocks? */
+	cbnz		w2, 0b
+
+	/* store new state */
+3:	st1		{v8.2d-v11.2d}, [x0]
+	ret
+ENDPROC(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
new file mode 100644
index 000000000000..a77c8632a589
--- /dev/null
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+				    int blocks);
+
+asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
+
+static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int len)
+{
+	if (!may_use_simd())
+		return sha512_base_do_update(desc, data, len,
+				(sha512_block_fn *)sha512_block_data_order);
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+			      (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out)
+{
+	if (!may_use_simd()) {
+		if (len)
+			sha512_base_do_update(desc, data, len,
+				(sha512_block_fn *)sha512_block_data_order);
+		sha512_base_do_finalize(desc,
+				(sha512_block_fn *)sha512_block_data_order);
+		return sha512_base_finish(desc, out);
+	}
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+			      (sha512_block_fn *)sha512_ce_transform);
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+	return sha512_base_finish(desc, out);
+}
+
+static int sha512_ce_final(struct shash_desc *desc, u8 *out)
+{
+	if (!may_use_simd()) {
+		sha512_base_do_finalize(desc,
+				(sha512_block_fn *)sha512_block_data_order);
+		return sha512_base_finish(desc, out);
+	}
+
+	kernel_neon_begin();
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+	return sha512_base_finish(desc, out);
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_ce_update,
+	.final			= sha512_ce_final,
+	.finup			= sha512_ce_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base.cra_name		= "sha384",
+	.base.cra_driver_name	= "sha384-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.init			= sha512_base_init,
+	.update			= sha512_ce_update,
+	.final			= sha512_ce_final,
+	.finup			= sha512_ce_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base.cra_name		= "sha512",
+	.base.cra_driver_name	= "sha512-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+} };
+
+static int __init sha512_ce_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA512, sha512_ce_mod_init);
+module_exit(sha512_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index aff35c9992a4..27db4851e380 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -27,6 +27,7 @@ MODULE_ALIAS_CRYPTO("sha512");
 
 asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha512_block_data_order);
 
 static int sha512_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
new file mode 100644
index 000000000000..27169fe07a68
--- /dev/null
+++ b/arch/arm64/crypto/sm3-ce-core.S
@@ -0,0 +1,141 @@
+/*
+ * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+	.set		.Lv\b\().4s, \b
+	.endr
+
+	.macro		sm3partw1, rd, rn, rm
+	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3partw2, rd, rn, rm
+	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3ss1, rd, rn, rm, ra
+	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt1a, rd, rn, rm, imm2
+	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt1b, rd, rn, rm, imm2
+	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt2a, rd, rn, rm, imm2
+	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt2b, rd, rn, rm, imm2
+	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		round, ab, s0, t0, t1, i
+	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
+	shl		\t1\().4s, \t0\().4s, #1
+	sri		\t1\().4s, \t0\().4s, #31
+	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
+	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
+	.endm
+
+	.macro		qround, ab, s0, s1, s2, s3, s4
+	.ifnb		\s4
+	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
+	ext		v6.16b, \s0\().16b, \s1\().16b, #12
+	ext		v7.16b, \s2\().16b, \s3\().16b, #8
+	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
+	.endif
+
+	eor		v10.16b, \s0\().16b, \s1\().16b
+
+	round		\ab, \s0, v11, v12, 0
+	round		\ab, \s0, v12, v11, 1
+	round		\ab, \s0, v11, v12, 2
+	round		\ab, \s0, v12, v11, 3
+
+	.ifnb		\s4
+	sm3partw2	\s4\().4s, v7.4s, v6.4s
+	.endif
+	.endm
+
+	/*
+	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
+	 *                       int blocks)
+	 */
+	.text
+ENTRY(sm3_ce_transform)
+	/* load state */
+	ld1		{v8.4s-v9.4s}, [x0]
+	rev64		v8.4s, v8.4s
+	rev64		v9.4s, v9.4s
+	ext		v8.16b, v8.16b, v8.16b, #8
+	ext		v9.16b, v9.16b, v9.16b, #8
+
+	adr_l		x8, .Lt
+	ldp		s13, s14, [x8]
+
+	/* load input */
+0:	ld1		{v0.16b-v3.16b}, [x1], #64
+	sub		w2, w2, #1
+
+	mov		v15.16b, v8.16b
+	mov		v16.16b, v9.16b
+
+CPU_LE(	rev32		v0.16b, v0.16b		)
+CPU_LE(	rev32		v1.16b, v1.16b		)
+CPU_LE(	rev32		v2.16b, v2.16b		)
+CPU_LE(	rev32		v3.16b, v3.16b		)
+
+	ext		v11.16b, v13.16b, v13.16b, #4
+
+	qround		a, v0, v1, v2, v3, v4
+	qround		a, v1, v2, v3, v4, v0
+	qround		a, v2, v3, v4, v0, v1
+	qround		a, v3, v4, v0, v1, v2
+
+	ext		v11.16b, v14.16b, v14.16b, #4
+
+	qround		b, v4, v0, v1, v2, v3
+	qround		b, v0, v1, v2, v3, v4
+	qround		b, v1, v2, v3, v4, v0
+	qround		b, v2, v3, v4, v0, v1
+	qround		b, v3, v4, v0, v1, v2
+	qround		b, v4, v0, v1, v2, v3
+	qround		b, v0, v1, v2, v3, v4
+	qround		b, v1, v2, v3, v4, v0
+	qround		b, v2, v3, v4, v0, v1
+	qround		b, v3, v4
+	qround		b, v4, v0
+	qround		b, v0, v1
+
+	eor		v8.16b, v8.16b, v15.16b
+	eor		v9.16b, v9.16b, v16.16b
+
+	/* handled all input blocks? */
+	cbnz		w2, 0b
+
+	/* save state */
+	rev64		v8.4s, v8.4s
+	rev64		v9.4s, v9.4s
+	ext		v8.16b, v8.16b, v8.16b, #8
+	ext		v9.16b, v9.16b, v9.16b, #8
+	st1		{v8.4s-v9.4s}, [x0]
+	ret
+ENDPROC(sm3_ce_transform)
+
+	.section	".rodata", "a"
+	.align		3
+.Lt:	.word		0x79cc4519, 0x9d8a7a87
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
new file mode 100644
index 000000000000..3b4948f7e26f
--- /dev/null
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -0,0 +1,92 @@
+/*
+ * sm3-ce-glue.c - SM3 secure hash using ARMv8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
+				 int blocks);
+
+static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	if (!may_use_simd())
+		return crypto_sm3_update(desc, data, len);
+
+	kernel_neon_begin();
+	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sm3_ce_final(struct shash_desc *desc, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sm3_finup(desc, NULL, 0, out);
+
+	kernel_neon_begin();
+	sm3_base_do_finalize(desc, sm3_ce_transform);
+	kernel_neon_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sm3_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	kernel_neon_end();
+
+	return sm3_ce_final(desc, out);
+}
+
+static struct shash_alg sm3_alg = {
+	.digestsize		= SM3_DIGEST_SIZE,
+	.init			= sm3_base_init,
+	.update			= sm3_ce_update,
+	.final			= sm3_ce_final,
+	.finup			= sm3_ce_finup,
+	.descsize		= sizeof(struct sm3_state),
+	.base.cra_name		= "sm3",
+	.base.cra_driver_name	= "sm3-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SM3_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+};
+
+static int __init sm3_ce_mod_init(void)
+{
+	return crypto_register_shash(&sm3_alg);
+}
+
+static void __exit sm3_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&sm3_alg);
+}
+
+module_cpu_feature_match(SM3, sm3_ce_mod_init);
+module_exit(sm3_ce_mod_fini);
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index f058e0c3e4d4..fd1d6c83f0c0 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -141,6 +141,7 @@ static struct shash_alg alg = {
 		.cra_name		= "crc32c",
 		.cra_driver_name	= "crc32c-vpmsum",
 		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(u32),
 		.cra_module		= THIS_MODULE,
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 436865926c26..423ee05887e6 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -239,6 +239,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32",
 			.cra_driver_name = "crc32-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,
@@ -259,6 +260,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32be",
 			.cra_driver_name = "crc32be-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,
@@ -279,6 +281,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32c",
 			.cra_driver_name = "crc32c-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index d1064e46efe8..8aa664638c3c 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -133,6 +133,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-sparc64",
 		.cra_priority		=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(u32),
 		.cra_alignmask		=	7,
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3d09e3aca18d..12e8484a8ee7 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -90,30 +90,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
 ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
             .octa 0x00000000000000000000000000000000
 
-.section .rodata
-.align 16
-.type aad_shift_arr, @object
-.size aad_shift_arr, 272
-aad_shift_arr:
-        .octa     0xffffffffffffffffffffffffffffffff
-        .octa     0xffffffffffffffffffffffffffffff0C
-        .octa     0xffffffffffffffffffffffffffff0D0C
-        .octa     0xffffffffffffffffffffffffff0E0D0C
-        .octa     0xffffffffffffffffffffffff0F0E0D0C
-        .octa     0xffffffffffffffffffffff0C0B0A0908
-        .octa     0xffffffffffffffffffff0D0C0B0A0908
-        .octa     0xffffffffffffffffff0E0D0C0B0A0908
-        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
-        .octa     0xffffffffffffff0C0B0A090807060504
-        .octa     0xffffffffffff0D0C0B0A090807060504
-        .octa     0xffffffffff0E0D0C0B0A090807060504
-        .octa     0xffffffff0F0E0D0C0B0A090807060504
-        .octa     0xffffff0C0B0A09080706050403020100
-        .octa     0xffff0D0C0B0A09080706050403020100
-        .octa     0xff0E0D0C0B0A09080706050403020100
-        .octa     0x0F0E0D0C0B0A09080706050403020100
-
-
 .text
 
 
@@ -257,6 +233,37 @@ aad_shift_arr:
 	pxor      \TMP1, \GH            # result is in TMP1
 .endm
 
+# Reads DLEN bytes starting at DPTR and stores in XMMDst
+# where 0 < DLEN < 16
+# Clobbers %rax, DLEN and XMM1
+.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
+        cmp $8, \DLEN
+        jl _read_lt8_\@
+        mov (\DPTR), %rax
+        MOVQ_R64_XMM %rax, \XMMDst
+        sub $8, \DLEN
+        jz _done_read_partial_block_\@
+	xor %eax, %eax
+_read_next_byte_\@:
+        shl $8, %rax
+        mov 7(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_\@
+        MOVQ_R64_XMM %rax, \XMM1
+	pslldq $8, \XMM1
+        por \XMM1, \XMMDst
+	jmp _done_read_partial_block_\@
+_read_lt8_\@:
+	xor %eax, %eax
+_read_next_byte_lt8_\@:
+        shl $8, %rax
+        mov -1(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_lt8_\@
+        MOVQ_R64_XMM %rax, \XMMDst
+_done_read_partial_block_\@:
+.endm
+
 /*
 * if a = number of total plaintext bytes
 * b = floor(a/16)
@@ -273,62 +280,30 @@ aad_shift_arr:
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r11           # %r11 = aadLen
 	pxor	   %xmm\i, %xmm\i
 	pxor       \XMM2, \XMM2
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest8\num_initial_blocks\operation
+	jl	   _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
 	movdqu	   (%r10), %xmm\i
 	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   %xmm\i, \XMM2
 	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 	add	   $16, %r10
-	sub	   $16, %r12
 	sub	   $16, %r11
 	cmp	   $16, %r11
 	jge	   _get_AAD_blocks\num_initial_blocks\operation
 
 	movdqu	   \XMM2, %xmm\i
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
 	cmp	   $0, %r11
 	je	   _get_AAD_done\num_initial_blocks\operation
 
-	pxor	   %xmm\i,%xmm\i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some CT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-	cmp	   $4, %r11
-	jle	   _get_AAD_rest4\num_initial_blocks\operation
-	movq	   (%r10), \TMP1
-	add	   $8, %r10
-	sub	   $8, %r11
-	pslldq	   $8, \TMP1
-	psrldq	   $8, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	jmp	   _get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	jle	   _get_AAD_rest0\num_initial_blocks\operation
-	mov	   (%r10), %eax
-	movq	   %rax, \TMP1
-	add	   $4, %r10
-	sub	   $4, %r10
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-_get_AAD_rest0\num_initial_blocks\operation:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq	   %r12, %r11
-	salq	   $4, %r11
-	movdqu	   aad_shift_arr(%r11), \TMP1
-	PSHUFB_XMM \TMP1, %xmm\i
-_get_AAD_rest_final\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   \XMM2, %xmm\i
 	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -532,62 +507,30 @@ _initial_blocks_done\num_initial_blocks\operation:
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r11           # %r11 = aadLen
 	pxor	   %xmm\i, %xmm\i
 	pxor	   \XMM2, \XMM2
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest8\num_initial_blocks\operation
+	jl	   _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
 	movdqu	   (%r10), %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   %xmm\i, \XMM2
 	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 	add	   $16, %r10
-	sub	   $16, %r12
 	sub	   $16, %r11
 	cmp	   $16, %r11
 	jge	   _get_AAD_blocks\num_initial_blocks\operation
 
 	movdqu	   \XMM2, %xmm\i
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
 	cmp	   $0, %r11
 	je	   _get_AAD_done\num_initial_blocks\operation
 
-	pxor	   %xmm\i,%xmm\i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some PT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-	cmp	   $4, %r11
-	jle	   _get_AAD_rest4\num_initial_blocks\operation
-	movq	   (%r10), \TMP1
-	add	   $8, %r10
-	sub	   $8, %r11
-	pslldq	   $8, \TMP1
-	psrldq	   $8, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	jmp	   _get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	jle	   _get_AAD_rest0\num_initial_blocks\operation
-	mov	   (%r10), %eax
-	movq	   %rax, \TMP1
-	add	   $4, %r10
-	sub	   $4, %r10
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-_get_AAD_rest0\num_initial_blocks\operation:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq	   %r12, %r11
-	salq	   $4, %r11
-	movdqu	   aad_shift_arr(%r11), \TMP1
-	PSHUFB_XMM \TMP1, %xmm\i
-_get_AAD_rest_final\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   \XMM2, %xmm\i
 	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -1386,14 +1329,6 @@ _esb_loop_\@:
 *
 *                        AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 *****************************************************************************/
@@ -1487,19 +1422,16 @@ _zero_cipher_left_decrypt:
 	PSHUFB_XMM %xmm10, %xmm0
 
 	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
-# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu (%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM %xmm2, %xmm1            # right shift 16-%r13 butes
 
+	lea (%arg3,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+
+	lea ALL_F+16(%rip), %r12
+	sub %r13, %r12
 	movdqa  %xmm1, %xmm2
 	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu (%r12), %xmm1
 	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
 	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
 	pand    %xmm1, %xmm2
@@ -1508,9 +1440,6 @@ _zero_cipher_left_decrypt:
 
 	pxor %xmm2, %xmm8
 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	          # GHASH computation for the last <16 byte block
-	sub %r13, %r11
-	add $16, %r11
 
         # output %r13 bytes
 	MOVQ_R64_XMM	%xmm0, %rax
@@ -1664,14 +1593,6 @@ ENDPROC(aesni_gcm_dec)
 *
 *                         AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
@@ -1764,19 +1685,16 @@ _zero_cipher_left_encrypt:
         movdqa SHUF_MASK(%rip), %xmm10
 	PSHUFB_XMM %xmm10, %xmm0
 
-
 	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1     # receive the last <16 byte blocks
-	lea SHIFT_MASK+16(%rip), %r12
+
+	lea (%arg3,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+
+	lea ALL_F+16(%rip), %r12
 	sub %r13, %r12
-	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
-	# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu	(%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM	%xmm2, %xmm1            # shift right 16-r13 byte
 	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu	(%r12), %xmm1
 	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
 	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
         movdqa SHUF_MASK(%rip), %xmm10
@@ -1785,9 +1703,6 @@ _zero_cipher_left_encrypt:
 	pxor	%xmm0, %xmm8
 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
 	# GHASH computation for the last <16 byte block
-	sub	%r13, %r11
-	add	$16, %r11
-
 	movdqa SHUF_MASK(%rip), %xmm10
 	PSHUFB_XMM %xmm10, %xmm0
 
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 3bf3dcf29825..34cf1c1f8c98 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -690,8 +690,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
 	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
-			   unsigned int key_len)
+static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key,
+				  unsigned int key_len)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -716,8 +716,8 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead,
 
 /* This is the Integrity Check Value (aka the authentication tag length and can
  * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
-				unsigned int authsize)
+static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent,
+				       unsigned int authsize)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -824,7 +824,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
-	    sg_is_last(req->dst) &&
+	    sg_is_last(req->dst) && req->dst->length &&
 	    (!PageHighMem(sg_page(req->dst)) ||
 	    req->dst->offset + req->dst->length <= PAGE_SIZE)) {
 		one_entry_in_sg = 1;
@@ -929,7 +929,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 			      aes_ctx);
 }
 
-static int rfc4106_encrypt(struct aead_request *req)
+static int gcmaes_wrapper_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -945,7 +945,7 @@ static int rfc4106_encrypt(struct aead_request *req)
 	return crypto_aead_encrypt(req);
 }
 
-static int rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_wrapper_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -1117,7 +1117,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
 {
 	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
 	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 
@@ -1128,6 +1128,30 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
 			      aes_ctx);
 }
 
+static int generic_gcmaes_init(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni",
+				       CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+
+	return 0;
+}
+
+static void generic_gcmaes_exit(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
 static struct aead_alg aesni_aead_algs[] = { {
 	.setkey			= common_rfc4106_set_key,
 	.setauthsize		= common_rfc4106_set_authsize,
@@ -1147,10 +1171,10 @@ static struct aead_alg aesni_aead_algs[] = { {
 }, {
 	.init			= rfc4106_init,
 	.exit			= rfc4106_exit,
-	.setkey			= rfc4106_set_key,
-	.setauthsize		= rfc4106_set_authsize,
-	.encrypt		= rfc4106_encrypt,
-	.decrypt		= rfc4106_decrypt,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
 	.ivsize			= GCM_RFC4106_IV_SIZE,
 	.maxauthsize		= 16,
 	.base = {
@@ -1170,13 +1194,31 @@ static struct aead_alg aesni_aead_algs[] = { {
 	.ivsize			= GCM_AES_IV_SIZE,
 	.maxauthsize		= 16,
 	.base = {
+		.cra_name		= "__generic-gcm-aes-aesni",
+		.cra_driver_name	= "__driver-generic-gcm-aes-aesni",
+		.cra_priority		= 0,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init			= generic_gcmaes_init,
+	.exit			= generic_gcmaes_exit,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
+	.ivsize			= GCM_AES_IV_SIZE,
+	.maxauthsize		= 16,
+	.base = {
 		.cra_name		= "gcm(aes)",
 		.cra_driver_name	= "generic-gcm-aesni",
 		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
-		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_module		= THIS_MODULE,
 	},
 } };
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 1e6af1b35f7b..dce7c5d39c2f 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -107,7 +107,6 @@ static struct skcipher_alg alg = {
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 27226df3f7d8..c8d9cdacbf10 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -162,6 +162,7 @@ static struct shash_alg alg = {
 			.cra_name		= "crc32",
 			.cra_driver_name	= "crc32-pclmul",
 			.cra_priority		= 200,
+			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 			.cra_ctxsize		= sizeof(u32),
 			.cra_module		= THIS_MODULE,
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index c194d5717ae5..5773e1161072 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -226,6 +226,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-intel",
 		.cra_priority		=	200,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(u32),
 		.cra_module		=	THIS_MODULE,
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index e32142bc071d..790377797544 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -164,14 +164,12 @@ static struct shash_alg alg = {
 	.init		= poly1305_simd_init,
 	.update		= poly1305_simd_update,
 	.final		= crypto_poly1305_final,
-	.setkey		= crypto_poly1305_setkey,
 	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-simd",
 		.cra_priority		= 300,
 		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_alignmask		= sizeof(u32) - 1,
 		.cra_blocksize		= POLY1305_BLOCK_SIZE,
 		.cra_module		= THIS_MODULE,
 	},
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
index 329452b8f794..6014b7b9e52a 100644
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -1,6 +1,7 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
+# Derived from:
+#	salsa20_pm.s version 20051229
+#	D. J. Bernstein
+#	Public domain.
 
 #include <linux/linkage.h>
 
@@ -935,180 +936,3 @@ ENTRY(salsa20_encrypt_bytes)
 	# goto bytesatleast1
 	jmp	._bytesatleast1
 ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   k = arg2
-	movl	8(%esp,%eax),%ecx
-	#   kbits = arg3
-	movl	12(%esp,%eax),%edx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in1 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%ebx
-	#   in2 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%esi
-	#   in3 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%edi
-	#   in4 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ebp
-	#   *(uint32 *) (x + 4) = in1
-	movl	%ebx,4(%eax)
-	#   *(uint32 *) (x + 8) = in2
-	movl	%esi,8(%eax)
-	#   *(uint32 *) (x + 12) = in3
-	movl	%edi,12(%eax)
-	#   *(uint32 *) (x + 16) = in4
-	movl	%ebp,16(%eax)
-	#   kbits - 256
-	cmp	$256,%edx
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-._kbits256:
-	#     in11 = *(uint32 *) (k + 16)
-	movl	16(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 20)
-	movl	20(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 24)
-	movl	24(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 28)
-	movl	28(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 857760878
-	mov	$857760878,%edx
-	#     in10 = 2036477234
-	mov	$2036477234,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-	#   goto keysetupdone
-	jmp	._keysetupdone
-._kbits128:
-	#     in11 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 824206446
-	mov	$824206446,%edx
-	#     in10 = 2036477238
-	mov	$2036477238,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-._keysetupdone:
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   iv = arg2
-	movl	8(%esp,%eax),%ecx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in6 = *(uint32 *) (iv + 0)
-	movl	0(%ecx),%edx
-	#   in7 = *(uint32 *) (iv + 4)
-	movl	4(%ecx),%ecx
-	#   in8 = 0
-	mov	$0,%ebx
-	#   in9 = 0
-	mov	$0,%esi
-	#   *(uint32 *) (x + 24) = in6
-	movl	%edx,24(%eax)
-	#   *(uint32 *) (x + 28) = in7
-	movl	%ecx,28(%eax)
-	#   *(uint32 *) (x + 32) = in8
-	movl	%ebx,32(%eax)
-	#   *(uint32 *) (x + 36) = in9
-	movl	%esi,36(%eax)
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
index 10db30d58006..03a4918f41ee 100644
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -803,117 +803,3 @@ ENTRY(salsa20_encrypt_bytes)
 	# goto bytesatleast1
 	jmp	._bytesatleast1
 ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   k = arg2
-	mov	%rsi,%rsi
-	#   kbits = arg3
-	mov	%rdx,%rdx
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in0 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%r8
-	#   in2 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%r9
-	#   *(uint64 *) (x + 4) = in0
-	movq	%r8,4(%rdi)
-	#   *(uint64 *) (x + 12) = in2
-	movq	%r9,12(%rdi)
-	#                    unsigned<? kbits - 256
-	cmp	$256,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-#   kbits256:
-._kbits256:
-	#     in10 = *(uint64 *) (k + 16)
-	movq	16(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 24)
-	movq	24(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 857760878
-	mov	$857760878,%rdx
-	#     in10 = 2036477234
-	mov	$2036477234,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-	# comment:fp stack unchanged by jump
-	#   goto keysetupdone
-	jmp	._keysetupdone
-#   kbits128:
-._kbits128:
-	#     in10 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 824206446
-	mov	$824206446,%rdx
-	#     in10 = 2036477238
-	mov	$2036477238,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-#   keysetupdone:
-._keysetupdone:
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   iv = arg2
-	mov	%rsi,%rsi
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in6 = *(uint64 *) (iv + 0)
-	movq	0(%rsi),%rsi
-	#   in8 = 0
-	mov	$0,%r8
-	#   *(uint64 *) (x + 24) = in6
-	movq	%rsi,24(%rdi)
-	#   *(uint64 *) (x + 32) = in8
-	movq	%r8,32(%rdi)
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index cb91a64a99e7..b07d7d959806 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -11,6 +11,9 @@
  * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
  *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
  *
+ * Also modified to set up the initial state using the generic C code rather
+ * than in assembly.
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
@@ -18,93 +21,65 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/salsa20.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
 
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
-				 u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
-				      const u8 *src, u8 *dst, u32 bytes);
+asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
+				      u32 bytes);
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
+static int salsa20_asm_crypt(struct skcipher_request *req)
 {
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
-	return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	u32 state[16];
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
+	err = skcipher_walk_virt(&walk, req, true);
 
-	salsa20_ivsetup(ctx, walk.iv);
+	crypto_salsa20_init(state, ctx, walk.iv);
 
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		salsa20_encrypt_bytes(state, walk.src.virt.addr,
+				      walk.dst.virt.addr, nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-asm",
-	.cra_priority       =   200,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
+static struct skcipher_alg alg = {
+	.base.cra_name		= "salsa20",
+	.base.cra_driver_name	= "salsa20-asm",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= SALSA20_MIN_KEY_SIZE,
+	.max_keysize		= SALSA20_MAX_KEY_SIZE,
+	.ivsize			= SALSA20_IV_SIZE,
+	.chunksize		= SALSA20_BLOCK_SIZE,
+	.setkey			= crypto_salsa20_setkey,
+	.encrypt		= salsa20_asm_crypt,
+	.decrypt		= salsa20_asm_crypt,
 };
 
 static int __init init(void)
 {
-	return crypto_register_alg(&alg);
+	return crypto_register_skcipher(&alg);
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_alg(&alg);
+	crypto_unregister_skcipher(&alg);
 }
 
 module_init(init);
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 1c3b7ceb36d2..e7273a606a07 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -55,29 +55,31 @@
 #define RAB1bl %bl
 #define RAB2bl %cl
 
+#define CD0 0x0(%rsp)
+#define CD1 0x8(%rsp)
+#define CD2 0x10(%rsp)
+
+# used only before/after all rounds
 #define RCD0 %r8
 #define RCD1 %r9
 #define RCD2 %r10
 
-#define RCD0d %r8d
-#define RCD1d %r9d
-#define RCD2d %r10d
-
-#define RX0 %rbp
-#define RX1 %r11
-#define RX2 %r12
+# used only during rounds
+#define RX0 %r8
+#define RX1 %r9
+#define RX2 %r10
 
-#define RX0d %ebp
-#define RX1d %r11d
-#define RX2d %r12d
+#define RX0d %r8d
+#define RX1d %r9d
+#define RX2d %r10d
 
-#define RY0 %r13
-#define RY1 %r14
-#define RY2 %r15
+#define RY0 %r11
+#define RY1 %r12
+#define RY2 %r13
 
-#define RY0d %r13d
-#define RY1d %r14d
-#define RY2d %r15d
+#define RY0d %r11d
+#define RY1d %r12d
+#define RY2d %r13d
 
 #define RT0 %rdx
 #define RT1 %rsi
@@ -85,6 +87,8 @@
 #define RT0d %edx
 #define RT1d %esi
 
+#define RT1bl %sil
+
 #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
 	movzbl ab ## bl,		tmp2 ## d; \
 	movzbl ab ## bh,		tmp1 ## d; \
@@ -92,6 +96,11 @@
 	op1##l T0(CTX, tmp2, 4),	dst ## d; \
 	op2##l T1(CTX, tmp1, 4),	dst ## d;
 
+#define swap_ab_with_cd(ab, cd, tmp)	\
+	movq cd, tmp;			\
+	movq ab, cd;			\
+	movq tmp, ab;
+
 /*
  * Combined G1 & G2 function. Reordered with help of rotates to have moves
  * at begining.
@@ -110,15 +119,15 @@
 	/* G1,2 && G2,2 */ \
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
-	xchgq cd ## 0, ab ## 0; \
+	swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
 	\
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
-	xchgq cd ## 1, ab ## 1; \
+	swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
 	\
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
-	xchgq cd ## 2, ab ## 2;
+	swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
 
 #define enc_round_end(ab, x, y, n) \
 	addl y ## d,			x ## d; \
@@ -168,6 +177,16 @@
 	decrypt_round3(ba, dc, (n*2)+1); \
 	decrypt_round3(ba, dc, (n*2));
 
+#define push_cd()	\
+	pushq RCD2;	\
+	pushq RCD1;	\
+	pushq RCD0;
+
+#define pop_cd()	\
+	popq RCD0;	\
+	popq RCD1;	\
+	popq RCD2;
+
 #define inpack3(in, n, xy, m) \
 	movq 4*(n)(in),			xy ## 0; \
 	xorq w+4*m(CTX),		xy ## 0; \
@@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way)
 	 *	%rdx: src, RIO
 	 *	%rcx: bool, if true: xor output
 	 */
-	pushq %r15;
-	pushq %r14;
 	pushq %r13;
 	pushq %r12;
-	pushq %rbp;
 	pushq %rbx;
 
 	pushq %rcx; /* bool xor */
@@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way)
 
 	inpack_enc3();
 
-	encrypt_cycle3(RAB, RCD, 0);
-	encrypt_cycle3(RAB, RCD, 1);
-	encrypt_cycle3(RAB, RCD, 2);
-	encrypt_cycle3(RAB, RCD, 3);
-	encrypt_cycle3(RAB, RCD, 4);
-	encrypt_cycle3(RAB, RCD, 5);
-	encrypt_cycle3(RAB, RCD, 6);
-	encrypt_cycle3(RAB, RCD, 7);
+	push_cd();
+	encrypt_cycle3(RAB, CD, 0);
+	encrypt_cycle3(RAB, CD, 1);
+	encrypt_cycle3(RAB, CD, 2);
+	encrypt_cycle3(RAB, CD, 3);
+	encrypt_cycle3(RAB, CD, 4);
+	encrypt_cycle3(RAB, CD, 5);
+	encrypt_cycle3(RAB, CD, 6);
+	encrypt_cycle3(RAB, CD, 7);
+	pop_cd();
 
 	popq RIO; /* dst */
-	popq %rbp; /* bool xor */
+	popq RT1; /* bool xor */
 
-	testb %bpl, %bpl;
+	testb RT1bl, RT1bl;
 	jnz .L__enc_xor3;
 
 	outunpack_enc3(mov);
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 
 .L__enc_xor3:
 	outunpack_enc3(xor);
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 ENDPROC(__twofish_enc_blk_3way)
 
@@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way)
 	 *	%rsi: dst
 	 *	%rdx: src, RIO
 	 */
-	pushq %r15;
-	pushq %r14;
 	pushq %r13;
 	pushq %r12;
-	pushq %rbp;
 	pushq %rbx;
 
 	pushq %rsi; /* dst */
 
 	inpack_dec3();
 
-	decrypt_cycle3(RAB, RCD, 7);
-	decrypt_cycle3(RAB, RCD, 6);
-	decrypt_cycle3(RAB, RCD, 5);
-	decrypt_cycle3(RAB, RCD, 4);
-	decrypt_cycle3(RAB, RCD, 3);
-	decrypt_cycle3(RAB, RCD, 2);
-	decrypt_cycle3(RAB, RCD, 1);
-	decrypt_cycle3(RAB, RCD, 0);
+	push_cd();
+	decrypt_cycle3(RAB, CD, 7);
+	decrypt_cycle3(RAB, CD, 6);
+	decrypt_cycle3(RAB, CD, 5);
+	decrypt_cycle3(RAB, CD, 4);
+	decrypt_cycle3(RAB, CD, 3);
+	decrypt_cycle3(RAB, CD, 2);
+	decrypt_cycle3(RAB, CD, 1);
+	decrypt_cycle3(RAB, CD, 0);
+	pop_cd();
 
 	popq RIO; /* dst */
 
 	outunpack_dec3();
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 ENDPROC(twofish_dec_blk_3way)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 20360e040425..b75264b09a46 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -131,7 +131,7 @@ config CRYPTO_DH
 
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
-	select CRYTPO_KPP
+	select CRYPTO_KPP
 	select CRYPTO_RNG_DEFAULT
 	help
 	  Generic implementation of the ECDH algorithm
@@ -1340,6 +1340,7 @@ config CRYPTO_SALSA20_586
 	tristate "Salsa20 stream cipher algorithm (i586)"
 	depends on (X86 || UML_X86) && !64BIT
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_SALSA20
 	help
 	  Salsa20 stream cipher algorithm.
 
@@ -1353,6 +1354,7 @@ config CRYPTO_SALSA20_X86_64
 	tristate "Salsa20 stream cipher algorithm (x86_64)"
 	depends on (X86 || UML_X86) && 64BIT
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_SALSA20
 	help
 	  Salsa20 stream cipher algorithm.
 
diff --git a/crypto/Makefile b/crypto/Makefile
index d674884b2d51..cdbc03b35510 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
+CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index 1441f07d0a19..09776bb1360e 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -18,9 +18,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 
@@ -28,7 +26,6 @@
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <crypto/algapi.h>
 #include <crypto/cryptd.h>
 #include <crypto/ablk_helper.h>
diff --git a/crypto/aead.c b/crypto/aead.c
index f794b30a9407..60b3bbe973e7 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -54,11 +54,18 @@ int crypto_aead_setkey(struct crypto_aead *tfm,
 		       const u8 *key, unsigned int keylen)
 {
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return setkey_unaligned(tfm, key, keylen);
+		err = setkey_unaligned(tfm, key, keylen);
+	else
+		err = crypto_aead_alg(tfm)->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return crypto_aead_alg(tfm)->setkey(tfm, key, keylen);
+	crypto_aead_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_aead_setkey);
 
@@ -93,6 +100,8 @@ static int crypto_aead_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_aead *aead = __crypto_aead_cast(tfm);
 	struct aead_alg *alg = crypto_aead_alg(aead);
 
+	crypto_aead_set_flags(aead, CRYPTO_TFM_NEED_KEY);
+
 	aead->authsize = alg->maxauthsize;
 
 	if (alg->exit)
@@ -295,7 +304,7 @@ int aead_init_geniv(struct crypto_aead *aead)
 	if (err)
 		goto out;
 
-	ctx->sknull = crypto_get_default_null_skcipher2();
+	ctx->sknull = crypto_get_default_null_skcipher();
 	err = PTR_ERR(ctx->sknull);
 	if (IS_ERR(ctx->sknull))
 		goto out;
@@ -315,7 +324,7 @@ out:
 	return err;
 
 drop_null:
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 	goto out;
 }
 EXPORT_SYMBOL_GPL(aead_init_geniv);
@@ -325,7 +334,7 @@ void aead_exit_geniv(struct crypto_aead *tfm)
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 EXPORT_SYMBOL_GPL(aead_exit_geniv);
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index f41047ab60f5..0f8d8d5523c3 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent);
 
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
-	const u32 forbidden = CRYPTO_ALG_INTERNAL;
+	const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct sockaddr_alg *sa = (void *)uaddr;
@@ -158,6 +158,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	void *private;
 	int err;
 
+	/* If caller uses non-allowed flag, return error. */
+	if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
+		return -EINVAL;
+
 	if (sock->state == SS_CONNECTED)
 		return -EINVAL;
 
@@ -176,9 +180,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
-	private = type->bind(sa->salg_name,
-			     sa->salg_feat & ~forbidden,
-			     sa->salg_mask & ~forbidden);
+	private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
 	if (IS_ERR(private)) {
 		module_put(type->owner);
 		return PTR_ERR(private);
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 3a35d67de7d9..266fc1d64f61 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -193,11 +193,18 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen)
 {
 	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return ahash_setkey_unaligned(tfm, key, keylen);
+		err = ahash_setkey_unaligned(tfm, key, keylen);
+	else
+		err = tfm->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return tfm->setkey(tfm, key, keylen);
+	crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_setkey);
 
@@ -368,7 +375,12 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup);
 
 int crypto_ahash_digest(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->digest);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return crypto_ahash_op(req, tfm->digest);
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
 
@@ -450,7 +462,6 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
 	hash->setkey = ahash_nosetkey;
-	hash->has_setkey = false;
 	hash->export = ahash_no_export;
 	hash->import = ahash_no_import;
 
@@ -465,7 +476,8 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 
 	if (alg->setkey) {
 		hash->setkey = alg->setkey;
-		hash->has_setkey = true;
+		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
+			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 	}
 	if (alg->export)
 		hash->export = alg->export;
@@ -649,5 +661,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(ahash_attr_alg);
 
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
+{
+	struct crypto_alg *alg = &halg->base;
+
+	if (alg->cra_type != &crypto_ahash_type)
+		return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg));
+
+	return __crypto_ahash_alg(alg)->setkey != NULL;
+}
+EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 9a636f961572..395b082d03a9 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -62,7 +62,7 @@ static int crypto_check_alg(struct crypto_alg *alg)
 	if (alg->cra_priority < 0)
 		return -EINVAL;
 
-	atomic_set(&alg->cra_refcnt, 1);
+	refcount_set(&alg->cra_refcnt, 1);
 
 	return crypto_set_driver_name(alg);
 }
@@ -123,7 +123,6 @@ static void crypto_remove_instance(struct crypto_instance *inst,
 	if (!tmpl || !crypto_tmpl_get(tmpl))
 		return;
 
-	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
 	list_move(&inst->alg.cra_list, list);
 	hlist_del(&inst->list);
 	inst->alg.cra_destroy = crypto_destroy_instance;
@@ -236,7 +235,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 	if (!larval->adult)
 		goto free_larval;
 
-	atomic_set(&larval->alg.cra_refcnt, 1);
+	refcount_set(&larval->alg.cra_refcnt, 1);
 	memcpy(larval->alg.cra_driver_name, alg->cra_driver_name,
 	       CRYPTO_MAX_ALG_NAME);
 	larval->alg.cra_priority = alg->cra_priority;
@@ -392,7 +391,6 @@ static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
 
 	alg->cra_flags |= CRYPTO_ALG_DEAD;
 
-	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
 	list_del_init(&alg->cra_list);
 	crypto_remove_spawns(alg, list, NULL);
 
@@ -411,7 +409,7 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	if (ret)
 		return ret;
 
-	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+	BUG_ON(refcount_read(&alg->cra_refcnt) != 1);
 	if (alg->cra_destroy)
 		alg->cra_destroy(alg);
 
@@ -470,7 +468,6 @@ int crypto_register_template(struct crypto_template *tmpl)
 	}
 
 	list_add(&tmpl->list, &crypto_template_list);
-	crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl);
 	err = 0;
 out:
 	up_write(&crypto_alg_sem);
@@ -497,12 +494,10 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 		BUG_ON(err);
 	}
 
-	crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl);
-
 	up_write(&crypto_alg_sem);
 
 	hlist_for_each_entry_safe(inst, n, list, list) {
-		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
+		BUG_ON(refcount_read(&inst->alg.cra_refcnt) != 1);
 		crypto_free_instance(inst);
 	}
 	crypto_remove_final(&users);
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index e9885a35ef6e..4b07edd5a9ff 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -42,7 +42,6 @@
 
 struct aead_tfm {
 	struct crypto_aead *aead;
-	bool has_key;
 	struct crypto_skcipher *null_tfm;
 };
 
@@ -398,7 +397,7 @@ static int aead_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -491,7 +490,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 		return ERR_CAST(aead);
 	}
 
-	null_tfm = crypto_get_default_null_skcipher2();
+	null_tfm = crypto_get_default_null_skcipher();
 	if (IS_ERR(null_tfm)) {
 		crypto_free_aead(aead);
 		kfree(tfm);
@@ -509,7 +508,7 @@ static void aead_release(void *private)
 	struct aead_tfm *tfm = private;
 
 	crypto_free_aead(tfm->aead);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 	kfree(tfm);
 }
 
@@ -523,12 +522,8 @@ static int aead_setauthsize(void *private, unsigned int authsize)
 static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 {
 	struct aead_tfm *tfm = private;
-	int err;
-
-	err = crypto_aead_setkey(tfm->aead, key, keylen);
-	tfm->has_key = !err;
 
-	return err;
+	return crypto_aead_setkey(tfm->aead, key, keylen);
 }
 
 static void aead_sock_destruct(struct sock *sk)
@@ -589,7 +584,7 @@ static int aead_accept_parent(void *private, struct sock *sk)
 {
 	struct aead_tfm *tfm = private;
 
-	if (!tfm->has_key)
+	if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return aead_accept_parent_nokey(private, sk);
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 76d2e716c792..6c9b1927a520 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -34,11 +34,6 @@ struct hash_ctx {
 	struct ahash_request req;
 };
 
-struct algif_hash_tfm {
-	struct crypto_ahash *hash;
-	bool has_key;
-};
-
 static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx)
 {
 	unsigned ds;
@@ -307,7 +302,7 @@ static int hash_check_key(struct socket *sock)
 	int err = 0;
 	struct sock *psk;
 	struct alg_sock *pask;
-	struct algif_hash_tfm *tfm;
+	struct crypto_ahash *tfm;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 
@@ -321,7 +316,7 @@ static int hash_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -412,41 +407,17 @@ static struct proto_ops algif_hash_ops_nokey = {
 
 static void *hash_bind(const char *name, u32 type, u32 mask)
 {
-	struct algif_hash_tfm *tfm;
-	struct crypto_ahash *hash;
-
-	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
-	if (!tfm)
-		return ERR_PTR(-ENOMEM);
-
-	hash = crypto_alloc_ahash(name, type, mask);
-	if (IS_ERR(hash)) {
-		kfree(tfm);
-		return ERR_CAST(hash);
-	}
-
-	tfm->hash = hash;
-
-	return tfm;
+	return crypto_alloc_ahash(name, type, mask);
 }
 
 static void hash_release(void *private)
 {
-	struct algif_hash_tfm *tfm = private;
-
-	crypto_free_ahash(tfm->hash);
-	kfree(tfm);
+	crypto_free_ahash(private);
 }
 
 static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-	struct algif_hash_tfm *tfm = private;
-	int err;
-
-	err = crypto_ahash_setkey(tfm->hash, key, keylen);
-	tfm->has_key = !err;
-
-	return err;
+	return crypto_ahash_setkey(private, key, keylen);
 }
 
 static void hash_sock_destruct(struct sock *sk)
@@ -461,11 +432,10 @@ static void hash_sock_destruct(struct sock *sk)
 
 static int hash_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct hash_ctx *ctx;
+	struct crypto_ahash *tfm = private;
 	struct alg_sock *ask = alg_sk(sk);
-	struct algif_hash_tfm *tfm = private;
-	struct crypto_ahash *hash = tfm->hash;
-	unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+	struct hash_ctx *ctx;
+	unsigned int len = sizeof(*ctx) + crypto_ahash_reqsize(tfm);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
@@ -478,7 +448,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
 
 	ask->private = ctx;
 
-	ahash_request_set_tfm(&ctx->req, hash);
+	ahash_request_set_tfm(&ctx->req, tfm);
 	ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   crypto_req_done, &ctx->wait);
 
@@ -489,9 +459,9 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
 
 static int hash_accept_parent(void *private, struct sock *sk)
 {
-	struct algif_hash_tfm *tfm = private;
+	struct crypto_ahash *tfm = private;
 
-	if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return hash_accept_parent_nokey(private, sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index f50907430c92..c4e885df4564 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -38,11 +38,6 @@
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_tfm {
-	struct crypto_skcipher *skcipher;
-	bool has_key;
-};
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t size)
 {
@@ -50,8 +45,7 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 
 	return af_alg_sendmsg(sock, msg, size, ivsize);
@@ -65,8 +59,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct af_alg_ctx *ctx = ask->private;
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 	unsigned int bs = crypto_skcipher_blocksize(tfm);
 	struct af_alg_async_req *areq;
 	int err = 0;
@@ -220,7 +213,7 @@ static int skcipher_check_key(struct socket *sock)
 	int err = 0;
 	struct sock *psk;
 	struct alg_sock *pask;
-	struct skcipher_tfm *tfm;
+	struct crypto_skcipher *tfm;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 
@@ -234,7 +227,7 @@ static int skcipher_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -313,41 +306,17 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-	struct skcipher_tfm *tfm;
-	struct crypto_skcipher *skcipher;
-
-	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
-	if (!tfm)
-		return ERR_PTR(-ENOMEM);
-
-	skcipher = crypto_alloc_skcipher(name, type, mask);
-	if (IS_ERR(skcipher)) {
-		kfree(tfm);
-		return ERR_CAST(skcipher);
-	}
-
-	tfm->skcipher = skcipher;
-
-	return tfm;
+	return crypto_alloc_skcipher(name, type, mask);
 }
 
 static void skcipher_release(void *private)
 {
-	struct skcipher_tfm *tfm = private;
-
-	crypto_free_skcipher(tfm->skcipher);
-	kfree(tfm);
+	crypto_free_skcipher(private);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-	struct skcipher_tfm *tfm = private;
-	int err;
-
-	err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
-	tfm->has_key = !err;
-
-	return err;
+	return crypto_skcipher_setkey(private, key, keylen);
 }
 
 static void skcipher_sock_destruct(struct sock *sk)
@@ -356,8 +325,7 @@ static void skcipher_sock_destruct(struct sock *sk)
 	struct af_alg_ctx *ctx = ask->private;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 
 	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
@@ -369,22 +337,21 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
 	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_tfm *tfm = private;
-	struct crypto_skcipher *skcipher = tfm->skcipher;
+	struct crypto_skcipher *tfm = private;
 	unsigned int len = sizeof(*ctx);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
+	ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(tfm),
 			       GFP_KERNEL);
 	if (!ctx->iv) {
 		sock_kfree_s(sk, ctx, len);
 		return -ENOMEM;
 	}
 
-	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
+	memset(ctx->iv, 0, crypto_skcipher_ivsize(tfm));
 
 	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
@@ -404,9 +371,9 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
 static int skcipher_accept_parent(void *private, struct sock *sk)
 {
-	struct skcipher_tfm *tfm = private;
+	struct crypto_skcipher *tfm = private;
 
-	if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return skcipher_accept_parent_nokey(private, sk);
diff --git a/crypto/api.c b/crypto/api.c
index 2a2479d168aa..70a894e52ff3 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -137,7 +137,7 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type,
 	if (IS_ERR(larval))
 		return ERR_CAST(larval);
 
-	atomic_set(&larval->alg.cra_refcnt, 2);
+	refcount_set(&larval->alg.cra_refcnt, 2);
 
 	down_write(&crypto_alg_sem);
 	alg = __crypto_alg_lookup(name, type, mask);
@@ -205,7 +205,8 @@ struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
+					       u32 mask)
 {
 	struct crypto_alg *alg;
 
@@ -231,7 +232,6 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 	return crypto_larval_add(name, type, mask);
 }
-EXPORT_SYMBOL_GPL(crypto_larval_lookup);
 
 int crypto_probing_notify(unsigned long val, void *v)
 {
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 875470b0e026..d3d6d72fe649 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -329,7 +329,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -363,7 +363,7 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm)
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_free(struct aead_instance *inst)
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 0cf5fefdb859..15f91ddd7f0e 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -352,7 +352,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -389,7 +389,7 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm)
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_esn_free(struct aead_instance *inst)
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 6c43a0a17a55..01c0d4aa2563 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -18,7 +18,6 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/errno.h>
-#include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index a02286bf319e..32ddd4836ff5 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -13,8 +13,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index df5c72629383..66169c178314 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -16,8 +16,7 @@
 * any later version.
 *
 * You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index 058c8d755d03..c8e5ec69790e 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -13,8 +13,7 @@
  * any later version.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index 4a45fa4890c0..e451c3cb6a56 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -9,44 +9,38 @@
  * (at your option) any later version.
  */
 
+#include <asm/unaligned.h>
 #include <crypto/algapi.h>
 #include <crypto/chacha20.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
-static inline u32 le32_to_cpuvp(const void *p)
-{
-	return le32_to_cpup(p);
-}
-
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 			     unsigned int bytes)
 {
-	u8 stream[CHACHA20_BLOCK_SIZE];
+	u32 stream[CHACHA20_BLOCK_WORDS];
 
 	if (dst != src)
 		memcpy(dst, src, bytes);
 
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
+		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
 		bytes -= CHACHA20_BLOCK_SIZE;
 		dst += CHACHA20_BLOCK_SIZE;
 	}
 	if (bytes) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, bytes);
+		crypto_xor(dst, (const u8 *)stream, bytes);
 	}
 }
 
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 {
-	static const char constant[16] = "expand 32-byte k";
-
-	state[0]  = le32_to_cpuvp(constant +  0);
-	state[1]  = le32_to_cpuvp(constant +  4);
-	state[2]  = le32_to_cpuvp(constant +  8);
-	state[3]  = le32_to_cpuvp(constant + 12);
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
 	state[4]  = ctx->key[0];
 	state[5]  = ctx->key[1];
 	state[6]  = ctx->key[2];
@@ -55,10 +49,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 	state[9]  = ctx->key[5];
 	state[10] = ctx->key[6];
 	state[11] = ctx->key[7];
-	state[12] = le32_to_cpuvp(iv +  0);
-	state[13] = le32_to_cpuvp(iv +  4);
-	state[14] = le32_to_cpuvp(iv +  8);
-	state[15] = le32_to_cpuvp(iv + 12);
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
 }
 EXPORT_SYMBOL_GPL(crypto_chacha20_init);
 
@@ -72,7 +66,7 @@ int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32));
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
 
 	return 0;
 }
@@ -111,7 +105,6 @@ static struct skcipher_alg alg = {
 	.base.cra_priority	= 100,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
index aa2a25fc7482..718cbce8d169 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -133,6 +133,7 @@ static struct shash_alg alg = {
 		.cra_name		= "crc32",
 		.cra_driver_name	= "crc32-generic",
 		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(u32),
 		.cra_module		= THIS_MODULE,
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 4c0a0e271876..372320399622 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -146,6 +146,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-generic",
 		.cra_priority		=	100,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_alignmask		=	3,
 		.cra_ctxsize		=	sizeof(struct chksum_ctx),
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index bd43cf5be14c..addca7bae33f 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -32,7 +32,9 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#define CRYPTD_MAX_CPU_QLEN 1000
+static unsigned int cryptd_max_cpu_qlen = 1000;
+module_param(cryptd_max_cpu_qlen, uint, 0);
+MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth");
 
 struct cryptd_cpu_queue {
 	struct crypto_queue queue;
@@ -116,6 +118,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue,
 		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
 		INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
 	}
+	pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
 	return 0;
 }
 
@@ -893,10 +896,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	if (err)
 		goto out_free_inst;
 
-	type = CRYPTO_ALG_ASYNC;
-	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_INTERNAL;
-	inst->alg.halg.base.cra_flags = type;
+	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
+				   CRYPTO_ALG_OPTIONAL_KEY));
 
 	inst->alg.halg.digestsize = salg->digestsize;
 	inst->alg.halg.statesize = salg->statesize;
@@ -911,7 +913,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.finup  = cryptd_hash_finup_enqueue;
 	inst->alg.export = cryptd_hash_export;
 	inst->alg.import = cryptd_hash_import;
-	inst->alg.setkey = cryptd_hash_setkey;
+	if (crypto_shash_alg_has_setkey(salg))
+		inst->alg.setkey = cryptd_hash_setkey;
 	inst->alg.digest = cryptd_hash_digest_enqueue;
 
 	err = ahash_register_instance(tmpl, inst);
@@ -1372,7 +1375,7 @@ static int __init cryptd_init(void)
 {
 	int err;
 
-	err = cryptd_init_queue(&queue, CRYPTD_MAX_CPU_QLEN);
+	err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen);
 	if (err)
 		return err;
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 0dbe2be7f783..5c291eedaa70 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -169,7 +169,7 @@ static int crypto_report_one(struct crypto_alg *alg,
 	ualg->cru_type = 0;
 	ualg->cru_mask = 0;
 	ualg->cru_flags = alg->cra_flags;
-	ualg->cru_refcnt = atomic_read(&alg->cra_refcnt);
+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
 
 	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
 		goto nla_put_failure;
@@ -387,7 +387,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto drop_alg;
 
 	err = -EBUSY;
-	if (atomic_read(&alg->cra_refcnt) > 2)
+	if (refcount_read(&alg->cra_refcnt) > 2)
 		goto drop_alg;
 
 	err = crypto_unregister_instance((struct crypto_instance *)alg);
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 633a9bcdc574..18f32f2a5e1c 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -964,7 +964,7 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
 	 * DRBG with a security strength of 256.
 	 */
 	if (crypto_get_default_rng())
-		err = -EFAULT;
+		return -EFAULT;
 
 	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
 	crypto_put_default_rng();
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index e3d889b122e0..45819e6015bf 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -118,8 +118,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 				struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -127,9 +125,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
 		goto free_inst;
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 8589681fb9f6..0ad879e1f9b2 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -1101,7 +1101,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1129,7 +1129,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_rfc4543_free(struct aead_instance *inst)
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 24e601954c7a..a4b1c026aaee 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -160,8 +160,6 @@ void gf128mul_x8_ble(le128 *r, const le128 *x)
 {
 	u64 a = le64_to_cpu(x->a);
 	u64 b = le64_to_cpu(x->b);
-
-	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
 	u64 _tt = gf128mul_table_be[a >> 56];
 
 	r->a = cpu_to_le64((a << 8) | (b >> 56));
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index 12ad3e3a84e3..1bffb3f712dd 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -56,9 +56,6 @@ static int ghash_update(struct shash_desc *desc,
 	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	u8 *dst = dctx->buffer;
 
-	if (!ctx->gf128)
-		return -ENOKEY;
-
 	if (dctx->bytes) {
 		int n = min(srclen, dctx->bytes);
 		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
@@ -111,9 +108,6 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	u8 *buf = dctx->buffer;
 
-	if (!ctx->gf128)
-		return -ENOKEY;
-
 	ghash_flush(ctx, dctx);
 	memcpy(dst, buf, GHASH_BLOCK_SIZE);
 
diff --git a/crypto/internal.h b/crypto/internal.h
index f07320423191..5ac27fba10e8 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -30,9 +30,6 @@
 enum {
 	CRYPTO_MSG_ALG_REQUEST,
 	CRYPTO_MSG_ALG_REGISTER,
-	CRYPTO_MSG_ALG_UNREGISTER,
-	CRYPTO_MSG_TMPL_REGISTER,
-	CRYPTO_MSG_TMPL_UNREGISTER,
 };
 
 struct crypto_instance;
@@ -78,7 +75,6 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm);
 
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
 void crypto_alg_tested(const char *name, int err);
 
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
@@ -106,13 +102,13 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
 
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
-	atomic_inc(&alg->cra_refcnt);
+	refcount_inc(&alg->cra_refcnt);
 	return alg;
 }
 
 static inline void crypto_alg_put(struct crypto_alg *alg)
 {
-	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
+	if (refcount_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
 		alg->cra_destroy(alg);
 }
 
diff --git a/crypto/keywrap.c b/crypto/keywrap.c
index 744e35134c45..ec5c6a087c90 100644
--- a/crypto/keywrap.c
+++ b/crypto/keywrap.c
@@ -188,7 +188,7 @@ static int crypto_kw_decrypt(struct blkcipher_desc *desc,
 	}
 
 	/* Perform authentication check */
-	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6))
+	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL))
 		ret = -EBADMSG;
 
 	memzero_explicit(&block, sizeof(struct crypto_kw_block));
@@ -221,7 +221,7 @@ static int crypto_kw_encrypt(struct blkcipher_desc *desc,
 	 * Place the predefined IV into block A -- for encrypt, the caller
 	 * does not need to provide an IV, but he needs to fetch the final IV.
 	 */
-	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6);
+	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL);
 
 	/*
 	 * src scatterlist is read-only. dst scatterlist is r/w. During the
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index eca04d3729b3..fe5129d6ff4e 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/sched/stat.h>
 #include <linux/slab.h>
-#include <linux/hardirq.h>
 
 #define MCRYPTD_MAX_CPU_QLEN 100
 #define MCRYPTD_BATCH 9
@@ -517,10 +516,9 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	if (err)
 		goto out_free_inst;
 
-	type = CRYPTO_ALG_ASYNC;
-	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_INTERNAL;
-	inst->alg.halg.base.cra_flags = type;
+	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
+				   CRYPTO_ALG_OPTIONAL_KEY));
 
 	inst->alg.halg.digestsize = halg->digestsize;
 	inst->alg.halg.statesize = halg->statesize;
@@ -535,7 +533,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.finup  = mcryptd_hash_finup_enqueue;
 	inst->alg.export = mcryptd_hash_export;
 	inst->alg.import = mcryptd_hash_import;
-	inst->alg.setkey = mcryptd_hash_setkey;
+	if (crypto_hash_alg_has_setkey(halg))
+		inst->alg.setkey = mcryptd_hash_setkey;
 	inst->alg.digest = mcryptd_hash_digest_enqueue;
 
 	err = ahash_register_instance(tmpl, inst);
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index b1c2d57dc734..b7a3a0613a30 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -47,17 +47,6 @@ int crypto_poly1305_init(struct shash_desc *desc)
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
-			   const u8 *key, unsigned int keylen)
-{
-	/* Poly1305 requires a unique key for each tag, which implies that
-	 * we can't set it on the tfm that gets accessed by multiple users
-	 * simultaneously. Instead we expect the key as the first 32 bytes in
-	 * the update() call. */
-	return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
-
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
@@ -76,6 +65,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
 	dctx->s[3] = get_unaligned_le32(key + 12);
 }
 
+/*
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
+ * expect the key as the first 32 bytes in the update() call.
+ */
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 					const u8 *src, unsigned int srclen)
 {
@@ -210,7 +204,6 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	__le32 *mac = (__le32 *)dst;
 	u32 h0, h1, h2, h3, h4;
 	u32 g0, g1, g2, g3, g4;
 	u32 mask;
@@ -267,10 +260,10 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	h3 = (h3 >> 18) | (h4 <<  8);
 
 	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f);
-	f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f);
-	f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f);
-	f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f);
+	f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst +  0);
+	f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst +  4);
+	f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst +  8);
+	f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
 
 	return 0;
 }
@@ -281,14 +274,12 @@ static struct shash_alg poly1305_alg = {
 	.init		= crypto_poly1305_init,
 	.update		= crypto_poly1305_update,
 	.final		= crypto_poly1305_final,
-	.setkey		= crypto_poly1305_setkey,
 	.descsize	= sizeof(struct poly1305_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-generic",
 		.cra_priority		= 100,
 		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_alignmask		= sizeof(u32) - 1,
 		.cra_blocksize		= POLY1305_BLOCK_SIZE,
 		.cra_module		= THIS_MODULE,
 	},
diff --git a/crypto/proc.c b/crypto/proc.c
index 2cc10c96d753..822fcef6d91c 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -46,7 +46,7 @@ static int c_show(struct seq_file *m, void *p)
 	seq_printf(m, "driver       : %s\n", alg->cra_driver_name);
 	seq_printf(m, "module       : %s\n", module_name(alg->cra_module));
 	seq_printf(m, "priority     : %d\n", alg->cra_priority);
-	seq_printf(m, "refcnt       : %d\n", atomic_read(&alg->cra_refcnt));
+	seq_printf(m, "refcnt       : %u\n", refcount_read(&alg->cra_refcnt));
 	seq_printf(m, "selftest     : %s\n",
 		   (alg->cra_flags & CRYPTO_ALG_TESTED) ?
 		   "passed" : "unknown");
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index d7da0eea5622..5074006a56c3 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -19,49 +19,19 @@
  *
  */
 
-#include <linux/init.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/salsa20.h>
 #include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <crypto/algapi.h>
-#include <asm/byteorder.h>
 
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-/*
- * Start of code taken from D. J. Bernstein's reference implementation.
- * With some modifications and optimizations made to suit our needs.
- */
-
-/*
-salsa20-ref.c version 20051118
-D. J. Bernstein
-Public domain.
-*/
-
-#define U32TO8_LITTLE(p, v) \
-	{ (p)[0] = (v >>  0) & 0xff; (p)[1] = (v >>  8) & 0xff; \
-	  (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; }
-#define U8TO32_LITTLE(p)   \
-	(((u32)((p)[0])      ) | ((u32)((p)[1]) <<  8) | \
-	 ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24)   )
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
-
-static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
+static void salsa20_block(u32 *state, __le32 *stream)
 {
 	u32 x[16];
 	int i;
 
-	memcpy(x, input, sizeof(x));
-	for (i = 20; i > 0; i -= 2) {
+	memcpy(x, state, sizeof(x));
+
+	for (i = 0; i < 20; i += 2) {
 		x[ 4] ^= rol32((x[ 0] + x[12]),  7);
 		x[ 8] ^= rol32((x[ 4] + x[ 0]),  9);
 		x[12] ^= rol32((x[ 8] + x[ 4]), 13);
@@ -95,145 +65,137 @@ static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
 		x[14] ^= rol32((x[13] + x[12]), 13);
 		x[15] ^= rol32((x[14] + x[13]), 18);
 	}
-	for (i = 0; i < 16; ++i)
-		x[i] += input[i];
-	for (i = 0; i < 16; ++i)
-		U32TO8_LITTLE(output + 4 * i,x[i]);
-}
 
-static const char sigma[16] = "expand 32-byte k";
-static const char tau[16] = "expand 16-byte k";
+	for (i = 0; i < 16; i++)
+		stream[i] = cpu_to_le32(x[i] + state[i]);
+
+	if (++state[8] == 0)
+		state[9]++;
+}
 
-static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes)
+static void salsa20_docrypt(u32 *state, u8 *dst, const u8 *src,
+			    unsigned int bytes)
 {
-	const char *constants;
+	__le32 stream[SALSA20_BLOCK_SIZE / sizeof(__le32)];
 
-	ctx->input[1] = U8TO32_LITTLE(k + 0);
-	ctx->input[2] = U8TO32_LITTLE(k + 4);
-	ctx->input[3] = U8TO32_LITTLE(k + 8);
-	ctx->input[4] = U8TO32_LITTLE(k + 12);
-	if (kbytes == 32) { /* recommended */
-		k += 16;
-		constants = sigma;
-	} else { /* kbytes == 16 */
-		constants = tau;
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= SALSA20_BLOCK_SIZE) {
+		salsa20_block(state, stream);
+		crypto_xor(dst, (const u8 *)stream, SALSA20_BLOCK_SIZE);
+		bytes -= SALSA20_BLOCK_SIZE;
+		dst += SALSA20_BLOCK_SIZE;
+	}
+	if (bytes) {
+		salsa20_block(state, stream);
+		crypto_xor(dst, (const u8 *)stream, bytes);
 	}
-	ctx->input[11] = U8TO32_LITTLE(k + 0);
-	ctx->input[12] = U8TO32_LITTLE(k + 4);
-	ctx->input[13] = U8TO32_LITTLE(k + 8);
-	ctx->input[14] = U8TO32_LITTLE(k + 12);
-	ctx->input[0] = U8TO32_LITTLE(constants + 0);
-	ctx->input[5] = U8TO32_LITTLE(constants + 4);
-	ctx->input[10] = U8TO32_LITTLE(constants + 8);
-	ctx->input[15] = U8TO32_LITTLE(constants + 12);
 }
 
-static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv)
+void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
+			 const u8 *iv)
 {
-	ctx->input[6] = U8TO32_LITTLE(iv + 0);
-	ctx->input[7] = U8TO32_LITTLE(iv + 4);
-	ctx->input[8] = 0;
-	ctx->input[9] = 0;
+	memcpy(state, ctx->initial_state, sizeof(ctx->initial_state));
+	state[6] = get_unaligned_le32(iv + 0);
+	state[7] = get_unaligned_le32(iv + 4);
 }
+EXPORT_SYMBOL_GPL(crypto_salsa20_init);
 
-static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst,
-				  const u8 *src, unsigned int bytes)
+int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  unsigned int keysize)
 {
-	u8 buf[64];
-
-	if (dst != src)
-		memcpy(dst, src, bytes);
-
-	while (bytes) {
-		salsa20_wordtobyte(buf, ctx->input);
-
-		ctx->input[8]++;
-		if (!ctx->input[8])
-			ctx->input[9]++;
+	static const char sigma[16] = "expand 32-byte k";
+	static const char tau[16] = "expand 16-byte k";
+	struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const char *constants;
 
-		if (bytes <= 64) {
-			crypto_xor(dst, buf, bytes);
-			return;
-		}
+	if (keysize != SALSA20_MIN_KEY_SIZE &&
+	    keysize != SALSA20_MAX_KEY_SIZE)
+		return -EINVAL;
 
-		crypto_xor(dst, buf, 64);
-		bytes -= 64;
-		dst += 64;
+	ctx->initial_state[1] = get_unaligned_le32(key + 0);
+	ctx->initial_state[2] = get_unaligned_le32(key + 4);
+	ctx->initial_state[3] = get_unaligned_le32(key + 8);
+	ctx->initial_state[4] = get_unaligned_le32(key + 12);
+	if (keysize == 32) { /* recommended */
+		key += 16;
+		constants = sigma;
+	} else { /* keysize == 16 */
+		constants = tau;
 	}
-}
-
-/*
- * End of code taken from D. J. Bernstein's reference implementation.
- */
+	ctx->initial_state[11] = get_unaligned_le32(key + 0);
+	ctx->initial_state[12] = get_unaligned_le32(key + 4);
+	ctx->initial_state[13] = get_unaligned_le32(key + 8);
+	ctx->initial_state[14] = get_unaligned_le32(key + 12);
+	ctx->initial_state[0]  = get_unaligned_le32(constants + 0);
+	ctx->initial_state[5]  = get_unaligned_le32(constants + 4);
+	ctx->initial_state[10] = get_unaligned_le32(constants + 8);
+	ctx->initial_state[15] = get_unaligned_le32(constants + 12);
+
+	/* space for the nonce; it will be overridden for each request */
+	ctx->initial_state[6] = 0;
+	ctx->initial_state[7] = 0;
+
+	/* initial block number */
+	ctx->initial_state[8] = 0;
+	ctx->initial_state[9] = 0;
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
-{
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_salsa20_setkey);
 
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
+static int salsa20_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	u32 state[16];
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
+	err = skcipher_walk_virt(&walk, req, true);
 
-	salsa20_ivsetup(ctx, walk.iv);
+	crypto_salsa20_init(state, ctx, walk.iv);
 
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		salsa20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+				nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-generic",
-	.cra_priority       =   100,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
+static struct skcipher_alg alg = {
+	.base.cra_name		= "salsa20",
+	.base.cra_driver_name	= "salsa20-generic",
+	.base.cra_priority	= 100,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= SALSA20_MIN_KEY_SIZE,
+	.max_keysize		= SALSA20_MAX_KEY_SIZE,
+	.ivsize			= SALSA20_IV_SIZE,
+	.chunksize		= SALSA20_BLOCK_SIZE,
+	.setkey			= crypto_salsa20_setkey,
+	.encrypt		= salsa20_crypt,
+	.decrypt		= salsa20_crypt,
 };
 
 static int __init salsa20_generic_mod_init(void)
 {
-	return crypto_register_alg(&alg);
+	return crypto_register_skcipher(&alg);
 }
 
 static void __exit salsa20_generic_mod_fini(void)
 {
-	crypto_unregister_alg(&alg);
+	crypto_unregister_skcipher(&alg);
 }
 
 module_init(salsa20_generic_mod_init);
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 570b7d1aa0ca..39dbf2f7e5f5 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -144,8 +144,6 @@ static int seqiv_aead_decrypt(struct aead_request *req)
 static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -153,9 +151,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize != sizeof(u64))
 		goto free_inst;
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 7e8ed96236ce..a965b9d80559 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -5,6 +5,7 @@
  * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
  *
  * SHA-3 code by Jeff Garzik <jeff@garzik.org>
+ *               Ard Biesheuvel <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -17,12 +18,10 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <crypto/sha3.h>
-#include <asm/byteorder.h>
+#include <asm/unaligned.h>
 
 #define KECCAK_ROUNDS 24
 
-#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
-
 static const u64 keccakf_rndc[24] = {
 	0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
 	0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
@@ -34,100 +33,133 @@ static const u64 keccakf_rndc[24] = {
 	0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
 };
 
-static const int keccakf_rotc[24] = {
-	1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
-	27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
-};
-
-static const int keccakf_piln[24] = {
-	10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
-	15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
-};
-
 /* update the state with given number of rounds */
 
-static void keccakf(u64 st[25])
+static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
 {
-	int i, j, round;
-	u64 t, bc[5];
+	u64 t[5], tt, bc[5];
+	int round;
 
 	for (round = 0; round < KECCAK_ROUNDS; round++) {
 
 		/* Theta */
-		for (i = 0; i < 5; i++)
-			bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15]
-				^ st[i + 20];
-
-		for (i = 0; i < 5; i++) {
-			t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
-			for (j = 0; j < 25; j += 5)
-				st[j + i] ^= t;
-		}
+		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+		t[0] = bc[4] ^ rol64(bc[1], 1);
+		t[1] = bc[0] ^ rol64(bc[2], 1);
+		t[2] = bc[1] ^ rol64(bc[3], 1);
+		t[3] = bc[2] ^ rol64(bc[4], 1);
+		t[4] = bc[3] ^ rol64(bc[0], 1);
+
+		st[0] ^= t[0];
 
 		/* Rho Pi */
-		t = st[1];
-		for (i = 0; i < 24; i++) {
-			j = keccakf_piln[i];
-			bc[0] = st[j];
-			st[j] = ROTL64(t, keccakf_rotc[i]);
-			t = bc[0];
-		}
+		tt = st[1];
+		st[ 1] = rol64(st[ 6] ^ t[1], 44);
+		st[ 6] = rol64(st[ 9] ^ t[4], 20);
+		st[ 9] = rol64(st[22] ^ t[2], 61);
+		st[22] = rol64(st[14] ^ t[4], 39);
+		st[14] = rol64(st[20] ^ t[0], 18);
+		st[20] = rol64(st[ 2] ^ t[2], 62);
+		st[ 2] = rol64(st[12] ^ t[2], 43);
+		st[12] = rol64(st[13] ^ t[3], 25);
+		st[13] = rol64(st[19] ^ t[4],  8);
+		st[19] = rol64(st[23] ^ t[3], 56);
+		st[23] = rol64(st[15] ^ t[0], 41);
+		st[15] = rol64(st[ 4] ^ t[4], 27);
+		st[ 4] = rol64(st[24] ^ t[4], 14);
+		st[24] = rol64(st[21] ^ t[1],  2);
+		st[21] = rol64(st[ 8] ^ t[3], 55);
+		st[ 8] = rol64(st[16] ^ t[1], 45);
+		st[16] = rol64(st[ 5] ^ t[0], 36);
+		st[ 5] = rol64(st[ 3] ^ t[3], 28);
+		st[ 3] = rol64(st[18] ^ t[3], 21);
+		st[18] = rol64(st[17] ^ t[2], 15);
+		st[17] = rol64(st[11] ^ t[1], 10);
+		st[11] = rol64(st[ 7] ^ t[2],  6);
+		st[ 7] = rol64(st[10] ^ t[0],  3);
+		st[10] = rol64(    tt ^ t[1],  1);
 
 		/* Chi */
-		for (j = 0; j < 25; j += 5) {
-			for (i = 0; i < 5; i++)
-				bc[i] = st[j + i];
-			for (i = 0; i < 5; i++)
-				st[j + i] ^= (~bc[(i + 1) % 5]) &
-					     bc[(i + 2) % 5];
-		}
+		bc[ 0] = ~st[ 1] & st[ 2];
+		bc[ 1] = ~st[ 2] & st[ 3];
+		bc[ 2] = ~st[ 3] & st[ 4];
+		bc[ 3] = ~st[ 4] & st[ 0];
+		bc[ 4] = ~st[ 0] & st[ 1];
+		st[ 0] ^= bc[ 0];
+		st[ 1] ^= bc[ 1];
+		st[ 2] ^= bc[ 2];
+		st[ 3] ^= bc[ 3];
+		st[ 4] ^= bc[ 4];
+
+		bc[ 0] = ~st[ 6] & st[ 7];
+		bc[ 1] = ~st[ 7] & st[ 8];
+		bc[ 2] = ~st[ 8] & st[ 9];
+		bc[ 3] = ~st[ 9] & st[ 5];
+		bc[ 4] = ~st[ 5] & st[ 6];
+		st[ 5] ^= bc[ 0];
+		st[ 6] ^= bc[ 1];
+		st[ 7] ^= bc[ 2];
+		st[ 8] ^= bc[ 3];
+		st[ 9] ^= bc[ 4];
+
+		bc[ 0] = ~st[11] & st[12];
+		bc[ 1] = ~st[12] & st[13];
+		bc[ 2] = ~st[13] & st[14];
+		bc[ 3] = ~st[14] & st[10];
+		bc[ 4] = ~st[10] & st[11];
+		st[10] ^= bc[ 0];
+		st[11] ^= bc[ 1];
+		st[12] ^= bc[ 2];
+		st[13] ^= bc[ 3];
+		st[14] ^= bc[ 4];
+
+		bc[ 0] = ~st[16] & st[17];
+		bc[ 1] = ~st[17] & st[18];
+		bc[ 2] = ~st[18] & st[19];
+		bc[ 3] = ~st[19] & st[15];
+		bc[ 4] = ~st[15] & st[16];
+		st[15] ^= bc[ 0];
+		st[16] ^= bc[ 1];
+		st[17] ^= bc[ 2];
+		st[18] ^= bc[ 3];
+		st[19] ^= bc[ 4];
+
+		bc[ 0] = ~st[21] & st[22];
+		bc[ 1] = ~st[22] & st[23];
+		bc[ 2] = ~st[23] & st[24];
+		bc[ 3] = ~st[24] & st[20];
+		bc[ 4] = ~st[20] & st[21];
+		st[20] ^= bc[ 0];
+		st[21] ^= bc[ 1];
+		st[22] ^= bc[ 2];
+		st[23] ^= bc[ 3];
+		st[24] ^= bc[ 4];
 
 		/* Iota */
 		st[0] ^= keccakf_rndc[round];
 	}
 }
 
-static void sha3_init(struct sha3_state *sctx, unsigned int digest_sz)
-{
-	memset(sctx, 0, sizeof(*sctx));
-	sctx->md_len = digest_sz;
-	sctx->rsiz = 200 - 2 * digest_sz;
-	sctx->rsizw = sctx->rsiz / 8;
-}
-
-static int sha3_224_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
-
-	sha3_init(sctx, SHA3_224_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_256_init(struct shash_desc *desc)
+int crypto_sha3_init(struct shash_desc *desc)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
 
-	sha3_init(sctx, SHA3_256_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_384_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
-
-	sha3_init(sctx, SHA3_384_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_512_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
+	sctx->rsiz = 200 - 2 * digest_size;
+	sctx->rsizw = sctx->rsiz / 8;
+	sctx->partial = 0;
 
-	sha3_init(sctx, SHA3_512_DIGEST_SIZE);
+	memset(sctx->st, 0, sizeof(sctx->st));
 	return 0;
 }
+EXPORT_SYMBOL(crypto_sha3_init);
 
-static int sha3_update(struct shash_desc *desc, const u8 *data,
+int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
 		       unsigned int len)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
@@ -149,7 +181,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
 			unsigned int i;
 
 			for (i = 0; i < sctx->rsizw; i++)
-				sctx->st[i] ^= ((u64 *) src)[i];
+				sctx->st[i] ^= get_unaligned_le64(src + 8 * i);
 			keccakf(sctx->st);
 
 			done += sctx->rsiz;
@@ -163,125 +195,89 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
 
 	return 0;
 }
+EXPORT_SYMBOL(crypto_sha3_update);
 
-static int sha3_final(struct shash_desc *desc, u8 *out)
+int crypto_sha3_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
 	unsigned int i, inlen = sctx->partial;
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+	__le64 *digest = (__le64 *)out;
 
 	sctx->buf[inlen++] = 0x06;
 	memset(sctx->buf + inlen, 0, sctx->rsiz - inlen);
 	sctx->buf[sctx->rsiz - 1] |= 0x80;
 
 	for (i = 0; i < sctx->rsizw; i++)
-		sctx->st[i] ^= ((u64 *) sctx->buf)[i];
+		sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i);
 
 	keccakf(sctx->st);
 
-	for (i = 0; i < sctx->rsizw; i++)
-		sctx->st[i] = cpu_to_le64(sctx->st[i]);
+	for (i = 0; i < digest_size / 8; i++)
+		put_unaligned_le64(sctx->st[i], digest++);
 
-	memcpy(out, sctx->st, sctx->md_len);
+	if (digest_size & 4)
+		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
 
 	memset(sctx, 0, sizeof(*sctx));
 	return 0;
 }
-
-static struct shash_alg sha3_224 = {
-	.digestsize	=	SHA3_224_DIGEST_SIZE,
-	.init		=	sha3_224_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-224",
-		.cra_driver_name =	"sha3-224-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_256 = {
-	.digestsize	=	SHA3_256_DIGEST_SIZE,
-	.init		=	sha3_256_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-256",
-		.cra_driver_name =	"sha3-256-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_384 = {
-	.digestsize	=	SHA3_384_DIGEST_SIZE,
-	.init		=	sha3_384_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-384",
-		.cra_driver_name =	"sha3-384-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_384_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_512 = {
-	.digestsize	=	SHA3_512_DIGEST_SIZE,
-	.init		=	sha3_512_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-512",
-		.cra_driver_name =	"sha3-512-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_512_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
+EXPORT_SYMBOL(crypto_sha3_final);
+
+static struct shash_alg algs[] = { {
+	.digestsize		= SHA3_224_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-224",
+	.base.cra_driver_name	= "sha3-224-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_224_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_256_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-256",
+	.base.cra_driver_name	= "sha3-256-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_256_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_384_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-384",
+	.base.cra_driver_name	= "sha3-384-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_384_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_512_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-512",
+	.base.cra_driver_name	= "sha3-512-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+} };
 
 static int __init sha3_generic_mod_init(void)
 {
-	int ret;
-
-	ret = crypto_register_shash(&sha3_224);
-	if (ret < 0)
-		goto err_out;
-	ret = crypto_register_shash(&sha3_256);
-	if (ret < 0)
-		goto err_out_224;
-	ret = crypto_register_shash(&sha3_384);
-	if (ret < 0)
-		goto err_out_256;
-	ret = crypto_register_shash(&sha3_512);
-	if (ret < 0)
-		goto err_out_384;
-
-	return 0;
-
-err_out_384:
-	crypto_unregister_shash(&sha3_384);
-err_out_256:
-	crypto_unregister_shash(&sha3_256);
-err_out_224:
-	crypto_unregister_shash(&sha3_224);
-err_out:
-	return ret;
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 }
 
 static void __exit sha3_generic_mod_fini(void)
 {
-	crypto_unregister_shash(&sha3_224);
-	crypto_unregister_shash(&sha3_256);
-	crypto_unregister_shash(&sha3_384);
-	crypto_unregister_shash(&sha3_512);
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
 
 module_init(sha3_generic_mod_init);
diff --git a/crypto/shash.c b/crypto/shash.c
index e849d3ee2e27..5d732c6bb4b2 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -58,11 +58,18 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return shash_setkey_unaligned(tfm, key, keylen);
+		err = shash_setkey_unaligned(tfm, key, keylen);
+	else
+		err = shash->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return shash->setkey(tfm, key, keylen);
+	crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
 
@@ -181,6 +188,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 
+	if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
 	if (((unsigned long)data | (unsigned long)out) & alignmask)
 		return shash_digest_unaligned(desc, data, len, out);
 
@@ -360,7 +370,8 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	crt->digest = shash_async_digest;
 	crt->setkey = shash_async_setkey;
 
-	crt->has_setkey = alg->setkey != shash_no_setkey;
+	crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) &
+				    CRYPTO_TFM_NEED_KEY);
 
 	if (alg->export)
 		crt->export = shash_async_export;
@@ -375,8 +386,14 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
+	struct shash_alg *alg = crypto_shash_alg(hash);
+
+	hash->descsize = alg->descsize;
+
+	if (crypto_shash_alg_has_setkey(alg) &&
+	    !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
+		crypto_shash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 
-	hash->descsize = crypto_shash_alg(hash)->descsize;
 	return 0;
 }
 
diff --git a/crypto/simd.c b/crypto/simd.c
index 88203370a62f..208226d7f908 100644
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -19,9 +19,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 11af5fd6a443..0fe2a2923ad0 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -598,8 +598,11 @@ static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm,
 	err = crypto_blkcipher_setkey(blkcipher, key, keylen);
 	crypto_skcipher_set_flags(tfm, crypto_blkcipher_get_flags(blkcipher) &
 				       CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
 
-	return err;
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static int skcipher_crypt_blkcipher(struct skcipher_request *req,
@@ -674,6 +677,9 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
 	skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
 	skcipher->keysize = calg->cra_blkcipher.max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	return 0;
 }
 
@@ -692,8 +698,11 @@ static int skcipher_setkey_ablkcipher(struct crypto_skcipher *tfm,
 	crypto_skcipher_set_flags(tfm,
 				  crypto_ablkcipher_get_flags(ablkcipher) &
 				  CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
 
-	return err;
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static int skcipher_crypt_ablkcipher(struct skcipher_request *req,
@@ -767,6 +776,9 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 			    sizeof(struct ablkcipher_request);
 	skcipher->keysize = calg->cra_ablkcipher.max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	return 0;
 }
 
@@ -796,6 +808,7 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 {
 	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
 	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	int err;
 
 	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
 		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -803,9 +816,15 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	}
 
 	if ((unsigned long)key & alignmask)
-		return skcipher_setkey_unaligned(tfm, key, keylen);
+		err = skcipher_setkey_unaligned(tfm, key, keylen);
+	else
+		err = cipher->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return cipher->setkey(tfm, key, keylen);
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
@@ -834,6 +853,9 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	skcipher->ivsize = alg->ivsize;
 	skcipher->keysize = alg->max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	if (alg->exit)
 		skcipher->base.exit = crypto_skcipher_exit_tfm;
 
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9267cbdb14d2..14213a096fd2 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -67,6 +67,7 @@ static char *alg = NULL;
 static u32 type;
 static u32 mask;
 static int mode;
+static u32 num_mb = 8;
 static char *tvmem[TVMEMSIZE];
 
 static char *check[] = {
@@ -79,6 +80,66 @@ static char *check[] = {
 	NULL
 };
 
+static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
+static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
+
+#define XBUFSIZE 8
+#define MAX_IVLEN 32
+
+static int testmgr_alloc_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++) {
+		buf[i] = (void *)__get_free_page(GFP_KERNEL);
+		if (!buf[i])
+			goto err_free_buf;
+	}
+
+	return 0;
+
+err_free_buf:
+	while (i-- > 0)
+		free_page((unsigned long)buf[i]);
+
+	return -ENOMEM;
+}
+
+static void testmgr_free_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++)
+		free_page((unsigned long)buf[i]);
+}
+
+static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
+			 unsigned int buflen, const void *assoc,
+			 unsigned int aad_size)
+{
+	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
+	int k, rem;
+
+	if (np > XBUFSIZE) {
+		rem = PAGE_SIZE;
+		np = XBUFSIZE;
+	} else {
+		rem = buflen % PAGE_SIZE;
+	}
+
+	sg_init_table(sg, np + 1);
+
+	sg_set_buf(&sg[0], assoc, aad_size);
+
+	if (rem)
+		np--;
+	for (k = 0; k < np; k++)
+		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
+
+	if (rem)
+		sg_set_buf(&sg[k + 1], xbuf[k], rem);
+}
+
 static inline int do_one_aead_op(struct aead_request *req, int ret)
 {
 	struct crypto_wait *wait = req->base.data;
@@ -86,6 +147,298 @@ static inline int do_one_aead_op(struct aead_request *req, int ret)
 	return crypto_wait_req(ret, wait);
 }
 
+struct test_mb_aead_data {
+	struct scatterlist sg[XBUFSIZE];
+	struct scatterlist sgout[XBUFSIZE];
+	struct aead_request *req;
+	struct crypto_wait wait;
+	char *xbuf[XBUFSIZE];
+	char *xoutbuf[XBUFSIZE];
+	char *axbuf[XBUFSIZE];
+};
+
+static int do_mult_aead_op(struct test_mb_aead_data *data, int enc,
+				u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++) {
+		if (enc == ENCRYPT)
+			rc[i] = crypto_aead_encrypt(data[i].req);
+		else
+			rc[i] = crypto_aead_decrypt(data[i].req);
+	}
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc,
+				int blen, int secs, u32 num_mb)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_mult_aead_op(data, enc, num_mb);
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
+	return 0;
+}
+
+static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
+			       int blen, u32 num_mb)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_mult_aead_op(data, enc, num_mb);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		ret = do_mult_aead_op(data, enc, num_mb);
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
+
+	return ret;
+}
+
+static void test_mb_aead_speed(const char *algo, int enc, int secs,
+			       struct aead_speed_template *template,
+			       unsigned int tcount, u8 authsize,
+			       unsigned int aad_size, u8 *keysize, u32 num_mb)
+{
+	struct test_mb_aead_data *data;
+	struct crypto_aead *tfm;
+	unsigned int i, j, iv_len;
+	const char *key;
+	const char *e;
+	void *assoc;
+	u32 *b_size;
+	char *iv;
+	int ret;
+
+
+	if (aad_size >= PAGE_SIZE) {
+		pr_err("associate data length (%u) too big\n", aad_size);
+		return;
+	}
+
+	iv = kzalloc(MAX_IVLEN, GFP_KERNEL);
+	if (!iv)
+		return;
+
+	if (enc == ENCRYPT)
+		e = "encryption";
+	else
+		e = "decryption";
+
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out_free_iv;
+
+	tfm = crypto_alloc_aead(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+			algo, PTR_ERR(tfm));
+		goto out_free_data;
+	}
+
+	ret = crypto_aead_setauthsize(tfm, authsize);
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].axbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].axbuf);
+			goto out_free_xbuf;
+		}
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xoutbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xoutbuf);
+			goto out_free_axbuf;
+		}
+
+	for (i = 0; i < num_mb; ++i) {
+		data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
+		if (!data[i].req) {
+			pr_err("alg: skcipher: Failed to allocate request for %s\n",
+			       algo);
+			while (i--)
+				aead_request_free(data[i].req);
+			goto out_free_xoutbuf;
+		}
+	}
+
+	for (i = 0; i < num_mb; ++i) {
+		crypto_init_wait(&data[i].wait);
+		aead_request_set_callback(data[i].req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  crypto_req_done, &data[i].wait);
+	}
+
+	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+		get_driver_name(crypto_aead, tfm), e);
+
+	i = 0;
+	do {
+		b_size = aead_sizes;
+		do {
+			if (*b_size + authsize > XBUFSIZE * PAGE_SIZE) {
+				pr_err("template (%u) too big for buffer (%lu)\n",
+				       authsize + *b_size,
+				       XBUFSIZE * PAGE_SIZE);
+				goto out;
+			}
+
+			pr_info("test %u (%d bit key, %d byte blocks): ", i,
+				*keysize * 8, *b_size);
+
+			/* Set up tfm global state, i.e. the key */
+
+			memset(tvmem[0], 0xff, PAGE_SIZE);
+			key = tvmem[0];
+			for (j = 0; j < tcount; j++) {
+				if (template[j].klen == *keysize) {
+					key = template[j].key;
+					break;
+				}
+			}
+
+			crypto_aead_clear_flags(tfm, ~0);
+
+			ret = crypto_aead_setkey(tfm, key, *keysize);
+			if (ret) {
+				pr_err("setkey() failed flags=%x\n",
+				       crypto_aead_get_flags(tfm));
+				goto out;
+			}
+
+			iv_len = crypto_aead_ivsize(tfm);
+			if (iv_len)
+				memset(iv, 0xff, iv_len);
+
+			/* Now setup per request stuff, i.e. buffers */
+
+			for (j = 0; j < num_mb; ++j) {
+				struct test_mb_aead_data *cur = &data[j];
+
+				assoc = cur->axbuf[0];
+				memset(assoc, 0xff, aad_size);
+
+				sg_init_aead(cur->sg, cur->xbuf,
+					     *b_size + (enc ? 0 : authsize),
+					     assoc, aad_size);
+
+				sg_init_aead(cur->sgout, cur->xoutbuf,
+					     *b_size + (enc ? authsize : 0),
+					     assoc, aad_size);
+
+				aead_request_set_ad(cur->req, aad_size);
+
+				if (!enc) {
+
+					aead_request_set_crypt(cur->req,
+							       cur->sgout,
+							       cur->sg,
+							       *b_size, iv);
+					ret = crypto_aead_encrypt(cur->req);
+					ret = do_one_aead_op(cur->req, ret);
+
+					if (ret) {
+						pr_err("calculating auth failed failed (%d)\n",
+						       ret);
+						break;
+					}
+				}
+
+				aead_request_set_crypt(cur->req, cur->sg,
+						       cur->sgout, *b_size +
+						       (enc ? 0 : authsize),
+						       iv);
+
+			}
+
+			if (secs)
+				ret = test_mb_aead_jiffies(data, enc, *b_size,
+							   secs, num_mb);
+			else
+				ret = test_mb_aead_cycles(data, enc, *b_size,
+							  num_mb);
+
+			if (ret) {
+				pr_err("%s() failed return code=%d\n", e, ret);
+				break;
+			}
+			b_size++;
+			i++;
+		} while (*b_size);
+		keysize++;
+	} while (*keysize);
+
+out:
+	for (i = 0; i < num_mb; ++i)
+		aead_request_free(data[i].req);
+out_free_xoutbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xoutbuf);
+out_free_axbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].axbuf);
+out_free_xbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xbuf);
+out_free_tfm:
+	crypto_free_aead(tfm);
+out_free_data:
+	kfree(data);
+out_free_iv:
+	kfree(iv);
+}
+
 static int test_aead_jiffies(struct aead_request *req, int enc,
 				int blen, int secs)
 {
@@ -151,60 +504,6 @@ out:
 	return ret;
 }
 
-static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
-static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
-
-#define XBUFSIZE 8
-#define MAX_IVLEN 32
-
-static int testmgr_alloc_buf(char *buf[XBUFSIZE])
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++) {
-		buf[i] = (void *)__get_free_page(GFP_KERNEL);
-		if (!buf[i])
-			goto err_free_buf;
-	}
-
-	return 0;
-
-err_free_buf:
-	while (i-- > 0)
-		free_page((unsigned long)buf[i]);
-
-	return -ENOMEM;
-}
-
-static void testmgr_free_buf(char *buf[XBUFSIZE])
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++)
-		free_page((unsigned long)buf[i]);
-}
-
-static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
-			unsigned int buflen)
-{
-	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
-	int k, rem;
-
-	if (np > XBUFSIZE) {
-		rem = PAGE_SIZE;
-		np = XBUFSIZE;
-	} else {
-		rem = buflen % PAGE_SIZE;
-	}
-
-	sg_init_table(sg, np + 1);
-	np--;
-	for (k = 0; k < np; k++)
-		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
-
-	sg_set_buf(&sg[k + 1], xbuf[k], rem);
-}
-
 static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 			    struct aead_speed_template *template,
 			    unsigned int tcount, u8 authsize,
@@ -316,19 +615,37 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 				goto out;
 			}
 
-			sg_init_aead(sg, xbuf,
-				    *b_size + (enc ? 0 : authsize));
+			sg_init_aead(sg, xbuf, *b_size + (enc ? 0 : authsize),
+				     assoc, aad_size);
 
 			sg_init_aead(sgout, xoutbuf,
-				    *b_size + (enc ? authsize : 0));
+				     *b_size + (enc ? authsize : 0), assoc,
+				     aad_size);
 
-			sg_set_buf(&sg[0], assoc, aad_size);
-			sg_set_buf(&sgout[0], assoc, aad_size);
+			aead_request_set_ad(req, aad_size);
+
+			if (!enc) {
+
+				/*
+				 * For decryption we need a proper auth so
+				 * we do the encryption path once with buffers
+				 * reversed (input <-> output) to calculate it
+				 */
+				aead_request_set_crypt(req, sgout, sg,
+						       *b_size, iv);
+				ret = do_one_aead_op(req,
+						     crypto_aead_encrypt(req));
+
+				if (ret) {
+					pr_err("calculating auth failed failed (%d)\n",
+					       ret);
+					break;
+				}
+			}
 
 			aead_request_set_crypt(req, sg, sgout,
 					       *b_size + (enc ? 0 : authsize),
 					       iv);
-			aead_request_set_ad(req, aad_size);
 
 			if (secs)
 				ret = test_aead_jiffies(req, enc, *b_size,
@@ -381,24 +698,98 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 }
 
 struct test_mb_ahash_data {
-	struct scatterlist sg[TVMEMSIZE];
+	struct scatterlist sg[XBUFSIZE];
 	char result[64];
 	struct ahash_request *req;
 	struct crypto_wait wait;
 	char *xbuf[XBUFSIZE];
 };
 
-static void test_mb_ahash_speed(const char *algo, unsigned int sec,
-				struct hash_speed *speed)
+static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++)
+		rc[i] = crypto_ahash_digest(data[i].req);
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
+				 int secs, u32 num_mb)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_mult_ahash_op(data, num_mb);
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
+	return 0;
+}
+
+static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
+				u32 num_mb)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_mult_ahash_op(data, num_mb);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		ret = do_mult_ahash_op(data, num_mb);
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
+
+	return ret;
+}
+
+static void test_mb_ahash_speed(const char *algo, unsigned int secs,
+				struct hash_speed *speed, u32 num_mb)
 {
 	struct test_mb_ahash_data *data;
 	struct crypto_ahash *tfm;
-	unsigned long start, end;
-	unsigned long cycles;
 	unsigned int i, j, k;
 	int ret;
 
-	data = kzalloc(sizeof(*data) * 8, GFP_KERNEL);
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return;
 
@@ -409,7 +800,7 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 		goto free_data;
 	}
 
-	for (i = 0; i < 8; ++i) {
+	for (i = 0; i < num_mb; ++i) {
 		if (testmgr_alloc_buf(data[i].xbuf))
 			goto out;
 
@@ -424,7 +815,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 
 		ahash_request_set_callback(data[i].req, 0, crypto_req_done,
 					   &data[i].wait);
-		test_hash_sg_init(data[i].sg);
+
+		sg_init_table(data[i].sg, XBUFSIZE);
+		for (j = 0; j < XBUFSIZE; j++) {
+			sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
+			memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
+		}
 	}
 
 	pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
@@ -435,16 +831,16 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 		if (speed[i].blen != speed[i].plen)
 			continue;
 
-		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
+		if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
 			pr_err("template (%u) too big for tvmem (%lu)\n",
-			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
+			       speed[i].blen, XBUFSIZE * PAGE_SIZE);
 			goto out;
 		}
 
 		if (speed[i].klen)
 			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
 
-		for (k = 0; k < 8; k++)
+		for (k = 0; k < num_mb; k++)
 			ahash_request_set_crypt(data[k].req, data[k].sg,
 						data[k].result, speed[i].blen);
 
@@ -453,34 +849,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 			i, speed[i].blen, speed[i].plen,
 			speed[i].blen / speed[i].plen);
 
-		start = get_cycles();
-
-		for (k = 0; k < 8; k++) {
-			ret = crypto_ahash_digest(data[k].req);
-			if (ret == -EINPROGRESS) {
-				ret = 0;
-				continue;
-			}
-
-			if (ret)
-				break;
-
-			crypto_req_done(&data[k].req->base, 0);
-		}
-
-		for (j = 0; j < k; j++) {
-			struct crypto_wait *wait = &data[j].wait;
-			int wait_ret;
-
-			wait_ret = crypto_wait_req(-EINPROGRESS, wait);
-			if (wait_ret)
-				ret = wait_ret;
-		}
+		if (secs)
+			ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
+						    num_mb);
+		else
+			ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
 
-		end = get_cycles();
-		cycles = end - start;
-		pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
-			cycles, cycles / (8 * speed[i].blen));
 
 		if (ret) {
 			pr_err("At least one hashing failed ret=%d\n", ret);
@@ -489,10 +863,10 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 	}
 
 out:
-	for (k = 0; k < 8; ++k)
+	for (k = 0; k < num_mb; ++k)
 		ahash_request_free(data[k].req);
 
-	for (k = 0; k < 8; ++k)
+	for (k = 0; k < num_mb; ++k)
 		testmgr_free_buf(data[k].xbuf);
 
 	crypto_free_ahash(tfm);
@@ -736,6 +1110,254 @@ static void test_hash_speed(const char *algo, unsigned int secs,
 	return test_ahash_speed_common(algo, secs, speed, CRYPTO_ALG_ASYNC);
 }
 
+struct test_mb_skcipher_data {
+	struct scatterlist sg[XBUFSIZE];
+	struct skcipher_request *req;
+	struct crypto_wait wait;
+	char *xbuf[XBUFSIZE];
+};
+
+static int do_mult_acipher_op(struct test_mb_skcipher_data *data, int enc,
+				u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++) {
+		if (enc == ENCRYPT)
+			rc[i] = crypto_skcipher_encrypt(data[i].req);
+		else
+			rc[i] = crypto_skcipher_decrypt(data[i].req);
+	}
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc,
+				int blen, int secs, u32 num_mb)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
+	return 0;
+}
+
+static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
+			       int blen, u32 num_mb)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
+
+	return ret;
+}
+
+static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
+				   struct cipher_speed_template *template,
+				   unsigned int tcount, u8 *keysize, u32 num_mb)
+{
+	struct test_mb_skcipher_data *data;
+	struct crypto_skcipher *tfm;
+	unsigned int i, j, iv_len;
+	const char *key;
+	const char *e;
+	u32 *b_size;
+	char iv[128];
+	int ret;
+
+	if (enc == ENCRYPT)
+		e = "encryption";
+	else
+		e = "decryption";
+
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return;
+
+	tfm = crypto_alloc_skcipher(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+			algo, PTR_ERR(tfm));
+		goto out_free_data;
+	}
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
+
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
+
+
+	for (i = 0; i < num_mb; ++i) {
+		data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL);
+		if (!data[i].req) {
+			pr_err("alg: skcipher: Failed to allocate request for %s\n",
+			       algo);
+			while (i--)
+				skcipher_request_free(data[i].req);
+			goto out_free_xbuf;
+		}
+	}
+
+	for (i = 0; i < num_mb; ++i) {
+		skcipher_request_set_callback(data[i].req,
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+					      crypto_req_done, &data[i].wait);
+		crypto_init_wait(&data[i].wait);
+	}
+
+	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+		get_driver_name(crypto_skcipher, tfm), e);
+
+	i = 0;
+	do {
+		b_size = block_sizes;
+		do {
+			if (*b_size > XBUFSIZE * PAGE_SIZE) {
+				pr_err("template (%u) too big for buffer (%lu)\n",
+				       *b_size, XBUFSIZE * PAGE_SIZE);
+				goto out;
+			}
+
+			pr_info("test %u (%d bit key, %d byte blocks): ", i,
+				*keysize * 8, *b_size);
+
+			/* Set up tfm global state, i.e. the key */
+
+			memset(tvmem[0], 0xff, PAGE_SIZE);
+			key = tvmem[0];
+			for (j = 0; j < tcount; j++) {
+				if (template[j].klen == *keysize) {
+					key = template[j].key;
+					break;
+				}
+			}
+
+			crypto_skcipher_clear_flags(tfm, ~0);
+
+			ret = crypto_skcipher_setkey(tfm, key, *keysize);
+			if (ret) {
+				pr_err("setkey() failed flags=%x\n",
+				       crypto_skcipher_get_flags(tfm));
+				goto out;
+			}
+
+			iv_len = crypto_skcipher_ivsize(tfm);
+			if (iv_len)
+				memset(&iv, 0xff, iv_len);
+
+			/* Now setup per request stuff, i.e. buffers */
+
+			for (j = 0; j < num_mb; ++j) {
+				struct test_mb_skcipher_data *cur = &data[j];
+				unsigned int k = *b_size;
+				unsigned int pages = DIV_ROUND_UP(k, PAGE_SIZE);
+				unsigned int p = 0;
+
+				sg_init_table(cur->sg, pages);
+
+				while (k > PAGE_SIZE) {
+					sg_set_buf(cur->sg + p, cur->xbuf[p],
+						   PAGE_SIZE);
+					memset(cur->xbuf[p], 0xff, PAGE_SIZE);
+					p++;
+					k -= PAGE_SIZE;
+				}
+
+				sg_set_buf(cur->sg + p, cur->xbuf[p], k);
+				memset(cur->xbuf[p], 0xff, k);
+
+				skcipher_request_set_crypt(cur->req, cur->sg,
+							   cur->sg, *b_size,
+							   iv);
+			}
+
+			if (secs)
+				ret = test_mb_acipher_jiffies(data, enc,
+							      *b_size, secs,
+							      num_mb);
+			else
+				ret = test_mb_acipher_cycles(data, enc,
+							     *b_size, num_mb);
+
+			if (ret) {
+				pr_err("%s() failed flags=%x\n", e,
+				       crypto_skcipher_get_flags(tfm));
+				break;
+			}
+			b_size++;
+			i++;
+		} while (*b_size);
+		keysize++;
+	} while (*keysize);
+
+out:
+	for (i = 0; i < num_mb; ++i)
+		skcipher_request_free(data[i].req);
+out_free_xbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xbuf);
+out_free_tfm:
+	crypto_free_skcipher(tfm);
+out_free_data:
+	kfree(data);
+}
+
 static inline int do_one_acipher_op(struct skcipher_request *req, int ret)
 {
 	struct crypto_wait *wait = req->base.data;
@@ -1557,16 +2179,24 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				NULL, 0, 16, 16, aead_speed_template_20);
 		test_aead_speed("gcm(aes)", ENCRYPT, sec,
 				NULL, 0, 16, 8, speed_template_16_24_32);
+		test_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec,
+				NULL, 0, 16, 16, aead_speed_template_20);
+		test_aead_speed("gcm(aes)", DECRYPT, sec,
+				NULL, 0, 16, 8, speed_template_16_24_32);
 		break;
 
 	case 212:
 		test_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec,
 				NULL, 0, 16, 16, aead_speed_template_19);
+		test_aead_speed("rfc4309(ccm(aes))", DECRYPT, sec,
+				NULL, 0, 16, 16, aead_speed_template_19);
 		break;
 
 	case 213:
 		test_aead_speed("rfc7539esp(chacha20,poly1305)", ENCRYPT, sec,
 				NULL, 0, 16, 8, aead_speed_template_36);
+		test_aead_speed("rfc7539esp(chacha20,poly1305)", DECRYPT, sec,
+				NULL, 0, 16, 8, aead_speed_template_36);
 		break;
 
 	case 214:
@@ -1574,6 +2204,33 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				  speed_template_32);
 		break;
 
+	case 215:
+		test_mb_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, NULL,
+				   0, 16, 16, aead_speed_template_20, num_mb);
+		test_mb_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8,
+				   speed_template_16_24_32, num_mb);
+		test_mb_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, NULL,
+				   0, 16, 16, aead_speed_template_20, num_mb);
+		test_mb_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8,
+				   speed_template_16_24_32, num_mb);
+		break;
+
+	case 216:
+		test_mb_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec, NULL, 0,
+				   16, 16, aead_speed_template_19, num_mb);
+		test_mb_aead_speed("rfc4309(ccm(aes))", DECRYPT, sec, NULL, 0,
+				   16, 16, aead_speed_template_19, num_mb);
+		break;
+
+	case 217:
+		test_mb_aead_speed("rfc7539esp(chacha20,poly1305)", ENCRYPT,
+				   sec, NULL, 0, 16, 8, aead_speed_template_36,
+				   num_mb);
+		test_mb_aead_speed("rfc7539esp(chacha20,poly1305)", DECRYPT,
+				   sec, NULL, 0, 16, 8, aead_speed_template_36,
+				   num_mb);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -1778,19 +2435,23 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 422:
-		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 423:
-		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 424:
-		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 425:
-		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 499:
@@ -2008,6 +2669,218 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				   speed_template_8_32);
 		break;
 
+	case 600:
+		test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ecb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("lrw(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ofb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ofb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("rfc3686(ctr(aes))", ENCRYPT, sec, NULL,
+				       0, speed_template_20_28_36, num_mb);
+		test_mb_skcipher_speed("rfc3686(ctr(aes))", DECRYPT, sec, NULL,
+				       0, speed_template_20_28_36, num_mb);
+		break;
+
+	case 601:
+		test_mb_skcipher_speed("ecb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ecb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cbc(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cbc(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cfb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cfb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ofb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ofb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		break;
+
+	case 602:
+		test_mb_skcipher_speed("ecb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ecb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cbc(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cfb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cfb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ofb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ofb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		break;
+
+	case 603:
+		test_mb_skcipher_speed("ecb(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 604:
+		test_mb_skcipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48_64, num_mb);
+		test_mb_skcipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48_64, num_mb);
+		break;
+
+	case 605:
+		test_mb_skcipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		break;
+
+	case 606:
+		test_mb_skcipher_speed("ecb(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ecb(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("cbc(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("cbc(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ctr(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ctr(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		break;
+
+	case 607:
+		test_mb_skcipher_speed("ecb(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 608:
+		test_mb_skcipher_speed("ecb(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 609:
+		test_mb_skcipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ctr(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ctr(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		break;
+
 	case 1000:
 		test_available();
 		break;
@@ -2069,6 +2942,8 @@ module_param(mode, int, 0);
 module_param(sec, uint, 0);
 MODULE_PARM_DESC(sec, "Length in seconds of speed tests "
 		      "(defaults to zero which uses CPU cycles instead)");
+module_param(num_mb, uint, 0000);
+MODULE_PARM_DESC(num_mb, "Number of concurrent requests to be used in mb speed tests (defaults to 8)");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Quick & dirty crypto testing module");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 29d7020b8826..d5e23a142a04 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -177,6 +177,18 @@ static void testmgr_free_buf(char *buf[XBUFSIZE])
 		free_page((unsigned long)buf[i]);
 }
 
+static int ahash_guard_result(char *result, char c, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (result[i] != c)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ahash_partial_update(struct ahash_request **preq,
 	struct crypto_ahash *tfm, const struct hash_testvec *template,
 	void *hash_buff, int k, int temp, struct scatterlist *sg,
@@ -185,7 +197,8 @@ static int ahash_partial_update(struct ahash_request **preq,
 	char *state;
 	struct ahash_request *req;
 	int statesize, ret = -EINVAL;
-	const char guard[] = { 0x00, 0xba, 0xad, 0x00 };
+	static const unsigned char guard[] = { 0x00, 0xba, 0xad, 0x00 };
+	int digestsize = crypto_ahash_digestsize(tfm);
 
 	req = *preq;
 	statesize = crypto_ahash_statesize(
@@ -196,12 +209,19 @@ static int ahash_partial_update(struct ahash_request **preq,
 		goto out_nostate;
 	}
 	memcpy(state + statesize, guard, sizeof(guard));
+	memset(result, 1, digestsize);
 	ret = crypto_ahash_export(req, state);
 	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	if (ret) {
 		pr_err("alg: hash: Failed to export() for %s\n", algo);
 		goto out;
 	}
+	ret = ahash_guard_result(result, 1, digestsize);
+	if (ret) {
+		pr_err("alg: hash: Failed, export used req->result for %s\n",
+		       algo);
+		goto out;
+	}
 	ahash_request_free(req);
 	req = ahash_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
@@ -221,6 +241,12 @@ static int ahash_partial_update(struct ahash_request **preq,
 		pr_err("alg: hash: Failed to import() for %s\n", algo);
 		goto out;
 	}
+	ret = ahash_guard_result(result, 1, digestsize);
+	if (ret) {
+		pr_err("alg: hash: Failed, import used req->result for %s\n",
+		       algo);
+		goto out;
+	}
 	ret = crypto_wait_req(crypto_ahash_update(req), wait);
 	if (ret)
 		goto out;
@@ -316,18 +342,31 @@ static int __test_hash(struct crypto_ahash *tfm,
 				goto out;
 			}
 		} else {
+			memset(result, 1, digest_size);
 			ret = crypto_wait_req(crypto_ahash_init(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: init failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
+			ret = ahash_guard_result(result, 1, digest_size);
+			if (ret) {
+				pr_err("alg: hash: init failed on test %d "
+				       "for %s: used req->result\n", j, algo);
+				goto out;
+			}
 			ret = crypto_wait_req(crypto_ahash_update(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: update failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
+			ret = ahash_guard_result(result, 1, digest_size);
+			if (ret) {
+				pr_err("alg: hash: update failed on test %d "
+				       "for %s: used req->result\n", j, algo);
+				goto out;
+			}
 			ret = crypto_wait_req(crypto_ahash_final(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: final failed on test %d "
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index a714b6293959..6044f6906bd6 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1052,6 +1052,142 @@ static const struct hash_testvec sha3_224_tv_template[] = {
 				"\xc9\xfd\x55\x74\x49\x44\x79\xba"
 				"\x5c\x7e\x7a\xb7\x6e\xf2\x64\xea"
 				"\xd0\xfc\xce\x33",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x7d\x0f\x2f\xb7\x65\x3b\xa7\x26"
+			     "\xc3\x88\x20\x71\x15\x06\xe8\x2d"
+			     "\xa3\x92\x44\xab\x3e\xe7\xff\x86"
+			     "\xb6\x79\x10\x72",
 	},
 };
 
@@ -1077,6 +1213,142 @@ static const struct hash_testvec sha3_256_tv_template[] = {
 				"\x49\x10\x03\x76\xa8\x23\x5e\x2c"
 				"\x82\xe1\xb9\x99\x8a\x99\x9e\x21"
 				"\xdb\x32\xdd\x97\x49\x6d\x33\x76",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\xde\x41\x04\xbd\xda\xda\xd9\x71"
+			     "\xf7\xfa\x80\xf5\xea\x11\x03\xb1"
+			     "\x3b\x6a\xbc\x5f\xb9\x66\x26\xf7"
+			     "\x8a\x97\xbb\xf2\x07\x08\x38\x30",
 	},
 };
 
@@ -1109,6 +1381,144 @@ static const struct hash_testvec sha3_384_tv_template[] = {
 				"\x9b\xfd\xbc\x32\xb9\xd4\xad\x5a"
 				"\xa0\x4a\x1f\x07\x6e\x62\xfe\xa1"
 				"\x9e\xef\x51\xac\xd0\x65\x7c\x22",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x1b\x19\x4d\x8f\xd5\x36\x87\x71"
+			     "\xcf\xca\x30\x85\x9b\xc1\x25\xc7"
+			     "\x00\xcb\x73\x8a\x8e\xd4\xfe\x2b"
+			     "\x1a\xa2\xdc\x2e\x41\xfd\x52\x51"
+			     "\xd2\x21\xae\x2d\xc7\xae\x8c\x40"
+			     "\xb9\xe6\x56\x48\x03\xcd\x88\x6b",
 	},
 };
 
@@ -1147,6 +1557,146 @@ static const struct hash_testvec sha3_512_tv_template[] = {
 				"\xba\x1b\x0d\x8d\xc7\x8c\x08\x63"
 				"\x46\xb5\x33\xb4\x9c\x03\x0d\x99"
 				"\xa2\x7d\xaf\x11\x39\xd6\xe7\x5e",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x59\xda\x30\xe3\x90\xe4\x3d\xde"
+			     "\xf0\xc6\x42\x17\xd7\xb2\x26\x47"
+			     "\x90\x28\xa6\x84\xe8\x49\x7a\x86"
+			     "\xd6\xb8\x9e\xf8\x07\x59\x21\x03"
+			     "\xad\xd2\xed\x48\xa3\xb9\xa5\xf0"
+			     "\xb3\xae\x02\x2b\xb8\xaf\xc3\x3b"
+			     "\xd6\xb0\x8f\xcb\x76\x8b\xa7\x41"
+			     "\x32\xc2\x8e\x50\x91\x86\x90\xfb",
 	},
 };
 
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index 5f62c4f9f6e0..f3a0dd25f871 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -24,9 +24,8 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index ebf7a3efb572..07e62433fbfb 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -23,9 +23,8 @@
  * GNU General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index df90b332554c..25c75af50d3f 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -12,8 +12,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  * Author:
  * 	Kazunori Miyazawa <miyazawa@linux-ipv6.org>
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 88044eda0ac6..4d0f571c15f9 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -73,26 +73,14 @@ config HW_RANDOM_ATMEL
 
 	  If unsure, say Y.
 
-config HW_RANDOM_BCM63XX
-	tristate "Broadcom BCM63xx Random Number Generator support"
-	depends on BCM63XX || BMIPS_GENERIC
-	default HW_RANDOM
-	---help---
-	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM63xx SoCs.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called bcm63xx-rng
-
-	  If unusure, say Y.
-
 config HW_RANDOM_BCM2835
-	tristate "Broadcom BCM2835 Random Number Generator support"
-	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X
+	tristate "Broadcom BCM2835/BCM63xx Random Number Generator support"
+	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X || \
+		   ARCH_BCM_63XX || BCM63XX || BMIPS_GENERIC
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM2835 SoCs.
+	  Generator hardware found on the Broadcom BCM2835 and BCM63xx SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called bcm2835-rng
@@ -436,6 +424,18 @@ config HW_RANDOM_S390
 
 	  If unsure, say Y.
 
+config HW_RANDOM_EXYNOS
+	tristate "Samsung Exynos True Random Number Generator support"
+	depends on ARCH_EXYNOS || COMPILE_TEST
+	default HW_RANDOM
+	---help---
+	  This driver provides support for the True Random Number
+	  Generator available in Exynos SoCs.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called exynos-trng.
+
+	  If unsure, say Y.
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 0ef05c61d9c8..b780370bd4eb 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -9,11 +9,11 @@ obj-$(CONFIG_HW_RANDOM_TIMERIOMEM) += timeriomem-rng.o
 obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o
 obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o
 obj-$(CONFIG_HW_RANDOM_ATMEL) += atmel-rng.o
-obj-$(CONFIG_HW_RANDOM_BCM63XX)	+= bcm63xx-rng.o
 obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
 obj-$(CONFIG_HW_RANDOM_N2RNG) += n2-rng.o
 n2-rng-y := n2-drv.o n2-asm.o
 obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o
+obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-trng.o
 obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP3_ROM) += omap3-rom-rng.o
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 574211a49549..7a84cec30c3a 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -15,6 +15,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/clk.h>
 
 #define RNG_CTRL	0x0
 #define RNG_STATUS	0x4
@@ -29,116 +30,180 @@
 
 #define RNG_INT_OFF	0x1
 
-static void __init nsp_rng_init(void __iomem *base)
+struct bcm2835_rng_priv {
+	struct hwrng rng;
+	void __iomem *base;
+	bool mask_interrupts;
+	struct clk *clk;
+};
+
+static inline struct bcm2835_rng_priv *to_rng_priv(struct hwrng *rng)
 {
-	u32 val;
+	return container_of(rng, struct bcm2835_rng_priv, rng);
+}
+
+static inline u32 rng_readl(struct bcm2835_rng_priv *priv, u32 offset)
+{
+	/* MIPS chips strapped for BE will automagically configure the
+	 * peripheral registers for CPU-native byte order.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		return __raw_readl(priv->base + offset);
+	else
+		return readl(priv->base + offset);
+}
 
-	/* mask the interrupt */
-	val = readl(base + RNG_INT_MASK);
-	val |= RNG_INT_OFF;
-	writel(val, base + RNG_INT_MASK);
+static inline void rng_writel(struct bcm2835_rng_priv *priv, u32 val,
+			      u32 offset)
+{
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		__raw_writel(val, priv->base + offset);
+	else
+		writel(val, priv->base + offset);
 }
 
 static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
 			       bool wait)
 {
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
 	u32 max_words = max / sizeof(u32);
 	u32 num_words, count;
 
-	while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) {
+	while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) {
 		if (!wait)
 			return 0;
 		cpu_relax();
 	}
 
-	num_words = readl(rng_base + RNG_STATUS) >> 24;
+	num_words = rng_readl(priv, RNG_STATUS) >> 24;
 	if (num_words > max_words)
 		num_words = max_words;
 
 	for (count = 0; count < num_words; count++)
-		((u32 *)buf)[count] = readl(rng_base + RNG_DATA);
+		((u32 *)buf)[count] = rng_readl(priv, RNG_DATA);
 
 	return num_words * sizeof(u32);
 }
 
-static struct hwrng bcm2835_rng_ops = {
-	.name	= "bcm2835",
-	.read	= bcm2835_rng_read,
+static int bcm2835_rng_init(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+	int ret = 0;
+	u32 val;
+
+	if (!IS_ERR(priv->clk)) {
+		ret = clk_prepare_enable(priv->clk);
+		if (ret)
+			return ret;
+	}
+
+	if (priv->mask_interrupts) {
+		/* mask the interrupt */
+		val = rng_readl(priv, RNG_INT_MASK);
+		val |= RNG_INT_OFF;
+		rng_writel(priv, val, RNG_INT_MASK);
+	}
+
+	/* set warm-up count & enable */
+	rng_writel(priv, RNG_WARMUP_COUNT, RNG_STATUS);
+	rng_writel(priv, RNG_RBGEN, RNG_CTRL);
+
+	return ret;
+}
+
+static void bcm2835_rng_cleanup(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+
+	/* disable rng hardware */
+	rng_writel(priv, 0, RNG_CTRL);
+
+	if (!IS_ERR(priv->clk))
+		clk_disable_unprepare(priv->clk);
+}
+
+struct bcm2835_rng_of_data {
+	bool mask_interrupts;
+};
+
+static const struct bcm2835_rng_of_data nsp_rng_of_data = {
+	.mask_interrupts = true,
 };
 
 static const struct of_device_id bcm2835_rng_of_match[] = {
 	{ .compatible = "brcm,bcm2835-rng"},
-	{ .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init},
-	{ .compatible = "brcm,bcm5301x-rng", .data = nsp_rng_init},
+	{ .compatible = "brcm,bcm-nsp-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm5301x-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm6368-rng"},
 	{},
 };
 
 static int bcm2835_rng_probe(struct platform_device *pdev)
 {
+	const struct bcm2835_rng_of_data *of_data;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	void (*rng_setup)(void __iomem *base);
 	const struct of_device_id *rng_id;
-	void __iomem *rng_base;
+	struct bcm2835_rng_priv *priv;
+	struct resource *r;
 	int err;
 
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
 	/* map peripheral */
-	rng_base = of_iomap(np, 0);
-	if (!rng_base) {
-		dev_err(dev, "failed to remap rng regs");
-		return -ENODEV;
-	}
-	bcm2835_rng_ops.priv = (unsigned long)rng_base;
+	priv->base = devm_ioremap_resource(dev, r);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	/* Clock is optional on most platforms */
+	priv->clk = devm_clk_get(dev, NULL);
+
+	priv->rng.name = pdev->name;
+	priv->rng.init = bcm2835_rng_init;
+	priv->rng.read = bcm2835_rng_read;
+	priv->rng.cleanup = bcm2835_rng_cleanup;
 
 	rng_id = of_match_node(bcm2835_rng_of_match, np);
-	if (!rng_id) {
-		iounmap(rng_base);
+	if (!rng_id)
 		return -EINVAL;
-	}
-	/* Check for rng init function, execute it */
-	rng_setup = rng_id->data;
-	if (rng_setup)
-		rng_setup(rng_base);
 
-	/* set warm-up count & enable */
-	__raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
-	__raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
+	/* Check for rng init function, execute it */
+	of_data = rng_id->data;
+	if (of_data)
+		priv->mask_interrupts = of_data->mask_interrupts;
 
 	/* register driver */
-	err = hwrng_register(&bcm2835_rng_ops);
-	if (err) {
+	err = devm_hwrng_register(dev, &priv->rng);
+	if (err)
 		dev_err(dev, "hwrng registration failed\n");
-		iounmap(rng_base);
-	} else
+	else
 		dev_info(dev, "hwrng registered\n");
 
 	return err;
 }
 
-static int bcm2835_rng_remove(struct platform_device *pdev)
-{
-	void __iomem *rng_base = (void __iomem *)bcm2835_rng_ops.priv;
-
-	/* disable rng hardware */
-	__raw_writel(0, rng_base + RNG_CTRL);
-
-	/* unregister driver */
-	hwrng_unregister(&bcm2835_rng_ops);
-	iounmap(rng_base);
-
-	return 0;
-}
-
 MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 
+static struct platform_device_id bcm2835_rng_devtype[] = {
+	{ .name = "bcm2835-rng" },
+	{ .name = "bcm63xx-rng" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, bcm2835_rng_devtype);
+
 static struct platform_driver bcm2835_rng_driver = {
 	.driver = {
 		.name = "bcm2835-rng",
 		.of_match_table = bcm2835_rng_of_match,
 	},
 	.probe		= bcm2835_rng_probe,
-	.remove		= bcm2835_rng_remove,
+	.id_table	= bcm2835_rng_devtype,
 };
 module_platform_driver(bcm2835_rng_driver);
 
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
deleted file mode 100644
index 5132c9cde50d..000000000000
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Broadcom BCM63xx Random Number Generator support
- *
- * Copyright (C) 2011, Florian Fainelli <florian@openwrt.org>
- * Copyright (C) 2009, Broadcom Corporation
- *
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
-#include <linux/of.h>
-
-#define RNG_CTRL			0x00
-#define RNG_EN				(1 << 0)
-
-#define RNG_STAT			0x04
-#define RNG_AVAIL_MASK			(0xff000000)
-
-#define RNG_DATA			0x08
-#define RNG_THRES			0x0c
-#define RNG_MASK			0x10
-
-struct bcm63xx_rng_priv {
-	struct hwrng rng;
-	struct clk *clk;
-	void __iomem *regs;
-};
-
-#define to_rng_priv(rng)	container_of(rng, struct bcm63xx_rng_priv, rng)
-
-static int bcm63xx_rng_init(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-	int error;
-
-	error = clk_prepare_enable(priv->clk);
-	if (error)
-		return error;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val |= RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	return 0;
-}
-
-static void bcm63xx_rng_cleanup(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val &= ~RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	clk_disable_unprepare(priv->clk);
-}
-
-static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
-}
-
-static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	*data = __raw_readl(priv->regs + RNG_DATA);
-
-	return 4;
-}
-
-static int bcm63xx_rng_probe(struct platform_device *pdev)
-{
-	struct resource *r;
-	int ret;
-	struct bcm63xx_rng_priv *priv;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		dev_err(&pdev->dev, "no iomem resource\n");
-		return -ENXIO;
-	}
-
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->rng.name = pdev->name;
-	priv->rng.init = bcm63xx_rng_init;
-	priv->rng.cleanup = bcm63xx_rng_cleanup;
-	priv->rng.data_present = bcm63xx_rng_data_present;
-	priv->rng.data_read = bcm63xx_rng_data_read;
-
-	priv->clk = devm_clk_get(&pdev->dev, "ipsec");
-	if (IS_ERR(priv->clk)) {
-		ret = PTR_ERR(priv->clk);
-		dev_err(&pdev->dev, "no clock for device: %d\n", ret);
-		return ret;
-	}
-
-	if (!devm_request_mem_region(&pdev->dev, r->start,
-					resource_size(r), pdev->name)) {
-		dev_err(&pdev->dev, "request mem failed");
-		return -EBUSY;
-	}
-
-	priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
-					resource_size(r));
-	if (!priv->regs) {
-		dev_err(&pdev->dev, "ioremap failed");
-		return -ENOMEM;
-	}
-
-	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register rng device: %d\n",
-			ret);
-		return ret;
-	}
-
-	dev_info(&pdev->dev, "registered RNG driver\n");
-
-	return 0;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id bcm63xx_rng_of_match[] = {
-	{ .compatible = "brcm,bcm6368-rng", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, bcm63xx_rng_of_match);
-#endif
-
-static struct platform_driver bcm63xx_rng_driver = {
-	.probe		= bcm63xx_rng_probe,
-	.driver		= {
-		.name	= "bcm63xx-rng",
-		.of_match_table = of_match_ptr(bcm63xx_rng_of_match),
-	},
-};
-
-module_platform_driver(bcm63xx_rng_driver);
-
-MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
-MODULE_DESCRIPTION("Broadcom BCM63xx RNG driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 657b8770b6b9..91bb98c42a1c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -306,6 +306,10 @@ static int enable_best_rng(void)
 		ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
 		if (!ret)
 			cur_rng_set_by_user = 0;
+	} else {
+		drop_current_rng();
+		cur_rng_set_by_user = 0;
+		ret = 0;
 	}
 
 	return ret;
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
new file mode 100644
index 000000000000..1947aed7c044
--- /dev/null
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RNG driver for Exynos TRNGs
+ *
+ * Author: Łukasz Stelmach <l.stelmach@samsung.com>
+ *
+ * Copyright 2017 (c) Samsung Electronics Software, Inc.
+ *
+ * Based on the Exynos PRNG driver drivers/crypto/exynos-rng by
+ * Krzysztof Kozłowski <krzk@kernel.org>
+ */
+
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#define EXYNOS_TRNG_CLKDIV         (0x0)
+
+#define EXYNOS_TRNG_CTRL           (0x20)
+#define EXYNOS_TRNG_CTRL_RNGEN     BIT(31)
+
+#define EXYNOS_TRNG_POST_CTRL      (0x30)
+#define EXYNOS_TRNG_ONLINE_CTRL    (0x40)
+#define EXYNOS_TRNG_ONLINE_STAT    (0x44)
+#define EXYNOS_TRNG_ONLINE_MAXCHI2 (0x48)
+#define EXYNOS_TRNG_FIFO_CTRL      (0x50)
+#define EXYNOS_TRNG_FIFO_0         (0x80)
+#define EXYNOS_TRNG_FIFO_1         (0x84)
+#define EXYNOS_TRNG_FIFO_2         (0x88)
+#define EXYNOS_TRNG_FIFO_3         (0x8c)
+#define EXYNOS_TRNG_FIFO_4         (0x90)
+#define EXYNOS_TRNG_FIFO_5         (0x94)
+#define EXYNOS_TRNG_FIFO_6         (0x98)
+#define EXYNOS_TRNG_FIFO_7         (0x9c)
+#define EXYNOS_TRNG_FIFO_LEN       (8)
+#define EXYNOS_TRNG_CLOCK_RATE     (500000)
+
+
+struct exynos_trng_dev {
+	struct device    *dev;
+	void __iomem     *mem;
+	struct clk       *clk;
+	struct hwrng rng;
+};
+
+static int exynos_trng_do_read(struct hwrng *rng, void *data, size_t max,
+			       bool wait)
+{
+	struct exynos_trng_dev *trng;
+	int val;
+
+	max = min_t(size_t, max, (EXYNOS_TRNG_FIFO_LEN * 4));
+
+	trng = (struct exynos_trng_dev *)rng->priv;
+
+	writel_relaxed(max * 8, trng->mem + EXYNOS_TRNG_FIFO_CTRL);
+	val = readl_poll_timeout(trng->mem + EXYNOS_TRNG_FIFO_CTRL, val,
+				 val == 0, 200, 1000000);
+	if (val < 0)
+		return val;
+
+	memcpy_fromio(data, trng->mem + EXYNOS_TRNG_FIFO_0, max);
+
+	return max;
+}
+
+static int exynos_trng_init(struct hwrng *rng)
+{
+	struct exynos_trng_dev *trng = (struct exynos_trng_dev *)rng->priv;
+	unsigned long sss_rate;
+	u32 val;
+
+	sss_rate = clk_get_rate(trng->clk);
+
+	/*
+	 * For most TRNG circuits the clock frequency of under 500 kHz
+	 * is safe.
+	 */
+	val = sss_rate / (EXYNOS_TRNG_CLOCK_RATE * 2);
+	if (val > 0x7fff) {
+		dev_err(trng->dev, "clock divider too large: %d", val);
+		return -ERANGE;
+	}
+	val = val << 1;
+	writel_relaxed(val, trng->mem + EXYNOS_TRNG_CLKDIV);
+
+	/* Enable the generator. */
+	val = EXYNOS_TRNG_CTRL_RNGEN;
+	writel_relaxed(val, trng->mem + EXYNOS_TRNG_CTRL);
+
+	/*
+	 * Disable post-processing. /dev/hwrng is supposed to deliver
+	 * unprocessed data.
+	 */
+	writel_relaxed(0, trng->mem + EXYNOS_TRNG_POST_CTRL);
+
+	return 0;
+}
+
+static int exynos_trng_probe(struct platform_device *pdev)
+{
+	struct exynos_trng_dev *trng;
+	struct resource *res;
+	int ret = -ENOMEM;
+
+	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+	if (!trng)
+		return ret;
+
+	trng->rng.name = devm_kstrdup(&pdev->dev, dev_name(&pdev->dev),
+				      GFP_KERNEL);
+	if (!trng->rng.name)
+		return ret;
+
+	trng->rng.init = exynos_trng_init;
+	trng->rng.read = exynos_trng_do_read;
+	trng->rng.priv = (unsigned long) trng;
+
+	platform_set_drvdata(pdev, trng);
+	trng->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	trng->mem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(trng->mem))
+		return PTR_ERR(trng->mem);
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not get runtime PM.\n");
+		goto err_pm_get;
+	}
+
+	trng->clk = devm_clk_get(&pdev->dev, "secss");
+	if (IS_ERR(trng->clk)) {
+		ret = PTR_ERR(trng->clk);
+		dev_err(&pdev->dev, "Could not get clock.\n");
+		goto err_clock;
+	}
+
+	ret = clk_prepare_enable(trng->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not enable the clk.\n");
+		goto err_clock;
+	}
+
+	ret = hwrng_register(&trng->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not register hwrng device.\n");
+		goto err_register;
+	}
+
+	dev_info(&pdev->dev, "Exynos True Random Number Generator.\n");
+
+	return 0;
+
+err_register:
+	clk_disable_unprepare(trng->clk);
+
+err_clock:
+	pm_runtime_put_sync(&pdev->dev);
+
+err_pm_get:
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static int exynos_trng_remove(struct platform_device *pdev)
+{
+	struct exynos_trng_dev *trng =  platform_get_drvdata(pdev);
+
+	hwrng_unregister(&trng->rng);
+	clk_disable_unprepare(trng->clk);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static int __maybe_unused exynos_trng_suspend(struct device *dev)
+{
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int __maybe_unused exynos_trng_resume(struct device *dev)
+{
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Could not get runtime PM.\n");
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(exynos_trng_pm_ops, exynos_trng_suspend,
+			 exynos_trng_resume);
+
+static const struct of_device_id exynos_trng_dt_match[] = {
+	{
+		.compatible = "samsung,exynos5250-trng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, exynos_trng_dt_match);
+
+static struct platform_driver exynos_trng_driver = {
+	.driver = {
+		.name = "exynos-trng",
+		.pm = &exynos_trng_pm_ops,
+		.of_match_table = exynos_trng_dt_match,
+	},
+	.probe = exynos_trng_probe,
+	.remove = exynos_trng_remove,
+};
+
+module_platform_driver(exynos_trng_driver);
+MODULE_AUTHOR("Łukasz Stelmach");
+MODULE_DESCRIPTION("H/W TRNG driver for Exynos chips");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 88db42d30760..eca87249bcff 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -282,8 +282,7 @@ static int __exit imx_rngc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int imx_rngc_suspend(struct device *dev)
+static int __maybe_unused imx_rngc_suspend(struct device *dev)
 {
 	struct imx_rngc *rngc = dev_get_drvdata(dev);
 
@@ -292,7 +291,7 @@ static int imx_rngc_suspend(struct device *dev)
 	return 0;
 }
 
-static int imx_rngc_resume(struct device *dev)
+static int __maybe_unused imx_rngc_resume(struct device *dev)
 {
 	struct imx_rngc *rngc = dev_get_drvdata(dev);
 
@@ -301,11 +300,7 @@ static int imx_rngc_resume(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops imx_rngc_pm_ops = {
-	.suspend	= imx_rngc_suspend,
-	.resume		= imx_rngc_resume,
-};
-#endif
+SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
 
 static const struct of_device_id imx_rngc_dt_ids[] = {
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
@@ -316,9 +311,7 @@ MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids);
 static struct platform_driver imx_rngc_driver = {
 	.driver = {
 		.name = "imx_rngc",
-#ifdef CONFIG_PM
 		.pm = &imx_rngc_pm_ops,
-#endif
 		.of_match_table = imx_rngc_dt_ids,
 	},
 	.remove = __exit_p(imx_rngc_remove),
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index 8da7bcf54105..7f99cd52b40e 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -135,6 +135,7 @@ static int mtk_rng_probe(struct platform_device *pdev)
 #endif
 	priv->rng.read = mtk_rng_read;
 	priv->rng.priv = (unsigned long)&pdev->dev;
+	priv->rng.quality = 900;
 
 	priv->clk = devm_clk_get(&pdev->dev, "rng");
 	if (IS_ERR(priv->clk)) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 64b59562c872..80f2c326db47 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -431,9 +431,9 @@ static int crng_init = 0;
 static int crng_init_cnt = 0;
 #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE]);
+			  __u32 out[CHACHA20_BLOCK_WORDS]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -817,7 +817,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u8	block[CHACHA20_BLOCK_SIZE];
+		__u32	block[CHACHA20_BLOCK_WORDS];
 		__u32	key[8];
 	} buf;
 
@@ -851,7 +851,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE])
+			  __u32 out[CHACHA20_BLOCK_WORDS])
 {
 	unsigned long v, flags;
 
@@ -867,7 +867,7 @@ static void _extract_crng(struct crng_state *crng,
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
 {
 	struct crng_state *crng = NULL;
 
@@ -885,7 +885,7 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
@@ -897,14 +897,14 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 		used = 0;
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	s = (__u32 *) &tmp[used];
+	s = &tmp[used / sizeof(__u32)];
 	d = &crng->state[4];
 	for (i=0; i < 8; i++)
 		*d++ ^= *s++;
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -920,7 +920,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
 	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1507,7 +1507,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
@@ -2114,7 +2114,7 @@ u64 get_random_u64(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((u8 *)batch->entropy_u64);
+		extract_crng((__u32 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
@@ -2144,7 +2144,7 @@ u32 get_random_u32(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng((u8 *)batch->entropy_u32);
+		extract_crng(batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 47ec920d5b71..4b741b83e23f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -723,7 +723,6 @@ config CRYPTO_DEV_ARTPEC6
 	select CRYPTO_HASH
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
-	select CRYPTO_SHA384
 	select CRYPTO_SHA512
 	help
 	  Enables the driver for the on-chip crypto accelerator
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index eeaf27859d80..ea83d0bff0e9 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -256,10 +256,6 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
 	if (is_ccm && !(req->iv[0] == 1 || req->iv[0] == 3))
 		return true;
 
-	/* CCM - fix CBC MAC mismatch in special case */
-	if (is_ccm && decrypt && !req->assoclen)
-		return true;
-
 	return false;
 }
 
@@ -330,7 +326,7 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
 	sa = (struct dynamic_sa_ctl *) ctx->sa_in;
 	sa->sa_contents.w = SA_AES_CCM_CONTENTS | (keylen << 2);
 
-	set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV,
+	set_dynamic_sa_command_0(sa, SA_SAVE_HASH, SA_NOT_SAVE_IV,
 				 SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE,
 				 SA_NO_HEADER_PROC, SA_HASH_ALG_CBC_MAC,
 				 SA_CIPHER_ALG_AES,
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index c44954e274bc..76f459ad2821 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -128,7 +128,14 @@ static void crypto4xx_hw_init(struct crypto4xx_device *dev)
 	writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT);
 	writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT);
 	writel(PPC4XX_INT_CFG, dev->ce_base + CRYPTO4XX_INT_CFG);
-	writel(PPC4XX_PD_DONE_INT, dev->ce_base + CRYPTO4XX_INT_EN);
+	if (dev->is_revb) {
+		writel(PPC4XX_INT_TIMEOUT_CNT_REVB << 10,
+		       dev->ce_base + CRYPTO4XX_INT_TIMEOUT_CNT);
+		writel(PPC4XX_PD_DONE_INT | PPC4XX_TMO_ERR_INT,
+		       dev->ce_base + CRYPTO4XX_INT_EN);
+	} else {
+		writel(PPC4XX_PD_DONE_INT, dev->ce_base + CRYPTO4XX_INT_EN);
+	}
 }
 
 int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
@@ -275,14 +282,12 @@ static u32 crypto4xx_put_pd_to_pdr(struct crypto4xx_device *dev, u32 idx)
  */
 static u32 crypto4xx_build_gdr(struct crypto4xx_device *dev)
 {
-	dev->gdr = dma_alloc_coherent(dev->core_dev->device,
-				      sizeof(struct ce_gd) * PPC4XX_NUM_GD,
-				      &dev->gdr_pa, GFP_ATOMIC);
+	dev->gdr = dma_zalloc_coherent(dev->core_dev->device,
+				       sizeof(struct ce_gd) * PPC4XX_NUM_GD,
+				       &dev->gdr_pa, GFP_ATOMIC);
 	if (!dev->gdr)
 		return -ENOMEM;
 
-	memset(dev->gdr, 0, sizeof(struct ce_gd) * PPC4XX_NUM_GD);
-
 	return 0;
 }
 
@@ -570,15 +575,14 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
 				struct pd_uinfo *pd_uinfo,
 				struct ce_pd *pd)
 {
-	struct aead_request *aead_req;
-	struct crypto4xx_ctx *ctx;
+	struct aead_request *aead_req = container_of(pd_uinfo->async_req,
+		struct aead_request, base);
 	struct scatterlist *dst = pd_uinfo->dest_va;
+	size_t cp_len = crypto_aead_authsize(
+		crypto_aead_reqtfm(aead_req));
+	u32 icv[cp_len];
 	int err = 0;
 
-	aead_req = container_of(pd_uinfo->async_req, struct aead_request,
-				base);
-	ctx  = crypto_tfm_ctx(aead_req->base.tfm);
-
 	if (pd_uinfo->using_sd) {
 		crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo,
 					  pd->pd_ctl_len.bf.pkt_len,
@@ -590,38 +594,39 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
 
 	if (pd_uinfo->sa_va->sa_command_0.bf.dir == DIR_OUTBOUND) {
 		/* append icv at the end */
-		size_t cp_len = crypto_aead_authsize(
-			crypto_aead_reqtfm(aead_req));
-		u32 icv[cp_len];
-
 		crypto4xx_memcpy_from_le32(icv, pd_uinfo->sr_va->save_digest,
 					   cp_len);
 
 		scatterwalk_map_and_copy(icv, dst, aead_req->cryptlen,
 					 cp_len, 1);
+	} else {
+		/* check icv at the end */
+		scatterwalk_map_and_copy(icv, aead_req->src,
+			aead_req->assoclen + aead_req->cryptlen -
+			cp_len, cp_len, 0);
+
+		crypto4xx_memcpy_from_le32(icv, icv, cp_len);
+
+		if (crypto_memneq(icv, pd_uinfo->sr_va->save_digest, cp_len))
+			err = -EBADMSG;
 	}
 
 	crypto4xx_ret_sg_desc(dev, pd_uinfo);
 
 	if (pd->pd_ctl.bf.status & 0xff) {
-		if (pd->pd_ctl.bf.status & 0x1) {
-			/* authentication error */
-			err = -EBADMSG;
-		} else {
-			if (!__ratelimit(&dev->aead_ratelimit)) {
-				if (pd->pd_ctl.bf.status & 2)
-					pr_err("pad fail error\n");
-				if (pd->pd_ctl.bf.status & 4)
-					pr_err("seqnum fail\n");
-				if (pd->pd_ctl.bf.status & 8)
-					pr_err("error _notify\n");
-				pr_err("aead return err status = 0x%02x\n",
-					pd->pd_ctl.bf.status & 0xff);
-				pr_err("pd pad_ctl = 0x%08x\n",
-					pd->pd_ctl.bf.pd_pad_ctl);
-			}
-			err = -EINVAL;
+		if (!__ratelimit(&dev->aead_ratelimit)) {
+			if (pd->pd_ctl.bf.status & 2)
+				pr_err("pad fail error\n");
+			if (pd->pd_ctl.bf.status & 4)
+				pr_err("seqnum fail\n");
+			if (pd->pd_ctl.bf.status & 8)
+				pr_err("error _notify\n");
+			pr_err("aead return err status = 0x%02x\n",
+				pd->pd_ctl.bf.status & 0xff);
+			pr_err("pd pad_ctl = 0x%08x\n",
+				pd->pd_ctl.bf.pd_pad_ctl);
 		}
+		err = -EINVAL;
 	}
 
 	if (pd_uinfo->state & PD_ENTRY_BUSY)
@@ -1070,21 +1075,29 @@ static void crypto4xx_bh_tasklet_cb(unsigned long data)
 /**
  * Top Half of isr.
  */
-static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data)
+static inline irqreturn_t crypto4xx_interrupt_handler(int irq, void *data,
+						      u32 clr_val)
 {
 	struct device *dev = (struct device *)data;
 	struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev);
 
-	if (!core_dev->dev->ce_base)
-		return 0;
-
-	writel(PPC4XX_INTERRUPT_CLR,
-	       core_dev->dev->ce_base + CRYPTO4XX_INT_CLR);
+	writel(clr_val, core_dev->dev->ce_base + CRYPTO4XX_INT_CLR);
 	tasklet_schedule(&core_dev->tasklet);
 
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data)
+{
+	return crypto4xx_interrupt_handler(irq, data, PPC4XX_INTERRUPT_CLR);
+}
+
+static irqreturn_t crypto4xx_ce_interrupt_handler_revb(int irq, void *data)
+{
+	return crypto4xx_interrupt_handler(irq, data, PPC4XX_INTERRUPT_CLR |
+		PPC4XX_TMO_ERR_INT);
+}
+
 /**
  * Supported Crypto Algorithms
  */
@@ -1266,6 +1279,8 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	struct resource res;
 	struct device *dev = &ofdev->dev;
 	struct crypto4xx_core_device *core_dev;
+	u32 pvr;
+	bool is_revb = true;
 
 	rc = of_address_to_resource(ofdev->dev.of_node, 0, &res);
 	if (rc)
@@ -1282,6 +1297,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 		       mfdcri(SDR0, PPC405EX_SDR0_SRST) | PPC405EX_CE_RESET);
 		mtdcri(SDR0, PPC405EX_SDR0_SRST,
 		       mfdcri(SDR0, PPC405EX_SDR0_SRST) & ~PPC405EX_CE_RESET);
+		is_revb = false;
 	} else if (of_find_compatible_node(NULL, NULL,
 			"amcc,ppc460sx-crypto")) {
 		mtdcri(SDR0, PPC460SX_SDR0_SRST,
@@ -1304,7 +1320,22 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	if (!core_dev->dev)
 		goto err_alloc_dev;
 
+	/*
+	 * Older version of 460EX/GT have a hardware bug.
+	 * Hence they do not support H/W based security intr coalescing
+	 */
+	pvr = mfspr(SPRN_PVR);
+	if (is_revb && ((pvr >> 4) == 0x130218A)) {
+		u32 min = PVR_MIN(pvr);
+
+		if (min < 4) {
+			dev_info(dev, "RevA detected - disable interrupt coalescing\n");
+			is_revb = false;
+		}
+	}
+
 	core_dev->dev->core_dev = core_dev;
+	core_dev->dev->is_revb = is_revb;
 	core_dev->device = dev;
 	spin_lock_init(&core_dev->lock);
 	INIT_LIST_HEAD(&core_dev->dev->alg_list);
@@ -1325,13 +1356,6 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	tasklet_init(&core_dev->tasklet, crypto4xx_bh_tasklet_cb,
 		     (unsigned long) dev);
 
-	/* Register for Crypto isr, Crypto Engine IRQ */
-	core_dev->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-	rc = request_irq(core_dev->irq, crypto4xx_ce_interrupt_handler, 0,
-			 core_dev->dev->name, dev);
-	if (rc)
-		goto err_request_irq;
-
 	core_dev->dev->ce_base = of_iomap(ofdev->dev.of_node, 0);
 	if (!core_dev->dev->ce_base) {
 		dev_err(dev, "failed to of_iomap\n");
@@ -1339,6 +1363,15 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 		goto err_iomap;
 	}
 
+	/* Register for Crypto isr, Crypto Engine IRQ */
+	core_dev->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
+	rc = request_irq(core_dev->irq, is_revb ?
+			 crypto4xx_ce_interrupt_handler_revb :
+			 crypto4xx_ce_interrupt_handler, 0,
+			 KBUILD_MODNAME, dev);
+	if (rc)
+		goto err_request_irq;
+
 	/* need to setup pdr, rdr, gdr and sdr before this */
 	crypto4xx_hw_init(core_dev->dev);
 
@@ -1352,11 +1385,11 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	return 0;
 
 err_start_dev:
-	iounmap(core_dev->dev->ce_base);
-err_iomap:
 	free_irq(core_dev->irq, dev);
 err_request_irq:
 	irq_dispose_mapping(core_dev->irq);
+	iounmap(core_dev->dev->ce_base);
+err_iomap:
 	tasklet_kill(&core_dev->tasklet);
 err_build_sdr:
 	crypto4xx_destroy_sdr(core_dev->dev);
@@ -1397,7 +1430,7 @@ MODULE_DEVICE_TABLE(of, crypto4xx_match);
 
 static struct platform_driver crypto4xx_driver = {
 	.driver = {
-		.name = MODULE_NAME,
+		.name = KBUILD_MODNAME,
 		.of_match_table = crypto4xx_match,
 	},
 	.probe		= crypto4xx_probe,
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index 8ac3bd37203b..23b726da6534 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -28,8 +28,6 @@
 #include "crypto4xx_reg_def.h"
 #include "crypto4xx_sa.h"
 
-#define MODULE_NAME "crypto4xx"
-
 #define PPC460SX_SDR0_SRST                      0x201
 #define PPC405EX_SDR0_SRST                      0x200
 #define PPC460EX_SDR0_SRST                      0x201
@@ -82,7 +80,6 @@ struct pd_uinfo {
 
 struct crypto4xx_device {
 	struct crypto4xx_core_device *core_dev;
-	char *name;
 	void __iomem *ce_base;
 	void __iomem *trng_base;
 
@@ -109,6 +106,7 @@ struct crypto4xx_device {
 	struct list_head alg_list;	/* List of algorithm supported
 					by this device */
 	struct ratelimit_state aead_ratelimit;
+	bool is_revb;
 };
 
 struct crypto4xx_core_device {
diff --git a/drivers/crypto/amcc/crypto4xx_reg_def.h b/drivers/crypto/amcc/crypto4xx_reg_def.h
index 0a22ec5d1a96..472331787e04 100644
--- a/drivers/crypto/amcc/crypto4xx_reg_def.h
+++ b/drivers/crypto/amcc/crypto4xx_reg_def.h
@@ -121,13 +121,15 @@
 #define PPC4XX_PD_SIZE				6
 #define PPC4XX_CTX_DONE_INT			0x2000
 #define PPC4XX_PD_DONE_INT			0x8000
+#define PPC4XX_TMO_ERR_INT			0x40000
 #define PPC4XX_BYTE_ORDER			0x22222
 #define PPC4XX_INTERRUPT_CLR			0x3ffff
 #define PPC4XX_PRNG_CTRL_AUTO_EN		0x3
 #define PPC4XX_DC_3DES_EN			1
 #define PPC4XX_TRNG_EN				0x00020000
-#define PPC4XX_INT_DESCR_CNT			4
+#define PPC4XX_INT_DESCR_CNT			7
 #define PPC4XX_INT_TIMEOUT_CNT			0
+#define PPC4XX_INT_TIMEOUT_CNT_REVB		0x3FF
 #define PPC4XX_INT_CFG				1
 /**
  * all follow define are ad hoc
diff --git a/drivers/crypto/amcc/crypto4xx_trng.c b/drivers/crypto/amcc/crypto4xx_trng.c
index 677ca17fd223..5e63742b0d22 100644
--- a/drivers/crypto/amcc/crypto4xx_trng.c
+++ b/drivers/crypto/amcc/crypto4xx_trng.c
@@ -92,7 +92,7 @@ void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev)
 	if (!rng)
 		goto err_out;
 
-	rng->name = MODULE_NAME;
+	rng->name = KBUILD_MODNAME;
 	rng->data_present = ppc4xx_trng_data_present;
 	rng->data_read = ppc4xx_trng_data_read;
 	rng->priv = (unsigned long) dev;
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index 456278440863..0fb8bbf41a8d 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 
 #include <crypto/aes.h>
+#include <crypto/gcm.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
@@ -1934,7 +1935,7 @@ static int artpec6_crypto_prepare_aead(struct aead_request *areq)
 
 	memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher));
 	// The HW omits the initial increment of the counter field.
-	crypto_inc(req_ctx->hw_ctx.J0+12, 4);
+	memcpy(req_ctx->hw_ctx.J0 + GCM_AES_IV_SIZE, "\x00\x00\x00\x01", 4);
 
 	ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx,
 		sizeof(struct artpec6_crypto_aead_hw_ctx), false, false);
@@ -2956,7 +2957,7 @@ static struct aead_alg aead_algos[] = {
 		.setkey = artpec6_crypto_aead_set_key,
 		.encrypt = artpec6_crypto_aead_encrypt,
 		.decrypt = artpec6_crypto_aead_decrypt,
-		.ivsize = AES_BLOCK_SIZE,
+		.ivsize = GCM_AES_IV_SIZE,
 		.maxauthsize = AES_BLOCK_SIZE,
 
 		.base = {
@@ -3041,9 +3042,6 @@ static int artpec6_crypto_probe(struct platform_device *pdev)
 	variant = (enum artpec6_crypto_variant)match->data;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index ce70b44d0fb6..2b75f95bbe1b 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -42,7 +42,6 @@
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
 #include <crypto/hash.h>
-#include <crypto/aes.h>
 #include <crypto/sha3.h>
 
 #include "util.h"
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index a118b9bed669..bfbf8bf77f03 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -494,7 +494,8 @@ static struct ahash_alg algs = {
 		.cra_driver_name	= DRIVER_NAME,
 		.cra_priority		= 100,
 		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-						CRYPTO_ALG_ASYNC,
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct bfin_crypto_crc_ctx),
 		.cra_alignmask		= 3,
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index baa8dd52472d..2188235be02d 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -108,6 +108,7 @@ struct caam_ctx {
 	dma_addr_t sh_desc_dec_dma;
 	dma_addr_t sh_desc_givenc_dma;
 	dma_addr_t key_dma;
+	enum dma_data_direction dir;
 	struct device *jrdev;
 	struct alginfo adata;
 	struct alginfo cdata;
@@ -118,6 +119,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
 			ctx->adata.keylen_pad;
@@ -136,9 +138,10 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize);
+	cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize,
+				    ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -154,9 +157,10 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* aead_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize);
+	cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize,
+				    ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -168,6 +172,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 ctx1_iv_off = 0;
 	u32 *desc, *nonce = NULL;
 	u32 inl_mask;
@@ -234,9 +239,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ivsize,
 			       ctx->authsize, is_rfc3686, nonce, ctx1_iv_off,
-			       false);
+			       false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 skip_enc:
 	/*
@@ -266,9 +271,9 @@ skip_enc:
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize,
 			       ctx->authsize, alg->caam.geniv, is_rfc3686,
-			       nonce, ctx1_iv_off, false);
+			       nonce, ctx1_iv_off, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	if (!alg->caam.geniv)
 		goto skip_givenc;
@@ -300,9 +305,9 @@ skip_enc:
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize,
 				  ctx->authsize, is_rfc3686, nonce,
-				  ctx1_iv_off, false);
+				  ctx1_iv_off, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 skip_givenc:
 	return 0;
@@ -346,7 +351,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -363,7 +368,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -405,7 +410,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -422,7 +427,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -465,7 +470,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -482,7 +487,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -503,6 +508,7 @@ static int aead_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
 	int ret = 0;
 
@@ -517,6 +523,27 @@ static int aead_setkey(struct crypto_aead *aead,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.keylen = keys.authkeylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
+
+		if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+			goto badkey;
+
+		memcpy(ctx->key, keys.authkey, keys.authkeylen);
+		memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
+		       keys.enckeylen);
+		dma_sync_single_for_device(jrdev, ctx->key_dma,
+					   ctx->adata.keylen_pad +
+					   keys.enckeylen, ctx->dir);
+		goto skip_split_key;
+	}
+
 	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, keys.authkey,
 			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
 			    keys.enckeylen);
@@ -527,12 +554,14 @@ static int aead_setkey(struct crypto_aead *aead,
 	/* postpend encryption key to auth split key */
 	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
 	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
-				   keys.enckeylen, DMA_TO_DEVICE);
+				   keys.enckeylen, ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
 		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
+
+skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 	return aead_set_sh_desc(aead);
 badkey:
@@ -552,7 +581,7 @@ static int gcm_setkey(struct crypto_aead *aead,
 #endif
 
 	memcpy(ctx->key, key, keylen);
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
 	ctx->cdata.keylen = keylen;
 
 	return gcm_set_sh_desc(aead);
@@ -580,7 +609,7 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	 */
 	ctx->cdata.keylen = keylen - 4;
 	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
-				   DMA_TO_DEVICE);
+				   ctx->dir);
 	return rfc4106_set_sh_desc(aead);
 }
 
@@ -606,7 +635,7 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	 */
 	ctx->cdata.keylen = keylen - 4;
 	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
-				   DMA_TO_DEVICE);
+				   ctx->dir);
 	return rfc4543_set_sh_desc(aead);
 }
 
@@ -625,7 +654,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	const bool is_rfc3686 = (ctr_mode &&
 				 (strstr(alg_name, "rfc3686") != NULL));
 
-	memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -648,9 +676,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		keylen -= CTR_RFC3686_NONCE_SIZE;
 	}
 
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* ablkcipher_encrypt shared descriptor */
@@ -658,21 +685,21 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
 				     ctx1_iv_off);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/* ablkcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
 				     ctx1_iv_off);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/* ablkcipher_givencrypt shared descriptor */
 	desc = ctx->sh_desc_givenc;
 	cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686,
 					ctx1_iv_off);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_givenc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -691,23 +718,21 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		return -EINVAL;
 	}
 
-	memcpy(ctx->key, key, keylen);
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* xts_ablkcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	/* xts_ablkcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 
 	return 0;
 }
@@ -979,9 +1004,6 @@ static void init_aead_job(struct aead_request *req,
 		append_seq_out_ptr(desc, dst_dma,
 				   req->assoclen + req->cryptlen - authsize,
 				   out_options);
-
-	/* REG3 = assoclen */
-	append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
 }
 
 static void init_gcm_job(struct aead_request *req,
@@ -996,6 +1018,7 @@ static void init_gcm_job(struct aead_request *req,
 	unsigned int last;
 
 	init_aead_job(req, edesc, all_contig, encrypt);
+	append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
 
 	/* BUG This should not be specific to generic GCM. */
 	last = 0;
@@ -1022,6 +1045,7 @@ static void init_authenc_job(struct aead_request *req,
 						 struct caam_aead_alg, aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
@@ -1045,6 +1069,15 @@ static void init_authenc_job(struct aead_request *req,
 
 	init_aead_job(req, edesc, all_contig, encrypt);
 
+	/*
+	 * {REG3, DPOVRD} = assoclen, depending on whether MATH command supports
+	 * having DPOVRD as destination.
+	 */
+	if (ctrlpriv->era < 3)
+		append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
+	else
+		append_math_add_imm_u32(desc, DPOVRD, ZERO, IMM, req->assoclen);
+
 	if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv))
 		append_load_as_imm(desc, req->iv, ivsize,
 				   LDST_CLASS_1_CCB |
@@ -3228,9 +3261,11 @@ struct caam_crypto_alg {
 	struct caam_alg_entry caam;
 };
 
-static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
+static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
+			    bool uses_dkp)
 {
 	dma_addr_t dma_addr;
+	struct caam_drv_private *priv;
 
 	ctx->jrdev = caam_jr_alloc();
 	if (IS_ERR(ctx->jrdev)) {
@@ -3238,10 +3273,16 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
 		return PTR_ERR(ctx->jrdev);
 	}
 
+	priv = dev_get_drvdata(ctx->jrdev->parent);
+	if (priv->era >= 6 && uses_dkp)
+		ctx->dir = DMA_BIDIRECTIONAL;
+	else
+		ctx->dir = DMA_TO_DEVICE;
+
 	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc,
 					offsetof(struct caam_ctx,
 						 sh_desc_enc_dma),
-					DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
 		dev_err(ctx->jrdev, "unable to map key, shared descriptors\n");
 		caam_jr_free(ctx->jrdev);
@@ -3269,7 +3310,7 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 		 container_of(alg, struct caam_crypto_alg, crypto_alg);
 	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	return caam_init_common(ctx, &caam_alg->caam);
+	return caam_init_common(ctx, &caam_alg->caam, false);
 }
 
 static int caam_aead_init(struct crypto_aead *tfm)
@@ -3279,14 +3320,15 @@ static int caam_aead_init(struct crypto_aead *tfm)
 		 container_of(alg, struct caam_aead_alg, aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
 
-	return caam_init_common(ctx, &caam_alg->caam);
+	return caam_init_common(ctx, &caam_alg->caam,
+				alg->setkey == aead_setkey);
 }
 
 static void caam_exit_common(struct caam_ctx *ctx)
 {
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma,
 			       offsetof(struct caam_ctx, sh_desc_enc_dma),
-			       DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+			       ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
 
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index 530c14ee32de..ceb93fbb76e6 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -45,16 +45,16 @@ static inline void append_dec_op1(u32 *desc, u32 type)
  * cnstr_shdsc_aead_null_encap - IPSec ESP encapsulation shared descriptor
  *                               (non-protocol) with no (null) encryption.
  * @desc: pointer to buffer used for descriptor construction
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @icvsize: integrity check value (ICV) size (truncated or full)
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize)
+				 unsigned int icvsize, int era)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -63,13 +63,18 @@ void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
-				  KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 	set_jump_tgt_here(desc, key_jump_cmd);
 
 	/* assoclen + cryptlen = seqinlen */
@@ -121,16 +126,16 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap);
  * cnstr_shdsc_aead_null_decap - IPSec ESP decapsulation shared descriptor
  *                               (non-protocol) with no (null) decryption.
  * @desc: pointer to buffer used for descriptor construction
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @icvsize: integrity check value (ICV) size (truncated or full)
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize)
+				 unsigned int icvsize, int era)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd, *jump_cmd;
 
@@ -139,13 +144,18 @@ void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 	set_jump_tgt_here(desc, key_jump_cmd);
 
 	/* Class 2 operation */
@@ -204,7 +214,7 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap);
 static void init_sh_desc_key_aead(u32 * const desc,
 				  struct alginfo * const cdata,
 				  struct alginfo * const adata,
-				  const bool is_rfc3686, u32 *nonce)
+				  const bool is_rfc3686, u32 *nonce, int era)
 {
 	u32 *key_jump_cmd;
 	unsigned int enckeylen = cdata->keylen;
@@ -224,13 +234,18 @@ static void init_sh_desc_key_aead(u32 * const desc,
 	if (is_rfc3686)
 		enckeylen -= CTR_RFC3686_NONCE_SIZE;
 
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, enckeylen,
@@ -261,26 +276,27 @@ static void init_sh_desc_key_aead(u32 * const desc,
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool is_rfc3686,
-			    u32 *nonce, const u32 ctx1_iv_off, const bool is_qi)
+			    u32 *nonce, const u32 ctx1_iv_off, const bool is_qi,
+			    int era)
 {
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	/* Class 2 operation */
 	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
@@ -306,8 +322,13 @@ void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 	}
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -350,27 +371,27 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_encap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool geniv,
 			    const bool is_rfc3686, u32 *nonce,
-			    const u32 ctx1_iv_off, const bool is_qi)
+			    const u32 ctx1_iv_off, const bool is_qi, int era)
 {
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	/* Class 2 operation */
 	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
@@ -397,11 +418,23 @@ void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 	}
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	if (geniv)
-		append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
-	else
-		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		if (geniv)
+			append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM,
+						ivsize);
+		else
+			append_math_add(desc, VARSEQOUTLEN, ZERO, REG3,
+					CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		if (geniv)
+			append_math_add_imm_u32(desc, VARSEQOUTLEN, DPOVRD, IMM,
+						ivsize);
+		else
+			append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD,
+					CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -456,29 +489,29 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_decap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       struct alginfo *adata, unsigned int ivsize,
 			       unsigned int icvsize, const bool is_rfc3686,
 			       u32 *nonce, const u32 ctx1_iv_off,
-			       const bool is_qi)
+			       const bool is_qi, int era)
 {
 	u32 geniv, moveiv;
 
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	if (is_qi) {
 		u32 *wait_load_cmd;
@@ -528,8 +561,13 @@ copy_iv:
 			 OP_ALG_ENCRYPT);
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -1075,7 +1113,7 @@ void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
@@ -1140,7 +1178,7 @@ void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
@@ -1209,7 +1247,7 @@ void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load Nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index e412ec8f7005..5f9445ae2114 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -43,28 +43,28 @@
 					 15 * CAAM_CMD_SZ)
 
 void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize);
+				 unsigned int icvsize, int era);
 
 void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize);
+				 unsigned int icvsize, int era);
 
 void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool is_rfc3686,
 			    u32 *nonce, const u32 ctx1_iv_off,
-			    const bool is_qi);
+			    const bool is_qi, int era);
 
 void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool geniv,
 			    const bool is_rfc3686, u32 *nonce,
-			    const u32 ctx1_iv_off, const bool is_qi);
+			    const u32 ctx1_iv_off, const bool is_qi, int era);
 
 void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       struct alginfo *adata, unsigned int ivsize,
 			       unsigned int icvsize, const bool is_rfc3686,
 			       u32 *nonce, const u32 ctx1_iv_off,
-			       const bool is_qi);
+			       const bool is_qi, int era);
 
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 			   unsigned int icvsize);
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index f9f08fce4356..4aecc9435f69 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -53,6 +53,7 @@ struct caam_ctx {
 	u32 sh_desc_givenc[DESC_MAX_USED_LEN];
 	u8 key[CAAM_MAX_KEY_SIZE];
 	dma_addr_t key_dma;
+	enum dma_data_direction dir;
 	struct alginfo adata;
 	struct alginfo cdata;
 	unsigned int authsize;
@@ -74,6 +75,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -124,7 +126,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	cnstr_shdsc_aead_encap(ctx->sh_desc_enc, &ctx->cdata, &ctx->adata,
 			       ivsize, ctx->authsize, is_rfc3686, nonce,
-			       ctx1_iv_off, true);
+			       ctx1_iv_off, true, ctrlpriv->era);
 
 skip_enc:
 	/* aead_decrypt shared descriptor */
@@ -149,7 +151,8 @@ skip_enc:
 
 	cnstr_shdsc_aead_decap(ctx->sh_desc_dec, &ctx->cdata, &ctx->adata,
 			       ivsize, ctx->authsize, alg->caam.geniv,
-			       is_rfc3686, nonce, ctx1_iv_off, true);
+			       is_rfc3686, nonce, ctx1_iv_off, true,
+			       ctrlpriv->era);
 
 	if (!alg->caam.geniv)
 		goto skip_givenc;
@@ -176,7 +179,7 @@ skip_enc:
 
 	cnstr_shdsc_aead_givencap(ctx->sh_desc_enc, &ctx->cdata, &ctx->adata,
 				  ivsize, ctx->authsize, is_rfc3686, nonce,
-				  ctx1_iv_off, true);
+				  ctx1_iv_off, true, ctrlpriv->era);
 
 skip_givenc:
 	return 0;
@@ -197,6 +200,7 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
 	int ret = 0;
 
@@ -211,6 +215,27 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.keylen = keys.authkeylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
+
+		if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+			goto badkey;
+
+		memcpy(ctx->key, keys.authkey, keys.authkeylen);
+		memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
+		       keys.enckeylen);
+		dma_sync_single_for_device(jrdev, ctx->key_dma,
+					   ctx->adata.keylen_pad +
+					   keys.enckeylen, ctx->dir);
+		goto skip_split_key;
+	}
+
 	ret = gen_split_key(jrdev, ctx->key, &ctx->adata, keys.authkey,
 			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
 			    keys.enckeylen);
@@ -220,13 +245,14 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 	/* postpend encryption key to auth split key */
 	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
 	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
-				   keys.enckeylen, DMA_TO_DEVICE);
+				   keys.enckeylen, ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
 		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
 
+skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 
 	ret = aead_set_sh_desc(aead);
@@ -272,7 +298,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	const bool is_rfc3686 = (ctr_mode && strstr(alg_name, "rfc3686"));
 	int ret = 0;
 
-	memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -295,9 +320,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		keylen -= CTR_RFC3686_NONCE_SIZE;
 	}
 
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* ablkcipher encrypt, decrypt, givencrypt shared descriptors */
@@ -356,10 +380,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		return -EINVAL;
 	}
 
-	memcpy(ctx->key, key, keylen);
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* xts ablkcipher encrypt, decrypt shared descriptors */
@@ -668,7 +690,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, op_type, 0, 0);
@@ -905,7 +927,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 
 	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, op_type, 0, 0);
@@ -1058,7 +1080,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	}
 
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
@@ -2123,7 +2145,8 @@ struct caam_crypto_alg {
 	struct caam_alg_entry caam;
 };
 
-static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
+static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
+			    bool uses_dkp)
 {
 	struct caam_drv_private *priv;
 
@@ -2137,8 +2160,14 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
 		return PTR_ERR(ctx->jrdev);
 	}
 
+	priv = dev_get_drvdata(ctx->jrdev->parent);
+	if (priv->era >= 6 && uses_dkp)
+		ctx->dir = DMA_BIDIRECTIONAL;
+	else
+		ctx->dir = DMA_TO_DEVICE;
+
 	ctx->key_dma = dma_map_single(ctx->jrdev, ctx->key, sizeof(ctx->key),
-				      DMA_TO_DEVICE);
+				      ctx->dir);
 	if (dma_mapping_error(ctx->jrdev, ctx->key_dma)) {
 		dev_err(ctx->jrdev, "unable to map key\n");
 		caam_jr_free(ctx->jrdev);
@@ -2149,7 +2178,6 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
 	ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
 	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
 
-	priv = dev_get_drvdata(ctx->jrdev->parent);
 	ctx->qidev = priv->qidev;
 
 	spin_lock_init(&ctx->lock);
@@ -2167,7 +2195,7 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 							crypto_alg);
 	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	return caam_init_common(ctx, &caam_alg->caam);
+	return caam_init_common(ctx, &caam_alg->caam, false);
 }
 
 static int caam_aead_init(struct crypto_aead *tfm)
@@ -2177,7 +2205,8 @@ static int caam_aead_init(struct crypto_aead *tfm)
 						      aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
 
-	return caam_init_common(ctx, &caam_alg->caam);
+	return caam_init_common(ctx, &caam_alg->caam,
+				alg->setkey == aead_setkey);
 }
 
 static void caam_exit_common(struct caam_ctx *ctx)
@@ -2186,8 +2215,7 @@ static void caam_exit_common(struct caam_ctx *ctx)
 	caam_drv_ctx_rel(ctx->drv_ctx[DECRYPT]);
 	caam_drv_ctx_rel(ctx->drv_ctx[GIVENCRYPT]);
 
-	dma_unmap_single(ctx->jrdev, ctx->key_dma, sizeof(ctx->key),
-			 DMA_TO_DEVICE);
+	dma_unmap_single(ctx->jrdev, ctx->key_dma, sizeof(ctx->key), ctx->dir);
 
 	caam_jr_free(ctx->jrdev);
 }
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 616720a04e7a..0beb28196e20 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -107,6 +107,7 @@ struct caam_hash_ctx {
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_fin_dma;
 	dma_addr_t sh_desc_digest_dma;
+	enum dma_data_direction dir;
 	struct device *jrdev;
 	u8 key[CAAM_MAX_HASH_KEY_SIZE];
 	int ctx_len;
@@ -241,7 +242,8 @@ static inline int ctx_map_to_sec4_sg(struct device *jrdev,
  *     read and write to seqout
  */
 static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
-				     struct caam_hash_ctx *ctx, bool import_ctx)
+				     struct caam_hash_ctx *ctx, bool import_ctx,
+				     int era)
 {
 	u32 op = ctx->adata.algtype;
 	u32 *skip_key_load;
@@ -254,9 +256,12 @@ static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
 		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 					    JUMP_COND_SHRD);
 
-		append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
-				  ctx->adata.keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+		if (era < 6)
+			append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
+					  ctx->adata.keylen, CLASS_2 |
+					  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+		else
+			append_proto_dkp(desc, &ctx->adata);
 
 		set_jump_tgt_here(desc, skip_key_load);
 
@@ -289,13 +294,17 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
 
+	ctx->adata.key_virt = ctx->key;
+
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true);
+	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true,
+			  ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash update shdesc@"__stringify(__LINE__)": ",
@@ -304,9 +313,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update_first shared descriptor */
 	desc = ctx->sh_desc_update_first;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false);
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false,
+			  ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash update first shdesc@"__stringify(__LINE__)": ",
@@ -315,9 +325,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_final shared descriptor */
 	desc = ctx->sh_desc_fin;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true);
+	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true,
+			  ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ahash final shdesc@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
@@ -326,9 +337,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_digest shared descriptor */
 	desc = ctx->sh_desc_digest;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false);
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false,
+			  ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
-				   desc_bytes(desc), DMA_TO_DEVICE);
+				   desc_bytes(desc), ctx->dir);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ahash digest shdesc@"__stringify(__LINE__)": ",
@@ -421,6 +433,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int digestsize = crypto_ahash_digestsize(ahash);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	int ret;
 	u8 *hashed_key = NULL;
 
@@ -441,16 +454,26 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 		key = hashed_key;
 	}
 
-	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key, keylen,
-			    CAAM_MAX_HASH_KEY_SIZE);
-	if (ret)
-		goto bad_free_key;
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.key_inline = true;
+		ctx->adata.keylen = keylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
 
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-		       ctx->adata.keylen_pad, 1);
-#endif
+		if (ctx->adata.keylen_pad > CAAM_MAX_HASH_KEY_SIZE)
+			goto bad_free_key;
+
+		memcpy(ctx->key, key, keylen);
+	} else {
+		ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key,
+				    keylen, CAAM_MAX_HASH_KEY_SIZE);
+		if (ret)
+			goto bad_free_key;
+	}
 
 	kfree(hashed_key);
 	return ahash_set_sh_desc(ahash);
@@ -1715,6 +1738,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 					 HASH_MSG_LEN + 64,
 					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
 	dma_addr_t dma_addr;
+	struct caam_drv_private *priv;
 
 	/*
 	 * Get a Job ring from Job Ring driver to ensure in-order
@@ -1726,10 +1750,13 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 		return PTR_ERR(ctx->jrdev);
 	}
 
+	priv = dev_get_drvdata(ctx->jrdev->parent);
+	ctx->dir = priv->era >= 6 ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
 	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update,
 					offsetof(struct caam_hash_ctx,
 						 sh_desc_update_dma),
-					DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
 		dev_err(ctx->jrdev, "unable to map shared descriptors\n");
 		caam_jr_free(ctx->jrdev);
@@ -1764,7 +1791,7 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
 			       offsetof(struct caam_hash_ctx,
 					sh_desc_update_dma),
-			       DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+			       ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
 
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 027e121c6f70..75d280cb2dc0 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -611,6 +611,8 @@ static int caam_probe(struct platform_device *pdev)
 		goto iounmap_ctrl;
 	}
 
+	ctrlpriv->era = caam_get_era();
+
 	ret = of_platform_populate(nprop, caam_match, NULL, dev);
 	if (ret) {
 		dev_err(dev, "JR platform devices creation error\n");
@@ -742,7 +744,7 @@ static int caam_probe(struct platform_device *pdev)
 
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
-		 caam_get_era());
+		 ctrlpriv->era);
 	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
 		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
 		 caam_dpaa2 ? "yes" : "no");
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 8142de7ba050..f76ff160a02c 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -444,6 +444,18 @@
 #define OP_PCLID_DSAVERIFY	(0x16 << OP_PCLID_SHIFT)
 #define OP_PCLID_RSAENC_PUBKEY  (0x18 << OP_PCLID_SHIFT)
 #define OP_PCLID_RSADEC_PRVKEY  (0x19 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_MD5	(0x20 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA1	(0x21 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA224	(0x22 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA256	(0x23 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA384	(0x24 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA512	(0x25 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_MD5	(0x60 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA1	(0x61 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA224	(0x62 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA256	(0x63 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA384	(0x64 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA512	(0x65 << OP_PCLID_SHIFT)
 
 /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */
 #define OP_PCLID_IPSEC		(0x01 << OP_PCLID_SHIFT)
@@ -1093,6 +1105,22 @@
 /* MacSec protinfos */
 #define OP_PCL_MACSEC				 0x0001
 
+/* Derived Key Protocol (DKP) Protinfo */
+#define OP_PCL_DKP_SRC_SHIFT	14
+#define OP_PCL_DKP_SRC_MASK	(3 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_IMM	(0 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_SEQ	(1 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_PTR	(2 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_SGF	(3 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_DST_SHIFT	12
+#define OP_PCL_DKP_DST_MASK	(3 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_IMM	(0 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_SEQ	(1 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_PTR	(2 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_SGF	(3 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_KEY_SHIFT	0
+#define OP_PCL_DKP_KEY_MASK	(0xfff << OP_PCL_DKP_KEY_SHIFT)
+
 /* PKI unidirectional protocol protinfo bits */
 #define OP_PCL_PKPROT_TEST			 0x0008
 #define OP_PCL_PKPROT_DECRYPT			 0x0004
@@ -1452,6 +1480,7 @@
 #define MATH_DEST_REG1		(0x01 << MATH_DEST_SHIFT)
 #define MATH_DEST_REG2		(0x02 << MATH_DEST_SHIFT)
 #define MATH_DEST_REG3		(0x03 << MATH_DEST_SHIFT)
+#define MATH_DEST_DPOVRD	(0x07 << MATH_DEST_SHIFT)
 #define MATH_DEST_SEQINLEN	(0x08 << MATH_DEST_SHIFT)
 #define MATH_DEST_SEQOUTLEN	(0x09 << MATH_DEST_SHIFT)
 #define MATH_DEST_VARSEQINLEN	(0x0a << MATH_DEST_SHIFT)
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index ba1ca0806f0a..d4256fa4a1d6 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -109,7 +109,7 @@ static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr,
 	append_ptr(desc, ptr);
 }
 
-static inline void append_data(u32 * const desc, void *data, int len)
+static inline void append_data(u32 * const desc, const void *data, int len)
 {
 	u32 *offset = desc_end(desc);
 
@@ -172,7 +172,7 @@ static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr,
 	append_cmd(desc, len);
 }
 
-static inline void append_cmd_data(u32 * const desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, const void *data, int len,
 				   u32 command)
 {
 	append_cmd(desc, command | IMMEDIATE | len);
@@ -271,7 +271,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
 APPEND_SEQ_PTR_INTLEN(out, OUT)
 
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 					 unsigned int len, u32 options) \
 { \
 	PRINT_POS; \
@@ -312,7 +312,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
  * from length of immediate data provided, e.g., split keys
  */
 #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 					 unsigned int data_len, \
 					 unsigned int len, u32 options) \
 { \
@@ -452,7 +452,7 @@ struct alginfo {
 	unsigned int keylen_pad;
 	union {
 		dma_addr_t key_dma;
-		void *key_virt;
+		const void *key_virt;
 	};
 	bool key_inline;
 };
@@ -496,4 +496,45 @@ static inline int desc_inline_query(unsigned int sd_base_len,
 	return (rem_bytes >= 0) ? 0 : -1;
 }
 
+/**
+ * append_proto_dkp - Derived Key Protocol (DKP): key -> split key
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions.
+ *         keylen should be the length of initial key, while keylen_pad
+ *         the length of the derived (split) key.
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, SHA224,
+ *         SHA256, SHA384, SHA512}.
+ */
+static inline void append_proto_dkp(u32 * const desc, struct alginfo *adata)
+{
+	u32 protid;
+
+	/*
+	 * Quick & dirty translation from OP_ALG_ALGSEL_{MD5, SHA*}
+	 * to OP_PCLID_DKP_{MD5, SHA*}
+	 */
+	protid = (adata->algtype & OP_ALG_ALGSEL_SUBMASK) |
+		 (0x20 << OP_ALG_ALGSEL_SHIFT);
+
+	if (adata->key_inline) {
+		int words;
+
+		append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+				 OP_PCL_DKP_SRC_IMM | OP_PCL_DKP_DST_IMM |
+				 adata->keylen);
+		append_data(desc, adata->key_virt, adata->keylen);
+
+		/* Reserve space in descriptor buffer for the derived key */
+		words = (ALIGN(adata->keylen_pad, CAAM_CMD_SZ) -
+			 ALIGN(adata->keylen, CAAM_CMD_SZ)) / CAAM_CMD_SZ;
+		if (words)
+			(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + words);
+	} else {
+		append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+				 OP_PCL_DKP_SRC_PTR | OP_PCL_DKP_DST_PTR |
+				 adata->keylen);
+		append_ptr(desc, adata->key_dma);
+	}
+}
+
 #endif /* DESC_CONSTR_H */
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 91f1107276e5..7696a774a362 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -84,6 +84,7 @@ struct caam_drv_private {
 	u8 qi_present;		/* Nonzero if QI present in device */
 	int secvio_irq;		/* Security violation interrupt number */
 	int virt_en;		/* Virtualization enabled in CAAM */
+	int era;		/* CAAM Era (internal HW revision) */
 
 #define	RNG4_MAX_HANDLES 2
 	/* RNG4 block */
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 8c79c3a153dc..312b5f042f31 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -11,36 +11,6 @@
 #include "desc_constr.h"
 #include "key_gen.h"
 
-/**
- * split_key_len - Compute MDHA split key length for a given algorithm
- * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
- *        SHA224, SHA384, SHA512.
- *
- * Return: MDHA split key length
- */
-static inline u32 split_key_len(u32 hash)
-{
-	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
-	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
-	u32 idx;
-
-	idx = (hash & OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT;
-
-	return (u32)(mdpadlen[idx] * 2);
-}
-
-/**
- * split_key_pad_len - Compute MDHA split key pad length for a given algorithm
- * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
- *        SHA224, SHA384, SHA512.
- *
- * Return: MDHA split key pad length
- */
-static inline u32 split_key_pad_len(u32 hash)
-{
-	return ALIGN(split_key_len(hash), 16);
-}
-
 void split_key_done(struct device *dev, u32 *desc, u32 err,
 			   void *context)
 {
diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h
index 5db055c25bd2..818f78f6fc1a 100644
--- a/drivers/crypto/caam/key_gen.h
+++ b/drivers/crypto/caam/key_gen.h
@@ -6,6 +6,36 @@
  *
  */
 
+/**
+ * split_key_len - Compute MDHA split key length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key length
+ */
+static inline u32 split_key_len(u32 hash)
+{
+	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
+	u32 idx;
+
+	idx = (hash & OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT;
+
+	return (u32)(mdpadlen[idx] * 2);
+}
+
+/**
+ * split_key_pad_len - Compute MDHA split key pad length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key pad length
+ */
+static inline u32 split_key_pad_len(u32 hash)
+{
+	return ALIGN(split_key_len(hash), 16);
+}
+
 struct split_key_result {
 	struct completion completion;
 	int err;
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 169e66231bcf..b0ba4331944b 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -459,7 +459,8 @@ int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req)
 	info->completion_addr = kzalloc(sizeof(union cpt_res_s), GFP_KERNEL);
 	if (unlikely(!info->completion_addr)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto request_cleanup;
 	}
 
 	result = (union cpt_res_s *)info->completion_addr;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index 4addc238a6ef..deaefd532aaa 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -6,7 +6,6 @@
 #include "nitrox_dev.h"
 #include "nitrox_req.h"
 #include "nitrox_csr.h"
-#include "nitrox_req.h"
 
 /* SLC_STORE_INFO */
 #define MIN_UDD_LEN 16
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index ff02b713c6f6..ca1f0d780b61 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -21,7 +21,6 @@
 #include <crypto/ctr.h>
 #include <crypto/gcm.h>
 #include <crypto/scatterwalk.h>
-#include <linux/delay.h>
 
 #include "ccp-crypto.h"
 
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index b56b3f711d94..5ae9f8706f17 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -19,3 +19,13 @@ config CRYPTO_DEV_CHELSIO
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called chcr.
+
+config CHELSIO_IPSEC_INLINE
+        bool "Chelsio IPSec XFRM Tx crypto offload"
+        depends on CHELSIO_T4
+	depends on CRYPTO_DEV_CHELSIO
+        depends on XFRM_OFFLOAD
+        depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+        default n
+        ---help---
+          Enable support for IPSec Tx Inline.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
index bebdf06687ad..eaecaf1ebcf3 100644
--- a/drivers/crypto/chelsio/Makefile
+++ b/drivers/crypto/chelsio/Makefile
@@ -2,3 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
 
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
 chcr-objs :=  chcr_core.o chcr_algo.o
+chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 4eed7171e2ae..34a02d690548 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -73,6 +73,29 @@
 
 #define IV AES_BLOCK_SIZE
 
+static unsigned int sgl_ent_len[] = {
+	0, 0, 16, 24, 40, 48, 64, 72, 88,
+	96, 112, 120, 136, 144, 160, 168, 184,
+	192, 208, 216, 232, 240, 256, 264, 280,
+	288, 304, 312, 328, 336, 352, 360, 376
+};
+
+static unsigned int dsgl_ent_len[] = {
+	0, 32, 32, 48, 48, 64, 64, 80, 80,
+	112, 112, 128, 128, 144, 144, 160, 160,
+	192, 192, 208, 208, 224, 224, 240, 240,
+	272, 272, 288, 288, 304, 304, 320, 320
+};
+
+static u32 round_constant[11] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, 0x6C000000
+};
+
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err);
+
 static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
 {
 	return ctx->crypto_ctx->aeadctx;
@@ -108,18 +131,6 @@ static inline int is_ofld_imm(const struct sk_buff *skb)
 	return (skb->len <= SGE_MAX_WR_LEN);
 }
 
-/*
- *	sgl_len - calculates the size of an SGL of the given capacity
- *	@n: the number of SGL entries
- *	Calculates the number of flits needed for a scatter/gather list that
- *	can hold the given number of entries.
- */
-static inline unsigned int sgl_len(unsigned int n)
-{
-	n--;
-	return (3 * n) / 2 + (n & 1) + 2;
-}
-
 static int sg_nents_xlen(struct scatterlist *sg, unsigned int reqlen,
 			 unsigned int entlen,
 			 unsigned int skip)
@@ -160,7 +171,6 @@ static inline void chcr_handle_ahash_resp(struct ahash_request *req,
 
 	if (input == NULL)
 		goto out;
-	reqctx = ahash_request_ctx(req);
 	digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
 	if (reqctx->is_sg_map)
 		chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
@@ -183,30 +193,17 @@ static inline void chcr_handle_ahash_resp(struct ahash_request *req,
 	}
 out:
 	req->base.complete(&req->base, err);
+}
 
-	}
-
-static inline void chcr_handle_aead_resp(struct aead_request *req,
-					 unsigned char *input,
-					 int err)
+static inline int get_aead_subtype(struct crypto_aead *aead)
 {
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
-
-
-	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
-	if (reqctx->b0_dma)
-		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->b0_dma,
-				 reqctx->b0_len, DMA_BIDIRECTIONAL);
-	if (reqctx->verify == VERIFY_SW) {
-		chcr_verify_tag(req, input, &err);
-		reqctx->verify = VERIFY_HW;
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct chcr_alg_template *chcr_crypto_alg =
+		container_of(alg, struct chcr_alg_template, alg.aead);
+	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
 }
-	req->base.complete(&req->base, err);
 
-}
-static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
+void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
 {
 	u8 temp[SHA512_DIGEST_SIZE];
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -231,6 +228,25 @@ static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
 		*err = 0;
 }
 
+static inline void chcr_handle_aead_resp(struct aead_request *req,
+					 unsigned char *input,
+					 int err)
+{
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
+
+	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
+	if (reqctx->b0_dma)
+		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->b0_dma,
+				 reqctx->b0_len, DMA_BIDIRECTIONAL);
+	if (reqctx->verify == VERIFY_SW) {
+		chcr_verify_tag(req, input, &err);
+		reqctx->verify = VERIFY_HW;
+	}
+	req->base.complete(&req->base, err);
+}
+
 /*
  *	chcr_handle_resp - Unmap the DMA buffers associated with the request
  *	@req: crypto request
@@ -558,7 +574,8 @@ static void  ulptx_walk_add_sg(struct ulptx_walk *walk,
 			skip = 0;
 		}
 	}
-	if (walk->nents == 0) {
+	WARN(!sg, "SG should not be null here\n");
+	if (sg && (walk->nents == 0)) {
 		small = min_t(unsigned int, sg_dma_len(sg) - skip_len, len);
 		sgmin = min_t(unsigned int, small, CHCR_SRC_SG_SIZE);
 		walk->sgl->len0 = cpu_to_be32(sgmin);
@@ -595,14 +612,6 @@ static void  ulptx_walk_add_sg(struct ulptx_walk *walk,
 	}
 }
 
-static inline int get_aead_subtype(struct crypto_aead *aead)
-{
-	struct aead_alg *alg = crypto_aead_alg(aead);
-	struct chcr_alg_template *chcr_crypto_alg =
-		container_of(alg, struct chcr_alg_template, alg.aead);
-	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
-}
-
 static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg = tfm->__crt_alg;
@@ -675,7 +684,7 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 			if (srclen <= dstlen)
 				break;
 			less = min_t(unsigned int, sg_dma_len(dst) - offset -
-				dstskip, CHCR_DST_SG_SIZE);
+				     dstskip, CHCR_DST_SG_SIZE);
 			dstlen += less;
 			offset += less;
 			if (offset == sg_dma_len(dst)) {
@@ -686,7 +695,7 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 			dstskip = 0;
 		}
 		src = sg_next(src);
-		 srcskip = 0;
+		srcskip = 0;
 	}
 	return min(srclen, dstlen);
 }
@@ -1008,7 +1017,8 @@ static unsigned int adjust_ctr_overflow(u8 *iv, u32 bytes)
 	return bytes;
 }
 
-static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv)
+static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv,
+			     u32 isfinal)
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
@@ -1035,7 +1045,8 @@ static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv)
 	for (i = 0; i < (round % 8); i++)
 		gf128mul_x_ble((le128 *)iv, (le128 *)iv);
 
-	crypto_cipher_decrypt_one(cipher, iv, iv);
+	if (!isfinal)
+		crypto_cipher_decrypt_one(cipher, iv, iv);
 out:
 	return ret;
 }
@@ -1056,7 +1067,7 @@ static int chcr_update_cipher_iv(struct ablkcipher_request *req,
 			CTR_RFC3686_IV_SIZE) = cpu_to_be32((reqctx->processed /
 						AES_BLOCK_SIZE) + 1);
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
-		ret = chcr_update_tweak(req, iv);
+		ret = chcr_update_tweak(req, iv, 0);
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
 		if (reqctx->op)
 			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
@@ -1087,7 +1098,7 @@ static int chcr_final_cipher_iv(struct ablkcipher_request *req,
 		ctr_add_iv(iv, req->info, (reqctx->processed /
 			   AES_BLOCK_SIZE));
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
-		ret = chcr_update_tweak(req, iv);
+		ret = chcr_update_tweak(req, iv, 1);
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
 		if (reqctx->op)
 			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
@@ -1101,7 +1112,6 @@ static int chcr_final_cipher_iv(struct ablkcipher_request *req,
 
 }
 
-
 static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 				   unsigned char *input, int err)
 {
@@ -1135,10 +1145,10 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 		bytes = chcr_sg_ent_in_wr(reqctx->srcsg, reqctx->dstsg, 1,
 					  SPACE_LEFT(ablkctx->enckey_len),
 					  reqctx->src_ofst, reqctx->dst_ofst);
-	if ((bytes + reqctx->processed) >= req->nbytes)
-		bytes  = req->nbytes - reqctx->processed;
-	else
-		bytes = ROUND_16(bytes);
+		if ((bytes + reqctx->processed) >= req->nbytes)
+			bytes  = req->nbytes - reqctx->processed;
+		else
+			bytes = ROUND_16(bytes);
 	} else {
 		/*CTR mode counter overfloa*/
 		bytes  = req->nbytes - reqctx->processed;
@@ -1239,15 +1249,15 @@ static int process_cipher(struct ablkcipher_request *req,
 					  MIN_CIPHER_SG,
 					  SPACE_LEFT(ablkctx->enckey_len),
 					  0, 0);
-	if ((bytes + reqctx->processed) >= req->nbytes)
-		bytes  = req->nbytes - reqctx->processed;
-	else
-		bytes = ROUND_16(bytes);
+		if ((bytes + reqctx->processed) >= req->nbytes)
+			bytes  = req->nbytes - reqctx->processed;
+		else
+			bytes = ROUND_16(bytes);
 	} else {
 		bytes = req->nbytes;
 	}
 	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
-				  CRYPTO_ALG_SUB_TYPE_CTR) {
+	    CRYPTO_ALG_SUB_TYPE_CTR) {
 		bytes = adjust_ctr_overflow(req->info, bytes);
 	}
 	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
@@ -2014,11 +2024,8 @@ static int chcr_aead_common_init(struct aead_request *req,
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	int error = -EINVAL;
-	unsigned int dst_size;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 
-	dst_size = req->assoclen + req->cryptlen + (op_type ?
-					-authsize : authsize);
 	/* validate key size */
 	if (aeadctx->enckey_len == 0)
 		goto err;
@@ -2083,7 +2090,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct ulptx_sgl *ulptx;
 	unsigned int transhdr_len;
-	unsigned int dst_size = 0, temp;
+	unsigned int dst_size = 0, temp, subtype = get_aead_subtype(tfm);
 	unsigned int   kctx_len = 0, dnents;
 	unsigned int  assoclen = req->assoclen;
 	unsigned int  authsize = crypto_aead_authsize(tfm);
@@ -2097,24 +2104,19 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 		return NULL;
 
 	reqctx->b0_dma = 0;
-	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL ||
+	subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
 		null = 1;
 		assoclen = 0;
 	}
-	dst_size = assoclen + req->cryptlen + (op_type ? -authsize :
-						    authsize);
 	error = chcr_aead_common_init(req, op_type);
 	if (error)
 		return ERR_PTR(error);
-	if (dst_size) {
-		dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
-		dnents += sg_nents_xlen(req->dst, req->cryptlen +
-			(op_type ? -authsize : authsize), CHCR_DST_SG_SIZE,
-			req->assoclen);
-		dnents += MIN_AUTH_SG; // For IV
-	} else {
-		dnents = 0;
-	}
+	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
+	dnents += sg_nents_xlen(req->dst, req->cryptlen +
+		(op_type ? -authsize : authsize), CHCR_DST_SG_SIZE,
+		req->assoclen);
+	dnents += MIN_AUTH_SG; // For IV
 
 	dst_size = get_space_for_phys_dsgl(dnents);
 	kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4)
@@ -2162,16 +2164,23 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 					temp & 0xF,
 					null ? 0 : assoclen + IV + 1,
 					temp, temp);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL ||
+	    subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA)
+		temp = CHCR_SCMD_CIPHER_MODE_AES_CTR;
+	else
+		temp = CHCR_SCMD_CIPHER_MODE_AES_CBC;
 	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type,
 					(op_type == CHCR_ENCRYPT_OP) ? 1 : 0,
-					CHCR_SCMD_CIPHER_MODE_AES_CBC,
+					temp,
 					actx->auth_mode, aeadctx->hmac_ctrl,
 					IV >> 1);
 	chcr_req->sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
 					 0, 0, dst_size);
 
 	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
-	if (op_type == CHCR_ENCRYPT_OP)
+	if (op_type == CHCR_ENCRYPT_OP ||
+		subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
+		subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL)
 		memcpy(chcr_req->key_ctx.key, aeadctx->key,
 		       aeadctx->enckey_len);
 	else
@@ -2181,7 +2190,16 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) <<
 					4), actx->h_iopad, kctx_len -
 				(DIV_ROUND_UP(aeadctx->enckey_len, 16) << 4));
-	memcpy(reqctx->iv, req->iv, IV);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
+	    subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
+		memcpy(reqctx->iv, aeadctx->nonce, CTR_RFC3686_NONCE_SIZE);
+		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->iv,
+				CTR_RFC3686_IV_SIZE);
+		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
+			CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
+	} else {
+		memcpy(reqctx->iv, req->iv, IV);
+	}
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size);
 	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, op_type, qid);
@@ -2202,9 +2220,9 @@ err:
 	return ERR_PTR(error);
 }
 
-static int chcr_aead_dma_map(struct device *dev,
-			     struct aead_request *req,
-			     unsigned short op_type)
+int chcr_aead_dma_map(struct device *dev,
+		      struct aead_request *req,
+		      unsigned short op_type)
 {
 	int error;
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2246,9 +2264,9 @@ err:
 	return -ENOMEM;
 }
 
-static void chcr_aead_dma_unmap(struct device *dev,
-			     struct aead_request *req,
-			     unsigned short op_type)
+void chcr_aead_dma_unmap(struct device *dev,
+			 struct aead_request *req,
+			 unsigned short op_type)
 {
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -2273,10 +2291,10 @@ static void chcr_aead_dma_unmap(struct device *dev,
 	}
 }
 
-static inline void chcr_add_aead_src_ent(struct aead_request *req,
-			       struct ulptx_sgl *ulptx,
-			       unsigned int assoclen,
-			       unsigned short op_type)
+void chcr_add_aead_src_ent(struct aead_request *req,
+			   struct ulptx_sgl *ulptx,
+			   unsigned int assoclen,
+			   unsigned short op_type)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2308,11 +2326,11 @@ static inline void chcr_add_aead_src_ent(struct aead_request *req,
 	}
 }
 
-static inline void chcr_add_aead_dst_ent(struct aead_request *req,
-			       struct cpl_rx_phys_dsgl *phys_cpl,
-			       unsigned int assoclen,
-			       unsigned short op_type,
-			       unsigned short qid)
+void chcr_add_aead_dst_ent(struct aead_request *req,
+			   struct cpl_rx_phys_dsgl *phys_cpl,
+			   unsigned int assoclen,
+			   unsigned short op_type,
+			   unsigned short qid)
 {
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -2330,9 +2348,9 @@ static inline void chcr_add_aead_dst_ent(struct aead_request *req,
 	dsgl_walk_end(&dsgl_walk, qid);
 }
 
-static inline void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct  cipher_wr_param *wrparam)
+void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+			     struct ulptx_sgl *ulptx,
+			     struct  cipher_wr_param *wrparam)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
@@ -2355,10 +2373,10 @@ static inline void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
 	}
 }
 
-static inline void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
-					   struct cpl_rx_phys_dsgl *phys_cpl,
-					   struct  cipher_wr_param *wrparam,
-					   unsigned short qid)
+void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+			     struct cpl_rx_phys_dsgl *phys_cpl,
+			     struct  cipher_wr_param *wrparam,
+			     unsigned short qid)
 {
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
 	struct dsgl_walk dsgl_walk;
@@ -2373,9 +2391,9 @@ static inline void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
 	dsgl_walk_end(&dsgl_walk, qid);
 }
 
-static inline void chcr_add_hash_src_ent(struct ahash_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct hash_wr_param *param)
+void chcr_add_hash_src_ent(struct ahash_request *req,
+			   struct ulptx_sgl *ulptx,
+			   struct hash_wr_param *param)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
@@ -2395,16 +2413,13 @@ static inline void chcr_add_hash_src_ent(struct ahash_request *req,
 			ulptx_walk_add_page(&ulp_walk, param->bfr_len,
 					    &reqctx->dma_addr);
 		ulptx_walk_add_sg(&ulp_walk, req->src, param->sg_len,
-					  0);
-//	       reqctx->srcsg = ulp_walk.last_sg;
-//	       reqctx->src_ofst = ulp_walk.last_sg_len;
-			ulptx_walk_end(&ulp_walk);
+				  0);
+		ulptx_walk_end(&ulp_walk);
 	}
 }
 
-
-static inline int chcr_hash_dma_map(struct device *dev,
-			     struct ahash_request *req)
+int chcr_hash_dma_map(struct device *dev,
+		      struct ahash_request *req)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	int error = 0;
@@ -2414,13 +2429,13 @@ static inline int chcr_hash_dma_map(struct device *dev,
 	error = dma_map_sg(dev, req->src, sg_nents(req->src),
 			   DMA_TO_DEVICE);
 	if (!error)
-		return error;
+		return -ENOMEM;
 	req_ctx->is_sg_map = 1;
 	return 0;
 }
 
-static inline void chcr_hash_dma_unmap(struct device *dev,
-			     struct ahash_request *req)
+void chcr_hash_dma_unmap(struct device *dev,
+			 struct ahash_request *req)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 
@@ -2433,9 +2448,8 @@ static inline void chcr_hash_dma_unmap(struct device *dev,
 
 }
 
-
-static int chcr_cipher_dma_map(struct device *dev,
-			     struct ablkcipher_request *req)
+int chcr_cipher_dma_map(struct device *dev,
+			struct ablkcipher_request *req)
 {
 	int error;
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
@@ -2469,8 +2483,9 @@ err:
 	dma_unmap_single(dev, reqctx->iv_dma, IV, DMA_BIDIRECTIONAL);
 	return -ENOMEM;
 }
-static void chcr_cipher_dma_unmap(struct device *dev,
-				  struct ablkcipher_request *req)
+
+void chcr_cipher_dma_unmap(struct device *dev,
+			   struct ablkcipher_request *req)
 {
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
 
@@ -2666,8 +2681,6 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	sub_type = get_aead_subtype(tfm);
 	if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
 		assoclen -= 8;
-	dst_size = assoclen + req->cryptlen + (op_type ? -authsize :
-						   authsize);
 	error = chcr_aead_common_init(req, op_type);
 	if (error)
 		return ERR_PTR(error);
@@ -2677,15 +2690,11 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	error = aead_ccm_validate_input(op_type, req, aeadctx, sub_type);
 	if (error)
 		goto err;
-	if (dst_size) {
-		dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
-		dnents += sg_nents_xlen(req->dst, req->cryptlen
-				+ (op_type ? -authsize : authsize),
-				CHCR_DST_SG_SIZE, req->assoclen);
-		dnents += MIN_CCM_SG; // For IV and B0
-	} else {
-		dnents = 0;
-	}
+	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
+	dnents += sg_nents_xlen(req->dst, req->cryptlen
+			+ (op_type ? -authsize : authsize),
+			CHCR_DST_SG_SIZE, req->assoclen);
+	dnents += MIN_CCM_SG; // For IV and B0
 	dst_size = get_space_for_phys_dsgl(dnents);
 	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) * 2;
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
@@ -2780,19 +2789,14 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 		assoclen = req->assoclen - 8;
 
 	reqctx->b0_dma = 0;
-	dst_size = assoclen + req->cryptlen + (op_type ? -authsize :  authsize);
 	error = chcr_aead_common_init(req, op_type);
-		if (error)
-			return	ERR_PTR(error);
-	if (dst_size) {
-		dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
-		dnents += sg_nents_xlen(req->dst,
-			req->cryptlen + (op_type ? -authsize : authsize),
+	if (error)
+		return ERR_PTR(error);
+	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
+	dnents += sg_nents_xlen(req->dst, req->cryptlen +
+				(op_type ? -authsize : authsize),
 				CHCR_DST_SG_SIZE, req->assoclen);
-		dnents += MIN_GCM_SG; // For IV
-	} else {
-		dnents = 0;
-	}
+	dnents += MIN_GCM_SG; // For IV
 	dst_size = get_space_for_phys_dsgl(dnents);
 	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) +
 		AEAD_H_SIZE;
@@ -2829,10 +2833,10 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
 					assoclen ? 1 : 0, assoclen,
 					assoclen + IV + 1, 0);
-		chcr_req->sec_cpl.cipherstop_lo_authinsert =
+	chcr_req->sec_cpl.cipherstop_lo_authinsert =
 			FILL_SEC_CPL_AUTHINSERT(0, assoclen + IV + 1,
 						temp, temp);
-		chcr_req->sec_cpl.seqno_numivs =
+	chcr_req->sec_cpl.seqno_numivs =
 			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
 					CHCR_ENCRYPT_OP) ? 1 : 0,
 					CHCR_SCMD_CIPHER_MODE_AES_GCM,
@@ -3212,7 +3216,7 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
 	/* it contains auth and cipher key both*/
 	struct crypto_authenc_keys keys;
-	unsigned int bs;
+	unsigned int bs, subtype;
 	unsigned int max_authsize = crypto_aead_alg(authenc)->maxauthsize;
 	int err = 0, i, key_ctx_len = 0;
 	unsigned char ck_size = 0;
@@ -3241,6 +3245,15 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 		pr_err("chcr : Unsupported digest size\n");
 		goto out;
 	}
+	subtype = get_aead_subtype(authenc);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
+		subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
+		if (keys.enckeylen < CTR_RFC3686_NONCE_SIZE)
+			goto out;
+		memcpy(aeadctx->nonce, keys.enckey + (keys.enckeylen
+		- CTR_RFC3686_NONCE_SIZE), CTR_RFC3686_NONCE_SIZE);
+		keys.enckeylen -= CTR_RFC3686_NONCE_SIZE;
+	}
 	if (keys.enckeylen == AES_KEYSIZE_128) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
 	} else if (keys.enckeylen == AES_KEYSIZE_192) {
@@ -3258,9 +3271,12 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	 */
 	memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
 	aeadctx->enckey_len = keys.enckeylen;
-	get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
-			    aeadctx->enckey_len << 3);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_SHA ||
+		subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL) {
 
+		get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+			    aeadctx->enckey_len << 3);
+	}
 	base_hash  = chcr_alloc_shash(max_authsize);
 	if (IS_ERR(base_hash)) {
 		pr_err("chcr : Base driver cannot be loaded\n");
@@ -3333,6 +3349,7 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
 	struct crypto_authenc_keys keys;
 	int err;
 	/* it contains auth and cipher key both*/
+	unsigned int subtype;
 	int key_ctx_len = 0;
 	unsigned char ck_size = 0;
 
@@ -3350,6 +3367,15 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
 		crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		goto out;
 	}
+	subtype = get_aead_subtype(authenc);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
+	    subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
+		if (keys.enckeylen < CTR_RFC3686_NONCE_SIZE)
+			goto out;
+		memcpy(aeadctx->nonce, keys.enckey + (keys.enckeylen
+			- CTR_RFC3686_NONCE_SIZE), CTR_RFC3686_NONCE_SIZE);
+		keys.enckeylen -= CTR_RFC3686_NONCE_SIZE;
+	}
 	if (keys.enckeylen == AES_KEYSIZE_128) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
 	} else if (keys.enckeylen == AES_KEYSIZE_192) {
@@ -3357,13 +3383,16 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
 	} else if (keys.enckeylen == AES_KEYSIZE_256) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
 	} else {
-		pr_err("chcr : Unsupported cipher key\n");
+		pr_err("chcr : Unsupported cipher key %d\n", keys.enckeylen);
 		goto out;
 	}
 	memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
 	aeadctx->enckey_len = keys.enckeylen;
-	get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
-				    aeadctx->enckey_len << 3);
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_SHA ||
+	    subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL) {
+		get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+				aeadctx->enckey_len << 3);
+	}
 	key_ctx_len =  sizeof(struct _key_ctx)
 		+ ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4);
 
@@ -3375,6 +3404,40 @@ out:
 	aeadctx->enckey_len = 0;
 	return -EINVAL;
 }
+
+static int chcr_aead_op(struct aead_request *req,
+			unsigned short op_type,
+			int size,
+			create_wr_t create_wr_fn)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct uld_ctx *u_ctx;
+	struct sk_buff *skb;
+
+	if (!a_ctx(tfm)->dev) {
+		pr_err("chcr : %s : No crypto device.\n", __func__);
+		return -ENXIO;
+	}
+	u_ctx = ULD_CTX(a_ctx(tfm));
+	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+				   a_ctx(tfm)->tx_qidx)) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	/* Form a WR from req */
+	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size,
+			   op_type);
+
+	if (IS_ERR(skb) || !skb)
+		return PTR_ERR(skb);
+
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
 static int chcr_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -3383,8 +3446,10 @@ static int chcr_aead_encrypt(struct aead_request *req)
 	reqctx->verify = VERIFY_HW;
 
 	switch (get_aead_subtype(tfm)) {
-	case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
-	case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+	case CRYPTO_ALG_SUB_TYPE_CTR_SHA:
+	case CRYPTO_ALG_SUB_TYPE_CBC_SHA:
+	case CRYPTO_ALG_SUB_TYPE_CBC_NULL:
+	case CRYPTO_ALG_SUB_TYPE_CTR_NULL:
 		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
 				    create_authenc_wr);
 	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
@@ -3413,8 +3478,10 @@ static int chcr_aead_decrypt(struct aead_request *req)
 	}
 
 	switch (get_aead_subtype(tfm)) {
-	case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
-	case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+	case CRYPTO_ALG_SUB_TYPE_CBC_SHA:
+	case CRYPTO_ALG_SUB_TYPE_CTR_SHA:
+	case CRYPTO_ALG_SUB_TYPE_CBC_NULL:
+	case CRYPTO_ALG_SUB_TYPE_CTR_NULL:
 		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
 				    create_authenc_wr);
 	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
@@ -3427,38 +3494,6 @@ static int chcr_aead_decrypt(struct aead_request *req)
 	}
 }
 
-static int chcr_aead_op(struct aead_request *req,
-			  unsigned short op_type,
-			  int size,
-			  create_wr_t create_wr_fn)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct uld_ctx *u_ctx;
-	struct sk_buff *skb;
-
-	if (!a_ctx(tfm)->dev) {
-		pr_err("chcr : %s : No crypto device.\n", __func__);
-		return -ENXIO;
-	}
-	u_ctx = ULD_CTX(a_ctx(tfm));
-	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-				   a_ctx(tfm)->tx_qidx)) {
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
-	}
-
-	/* Form a WR from req */
-	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size,
-			   op_type);
-
-	if (IS_ERR(skb) || !skb)
-		return PTR_ERR(skb);
-
-	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
-	chcr_send_wr(skb);
-	return -EINPROGRESS;
-}
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	{
@@ -3742,7 +3777,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_SHA,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3763,7 +3798,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_SHA,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3785,7 +3820,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_SHA,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3805,7 +3840,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_SHA,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3826,7 +3861,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_SHA,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3847,7 +3882,7 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 	},
 	{
-		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_NULL,
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CBC_NULL,
 		.is_registered = 0,
 		.alg.aead = {
 			.base = {
@@ -3867,6 +3902,133 @@ static struct chcr_alg_template driver_algs[] = {
 			.setauthsize = chcr_authenc_null_setauthsize,
 		}
 	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_SHA,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha1),rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-hmac-sha1-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_SHA,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+
+				.cra_name = "authenc(hmac(sha256),rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-hmac-sha256-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize	= SHA256_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_SHA,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha224),rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-hmac-sha224-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+			},
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_SHA,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha384),rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-hmac-sha384-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_SHA,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(hmac(sha512),rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-hmac-sha512-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+			.setkey = chcr_authenc_setkey,
+			.setauthsize = chcr_authenc_setauthsize,
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_CTR_NULL,
+		.is_registered = 0,
+		.alg.aead = {
+			.base = {
+				.cra_name = "authenc(digest_null,rfc3686(ctr(aes)))",
+				.cra_driver_name =
+				"authenc-digest_null-rfc3686-ctr-aes-chcr",
+				.cra_blocksize	 = 1,
+				.cra_priority = CHCR_AEAD_PRIORITY,
+				.cra_ctxsize =	sizeof(struct chcr_context) +
+						sizeof(struct chcr_aead_ctx) +
+						sizeof(struct chcr_authenc_ctx),
+
+			},
+			.ivsize  = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = 0,
+			.setkey  = chcr_aead_digest_null_setkey,
+			.setauthsize = chcr_authenc_null_setauthsize,
+		}
+	},
+
 };
 
 /*
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index 96c9335ee728..d1673a5d4bf5 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -226,15 +226,6 @@
 #define SPACE_LEFT(len) \
 	((SGE_MAX_WR_LEN - WR_MIN_LEN - (len)))
 
-unsigned int sgl_ent_len[] = {0, 0, 16, 24, 40, 48, 64, 72, 88,
-				96, 112, 120, 136, 144, 160, 168, 184,
-				192, 208, 216, 232, 240, 256, 264, 280,
-				288, 304, 312, 328, 336, 352, 360, 376};
-unsigned int dsgl_ent_len[] = {0, 32, 32, 48, 48, 64, 64, 80, 80,
-				112, 112, 128, 128, 144, 144, 160, 160,
-				192, 192, 208, 208, 224, 224, 240, 240,
-				272, 272, 288, 288, 304, 304, 320, 320};
-
 struct algo_param {
 	unsigned int auth_mode;
 	unsigned int mk_size;
@@ -404,10 +395,4 @@ static inline u32 aes_ks_subword(const u32 w)
 	return *(u32 *)(&bytes[0]);
 }
 
-static u32 round_constant[11] = {
-	0x01000000, 0x02000000, 0x04000000, 0x08000000,
-	0x10000000, 0x20000000, 0x40000000, 0x80000000,
-	0x1B000000, 0x36000000, 0x6C000000
-};
-
 #endif /* __CHCR_ALGO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index f5a2624081dc..04f277cade7c 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -48,6 +48,9 @@ static struct cxgb4_uld_info chcr_uld_info = {
 	.add = chcr_uld_add,
 	.state_change = chcr_uld_state_change,
 	.rx_handler = chcr_uld_rx_handler,
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	.tx_handler = chcr_uld_tx_handler,
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
 };
 
 struct uld_ctx *assign_chcr_device(void)
@@ -164,6 +167,10 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
 		goto out;
 	}
 	u_ctx->lldi = *lld;
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	if (lld->crypto & ULP_CRYPTO_IPSEC_INLINE)
+		chcr_add_xfrmops(lld);
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
 out:
 	return u_ctx;
 }
@@ -187,6 +194,13 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
 	return 0;
 }
 
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev)
+{
+	return chcr_ipsec_xmit(skb, dev);
+}
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+
 static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 {
 	struct uld_ctx *u_ctx = handle;
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 94e7412f6164..3c29ee09b8b5 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -39,6 +39,7 @@
 #include <crypto/algapi.h>
 #include "t4_hw.h"
 #include "cxgb4.h"
+#include "t4_msg.h"
 #include "cxgb4_uld.h"
 
 #define DRV_MODULE_NAME "chcr"
@@ -89,12 +90,49 @@ struct uld_ctx {
 	struct chcr_dev *dev;
 };
 
+struct chcr_ipsec_req {
+	struct ulp_txpkt ulptx;
+	struct ulptx_idata sc_imm;
+	struct cpl_tx_sec_pdu sec_cpl;
+	struct _key_ctx key_ctx;
+};
+
+struct chcr_ipsec_wr {
+	struct fw_ulptx_wr wreq;
+	struct chcr_ipsec_req req;
+};
+
+struct ipsec_sa_entry {
+	int hmac_ctrl;
+	unsigned int enckey_len;
+	unsigned int kctx_len;
+	unsigned int authsize;
+	__be32 key_ctx_hdr;
+	char salt[MAX_SALT];
+	char key[2 * AES_MAX_KEY_SIZE];
+};
+
+/*
+ *      sgl_len - calculates the size of an SGL of the given capacity
+ *      @n: the number of SGL entries
+ *      Calculates the number of flits needed for a scatter/gather list that
+ *      can hold the given number of entries.
+ */
+static inline unsigned int sgl_len(unsigned int n)
+{
+	n--;
+	return (3 * n) / 2 + (n & 1) + 2;
+}
+
 struct uld_ctx *assign_chcr_device(void);
 int chcr_send_wr(struct sk_buff *skb);
 int start_crypto(void);
 int stop_crypto(void);
 int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
 			const struct pkt_gl *pgl);
+int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev);
 int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 		     int err);
+int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
+void chcr_add_xfrmops(const struct cxgb4_lld_info *lld);
 #endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 94a87e3ad9bc..7daf0a17a7d2 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -134,14 +134,16 @@
 #define CRYPTO_ALG_SUB_TYPE_HASH_HMAC       0x01000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106    0x02000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_GCM	    0x03000000
-#define CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC    0x04000000
+#define CRYPTO_ALG_SUB_TYPE_CBC_SHA	    0x04000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_CCM        0x05000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
-#define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
+#define CRYPTO_ALG_SUB_TYPE_CBC_NULL	    0x07000000
 #define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
 #define CRYPTO_ALG_SUB_TYPE_CTR_RFC3686     0x09000000
 #define CRYPTO_ALG_SUB_TYPE_XTS		    0x0a000000
 #define CRYPTO_ALG_SUB_TYPE_CBC		    0x0b000000
+#define CRYPTO_ALG_SUB_TYPE_CTR_SHA	    0x0c000000
+#define CRYPTO_ALG_SUB_TYPE_CTR_NULL   0x0d000000
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
 			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 
@@ -210,8 +212,6 @@ struct dsgl_walk {
 	struct phys_sge_pairs *to;
 };
 
-
-
 struct chcr_gcm_ctx {
 	u8 ghash_h[AEAD_H_SIZE];
 };
@@ -227,21 +227,18 @@ struct __aead_ctx {
 	struct chcr_authenc_ctx authenc[0];
 };
 
-
-
 struct chcr_aead_ctx {
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
 	struct crypto_aead *sw_cipher;
 	u8 salt[MAX_SALT];
 	u8 key[CHCR_AES_MAX_KEY_LEN];
+	u8 nonce[4];
 	u16 hmac_ctrl;
 	u16 mayverify;
 	struct	__aead_ctx ctx[0];
 };
 
-
-
 struct hmac_ctx {
 	struct crypto_shash *base_hash;
 	u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
@@ -307,44 +304,29 @@ typedef struct sk_buff *(*create_wr_t)(struct aead_request *req,
 				       int size,
 				       unsigned short op_type);
 
-static int chcr_aead_op(struct aead_request *req_base,
-			  unsigned short op_type,
-			  int size,
-			  create_wr_t create_wr_fn);
-static inline int get_aead_subtype(struct crypto_aead *aead);
-static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
-				   unsigned char *input, int err);
-static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err);
-static int chcr_aead_dma_map(struct device *dev, struct aead_request *req,
-			     unsigned short op_type);
-static void chcr_aead_dma_unmap(struct device *dev, struct aead_request
-				*req, unsigned short op_type);
-static inline void chcr_add_aead_dst_ent(struct aead_request *req,
-				    struct cpl_rx_phys_dsgl *phys_cpl,
-				    unsigned int assoclen,
-				    unsigned short op_type,
-				    unsigned short qid);
-static inline void chcr_add_aead_src_ent(struct aead_request *req,
-				    struct ulptx_sgl *ulptx,
-				    unsigned int assoclen,
-				    unsigned short op_type);
-static inline void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct  cipher_wr_param *wrparam);
-static int chcr_cipher_dma_map(struct device *dev,
-			       struct ablkcipher_request *req);
-static void chcr_cipher_dma_unmap(struct device *dev,
-				  struct ablkcipher_request *req);
-static inline void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
-					   struct cpl_rx_phys_dsgl *phys_cpl,
-					   struct  cipher_wr_param *wrparam,
-					   unsigned short qid);
+void chcr_verify_tag(struct aead_request *req, u8 *input, int *err);
+int chcr_aead_dma_map(struct device *dev, struct aead_request *req,
+		      unsigned short op_type);
+void chcr_aead_dma_unmap(struct device *dev, struct aead_request *req,
+			 unsigned short op_type);
+void chcr_add_aead_dst_ent(struct aead_request *req,
+			   struct cpl_rx_phys_dsgl *phys_cpl,
+			   unsigned int assoclen, unsigned short op_type,
+			   unsigned short qid);
+void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx,
+			   unsigned int assoclen, unsigned short op_type);
+void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+			     struct ulptx_sgl *ulptx,
+			     struct  cipher_wr_param *wrparam);
+int chcr_cipher_dma_map(struct device *dev, struct ablkcipher_request *req);
+void chcr_cipher_dma_unmap(struct device *dev, struct ablkcipher_request *req);
+void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+			     struct cpl_rx_phys_dsgl *phys_cpl,
+			     struct  cipher_wr_param *wrparam,
+			     unsigned short qid);
 int sg_nents_len_skip(struct scatterlist *sg, u64 len, u64 skip);
-static inline void chcr_add_hash_src_ent(struct ahash_request *req,
-					 struct ulptx_sgl *ulptx,
-					 struct hash_wr_param *param);
-static inline int chcr_hash_dma_map(struct device *dev,
-				    struct ahash_request *req);
-static inline void chcr_hash_dma_unmap(struct device *dev,
-				       struct ahash_request *req);
+void chcr_add_hash_src_ent(struct ahash_request *req, struct ulptx_sgl *ulptx,
+			   struct hash_wr_param *param);
+int chcr_hash_dma_map(struct device *dev, struct ahash_request *req);
+void chcr_hash_dma_unmap(struct device *dev, struct ahash_request *req);
 #endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
new file mode 100644
index 000000000000..db1e241104ed
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -0,0 +1,654 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *	Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "chcr:" fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/esp.h>
+#include <net/xfrm.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+/*
+ * Max Tx descriptor space we allow for an Ethernet packet to be inlined
+ * into a WR.
+ */
+#define MAX_IMM_TX_PKT_LEN 256
+#define GCM_ESP_IV_SIZE     8
+
+static int chcr_xfrm_add_state(struct xfrm_state *x);
+static void chcr_xfrm_del_state(struct xfrm_state *x);
+static void chcr_xfrm_free_state(struct xfrm_state *x);
+static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+
+static const struct xfrmdev_ops chcr_xfrmdev_ops = {
+	.xdo_dev_state_add      = chcr_xfrm_add_state,
+	.xdo_dev_state_delete   = chcr_xfrm_del_state,
+	.xdo_dev_state_free     = chcr_xfrm_free_state,
+	.xdo_dev_offload_ok     = chcr_ipsec_offload_ok,
+};
+
+/* Add offload xfrms to Chelsio Interface */
+void chcr_add_xfrmops(const struct cxgb4_lld_info *lld)
+{
+	struct net_device *netdev = NULL;
+	int i;
+
+	for (i = 0; i < lld->nports; i++) {
+		netdev = lld->ports[i];
+		if (!netdev)
+			continue;
+		netdev->xfrmdev_ops = &chcr_xfrmdev_ops;
+		netdev->hw_enc_features |= NETIF_F_HW_ESP;
+		netdev->features |= NETIF_F_HW_ESP;
+		rtnl_lock();
+		netdev_change_features(netdev);
+		rtnl_unlock();
+	}
+}
+
+static inline int chcr_ipsec_setauthsize(struct xfrm_state *x,
+					 struct ipsec_sa_entry *sa_entry)
+{
+	int hmac_ctrl;
+	int authsize = x->aead->alg_icv_len / 8;
+
+	sa_entry->authsize = authsize;
+
+	switch (authsize) {
+	case ICV_8:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		break;
+	case ICV_12:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		break;
+	case ICV_16:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return hmac_ctrl;
+}
+
+static inline int chcr_ipsec_setkey(struct xfrm_state *x,
+				    struct ipsec_sa_entry *sa_entry)
+{
+	struct crypto_cipher *cipher;
+	int keylen = (x->aead->alg_key_len + 7) / 8;
+	unsigned char *key = x->aead->alg_key;
+	int ck_size, key_ctx_size = 0;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	int ret = 0;
+
+	if (keylen > 3) {
+		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+		memcpy(sa_entry->salt, key + keylen, 4);
+	}
+
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(sa_entry->key, key, keylen);
+	sa_entry->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+			      ((DIV_ROUND_UP(keylen, 16)) << 4) +
+			      AEAD_H_SIZE;
+
+	sa_entry->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0,
+						 key_ctx_size >> 4);
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	if (IS_ERR(cipher)) {
+		sa_entry->enckey_len = 0;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret) {
+		sa_entry->enckey_len = 0;
+		goto out1;
+	}
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+	memcpy(sa_entry->key + (DIV_ROUND_UP(sa_entry->enckey_len, 16) *
+	       16), ghash_h, AEAD_H_SIZE);
+	sa_entry->kctx_len = ((DIV_ROUND_UP(sa_entry->enckey_len, 16)) << 4) +
+			      AEAD_H_SIZE;
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+/*
+ * chcr_xfrm_add_state
+ * returns 0 on success, negative error if failed to send message to FPGA
+ * positive error if FPGA returned a bad response
+ */
+static int chcr_xfrm_add_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+	int res = 0;
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		pr_debug("CHCR: Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		pr_debug("CHCR: Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.flags & XFRM_STATE_ESN) {
+		pr_debug("CHCR: Cannot offload ESN xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		pr_debug("CHCR: Only IPv4/6 xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		pr_debug("CHCR: Only transport and tunnel xfrm offload\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		pr_debug("CHCR: Only ESP xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		pr_debug("CHCR: Encapsulated xfrm state not offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		pr_debug("CHCR: Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128 &&
+	    x->aead->alg_icv_len != 96) {
+		pr_debug("CHCR: Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
+	return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		pr_debug("CHCR: Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		pr_debug("CHCR: Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		pr_debug("CHCR: Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		pr_debug("CHCR: Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		res = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->hmac_ctrl = chcr_ipsec_setauthsize(x, sa_entry);
+	chcr_ipsec_setkey(x, sa_entry);
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	try_module_get(THIS_MODULE);
+out:
+	return res;
+}
+
+static void chcr_xfrm_del_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static void chcr_xfrm_free_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kfree(sa_entry);
+	module_put(THIS_MODULE);
+}
+
+static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	/* Offload with IP options is not supported yet */
+	if (ip_hdr(skb)->ihl > 5)
+		return false;
+
+	return true;
+}
+
+static inline int is_eth_imm(const struct sk_buff *skb, unsigned int kctx_len)
+{
+	int hdrlen = sizeof(struct chcr_ipsec_req) + kctx_len;
+
+	hdrlen += sizeof(struct cpl_tx_pkt);
+	if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
+		return hdrlen;
+	return 0;
+}
+
+static inline unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
+					     unsigned int kctx_len)
+{
+	unsigned int flits;
+	int hdrlen = is_eth_imm(skb, kctx_len);
+
+	/* If the skb is small enough, we can pump it out as a work request
+	 * with only immediate data.  In that case we just have to have the
+	 * TX Packet header plus the skb data in the Work Request.
+	 */
+
+	if (hdrlen)
+		return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
+
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+
+	/* Otherwise, we're going to have to construct a Scatter gather list
+	 * of the skb body and fragments.  We also include the flits necessary
+	 * for the TX Packet Work Request and CPL.  We always have a firmware
+	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+	 * message or, if we're doing a Large Send Offload, an LSO CPL message
+	 * with an embedded TX Packet Write CPL message.
+	 */
+	flits += (sizeof(struct fw_ulptx_wr) +
+		  sizeof(struct chcr_ipsec_req) +
+		  kctx_len +
+		  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+	return flits;
+}
+
+inline void *copy_cpltx_pktxt(struct sk_buff *skb,
+				struct net_device *dev,
+				void *pos)
+{
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sge_eth_txq *q;
+	struct cpl_tx_pkt_core *cpl;
+	u64 cntrl = 0;
+	u32 ctrl0, qidx;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cpl = (struct cpl_tx_pkt_core *)pos;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
+			       TXPKT_PF_V(adap->pf);
+	if (skb_vlan_tag_present(skb)) {
+		q->vlan_ins++;
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+	}
+
+	cpl->ctrl0 = htonl(ctrl0);
+	cpl->pack = htons(0);
+	cpl->len = htons(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	pos += sizeof(struct cpl_tx_pkt_core);
+	return pos;
+}
+
+inline void *copy_key_cpltx_pktxt(struct sk_buff *skb,
+				struct net_device *dev,
+				void *pos,
+				struct ipsec_sa_entry *sa_entry)
+{
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sge_eth_txq *q;
+	unsigned int len, qidx;
+	struct _key_ctx *key_ctx;
+	int left, eoq, key_len;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	len = sa_entry->enckey_len + sizeof(struct cpl_tx_pkt_core);
+	key_len = sa_entry->kctx_len;
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	left = eoq;
+	if (!eoq) {
+		pos = q->q.desc;
+		left = 64 * q->q.size;
+	}
+
+	/* Copy the Key context header */
+	key_ctx = (struct _key_ctx *)pos;
+	key_ctx->ctx_hdr = sa_entry->key_ctx_hdr;
+	memcpy(key_ctx->salt, sa_entry->salt, MAX_SALT);
+	pos += sizeof(struct _key_ctx);
+	left -= sizeof(struct _key_ctx);
+
+	if (likely(len <= left)) {
+		memcpy(key_ctx->key, sa_entry->key, key_len);
+		pos += key_len;
+	} else {
+		if (key_len <= left) {
+			memcpy(pos, sa_entry->key, key_len);
+			pos += key_len;
+		} else {
+			memcpy(pos, sa_entry->key, left);
+			memcpy(q->q.desc, sa_entry->key + left,
+			       key_len - left);
+			pos = (u8 *)q->q.desc + (key_len - left);
+		}
+	}
+	/* Copy CPL TX PKT XT */
+	pos = copy_cpltx_pktxt(skb, dev, pos);
+
+	return pos;
+}
+
+inline void *chcr_crypto_wreq(struct sk_buff *skb,
+			       struct net_device *dev,
+			       void *pos,
+			       int credits,
+			       struct ipsec_sa_entry *sa_entry)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	unsigned int immdatalen = 0;
+	unsigned int ivsize = GCM_ESP_IV_SIZE;
+	struct chcr_ipsec_wr *wr;
+	unsigned int flits;
+	u32 wr_mid;
+	int qidx = skb_get_queue_mapping(skb);
+	struct sge_eth_txq *q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	unsigned int kctx_len = sa_entry->kctx_len;
+	int qid = q->q.cntxt_id;
+
+	atomic_inc(&adap->chcr_stats.ipsec_cnt);
+
+	flits = calc_tx_sec_flits(skb, kctx_len);
+
+	if (is_eth_imm(skb, kctx_len))
+		immdatalen = skb->len;
+
+	/* WR Header */
+	wr = (struct chcr_ipsec_wr *)pos;
+	wr->wreq.op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr_mid = FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		netif_tx_stop_queue(q->txq);
+		q->q.stops++;
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+	wr_mid |= FW_ULPTX_WR_DATA_F;
+	wr->wreq.flowid_len16 = htonl(wr_mid);
+
+	/* ULPTX */
+	wr->req.ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(pi->port_id, qid);
+	wr->req.ulptx.len = htonl(DIV_ROUND_UP(flits, 2)  - 1);
+
+	/* Sub-command */
+	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(immdatalen);
+	wr->req.sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+					 sizeof(wr->req.key_ctx) +
+					 kctx_len +
+					 sizeof(struct cpl_tx_pkt_core) +
+					 immdatalen);
+
+	/* CPL_SEC_PDU */
+	wr->req.sec_cpl.op_ivinsrtofst = htonl(
+				CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+				CPL_TX_SEC_PDU_CPLLEN_V(2) |
+				CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+				CPL_TX_SEC_PDU_IVINSRTOFST_V(
+				(skb_transport_offset(skb) +
+				sizeof(struct ip_esp_hdr) + 1)));
+
+	wr->req.sec_cpl.pldlen = htonl(skb->len);
+
+	wr->req.sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+				(skb_transport_offset(skb) + 1),
+				(skb_transport_offset(skb) +
+				 sizeof(struct ip_esp_hdr)),
+				(skb_transport_offset(skb) +
+				 sizeof(struct ip_esp_hdr) +
+				 GCM_ESP_IV_SIZE + 1), 0);
+
+	wr->req.sec_cpl.cipherstop_lo_authinsert =
+		FILL_SEC_CPL_AUTHINSERT(0, skb_transport_offset(skb) +
+					   sizeof(struct ip_esp_hdr) +
+					   GCM_ESP_IV_SIZE + 1,
+					   sa_entry->authsize,
+					   sa_entry->authsize);
+	wr->req.sec_cpl.seqno_numivs =
+		FILL_SEC_CPL_SCMD0_SEQNO(CHCR_ENCRYPT_OP, 1,
+					 CHCR_SCMD_CIPHER_MODE_AES_GCM,
+					 CHCR_SCMD_AUTH_MODE_GHASH,
+					 sa_entry->hmac_ctrl,
+					 ivsize >> 1);
+	wr->req.sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+								  0, 0, 0);
+
+	pos += sizeof(struct fw_ulptx_wr) +
+	       sizeof(struct ulp_txpkt) +
+	       sizeof(struct ulptx_idata) +
+	       sizeof(struct cpl_tx_sec_pdu);
+
+	pos = copy_key_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+/**
+ *      flits_to_desc - returns the num of Tx descriptors for the given flits
+ *      @n: the number of flits
+ *
+ *      Returns the number of Tx descriptors needed for the supplied number
+ *      of flits.
+ */
+static inline unsigned int flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+
+static inline unsigned int txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static void eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static inline void txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+/*
+ *      chcr_ipsec_xmit called from ULD Tx handler
+ */
+int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct ipsec_sa_entry *sa_entry;
+	u64 *pos, *end, *before, *sgl;
+	int qidx, left, credits;
+	unsigned int flits = 0, ndesc, kctx_len;
+	struct adapter *adap;
+	struct sge_eth_txq *q;
+	struct port_info *pi;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	bool immediate = false;
+
+	if (!x->xso.offload_handle)
+		return NETDEV_TX_BUSY;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kctx_len = sa_entry->kctx_len;
+
+	if (skb->sp->len != 1) {
+out_free:       dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+
+	flits = calc_tx_sec_flits(skb, sa_entry->kctx_len);
+	ndesc = flits_to_desc(flits);
+	credits = txq_avail(&q->q) - ndesc;
+
+	if (unlikely(credits < 0)) {
+		eth_txq_stop(q);
+		dev_err(adap->pdev_dev,
+			"%s: Tx ring %u full while queue awake! cred:%d %d %d flits:%d\n",
+			dev->name, qidx, credits, ndesc, txq_avail(&q->q),
+			flits);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (is_eth_imm(skb, kctx_len))
+		immediate = true;
+
+	if (!immediate &&
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
+		q->mapping_err++;
+		goto out_free;
+	}
+
+	pos = (u64 *)&q->q.desc[q->q.pidx];
+	before = (u64 *)pos;
+	end = (u64 *)pos + flits;
+	/* Setup IPSec CPL */
+	pos = (void *)chcr_crypto_wreq(skb, dev, (void *)pos,
+				       credits, sa_entry);
+	if (before > (u64 *)pos) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+	}
+	if (pos == (u64 *)q->q.stat) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		pos = (void *)q->q.desc;
+	}
+
+	sgl = (void *)pos;
+	if (immediate) {
+		cxgb4_inline_tx_skb(skb, &q->q, sgl);
+		dev_consume_skb_any(skb);
+	} else {
+		int last_desc;
+
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end,
+				0, addr);
+		skb_orphan(skb);
+
+		last_desc = q->q.pidx + ndesc - 1;
+		if (last_desc >= q->q.size)
+			last_desc -= q->q.size;
+		q->q.sdesc[last_desc].skb = skb;
+		q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
+	}
+	txq_advance(&q->q, ndesc);
+
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	return NETDEV_TX_OK;
+}
diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index 451620b475a0..86f5f459762e 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * exynos-rng.c - Random Number Generator driver for the Exynos
  *
@@ -6,15 +7,6 @@
  * Loosely based on old driver from drivers/char/hw_random/exynos-rng.c:
  * Copyright (C) 2012 Samsung Electronics
  * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -22,12 +14,18 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #include <crypto/internal/rng.h>
 
 #define EXYNOS_RNG_CONTROL		0x0
 #define EXYNOS_RNG_STATUS		0x10
+
+#define EXYNOS_RNG_SEED_CONF		0x14
+#define EXYNOS_RNG_GEN_PRNG	        BIT(1)
+
 #define EXYNOS_RNG_SEED_BASE		0x140
 #define EXYNOS_RNG_SEED(n)		(EXYNOS_RNG_SEED_BASE + (n * 0x4))
 #define EXYNOS_RNG_OUT_BASE		0x160
@@ -43,13 +41,21 @@
 #define EXYNOS_RNG_SEED_REGS		5
 #define EXYNOS_RNG_SEED_SIZE		(EXYNOS_RNG_SEED_REGS * 4)
 
+enum exynos_prng_type {
+	EXYNOS_PRNG_UNKNOWN = 0,
+	EXYNOS_PRNG_EXYNOS4,
+	EXYNOS_PRNG_EXYNOS5,
+};
+
 /*
- * Driver re-seeds itself with generated random numbers to increase
- * the randomness.
+ * Driver re-seeds itself with generated random numbers to hinder
+ * backtracking of the original seed.
  *
  * Time for next re-seed in ms.
  */
-#define EXYNOS_RNG_RESEED_TIME		100
+#define EXYNOS_RNG_RESEED_TIME		1000
+#define EXYNOS_RNG_RESEED_BYTES		65536
+
 /*
  * In polling mode, do not wait infinitely for the engine to finish the work.
  */
@@ -63,13 +69,17 @@ struct exynos_rng_ctx {
 /* Device associated memory */
 struct exynos_rng_dev {
 	struct device			*dev;
+	enum exynos_prng_type		type;
 	void __iomem			*mem;
 	struct clk			*clk;
+	struct mutex 			lock;
 	/* Generated numbers stored for seeding during resume */
 	u8				seed_save[EXYNOS_RNG_SEED_SIZE];
 	unsigned int			seed_save_len;
 	/* Time of last seeding in jiffies */
 	unsigned long			last_seeding;
+	/* Bytes generated since last seeding */
+	unsigned long			bytes_seeding;
 };
 
 static struct exynos_rng_dev *exynos_rng_dev;
@@ -114,39 +124,12 @@ static int exynos_rng_set_seed(struct exynos_rng_dev *rng,
 	}
 
 	rng->last_seeding = jiffies;
+	rng->bytes_seeding = 0;
 
 	return 0;
 }
 
 /*
- * Read from output registers and put the data under 'dst' array,
- * up to dlen bytes.
- *
- * Returns number of bytes actually stored in 'dst' (dlen
- * or EXYNOS_RNG_SEED_SIZE).
- */
-static unsigned int exynos_rng_copy_random(struct exynos_rng_dev *rng,
-					   u8 *dst, unsigned int dlen)
-{
-	unsigned int cnt = 0;
-	int i, j;
-	u32 val;
-
-	for (j = 0; j < EXYNOS_RNG_SEED_REGS; j++) {
-		val = exynos_rng_readl(rng, EXYNOS_RNG_OUT(j));
-
-		for (i = 0; i < 4; i++) {
-			dst[cnt] = val & 0xff;
-			val >>= 8;
-			if (++cnt >= dlen)
-				return cnt;
-		}
-	}
-
-	return cnt;
-}
-
-/*
  * Start the engine and poll for finish.  Then read from output registers
  * filling the 'dst' buffer up to 'dlen' bytes or up to size of generated
  * random data (EXYNOS_RNG_SEED_SIZE).
@@ -160,8 +143,13 @@ static int exynos_rng_get_random(struct exynos_rng_dev *rng,
 {
 	int retry = EXYNOS_RNG_WAIT_RETRIES;
 
-	exynos_rng_writel(rng, EXYNOS_RNG_CONTROL_START,
-			  EXYNOS_RNG_CONTROL);
+	if (rng->type == EXYNOS_PRNG_EXYNOS4) {
+		exynos_rng_writel(rng, EXYNOS_RNG_CONTROL_START,
+				  EXYNOS_RNG_CONTROL);
+	} else if (rng->type == EXYNOS_PRNG_EXYNOS5) {
+		exynos_rng_writel(rng, EXYNOS_RNG_GEN_PRNG,
+				  EXYNOS_RNG_SEED_CONF);
+	}
 
 	while (!(exynos_rng_readl(rng,
 			EXYNOS_RNG_STATUS) & EXYNOS_RNG_STATUS_RNG_DONE) && --retry)
@@ -173,7 +161,9 @@ static int exynos_rng_get_random(struct exynos_rng_dev *rng,
 	/* Clear status bit */
 	exynos_rng_writel(rng, EXYNOS_RNG_STATUS_RNG_DONE,
 			  EXYNOS_RNG_STATUS);
-	*read = exynos_rng_copy_random(rng, dst, dlen);
+	*read = min_t(size_t, dlen, EXYNOS_RNG_SEED_SIZE);
+	memcpy_fromio(dst, rng->mem + EXYNOS_RNG_OUT_BASE, *read);
+	rng->bytes_seeding += *read;
 
 	return 0;
 }
@@ -187,13 +177,18 @@ static void exynos_rng_reseed(struct exynos_rng_dev *rng)
 	unsigned int read = 0;
 	u8 seed[EXYNOS_RNG_SEED_SIZE];
 
-	if (time_before(now, next_seeding))
+	if (time_before(now, next_seeding) &&
+	    rng->bytes_seeding < EXYNOS_RNG_RESEED_BYTES)
 		return;
 
 	if (exynos_rng_get_random(rng, seed, sizeof(seed), &read))
 		return;
 
 	exynos_rng_set_seed(rng, seed, read);
+
+	/* Let others do some of their job. */
+	mutex_unlock(&rng->lock);
+	mutex_lock(&rng->lock);
 }
 
 static int exynos_rng_generate(struct crypto_rng *tfm,
@@ -209,6 +204,7 @@ static int exynos_rng_generate(struct crypto_rng *tfm,
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
 	do {
 		ret = exynos_rng_get_random(rng, dst, dlen, &read);
 		if (ret)
@@ -219,6 +215,7 @@ static int exynos_rng_generate(struct crypto_rng *tfm,
 
 		exynos_rng_reseed(rng);
 	} while (dlen > 0);
+	mutex_unlock(&rng->lock);
 
 	clk_disable_unprepare(rng->clk);
 
@@ -236,7 +233,9 @@ static int exynos_rng_seed(struct crypto_rng *tfm, const u8 *seed,
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
 	ret = exynos_rng_set_seed(ctx->rng, seed, slen);
+	mutex_unlock(&rng->lock);
 
 	clk_disable_unprepare(rng->clk);
 
@@ -259,7 +258,7 @@ static struct rng_alg exynos_rng_alg = {
 	.base			= {
 		.cra_name		= "stdrng",
 		.cra_driver_name	= "exynos_rng",
-		.cra_priority		= 100,
+		.cra_priority		= 300,
 		.cra_ctxsize		= sizeof(struct exynos_rng_ctx),
 		.cra_module		= THIS_MODULE,
 		.cra_init		= exynos_rng_kcapi_init,
@@ -279,6 +278,10 @@ static int exynos_rng_probe(struct platform_device *pdev)
 	if (!rng)
 		return -ENOMEM;
 
+	rng->type = (enum exynos_prng_type)of_device_get_match_data(&pdev->dev);
+
+	mutex_init(&rng->lock);
+
 	rng->dev = &pdev->dev;
 	rng->clk = devm_clk_get(&pdev->dev, "secss");
 	if (IS_ERR(rng->clk)) {
@@ -329,9 +332,14 @@ static int __maybe_unused exynos_rng_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
+
 	/* Get new random numbers and store them for seeding on resume. */
 	exynos_rng_get_random(rng, rng->seed_save, sizeof(rng->seed_save),
 			      &(rng->seed_save_len));
+
+	mutex_unlock(&rng->lock);
+
 	dev_dbg(rng->dev, "Stored %u bytes for seeding on system resume\n",
 		rng->seed_save_len);
 
@@ -354,8 +362,12 @@ static int __maybe_unused exynos_rng_resume(struct device *dev)
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
+
 	ret = exynos_rng_set_seed(rng, rng->seed_save, rng->seed_save_len);
 
+	mutex_unlock(&rng->lock);
+
 	clk_disable_unprepare(rng->clk);
 
 	return ret;
@@ -367,6 +379,10 @@ static SIMPLE_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_suspend,
 static const struct of_device_id exynos_rng_dt_match[] = {
 	{
 		.compatible = "samsung,exynos4-rng",
+		.data = (const void *)EXYNOS_PRNG_EXYNOS4,
+	}, {
+		.compatible = "samsung,exynos5250-prng",
+		.data = (const void *)EXYNOS_PRNG_EXYNOS5,
 	},
 	{ },
 };
@@ -386,4 +402,4 @@ module_platform_driver(exynos_rng_driver);
 
 MODULE_DESCRIPTION("Exynos H/W Random Number Generator driver");
 MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index e09d4055b19e..a5a36fe7bf2c 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2579,6 +2579,7 @@ err_out_unmap_bars:
 	for (i = 0; i < 3; ++i)
 		if (dev->bar[i])
 			iounmap(dev->bar[i]);
+	kfree(dev);
 
 err_out_free_regions:
 	pci_release_regions(pdev);
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 4bcef78a08aa..225e74a7f724 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -108,10 +108,10 @@ static void eip197_write_firmware(struct safexcel_crypto_priv *priv,
 	writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
 	       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
 	       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
-	       priv->base + ctrl);
+	       EIP197_PE(priv) + ctrl);
 
 	/* Enable access to the program memory */
-	writel(prog_en, priv->base + EIP197_PE_ICE_RAM_CTRL);
+	writel(prog_en, EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL);
 
 	/* Write the firmware */
 	for (i = 0; i < fw->size / sizeof(u32); i++)
@@ -119,12 +119,12 @@ static void eip197_write_firmware(struct safexcel_crypto_priv *priv,
 		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
 
 	/* Disable access to the program memory */
-	writel(0, priv->base + EIP197_PE_ICE_RAM_CTRL);
+	writel(0, EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL);
 
 	/* Release engine from reset */
-	val = readl(priv->base + ctrl);
+	val = readl(EIP197_PE(priv) + ctrl);
 	val &= ~EIP197_PE_ICE_x_CTRL_SW_RESET;
-	writel(val, priv->base + ctrl);
+	writel(val, EIP197_PE(priv) + ctrl);
 }
 
 static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
@@ -145,14 +145,14 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
 	 }
 
 	/* Clear the scratchpad memory */
-	val = readl(priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+	val = readl(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL);
 	val |= EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER |
 	       EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN |
 	       EIP197_PE_ICE_SCRATCH_CTRL_SCRATCH_ACCESS |
 	       EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS;
-	writel(val, priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+	writel(val, EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL);
 
-	memset(priv->base + EIP197_PE_ICE_SCRATCH_RAM, 0,
+	memset(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0,
 	       EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
 
 	eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL,
@@ -173,7 +173,7 @@ static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
 	u32 hdw, cd_size_rnd, val;
 	int i;
 
-	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
 	hdw &= GENMASK(27, 25);
 	hdw >>= 25;
 
@@ -182,26 +182,25 @@ static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
 	for (i = 0; i < priv->config.rings; i++) {
 		/* ring base address */
 		writel(lower_32_bits(priv->ring[i].cdr.base_dma),
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
 		writel(upper_32_bits(priv->ring[i].cdr.base_dma),
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
 
 		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) |
 		       priv->config.cd_size,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
 		writel(((EIP197_FETCH_COUNT * (cd_size_rnd << hdw)) << 16) |
 		       (EIP197_FETCH_COUNT * priv->config.cd_offset),
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
 		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
 		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
-		writel(val,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DMA_CFG);
+		writel(val, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_DMA_CFG);
 
 		/* clear any pending interrupt */
 		writel(GENMASK(5, 0),
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_STAT);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_STAT);
 	}
 
 	return 0;
@@ -212,7 +211,7 @@ static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 	u32 hdw, rd_size_rnd, val;
 	int i;
 
-	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
 	hdw &= GENMASK(27, 25);
 	hdw >>= 25;
 
@@ -221,33 +220,33 @@ static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 	for (i = 0; i < priv->config.rings; i++) {
 		/* ring base address */
 		writel(lower_32_bits(priv->ring[i].rdr.base_dma),
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
 		writel(upper_32_bits(priv->ring[i].rdr.base_dma),
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
 
 		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.rd_offset << 16) |
 		       priv->config.rd_size,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
 
 		writel(((EIP197_FETCH_COUNT * (rd_size_rnd << hdw)) << 16) |
 		       (EIP197_FETCH_COUNT * priv->config.rd_offset),
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
 		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
 		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
 		val |= EIP197_HIA_xDR_WR_RES_BUF | EIP197_HIA_xDR_WR_CTRL_BUG;
 		writel(val,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DMA_CFG);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DMA_CFG);
 
 		/* clear any pending interrupt */
 		writel(GENMASK(7, 0),
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_STAT);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_STAT);
 
 		/* enable ring interrupt */
-		val = readl(priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+		val = readl(EIP197_HIA_AIC_R(priv) + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
 		val |= EIP197_RDR_IRQ(i);
-		writel(val, priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+		writel(val, EIP197_HIA_AIC_R(priv) + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
 	}
 
 	return 0;
@@ -259,39 +258,40 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	int i, ret;
 
 	/* Determine endianess and configure byte swap */
-	version = readl(priv->base + EIP197_HIA_VERSION);
-	val = readl(priv->base + EIP197_HIA_MST_CTRL);
+	version = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_VERSION);
+	val = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
 
 	if ((version & 0xffff) == EIP197_HIA_VERSION_BE)
 		val |= EIP197_MST_CTRL_BYTE_SWAP;
 	else if (((version >> 16) & 0xffff) == EIP197_HIA_VERSION_LE)
 		val |= (EIP197_MST_CTRL_NO_BYTE_SWAP >> 24);
 
-	writel(val, priv->base + EIP197_HIA_MST_CTRL);
-
+	writel(val, EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
 
 	/* Configure wr/rd cache values */
 	writel(EIP197_MST_CTRL_RD_CACHE(RD_CACHE_4BITS) |
 	       EIP197_MST_CTRL_WD_CACHE(WR_CACHE_4BITS),
-	       priv->base + EIP197_MST_CTRL);
+	       EIP197_HIA_GEN_CFG(priv) + EIP197_MST_CTRL);
 
 	/* Interrupts reset */
 
 	/* Disable all global interrupts */
-	writel(0, priv->base + EIP197_HIA_AIC_G_ENABLE_CTRL);
+	writel(0, EIP197_HIA_AIC_G(priv) + EIP197_HIA_AIC_G_ENABLE_CTRL);
 
 	/* Clear any pending interrupt */
-	writel(GENMASK(31, 0), priv->base + EIP197_HIA_AIC_G_ACK);
+	writel(GENMASK(31, 0), EIP197_HIA_AIC_G(priv) + EIP197_HIA_AIC_G_ACK);
 
 	/* Data Fetch Engine configuration */
 
 	/* Reset all DFE threads */
 	writel(EIP197_DxE_THR_CTRL_RESET_PE,
-	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+	       EIP197_HIA_DFE_THR(priv) + EIP197_HIA_DFE_THR_CTRL);
 
-	/* Reset HIA input interface arbiter */
-	writel(EIP197_HIA_RA_PE_CTRL_RESET,
-	       priv->base + EIP197_HIA_RA_PE_CTRL);
+	if (priv->version == EIP197) {
+		/* Reset HIA input interface arbiter */
+		writel(EIP197_HIA_RA_PE_CTRL_RESET,
+		       EIP197_HIA_AIC(priv) + EIP197_HIA_RA_PE_CTRL);
+	}
 
 	/* DMA transfer size to use */
 	val = EIP197_HIA_DFE_CFG_DIS_DEBUG;
@@ -299,29 +299,32 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	val |= EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(5) | EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(7);
 	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(RD_CACHE_3BITS);
 	val |= EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(RD_CACHE_3BITS);
-	writel(val, priv->base + EIP197_HIA_DFE_CFG);
+	writel(val, EIP197_HIA_DFE(priv) + EIP197_HIA_DFE_CFG);
 
 	/* Leave the DFE threads reset state */
-	writel(0, priv->base + EIP197_HIA_DFE_THR_CTRL);
+	writel(0, EIP197_HIA_DFE_THR(priv) + EIP197_HIA_DFE_THR_CTRL);
 
 	/* Configure the procesing engine thresholds */
 	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(9),
-	      priv->base + EIP197_PE_IN_DBUF_THRES);
+	       EIP197_PE(priv) + EIP197_PE_IN_DBUF_THRES);
 	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(7),
-	      priv->base + EIP197_PE_IN_TBUF_THRES);
+	       EIP197_PE(priv) + EIP197_PE_IN_TBUF_THRES);
 
-	/* enable HIA input interface arbiter and rings */
-	writel(EIP197_HIA_RA_PE_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
-	       priv->base + EIP197_HIA_RA_PE_CTRL);
+	if (priv->version == EIP197) {
+		/* enable HIA input interface arbiter and rings */
+		writel(EIP197_HIA_RA_PE_CTRL_EN |
+		       GENMASK(priv->config.rings - 1, 0),
+		       EIP197_HIA_AIC(priv) + EIP197_HIA_RA_PE_CTRL);
+	}
 
 	/* Data Store Engine configuration */
 
 	/* Reset all DSE threads */
 	writel(EIP197_DxE_THR_CTRL_RESET_PE,
-	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+	       EIP197_HIA_DSE_THR(priv) + EIP197_HIA_DSE_THR_CTRL);
 
 	/* Wait for all DSE threads to complete */
-	while ((readl(priv->base + EIP197_HIA_DSE_THR_STAT) &
+	while ((readl(EIP197_HIA_DSE_THR(priv) + EIP197_HIA_DSE_THR_STAT) &
 		GENMASK(15, 12)) != GENMASK(15, 12))
 		;
 
@@ -330,15 +333,19 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(7) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
 	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
 	val |= EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE;
-	val |= EIP197_HIA_DSE_CFG_EN_SINGLE_WR;
-	writel(val, priv->base + EIP197_HIA_DSE_CFG);
+	/* FIXME: instability issues can occur for EIP97 but disabling it impact
+	 * performances.
+	 */
+	if (priv->version == EIP197)
+		val |= EIP197_HIA_DSE_CFG_EN_SINGLE_WR;
+	writel(val, EIP197_HIA_DSE(priv) + EIP197_HIA_DSE_CFG);
 
 	/* Leave the DSE threads reset state */
-	writel(0, priv->base + EIP197_HIA_DSE_THR_CTRL);
+	writel(0, EIP197_HIA_DSE_THR(priv) + EIP197_HIA_DSE_THR_CTRL);
 
 	/* Configure the procesing engine thresholds */
 	writel(EIP197_PE_OUT_DBUF_THRES_MIN(7) | EIP197_PE_OUT_DBUF_THRES_MAX(8),
-	       priv->base + EIP197_PE_OUT_DBUF_THRES);
+	       EIP197_PE(priv) + EIP197_PE_OUT_DBUF_THRES);
 
 	/* Processing Engine configuration */
 
@@ -348,73 +355,75 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
 	val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
 	val |= EIP197_ALG_SHA2;
-	writel(val, priv->base + EIP197_PE_EIP96_FUNCTION_EN);
+	writel(val, EIP197_PE(priv) + EIP197_PE_EIP96_FUNCTION_EN);
 
 	/* Command Descriptor Rings prepare */
 	for (i = 0; i < priv->config.rings; i++) {
 		/* Clear interrupts for this ring */
 		writel(GENMASK(31, 0),
-		       priv->base + EIP197_HIA_AIC_R_ENABLE_CLR(i));
+		       EIP197_HIA_AIC_R(priv) + EIP197_HIA_AIC_R_ENABLE_CLR(i));
 
 		/* Disable external triggering */
-		writel(0, priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+		writel(0, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Clear the pending prepared counter */
 		writel(EIP197_xDR_PREP_CLR_COUNT,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_PREP_COUNT);
 
 		/* Clear the pending processed counter */
 		writel(EIP197_xDR_PROC_CLR_COUNT,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_PROC_COUNT);
 
 		writel(0,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_PREP_PNTR);
 		writel(0,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_PROC_PNTR);
 
 		writel((EIP197_DEFAULT_RING_SIZE * priv->config.cd_offset) << 2,
-		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_SIZE);
+		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_SIZE);
 	}
 
 	/* Result Descriptor Ring prepare */
 	for (i = 0; i < priv->config.rings; i++) {
 		/* Disable external triggering*/
-		writel(0, priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+		writel(0, EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Clear the pending prepared counter */
 		writel(EIP197_xDR_PREP_CLR_COUNT,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_PREP_COUNT);
 
 		/* Clear the pending processed counter */
 		writel(EIP197_xDR_PROC_CLR_COUNT,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_PROC_COUNT);
 
 		writel(0,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_PREP_PNTR);
 		writel(0,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_PROC_PNTR);
 
 		/* Ring size */
 		writel((EIP197_DEFAULT_RING_SIZE * priv->config.rd_offset) << 2,
-		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_SIZE);
+		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_SIZE);
 	}
 
 	/* Enable command descriptor rings */
 	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
-	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+	       EIP197_HIA_DFE_THR(priv) + EIP197_HIA_DFE_THR_CTRL);
 
 	/* Enable result descriptor rings */
 	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
-	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+	       EIP197_HIA_DSE_THR(priv) + EIP197_HIA_DSE_THR_CTRL);
 
 	/* Clear any HIA interrupt */
-	writel(GENMASK(30, 20), priv->base + EIP197_HIA_AIC_G_ACK);
+	writel(GENMASK(30, 20), EIP197_HIA_AIC_G(priv) + EIP197_HIA_AIC_G_ACK);
 
-	eip197_trc_cache_init(priv);
+	if (priv->version == EIP197) {
+		eip197_trc_cache_init(priv);
 
-	ret = eip197_load_firmwares(priv);
-	if (ret)
-		return ret;
+		ret = eip197_load_firmwares(priv);
+		if (ret)
+			return ret;
+	}
 
 	safexcel_hw_setup_cdesc_rings(priv);
 	safexcel_hw_setup_rdesc_rings(priv);
@@ -422,6 +431,23 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	return 0;
 }
 
+/* Called with ring's lock taken */
+static int safexcel_try_push_requests(struct safexcel_crypto_priv *priv,
+				      int ring, int reqs)
+{
+	int coal = min_t(int, reqs, EIP197_MAX_BATCH_SZ);
+
+	if (!coal)
+		return 0;
+
+	/* Configure when we want an interrupt */
+	writel(EIP197_HIA_RDR_THRESH_PKT_MODE |
+	       EIP197_HIA_RDR_THRESH_PROC_PKT(coal),
+	       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_THRESH);
+
+	return coal;
+}
+
 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
 {
 	struct crypto_async_request *req, *backlog;
@@ -429,34 +455,36 @@ void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
 	struct safexcel_request *request;
 	int ret, nreq = 0, cdesc = 0, rdesc = 0, commands, results;
 
-	priv->ring[ring].need_dequeue = false;
+	/* If a request wasn't properly dequeued because of a lack of resources,
+	 * proceeded it first,
+	 */
+	req = priv->ring[ring].req;
+	backlog = priv->ring[ring].backlog;
+	if (req)
+		goto handle_req;
 
-	do {
+	while (true) {
 		spin_lock_bh(&priv->ring[ring].queue_lock);
 		backlog = crypto_get_backlog(&priv->ring[ring].queue);
 		req = crypto_dequeue_request(&priv->ring[ring].queue);
 		spin_unlock_bh(&priv->ring[ring].queue_lock);
 
-		if (!req)
+		if (!req) {
+			priv->ring[ring].req = NULL;
+			priv->ring[ring].backlog = NULL;
 			goto finalize;
+		}
 
+handle_req:
 		request = kzalloc(sizeof(*request), EIP197_GFP_FLAGS(*req));
-		if (!request) {
-			spin_lock_bh(&priv->ring[ring].queue_lock);
-			crypto_enqueue_request(&priv->ring[ring].queue, req);
-			spin_unlock_bh(&priv->ring[ring].queue_lock);
-
-			priv->ring[ring].need_dequeue = true;
-			goto finalize;
-		}
+		if (!request)
+			goto request_failed;
 
 		ctx = crypto_tfm_ctx(req->tfm);
 		ret = ctx->send(req, ring, request, &commands, &results);
 		if (ret) {
 			kfree(request);
-			req->complete(req, ret);
-			priv->ring[ring].need_dequeue = true;
-			goto finalize;
+			goto request_failed;
 		}
 
 		if (backlog)
@@ -468,30 +496,39 @@ void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
 
 		cdesc += commands;
 		rdesc += results;
-	} while (nreq++ < EIP197_MAX_BATCH_SZ);
+		nreq++;
+	}
+
+request_failed:
+	/* Not enough resources to handle all the requests. Bail out and save
+	 * the request and the backlog for the next dequeue call (per-ring).
+	 */
+	priv->ring[ring].req = req;
+	priv->ring[ring].backlog = backlog;
 
 finalize:
-	if (nreq == EIP197_MAX_BATCH_SZ)
-		priv->ring[ring].need_dequeue = true;
-	else if (!nreq)
+	if (!nreq)
 		return;
 
-	spin_lock_bh(&priv->ring[ring].lock);
+	spin_lock_bh(&priv->ring[ring].egress_lock);
 
-	/* Configure when we want an interrupt */
-	writel(EIP197_HIA_RDR_THRESH_PKT_MODE |
-	       EIP197_HIA_RDR_THRESH_PROC_PKT(nreq),
-	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_THRESH);
+	if (!priv->ring[ring].busy) {
+		nreq -= safexcel_try_push_requests(priv, ring, nreq);
+		if (nreq)
+			priv->ring[ring].busy = true;
+	}
+
+	priv->ring[ring].requests_left += nreq;
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
 	/* let the RDR know we have pending descriptors */
 	writel((rdesc * priv->config.rd_offset) << 2,
-	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
+	       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 
 	/* let the CDR know we have pending descriptors */
 	writel((cdesc * priv->config.cd_offset) << 2,
-	       priv->base + EIP197_HIA_CDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
-
-	spin_unlock_bh(&priv->ring[ring].lock);
+	       EIP197_HIA_CDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 }
 
 void safexcel_free_context(struct safexcel_crypto_priv *priv,
@@ -540,7 +577,6 @@ void safexcel_inv_complete(struct crypto_async_request *req, int error)
 }
 
 int safexcel_invalidate_cache(struct crypto_async_request *async,
-			      struct safexcel_context *ctx,
 			      struct safexcel_crypto_priv *priv,
 			      dma_addr_t ctxr_dma, int ring,
 			      struct safexcel_request *request)
@@ -587,14 +623,17 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
 {
 	struct safexcel_request *sreq;
 	struct safexcel_context *ctx;
-	int ret, i, nreq, ndesc = 0;
+	int ret, i, nreq, ndesc, tot_descs, done;
 	bool should_complete;
 
-	nreq = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
-	nreq >>= 24;
-	nreq &= GENMASK(6, 0);
+handle_results:
+	tot_descs = 0;
+
+	nreq = readl(EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PROC_COUNT);
+	nreq >>= EIP197_xDR_PROC_xD_PKT_OFFSET;
+	nreq &= EIP197_xDR_PROC_xD_PKT_MASK;
 	if (!nreq)
-		return;
+		goto requests_left;
 
 	for (i = 0; i < nreq; i++) {
 		spin_lock_bh(&priv->ring[ring].egress_lock);
@@ -609,13 +648,9 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
 		if (ndesc < 0) {
 			kfree(sreq);
 			dev_err(priv->dev, "failed to handle result (%d)", ndesc);
-			return;
+			goto acknowledge;
 		}
 
-		writel(EIP197_xDR_PROC_xD_PKT(1) |
-		       EIP197_xDR_PROC_xD_COUNT(ndesc * priv->config.rd_offset),
-		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
-
 		if (should_complete) {
 			local_bh_disable();
 			sreq->req->complete(sreq->req, ret);
@@ -623,19 +658,41 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
 		}
 
 		kfree(sreq);
+		tot_descs += ndesc;
 	}
+
+acknowledge:
+	if (i) {
+		writel(EIP197_xDR_PROC_xD_PKT(i) |
+		       EIP197_xDR_PROC_xD_COUNT(tot_descs * priv->config.rd_offset),
+		       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PROC_COUNT);
+	}
+
+	/* If the number of requests overflowed the counter, try to proceed more
+	 * requests.
+	 */
+	if (nreq == EIP197_xDR_PROC_xD_PKT_MASK)
+		goto handle_results;
+
+requests_left:
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	done = safexcel_try_push_requests(priv, ring,
+					  priv->ring[ring].requests_left);
+
+	priv->ring[ring].requests_left -= done;
+	if (!done && !priv->ring[ring].requests_left)
+		priv->ring[ring].busy = false;
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
 }
 
-static void safexcel_handle_result_work(struct work_struct *work)
+static void safexcel_dequeue_work(struct work_struct *work)
 {
 	struct safexcel_work_data *data =
 			container_of(work, struct safexcel_work_data, work);
-	struct safexcel_crypto_priv *priv = data->priv;
-
-	safexcel_handle_result_descriptor(priv, data->ring);
 
-	if (priv->ring[data->ring].need_dequeue)
-		safexcel_dequeue(data->priv, data->ring);
+	safexcel_dequeue(data->priv, data->ring);
 }
 
 struct safexcel_ring_irq_data {
@@ -647,16 +704,16 @@ static irqreturn_t safexcel_irq_ring(int irq, void *data)
 {
 	struct safexcel_ring_irq_data *irq_data = data;
 	struct safexcel_crypto_priv *priv = irq_data->priv;
-	int ring = irq_data->ring;
+	int ring = irq_data->ring, rc = IRQ_NONE;
 	u32 status, stat;
 
-	status = readl(priv->base + EIP197_HIA_AIC_R_ENABLED_STAT(ring));
+	status = readl(EIP197_HIA_AIC_R(priv) + EIP197_HIA_AIC_R_ENABLED_STAT(ring));
 	if (!status)
-		return IRQ_NONE;
+		return rc;
 
 	/* RDR interrupts */
 	if (status & EIP197_RDR_IRQ(ring)) {
-		stat = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+		stat = readl(EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_STAT);
 
 		if (unlikely(stat & EIP197_xDR_ERR)) {
 			/*
@@ -666,22 +723,37 @@ static irqreturn_t safexcel_irq_ring(int irq, void *data)
 			 */
 			dev_err(priv->dev, "RDR: fatal error.");
 		} else if (likely(stat & EIP197_xDR_THRESH)) {
-			queue_work(priv->ring[ring].workqueue, &priv->ring[ring].work_data.work);
+			rc = IRQ_WAKE_THREAD;
 		}
 
 		/* ACK the interrupts */
 		writel(stat & 0xff,
-		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+		       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_STAT);
 	}
 
 	/* ACK the interrupts */
-	writel(status, priv->base + EIP197_HIA_AIC_R_ACK(ring));
+	writel(status, EIP197_HIA_AIC_R(priv) + EIP197_HIA_AIC_R_ACK(ring));
+
+	return rc;
+}
+
+static irqreturn_t safexcel_irq_ring_thread(int irq, void *data)
+{
+	struct safexcel_ring_irq_data *irq_data = data;
+	struct safexcel_crypto_priv *priv = irq_data->priv;
+	int ring = irq_data->ring;
+
+	safexcel_handle_result_descriptor(priv, ring);
+
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	return IRQ_HANDLED;
 }
 
 static int safexcel_request_ring_irq(struct platform_device *pdev, const char *name,
 				     irq_handler_t handler,
+				     irq_handler_t threaded_handler,
 				     struct safexcel_ring_irq_data *ring_irq_priv)
 {
 	int ret, irq = platform_get_irq_byname(pdev, name);
@@ -691,8 +763,9 @@ static int safexcel_request_ring_irq(struct platform_device *pdev, const char *n
 		return irq;
 	}
 
-	ret = devm_request_irq(&pdev->dev, irq, handler, 0,
-			       dev_name(&pdev->dev), ring_irq_priv);
+	ret = devm_request_threaded_irq(&pdev->dev, irq, handler,
+					threaded_handler, IRQF_ONESHOT,
+					dev_name(&pdev->dev), ring_irq_priv);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to request IRQ %d\n", irq);
 		return ret;
@@ -755,11 +828,11 @@ static void safexcel_configure(struct safexcel_crypto_priv *priv)
 {
 	u32 val, mask;
 
-	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	val = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
 	val = (val & GENMASK(27, 25)) >> 25;
 	mask = BIT(val) - 1;
 
-	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	val = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
 	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
 
 	priv->config.cd_size = (sizeof(struct safexcel_command_desc) / sizeof(u32));
@@ -769,6 +842,35 @@ static void safexcel_configure(struct safexcel_crypto_priv *priv)
 	priv->config.rd_offset = (priv->config.rd_size + mask) & ~mask;
 }
 
+static void safexcel_init_register_offsets(struct safexcel_crypto_priv *priv)
+{
+	struct safexcel_register_offsets *offsets = &priv->offsets;
+
+	if (priv->version == EIP197) {
+		offsets->hia_aic	= EIP197_HIA_AIC_BASE;
+		offsets->hia_aic_g	= EIP197_HIA_AIC_G_BASE;
+		offsets->hia_aic_r	= EIP197_HIA_AIC_R_BASE;
+		offsets->hia_aic_xdr	= EIP197_HIA_AIC_xDR_BASE;
+		offsets->hia_dfe	= EIP197_HIA_DFE_BASE;
+		offsets->hia_dfe_thr	= EIP197_HIA_DFE_THR_BASE;
+		offsets->hia_dse	= EIP197_HIA_DSE_BASE;
+		offsets->hia_dse_thr	= EIP197_HIA_DSE_THR_BASE;
+		offsets->hia_gen_cfg	= EIP197_HIA_GEN_CFG_BASE;
+		offsets->pe		= EIP197_PE_BASE;
+	} else {
+		offsets->hia_aic	= EIP97_HIA_AIC_BASE;
+		offsets->hia_aic_g	= EIP97_HIA_AIC_G_BASE;
+		offsets->hia_aic_r	= EIP97_HIA_AIC_R_BASE;
+		offsets->hia_aic_xdr	= EIP97_HIA_AIC_xDR_BASE;
+		offsets->hia_dfe	= EIP97_HIA_DFE_BASE;
+		offsets->hia_dfe_thr	= EIP97_HIA_DFE_THR_BASE;
+		offsets->hia_dse	= EIP97_HIA_DSE_BASE;
+		offsets->hia_dse_thr	= EIP97_HIA_DSE_THR_BASE;
+		offsets->hia_gen_cfg	= EIP97_HIA_GEN_CFG_BASE;
+		offsets->pe		= EIP97_PE_BASE;
+	}
+}
+
 static int safexcel_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -781,6 +883,9 @@ static int safexcel_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv->dev = dev;
+	priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev);
+
+	safexcel_init_register_offsets(priv);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->base = devm_ioremap_resource(dev, res);
@@ -839,6 +944,7 @@ static int safexcel_probe(struct platform_device *pdev)
 
 		snprintf(irq_name, 6, "ring%d", i);
 		irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring,
+						safexcel_irq_ring_thread,
 						ring_irq);
 		if (irq < 0) {
 			ret = irq;
@@ -847,7 +953,7 @@ static int safexcel_probe(struct platform_device *pdev)
 
 		priv->ring[i].work_data.priv = priv;
 		priv->ring[i].work_data.ring = i;
-		INIT_WORK(&priv->ring[i].work_data.work, safexcel_handle_result_work);
+		INIT_WORK(&priv->ring[i].work_data.work, safexcel_dequeue_work);
 
 		snprintf(wq_name, 9, "wq_ring%d", i);
 		priv->ring[i].workqueue = create_singlethread_workqueue(wq_name);
@@ -856,6 +962,9 @@ static int safexcel_probe(struct platform_device *pdev)
 			goto err_clk;
 		}
 
+		priv->ring[i].requests_left = 0;
+		priv->ring[i].busy = false;
+
 		crypto_init_queue(&priv->ring[i].queue,
 				  EIP197_DEFAULT_RING_SIZE);
 
@@ -903,7 +1012,14 @@ static int safexcel_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id safexcel_of_match_table[] = {
-	{ .compatible = "inside-secure,safexcel-eip197" },
+	{
+		.compatible = "inside-secure,safexcel-eip97",
+		.data = (void *)EIP97,
+	},
+	{
+		.compatible = "inside-secure,safexcel-eip197",
+		.data = (void *)EIP197,
+	},
 	{},
 };
 
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 304c5838c11a..4e219c21608b 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -19,64 +19,103 @@
 #define EIP197_HIA_VERSION_BE			0x35ca
 
 /* Static configuration */
-#define EIP197_DEFAULT_RING_SIZE		64
+#define EIP197_DEFAULT_RING_SIZE		400
 #define EIP197_MAX_TOKENS			5
 #define EIP197_MAX_RINGS			4
 #define EIP197_FETCH_COUNT			1
-#define EIP197_MAX_BATCH_SZ			EIP197_DEFAULT_RING_SIZE
+#define EIP197_MAX_BATCH_SZ			64
 
 #define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
 				 GFP_KERNEL : GFP_ATOMIC)
 
+/* Register base offsets */
+#define EIP197_HIA_AIC(priv)		((priv)->base + (priv)->offsets.hia_aic)
+#define EIP197_HIA_AIC_G(priv)		((priv)->base + (priv)->offsets.hia_aic_g)
+#define EIP197_HIA_AIC_R(priv)		((priv)->base + (priv)->offsets.hia_aic_r)
+#define EIP197_HIA_AIC_xDR(priv)	((priv)->base + (priv)->offsets.hia_aic_xdr)
+#define EIP197_HIA_DFE(priv)		((priv)->base + (priv)->offsets.hia_dfe)
+#define EIP197_HIA_DFE_THR(priv)	((priv)->base + (priv)->offsets.hia_dfe_thr)
+#define EIP197_HIA_DSE(priv)		((priv)->base + (priv)->offsets.hia_dse)
+#define EIP197_HIA_DSE_THR(priv)	((priv)->base + (priv)->offsets.hia_dse_thr)
+#define EIP197_HIA_GEN_CFG(priv)	((priv)->base + (priv)->offsets.hia_gen_cfg)
+#define EIP197_PE(priv)			((priv)->base + (priv)->offsets.pe)
+
+/* EIP197 base offsets */
+#define EIP197_HIA_AIC_BASE		0x90000
+#define EIP197_HIA_AIC_G_BASE		0x90000
+#define EIP197_HIA_AIC_R_BASE		0x90800
+#define EIP197_HIA_AIC_xDR_BASE		0x80000
+#define EIP197_HIA_DFE_BASE		0x8c000
+#define EIP197_HIA_DFE_THR_BASE		0x8c040
+#define EIP197_HIA_DSE_BASE		0x8d000
+#define EIP197_HIA_DSE_THR_BASE		0x8d040
+#define EIP197_HIA_GEN_CFG_BASE		0xf0000
+#define EIP197_PE_BASE			0xa0000
+
+/* EIP97 base offsets */
+#define EIP97_HIA_AIC_BASE		0x0
+#define EIP97_HIA_AIC_G_BASE		0x0
+#define EIP97_HIA_AIC_R_BASE		0x0
+#define EIP97_HIA_AIC_xDR_BASE		0x0
+#define EIP97_HIA_DFE_BASE		0xf000
+#define EIP97_HIA_DFE_THR_BASE		0xf200
+#define EIP97_HIA_DSE_BASE		0xf400
+#define EIP97_HIA_DSE_THR_BASE		0xf600
+#define EIP97_HIA_GEN_CFG_BASE		0x10000
+#define EIP97_PE_BASE			0x10000
+
 /* CDR/RDR register offsets */
-#define EIP197_HIA_xDR_OFF(r)			(0x80000 + (r) * 0x1000)
-#define EIP197_HIA_CDR(r)			(EIP197_HIA_xDR_OFF(r))
-#define EIP197_HIA_RDR(r)			(EIP197_HIA_xDR_OFF(r) + 0x800)
-#define EIP197_HIA_xDR_RING_BASE_ADDR_LO	0x0
-#define EIP197_HIA_xDR_RING_BASE_ADDR_HI	0x4
-#define EIP197_HIA_xDR_RING_SIZE		0x18
-#define EIP197_HIA_xDR_DESC_SIZE		0x1c
-#define EIP197_HIA_xDR_CFG			0x20
-#define EIP197_HIA_xDR_DMA_CFG			0x24
-#define EIP197_HIA_xDR_THRESH			0x28
-#define EIP197_HIA_xDR_PREP_COUNT		0x2c
-#define EIP197_HIA_xDR_PROC_COUNT		0x30
-#define EIP197_HIA_xDR_PREP_PNTR		0x34
-#define EIP197_HIA_xDR_PROC_PNTR		0x38
-#define EIP197_HIA_xDR_STAT			0x3c
+#define EIP197_HIA_xDR_OFF(priv, r)		(EIP197_HIA_AIC_xDR(priv) + (r) * 0x1000)
+#define EIP197_HIA_CDR(priv, r)			(EIP197_HIA_xDR_OFF(priv, r))
+#define EIP197_HIA_RDR(priv, r)			(EIP197_HIA_xDR_OFF(priv, r) + 0x800)
+#define EIP197_HIA_xDR_RING_BASE_ADDR_LO	0x0000
+#define EIP197_HIA_xDR_RING_BASE_ADDR_HI	0x0004
+#define EIP197_HIA_xDR_RING_SIZE		0x0018
+#define EIP197_HIA_xDR_DESC_SIZE		0x001c
+#define EIP197_HIA_xDR_CFG			0x0020
+#define EIP197_HIA_xDR_DMA_CFG			0x0024
+#define EIP197_HIA_xDR_THRESH			0x0028
+#define EIP197_HIA_xDR_PREP_COUNT		0x002c
+#define EIP197_HIA_xDR_PROC_COUNT		0x0030
+#define EIP197_HIA_xDR_PREP_PNTR		0x0034
+#define EIP197_HIA_xDR_PROC_PNTR		0x0038
+#define EIP197_HIA_xDR_STAT			0x003c
 
 /* register offsets */
-#define EIP197_HIA_DFE_CFG			0x8c000
-#define EIP197_HIA_DFE_THR_CTRL			0x8c040
-#define EIP197_HIA_DFE_THR_STAT			0x8c044
-#define EIP197_HIA_DSE_CFG			0x8d000
-#define EIP197_HIA_DSE_THR_CTRL			0x8d040
-#define EIP197_HIA_DSE_THR_STAT			0x8d044
-#define EIP197_HIA_RA_PE_CTRL			0x90010
-#define EIP197_HIA_RA_PE_STAT			0x90014
+#define EIP197_HIA_DFE_CFG			0x0000
+#define EIP197_HIA_DFE_THR_CTRL			0x0000
+#define EIP197_HIA_DFE_THR_STAT			0x0004
+#define EIP197_HIA_DSE_CFG			0x0000
+#define EIP197_HIA_DSE_THR_CTRL			0x0000
+#define EIP197_HIA_DSE_THR_STAT			0x0004
+#define EIP197_HIA_RA_PE_CTRL			0x0010
+#define EIP197_HIA_RA_PE_STAT			0x0014
 #define EIP197_HIA_AIC_R_OFF(r)			((r) * 0x1000)
-#define EIP197_HIA_AIC_R_ENABLE_CTRL(r)		(0x9e808 - EIP197_HIA_AIC_R_OFF(r))
-#define EIP197_HIA_AIC_R_ENABLED_STAT(r)	(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
-#define EIP197_HIA_AIC_R_ACK(r)			(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
-#define EIP197_HIA_AIC_R_ENABLE_CLR(r)		(0x9e814 - EIP197_HIA_AIC_R_OFF(r))
-#define EIP197_HIA_AIC_G_ENABLE_CTRL		0x9f808
-#define EIP197_HIA_AIC_G_ENABLED_STAT		0x9f810
-#define EIP197_HIA_AIC_G_ACK			0x9f810
-#define EIP197_HIA_MST_CTRL			0x9fff4
-#define EIP197_HIA_OPTIONS			0x9fff8
-#define EIP197_HIA_VERSION			0x9fffc
-#define EIP197_PE_IN_DBUF_THRES			0xa0000
-#define EIP197_PE_IN_TBUF_THRES			0xa0100
-#define EIP197_PE_ICE_SCRATCH_RAM		0xa0800
-#define EIP197_PE_ICE_PUE_CTRL			0xa0c80
-#define EIP197_PE_ICE_SCRATCH_CTRL		0xa0d04
-#define EIP197_PE_ICE_FPP_CTRL			0xa0d80
-#define EIP197_PE_ICE_RAM_CTRL			0xa0ff0
-#define EIP197_PE_EIP96_FUNCTION_EN		0xa1004
-#define EIP197_PE_EIP96_CONTEXT_CTRL		0xa1008
-#define EIP197_PE_EIP96_CONTEXT_STAT		0xa100c
-#define EIP197_PE_OUT_DBUF_THRES		0xa1c00
-#define EIP197_PE_OUT_TBUF_THRES		0xa1d00
+#define EIP197_HIA_AIC_R_ENABLE_CTRL(r)		(0xe008 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLED_STAT(r)	(0xe010 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ACK(r)			(0xe010 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLE_CLR(r)		(0xe014 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_G_ENABLE_CTRL		0xf808
+#define EIP197_HIA_AIC_G_ENABLED_STAT		0xf810
+#define EIP197_HIA_AIC_G_ACK			0xf810
+#define EIP197_HIA_MST_CTRL			0xfff4
+#define EIP197_HIA_OPTIONS			0xfff8
+#define EIP197_HIA_VERSION			0xfffc
+#define EIP197_PE_IN_DBUF_THRES			0x0000
+#define EIP197_PE_IN_TBUF_THRES			0x0100
+#define EIP197_PE_ICE_SCRATCH_RAM		0x0800
+#define EIP197_PE_ICE_PUE_CTRL			0x0c80
+#define EIP197_PE_ICE_SCRATCH_CTRL		0x0d04
+#define EIP197_PE_ICE_FPP_CTRL			0x0d80
+#define EIP197_PE_ICE_RAM_CTRL			0x0ff0
+#define EIP197_PE_EIP96_FUNCTION_EN		0x1004
+#define EIP197_PE_EIP96_CONTEXT_CTRL		0x1008
+#define EIP197_PE_EIP96_CONTEXT_STAT		0x100c
+#define EIP197_PE_OUT_DBUF_THRES		0x1c00
+#define EIP197_PE_OUT_TBUF_THRES		0x1d00
+#define EIP197_MST_CTRL				0xfff4
+
+/* EIP197-specific registers, no indirection */
 #define EIP197_CLASSIFICATION_RAMS		0xe0000
 #define EIP197_TRC_CTRL				0xf0800
 #define EIP197_TRC_LASTRES			0xf0804
@@ -90,7 +129,6 @@
 #define EIP197_TRC_ECCDATASTAT			0xf083c
 #define EIP197_TRC_ECCDATA			0xf0840
 #define EIP197_CS_RAM_CTRL			0xf7ff0
-#define EIP197_MST_CTRL				0xffff4
 
 /* EIP197_HIA_xDR_DESC_SIZE */
 #define EIP197_xDR_DESC_MODE_64BIT		BIT(31)
@@ -117,6 +155,8 @@
 #define EIP197_xDR_PREP_CLR_COUNT		BIT(31)
 
 /* EIP197_HIA_xDR_PROC_COUNT */
+#define EIP197_xDR_PROC_xD_PKT_OFFSET		24
+#define EIP197_xDR_PROC_xD_PKT_MASK		GENMASK(6, 0)
 #define EIP197_xDR_PROC_xD_COUNT(n)		((n) << 2)
 #define EIP197_xDR_PROC_xD_PKT(n)		((n) << 24)
 #define EIP197_xDR_PROC_CLR_COUNT		BIT(31)
@@ -463,12 +503,33 @@ struct safexcel_work_data {
 	int ring;
 };
 
+enum safexcel_eip_version {
+	EIP97,
+	EIP197,
+};
+
+struct safexcel_register_offsets {
+	u32 hia_aic;
+	u32 hia_aic_g;
+	u32 hia_aic_r;
+	u32 hia_aic_xdr;
+	u32 hia_dfe;
+	u32 hia_dfe_thr;
+	u32 hia_dse;
+	u32 hia_dse_thr;
+	u32 hia_gen_cfg;
+	u32 pe;
+};
+
 struct safexcel_crypto_priv {
 	void __iomem *base;
 	struct device *dev;
 	struct clk *clk;
 	struct safexcel_config config;
 
+	enum safexcel_eip_version version;
+	struct safexcel_register_offsets offsets;
+
 	/* context DMA pool */
 	struct dma_pool *context_pool;
 
@@ -489,7 +550,20 @@ struct safexcel_crypto_priv {
 		/* queue */
 		struct crypto_queue queue;
 		spinlock_t queue_lock;
-		bool need_dequeue;
+
+		/* Number of requests in the engine that needs the threshold
+		 * interrupt to be set up.
+		 */
+		int requests_left;
+
+		/* The ring is currently handling at least one request */
+		bool busy;
+
+		/* Store for current requests when bailing out of the dequeueing
+		 * function when no enough resources are available.
+		 */
+		struct crypto_async_request *req;
+		struct crypto_async_request *backlog;
 	} ring[EIP197_MAX_RINGS];
 };
 
@@ -539,7 +613,6 @@ void safexcel_free_context(struct safexcel_crypto_priv *priv,
 				  struct crypto_async_request *req,
 				  int result_sz);
 int safexcel_invalidate_cache(struct crypto_async_request *async,
-			      struct safexcel_context *ctx,
 			      struct safexcel_crypto_priv *priv,
 			      dma_addr_t ctxr_dma, int ring,
 			      struct safexcel_request *request);
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index fcc0a606d748..63a8768ed2ae 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -27,7 +27,6 @@ struct safexcel_cipher_ctx {
 	struct safexcel_context base;
 	struct safexcel_crypto_priv *priv;
 
-	enum safexcel_cipher_direction direction;
 	u32 mode;
 
 	__le32 key[8];
@@ -35,6 +34,7 @@ struct safexcel_cipher_ctx {
 };
 
 struct safexcel_cipher_req {
+	enum safexcel_cipher_direction direction;
 	bool needs_inv;
 };
 
@@ -69,6 +69,7 @@ static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 {
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct crypto_aes_ctx aes;
 	int ret, i;
 
@@ -78,10 +79,12 @@ static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 		return ret;
 	}
 
-	for (i = 0; i < len / sizeof(u32); i++) {
-		if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
-			ctx->base.needs_inv = true;
-			break;
+	if (priv->version == EIP197 && ctx->base.ctxr_dma) {
+		for (i = 0; i < len / sizeof(u32); i++) {
+			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
 		}
 	}
 
@@ -95,12 +98,15 @@ static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 }
 
 static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
+				    struct crypto_async_request *async,
 				    struct safexcel_command_desc *cdesc)
 {
 	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
 	int ctrl_size;
 
-	if (ctx->direction == SAFEXCEL_ENCRYPT)
+	if (sreq->direction == SAFEXCEL_ENCRYPT)
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_OUT;
 	else
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_IN;
@@ -243,7 +249,7 @@ static int safexcel_aes_send(struct crypto_async_request *async,
 		n_cdesc++;
 
 		if (n_cdesc == 1) {
-			safexcel_context_control(ctx, cdesc);
+			safexcel_context_control(ctx, async, cdesc);
 			safexcel_cipher_token(ctx, async, cdesc, req->cryptlen);
 		}
 
@@ -353,8 +359,8 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 	if (enq_ret != -EINPROGRESS)
 		*ret = enq_ret;
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	*should_complete = false;
 
@@ -390,7 +396,7 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
-	ret = safexcel_invalidate_cache(async, &ctx->base, priv,
+	ret = safexcel_invalidate_cache(async, priv,
 					ctx->base.ctxr_dma, ring, request);
 	if (unlikely(ret))
 		return ret;
@@ -406,9 +412,13 @@ static int safexcel_send(struct crypto_async_request *async,
 			 int *commands, int *results)
 {
 	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
+	BUG_ON(priv->version == EIP97 && sreq->needs_inv);
+
 	if (sreq->needs_inv)
 		ret = safexcel_cipher_send_inv(async, ring, request,
 					       commands, results);
@@ -443,8 +453,8 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 	crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	wait_for_completion_interruptible(&result.completion);
 
@@ -467,11 +477,11 @@ static int safexcel_aes(struct skcipher_request *req,
 	int ret, ring;
 
 	sreq->needs_inv = false;
-	ctx->direction = dir;
+	sreq->direction = dir;
 	ctx->mode = mode;
 
 	if (ctx->base.ctxr) {
-		if (ctx->base.needs_inv) {
+		if (priv->version == EIP197 && ctx->base.needs_inv) {
 			sreq->needs_inv = true;
 			ctx->base.needs_inv = false;
 		}
@@ -490,8 +500,8 @@ static int safexcel_aes(struct skcipher_request *req,
 	ret = crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	return ret;
 }
@@ -539,9 +549,14 @@ static void safexcel_skcipher_cra_exit(struct crypto_tfm *tfm)
 
 	memzero_explicit(ctx->base.ctxr->data, 8 * sizeof(u32));
 
-	ret = safexcel_cipher_exit_inv(tfm);
-	if (ret)
-		dev_warn(priv->dev, "cipher: invalidation error %d\n", ret);
+	if (priv->version == EIP197) {
+		ret = safexcel_cipher_exit_inv(tfm);
+		if (ret)
+			dev_warn(priv->dev, "cipher: invalidation error %d\n", ret);
+	} else {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+	}
 }
 
 struct safexcel_alg_template safexcel_alg_ecb_aes = {
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 0c5a5820b06e..122a2a58e98f 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -14,7 +14,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
 
-
 #include "safexcel.h"
 
 struct safexcel_ahash_ctx {
@@ -34,6 +33,8 @@ struct safexcel_ahash_req {
 	bool hmac;
 	bool needs_inv;
 
+	int nents;
+
 	u8 state_sz;    /* expected sate size, only set once */
 	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
 
@@ -152,8 +153,10 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin
 		memcpy(areq->result, sreq->state,
 		       crypto_ahash_digestsize(ahash));
 
-	dma_unmap_sg(priv->dev, areq->src,
-		     sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
+	if (sreq->nents) {
+		dma_unmap_sg(priv->dev, areq->src, sreq->nents, DMA_TO_DEVICE);
+		sreq->nents = 0;
+	}
 
 	safexcel_free_context(priv, async, sreq->state_sz);
 
@@ -178,7 +181,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
 	struct safexcel_result_desc *rdesc;
 	struct scatterlist *sg;
-	int i, nents, queued, len, cache_len, extra, n_cdesc = 0, ret = 0;
+	int i, queued, len, cache_len, extra, n_cdesc = 0, ret = 0;
 
 	queued = len = req->len - req->processed;
 	if (queued < crypto_ahash_blocksize(ahash))
@@ -186,17 +189,31 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	else
 		cache_len = queued - areq->nbytes;
 
-	/*
-	 * If this is not the last request and the queued data does not fit
-	 * into full blocks, cache it for the next send() call.
-	 */
-	extra = queued & (crypto_ahash_blocksize(ahash) - 1);
-	if (!req->last_req && extra) {
-		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
-				   req->cache_next, extra, areq->nbytes - extra);
-
-		queued -= extra;
-		len -= extra;
+	if (!req->last_req) {
+		/* If this is not the last request and the queued data does not
+		 * fit into full blocks, cache it for the next send() call.
+		 */
+		extra = queued & (crypto_ahash_blocksize(ahash) - 1);
+		if (!extra)
+			/* If this is not the last request and the queued data
+			 * is a multiple of a block, cache the last one for now.
+			 */
+			extra = queued - crypto_ahash_blocksize(ahash);
+
+		if (extra) {
+			sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+					   req->cache_next, extra,
+					   areq->nbytes - extra);
+
+			queued -= extra;
+			len -= extra;
+
+			if (!queued) {
+				*commands = 0;
+				*results = 0;
+				return 0;
+			}
+		}
 	}
 
 	spin_lock_bh(&priv->ring[ring].egress_lock);
@@ -234,15 +251,15 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	}
 
 	/* Now handle the current ahash request buffer(s) */
-	nents = dma_map_sg(priv->dev, areq->src,
-		       sg_nents_for_len(areq->src, areq->nbytes),
-		       DMA_TO_DEVICE);
-	if (!nents) {
+	req->nents = dma_map_sg(priv->dev, areq->src,
+				sg_nents_for_len(areq->src, areq->nbytes),
+				DMA_TO_DEVICE);
+	if (!req->nents) {
 		ret = -ENOMEM;
 		goto cdesc_rollback;
 	}
 
-	for_each_sg(areq->src, sg, nents, i) {
+	for_each_sg(areq->src, sg, req->nents, i) {
 		int sglen = sg_dma_len(sg);
 
 		/* Do not overflow the request */
@@ -382,8 +399,8 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 	if (enq_ret != -EINPROGRESS)
 		*ret = enq_ret;
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	*should_complete = false;
 
@@ -398,6 +415,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	int err;
 
+	BUG_ON(priv->version == EIP97 && req->needs_inv);
+
 	if (req->needs_inv) {
 		req->needs_inv = false;
 		err = safexcel_handle_inv_result(priv, ring, async,
@@ -418,7 +437,7 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	int ret;
 
-	ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
+	ret = safexcel_invalidate_cache(async, ctx->priv,
 					ctx->base.ctxr_dma, ring, request);
 	if (unlikely(ret))
 		return ret;
@@ -471,8 +490,8 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	wait_for_completion_interruptible(&result.completion);
 
@@ -485,13 +504,23 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	return 0;
 }
 
+/* safexcel_ahash_cache: cache data until at least one request can be sent to
+ * the engine, aka. when there is at least 1 block size in the pipe.
+ */
 static int safexcel_ahash_cache(struct ahash_request *areq)
 {
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	int queued, cache_len;
 
+	/* cache_len: everyting accepted by the driver but not sent yet,
+	 * tot sz handled by update() - last req sz - tot sz handled by send()
+	 */
 	cache_len = req->len - areq->nbytes - req->processed;
+	/* queued: everything accepted by the driver which will be handled by
+	 * the next send() calls.
+	 * tot sz handled by update() - tot sz handled by send()
+	 */
 	queued = req->len - req->processed;
 
 	/*
@@ -505,7 +534,7 @@ static int safexcel_ahash_cache(struct ahash_request *areq)
 		return areq->nbytes;
 	}
 
-	/* We could'nt cache all the data */
+	/* We couldn't cache all the data */
 	return -E2BIG;
 }
 
@@ -518,10 +547,17 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 
 	req->needs_inv = false;
 
-	if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
-		ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
-
 	if (ctx->base.ctxr) {
+		if (priv->version == EIP197 &&
+		    !ctx->base.needs_inv && req->processed &&
+		    ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
+			/* We're still setting needs_inv here, even though it is
+			 * cleared right away, because the needs_inv flag can be
+			 * set in other functions and we want to keep the same
+			 * logic.
+			 */
+			ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
+
 		if (ctx->base.needs_inv) {
 			ctx->base.needs_inv = false;
 			req->needs_inv = true;
@@ -541,8 +577,8 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 	ret = crypto_enqueue_request(&priv->ring[ring].queue, &areq->base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
-	if (!priv->ring[ring].need_dequeue)
-		safexcel_dequeue(priv, ring);
+	queue_work(priv->ring[ring].workqueue,
+		   &priv->ring[ring].work_data.work);
 
 	return ret;
 }
@@ -625,7 +661,6 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out)
 	export->processed = req->processed;
 
 	memcpy(export->state, req->state, req->state_sz);
-	memset(export->cache, 0, crypto_ahash_blocksize(ahash));
 	memcpy(export->cache, req->cache, crypto_ahash_blocksize(ahash));
 
 	return 0;
@@ -707,9 +742,14 @@ static void safexcel_ahash_cra_exit(struct crypto_tfm *tfm)
 	if (!ctx->base.ctxr)
 		return;
 
-	ret = safexcel_ahash_exit_inv(tfm);
-	if (ret)
-		dev_warn(priv->dev, "hash: invalidation error %d\n", ret);
+	if (priv->version == EIP197) {
+		ret = safexcel_ahash_exit_inv(tfm);
+		if (ret)
+			dev_warn(priv->dev, "hash: invalidation error %d\n", ret);
+	} else {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+	}
 }
 
 struct safexcel_alg_template safexcel_alg_sha1 = {
@@ -848,7 +888,7 @@ static int safexcel_hmac_init_iv(struct ahash_request *areq,
 	req->last_req = true;
 
 	ret = crypto_ahash_update(areq);
-	if (ret && ret != -EINPROGRESS)
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
 		return ret;
 
 	wait_for_completion_interruptible(&result.completion);
@@ -913,6 +953,7 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 				     unsigned int keylen)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct safexcel_ahash_export_state istate, ostate;
 	int ret, i;
 
@@ -920,11 +961,13 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 	if (ret)
 		return ret;
 
-	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
-		if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
-		    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
-			ctx->base.needs_inv = true;
-			break;
+	if (priv->version == EIP197 && ctx->base.ctxr) {
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
+			if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
+			    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
 		}
 	}
 
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 8705b28eb02c..717a26607bdb 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -260,12 +260,11 @@ static int setup_crypt_desc(void)
 {
 	struct device *dev = &pdev->dev;
 	BUILD_BUG_ON(sizeof(struct crypt_ctl) != 64);
-	crypt_virt = dma_alloc_coherent(dev,
-			NPE_QLEN * sizeof(struct crypt_ctl),
-			&crypt_phys, GFP_ATOMIC);
+	crypt_virt = dma_zalloc_coherent(dev,
+					 NPE_QLEN * sizeof(struct crypt_ctl),
+					 &crypt_phys, GFP_ATOMIC);
 	if (!crypt_virt)
 		return -ENOMEM;
-	memset(crypt_virt, 0, NPE_QLEN * sizeof(struct crypt_ctl));
 	return 0;
 }
 
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 3a0c40081ffb..aca2373fa1de 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/genalloc.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -410,8 +411,11 @@ static int mv_cesa_get_sram(struct platform_device *pdev, int idx)
 	if (IS_ERR(engine->sram))
 		return PTR_ERR(engine->sram);
 
-	engine->sram_dma = phys_to_dma(cesa->dev,
-				       (phys_addr_t)res->start);
+	engine->sram_dma = dma_map_resource(cesa->dev, res->start,
+					    cesa->sram_size,
+					    DMA_BIDIRECTIONAL, 0);
+	if (dma_mapping_error(cesa->dev, engine->sram_dma))
+		return -ENOMEM;
 
 	return 0;
 }
@@ -421,11 +425,12 @@ static void mv_cesa_put_sram(struct platform_device *pdev, int idx)
 	struct mv_cesa_dev *cesa = platform_get_drvdata(pdev);
 	struct mv_cesa_engine *engine = &cesa->engines[idx];
 
-	if (!engine->pool)
-		return;
-
-	gen_pool_free(engine->pool, (unsigned long)engine->sram,
-		      cesa->sram_size);
+	if (engine->pool)
+		gen_pool_free(engine->pool, (unsigned long)engine->sram,
+			      cesa->sram_size);
+	else
+		dma_unmap_resource(cesa->dev, engine->sram_dma,
+				   cesa->sram_size, DMA_BIDIRECTIONAL, 0);
 }
 
 static int mv_cesa_probe(struct platform_device *pdev)
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index f2246a5abcf6..1e87637c412d 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -743,8 +743,8 @@ static int nx842_open_percpu_txwins(void)
 		}
 
 		if (!per_cpu(cpu_txwin, i)) {
-			/* shoudn't happen, Each chip will have NX engine */
-			pr_err("NX engine is not availavle for CPU %d\n", i);
+			/* shouldn't happen, Each chip will have NX engine */
+			pr_err("NX engine is not available for CPU %d\n", i);
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 5a6dc53b2b9d..4ef52c9d72fc 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1618,7 +1618,7 @@ MODULE_DEVICE_TABLE(of, spacc_of_id_table);
 
 static int spacc_probe(struct platform_device *pdev)
 {
-	int i, err, ret = -EINVAL;
+	int i, err, ret;
 	struct resource *mem, *irq;
 	struct device_node *np = pdev->dev.of_node;
 	struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine),
@@ -1679,22 +1679,18 @@ static int spacc_probe(struct platform_device *pdev)
 	engine->clk = clk_get(&pdev->dev, "ref");
 	if (IS_ERR(engine->clk)) {
 		dev_info(&pdev->dev, "clk unavailable\n");
-		device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
 		return PTR_ERR(engine->clk);
 	}
 
 	if (clk_prepare_enable(engine->clk)) {
 		dev_info(&pdev->dev, "unable to prepare/enable clk\n");
-		clk_put(engine->clk);
-		return -EIO;
+		ret = -EIO;
+		goto err_clk_put;
 	}
 
-	err = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh);
-	if (err) {
-		clk_disable_unprepare(engine->clk);
-		clk_put(engine->clk);
-		return err;
-	}
+	ret = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh);
+	if (ret)
+		goto err_clk_disable;
 
 
 	/*
@@ -1725,6 +1721,7 @@ static int spacc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, engine);
 
+	ret = -EINVAL;
 	INIT_LIST_HEAD(&engine->registered_algs);
 	for (i = 0; i < engine->num_algs; ++i) {
 		engine->algs[i].engine = engine;
@@ -1759,6 +1756,16 @@ static int spacc_probe(struct platform_device *pdev)
 				engine->aeads[i].alg.base.cra_name);
 	}
 
+	if (!ret)
+		return 0;
+
+	del_timer_sync(&engine->packet_timeout);
+	device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
+err_clk_disable:
+	clk_disable_unprepare(engine->clk);
+err_clk_put:
+	clk_put(engine->clk);
+
 	return ret;
 }
 
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 8c4fd255a601..ff149e176f64 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -117,19 +117,19 @@ void qat_hal_set_live_ctx(struct icp_qat_fw_loader_handle *handle,
 
 #define CSR_RETRY_TIMES 500
 static int qat_hal_rd_ae_csr(struct icp_qat_fw_loader_handle *handle,
-			     unsigned char ae, unsigned int csr,
-			     unsigned int *value)
+			     unsigned char ae, unsigned int csr)
 {
 	unsigned int iterations = CSR_RETRY_TIMES;
+	int value;
 
 	do {
-		*value = GET_AE_CSR(handle, ae, csr);
+		value = GET_AE_CSR(handle, ae, csr);
 		if (!(GET_AE_CSR(handle, ae, LOCAL_CSR_STATUS) & LCS_STATUS))
-			return 0;
+			return value;
 	} while (iterations--);
 
 	pr_err("QAT: Read CSR timeout\n");
-	return -EFAULT;
+	return 0;
 }
 
 static int qat_hal_wr_ae_csr(struct icp_qat_fw_loader_handle *handle,
@@ -154,9 +154,9 @@ static void qat_hal_get_wakeup_event(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, ctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_WAKEUP_EVENTS_INDIRECT, events);
+	*events = qat_hal_rd_ae_csr(handle, ae, CTX_WAKEUP_EVENTS_INDIRECT);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
 }
 
@@ -169,13 +169,13 @@ static int qat_hal_wait_cycles(struct icp_qat_fw_loader_handle *handle,
 	int times = MAX_RETRY_TIMES;
 	int elapsed_cycles = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT, &base_cnt);
+	base_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 	base_cnt &= 0xffff;
 	while ((int)cycles > elapsed_cycles && times--) {
 		if (chk_inactive)
-			qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &csr);
+			csr = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 
-		qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT, &cur_cnt);
+		cur_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 		cur_cnt &= 0xffff;
 		elapsed_cycles = cur_cnt - base_cnt;
 
@@ -207,7 +207,7 @@ int qat_hal_set_ae_ctx_mode(struct icp_qat_fw_loader_handle *handle,
 	}
 
 	/* Sets the accelaration engine context mode to either four or eight */
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr = IGNORE_W1C_MASK & csr;
 	new_csr = (mode == 4) ?
 		SET_BIT(csr, CE_INUSE_CONTEXTS_BITPOS) :
@@ -221,7 +221,7 @@ int qat_hal_set_ae_nn_mode(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int csr, new_csr;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr &= IGNORE_W1C_MASK;
 
 	new_csr = (mode) ?
@@ -240,7 +240,7 @@ int qat_hal_set_ae_lm_mode(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int csr, new_csr;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr &= IGNORE_W1C_MASK;
 	switch (lm_type) {
 	case ICP_LMEM0:
@@ -328,7 +328,7 @@ static void qat_hal_wr_indr_csr(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
@@ -340,16 +340,18 @@ static void qat_hal_wr_indr_csr(struct icp_qat_fw_loader_handle *handle,
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
 }
 
-static void qat_hal_rd_indr_csr(struct icp_qat_fw_loader_handle *handle,
+static unsigned int qat_hal_rd_indr_csr(struct icp_qat_fw_loader_handle *handle,
 				unsigned char ae, unsigned char ctx,
-				unsigned int ae_csr, unsigned int *csr_val)
+				unsigned int ae_csr)
 {
-	unsigned int cur_ctx;
+	unsigned int cur_ctx, csr_val;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, ctx);
-	qat_hal_rd_ae_csr(handle, ae, ae_csr, csr_val);
+	csr_val = qat_hal_rd_ae_csr(handle, ae, ae_csr);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
+
+	return csr_val;
 }
 
 static void qat_hal_put_sig_event(struct icp_qat_fw_loader_handle *handle,
@@ -358,7 +360,7 @@ static void qat_hal_put_sig_event(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
 			continue;
@@ -374,7 +376,7 @@ static void qat_hal_put_wakeup_event(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
 			continue;
@@ -392,13 +394,11 @@ static int qat_hal_check_ae_alive(struct icp_qat_fw_loader_handle *handle)
 	int times = MAX_RETRY_TIMES;
 
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
-		qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
-				  (unsigned int *)&base_cnt);
+		base_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 		base_cnt &= 0xffff;
 
 		do {
-			qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
-					  (unsigned int *)&cur_cnt);
+			cur_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 			cur_cnt &= 0xffff;
 		} while (times-- && (cur_cnt == base_cnt));
 
@@ -416,8 +416,8 @@ int qat_hal_check_ae_active(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int enable = 0, active = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &enable);
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &active);
+	enable = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
+	active = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 	if ((enable & (0xff << CE_ENABLE_BITPOS)) ||
 	    (active & (1 << ACS_ABO_BITPOS)))
 		return 1;
@@ -540,7 +540,7 @@ static void qat_hal_disable_ctx(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx);
+	ctx = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx &= IGNORE_W1C_MASK &
 		(~((ctx_mask & ICP_QAT_UCLO_AE_ALL_CTX) << CE_ENABLE_BITPOS));
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx);
@@ -583,7 +583,7 @@ void qat_hal_wr_uwords(struct icp_qat_fw_loader_handle *handle,
 	unsigned int ustore_addr;
 	unsigned int i;
 
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	for (i = 0; i < words_num; i++) {
@@ -604,7 +604,7 @@ static void qat_hal_enable_ctx(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx);
+	ctx = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx &= IGNORE_W1C_MASK;
 	ctx_mask &= (ctx & CE_INUSE_CONTEXTS) ? 0x55 : 0xFF;
 	ctx |= (ctx_mask << CE_ENABLE_BITPOS);
@@ -636,10 +636,10 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
 	int ret = 0;
 
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
-		qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 		csr_val &= ~(1 << MMC_SHARE_CS_BITPOS);
 		qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL, csr_val);
-		qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 		csr_val &= IGNORE_W1C_MASK;
 		csr_val |= CE_NN_MODE;
 		qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, csr_val);
@@ -648,7 +648,7 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
 		qat_hal_wr_indr_csr(handle, ae, ctx_mask, CTX_STS_INDIRECT,
 				    handle->hal_handle->upc_mask &
 				    INIT_PC_VALUE);
-		qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
+		savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 		qat_hal_wr_ae_csr(handle, ae, ACTIVE_CTX_STATUS, 0);
 		qat_hal_put_wakeup_event(handle, ae, ctx_mask, XCWE_VOLUNTARY);
 		qat_hal_wr_indr_csr(handle, ae, ctx_mask,
@@ -760,7 +760,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
 		unsigned int csr_val = 0;
 
-		qat_hal_rd_ae_csr(handle, ae, SIGNATURE_ENABLE, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, SIGNATURE_ENABLE);
 		csr_val |= 0x1;
 		qat_hal_wr_ae_csr(handle, ae, SIGNATURE_ENABLE, csr_val);
 	}
@@ -826,16 +826,16 @@ static void qat_hal_get_uwords(struct icp_qat_fw_loader_handle *handle,
 	unsigned int i, uwrd_lo, uwrd_hi;
 	unsigned int ustore_addr, misc_control;
 
-	qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &misc_control);
+	misc_control = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 	qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL,
 			  misc_control & 0xfffffffb);
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	for (i = 0; i < words_num; i++) {
 		qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 		uaddr++;
-		qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_LOWER, &uwrd_lo);
-		qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_UPPER, &uwrd_hi);
+		uwrd_lo = qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_LOWER);
+		uwrd_hi = qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_UPPER);
 		uword[i] = uwrd_hi;
 		uword[i] = (uword[i] << 0x20) | uwrd_lo;
 	}
@@ -849,7 +849,7 @@ void qat_hal_wr_umem(struct icp_qat_fw_loader_handle *handle,
 {
 	unsigned int i, ustore_addr;
 
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	for (i = 0; i < words_num; i++) {
@@ -890,26 +890,27 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle,
 		return -EINVAL;
 	}
 	/* save current context */
-	qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_0_INDIRECT, &ind_lm_addr0);
-	qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_1_INDIRECT, &ind_lm_addr1);
-	qat_hal_rd_indr_csr(handle, ae, ctx, INDIRECT_LM_ADDR_0_BYTE_INDEX,
-			    &ind_lm_addr_byte0);
-	qat_hal_rd_indr_csr(handle, ae, ctx, INDIRECT_LM_ADDR_1_BYTE_INDEX,
-			    &ind_lm_addr_byte1);
+	ind_lm_addr0 = qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_0_INDIRECT);
+	ind_lm_addr1 = qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_1_INDIRECT);
+	ind_lm_addr_byte0 = qat_hal_rd_indr_csr(handle, ae, ctx,
+						INDIRECT_LM_ADDR_0_BYTE_INDEX);
+	ind_lm_addr_byte1 = qat_hal_rd_indr_csr(handle, ae, ctx,
+						INDIRECT_LM_ADDR_1_BYTE_INDEX);
 	if (inst_num <= MAX_EXEC_INST)
 		qat_hal_get_uwords(handle, ae, 0, inst_num, savuwords);
 	qat_hal_get_wakeup_event(handle, ae, ctx, &wakeup_events);
-	qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT, &savpc);
+	savpc = qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT);
 	savpc = (savpc & handle->hal_handle->upc_mask) >> 0;
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
-	qat_hal_rd_ae_csr(handle, ae, CC_ENABLE, &savcc);
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL, &ctxarb_ctl);
-	qat_hal_rd_indr_csr(handle, ae, ctx, FUTURE_COUNT_SIGNAL_INDIRECT,
-			    &ind_cnt_sig);
-	qat_hal_rd_indr_csr(handle, ae, ctx, CTX_SIG_EVENTS_INDIRECT, &ind_sig);
-	qat_hal_rd_ae_csr(handle, ae, CTX_SIG_EVENTS_ACTIVE, &act_sig);
+	savcc = qat_hal_rd_ae_csr(handle, ae, CC_ENABLE);
+	savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
+	ctxarb_ctl = qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL);
+	ind_cnt_sig = qat_hal_rd_indr_csr(handle, ae, ctx,
+					  FUTURE_COUNT_SIGNAL_INDIRECT);
+	ind_sig = qat_hal_rd_indr_csr(handle, ae, ctx,
+				      CTX_SIG_EVENTS_INDIRECT);
+	act_sig = qat_hal_rd_ae_csr(handle, ae, CTX_SIG_EVENTS_ACTIVE);
 	/* execute micro codes */
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables);
 	qat_hal_wr_uwords(handle, ae, 0, inst_num, micro_inst);
@@ -927,8 +928,8 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle,
 	if (endpc) {
 		unsigned int ctx_status;
 
-		qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT,
-				    &ctx_status);
+		ctx_status = qat_hal_rd_indr_csr(handle, ae, ctx,
+						 CTX_STS_INDIRECT);
 		*endpc = ctx_status & handle->hal_handle->upc_mask;
 	}
 	/* retore to saved context */
@@ -938,7 +939,7 @@ static int qat_hal_exec_micro_inst(struct icp_qat_fw_loader_handle *handle,
 	qat_hal_put_wakeup_event(handle, ae, (1 << ctx), wakeup_events);
 	qat_hal_wr_indr_csr(handle, ae, (1 << ctx), CTX_STS_INDIRECT,
 			    handle->hal_handle->upc_mask & savpc);
-	qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &csr_val);
+	csr_val = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 	newcsr_val = CLR_BIT(csr_val, MMC_SHARE_CS_BITPOS);
 	qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL, newcsr_val);
 	qat_hal_wr_ae_csr(handle, ae, CC_ENABLE, savcc);
@@ -986,16 +987,16 @@ static int qat_hal_rd_rel_reg(struct icp_qat_fw_loader_handle *handle,
 		insts = (uint64_t)0xA030000000ull | ((reg_addr & 0x3ff) << 10);
 		break;
 	}
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL, &ctxarb_cntl);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
+	ctxarb_cntl = qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
 	if (ctx != (savctx & ACS_ACNO))
 		qat_hal_wr_ae_csr(handle, ae, ACTIVE_CTX_STATUS,
 				  ctx & ACS_ACNO);
 	qat_hal_get_uwords(handle, ae, 0, 1, &savuword);
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables);
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr = UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	insts = qat_hal_set_uword_ecc(insts);
@@ -1011,7 +1012,7 @@ static int qat_hal_rd_rel_reg(struct icp_qat_fw_loader_handle *handle,
 	 * the instruction should have been executed
 	 * prior to clearing the ECS in putUwords
 	 */
-	qat_hal_rd_ae_csr(handle, ae, ALU_OUT, data);
+	*data = qat_hal_rd_ae_csr(handle, ae, ALU_OUT);
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, ustore_addr);
 	qat_hal_wr_uwords(handle, ae, 0, 1, &savuword);
 	if (ctx != (savctx & ACS_ACNO))
@@ -1188,7 +1189,7 @@ static int qat_hal_put_rel_rd_xfer(struct icp_qat_fw_loader_handle *handle,
 	unsigned short mask;
 	unsigned short dr_offset = 0x10;
 
-	status = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	status = ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (CE_INUSE_CONTEXTS & ctx_enables) {
 		if (ctx & 0x1) {
 			pr_err("QAT: bad 4-ctx mode,ctx=0x%x\n", ctx);
@@ -1238,7 +1239,7 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle,
 	const int num_inst = ARRAY_SIZE(micro_inst), code_off = 1;
 	const unsigned short gprnum = 0, dly = num_inst * 0x5;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (CE_INUSE_CONTEXTS & ctx_enables) {
 		if (ctx & 0x1) {
 			pr_err("QAT: 4-ctx mode,ctx=0x%x\n", ctx);
@@ -1282,7 +1283,7 @@ static int qat_hal_put_rel_nn(struct icp_qat_fw_loader_handle *handle,
 	unsigned int ctx_enables;
 	int stat = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables | CE_NN_MODE);
 
@@ -1299,7 +1300,7 @@ static int qat_hal_convert_abs_to_rel(struct icp_qat_fw_loader_handle
 {
 	unsigned int ctx_enables;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (ctx_enables & CE_INUSE_CONTEXTS) {
 		/* 4-ctx mode */
 		*relreg = absreg_num & 0x1F;
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 142c6020cec7..188f44b7eb27 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1,17 +1,13 @@
-/*
- * Cryptographic API.
- *
- * Support for Samsung S5PV210 and Exynos HW acceleration.
- *
- * Copyright (C) 2011 NetUP Inc. All rights reserved.
- * Copyright (c) 2017 Samsung Electronics Co., Ltd. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Hash part based on omap-sham.c driver.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Cryptographic API.
+//
+// Support for Samsung S5PV210 and Exynos HW acceleration.
+//
+// Copyright (C) 2011 NetUP Inc. All rights reserved.
+// Copyright (c) 2017 Samsung Electronics Co., Ltd. All rights reserved.
+//
+// Hash part based on omap-sham.c driver.
 
 #include <linux/clk.h>
 #include <linux/crypto.h>
@@ -1461,7 +1457,7 @@ static void s5p_hash_tasklet_cb(unsigned long data)
 				       &dd->hash_flags)) {
 			/* hash or semi-hash ready */
 			clear_bit(HASH_FLAGS_DMA_READY, &dd->hash_flags);
-				goto finish;
+			goto finish;
 		}
 	}
 
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 602332e02729..63aa78c0b12b 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -1,4 +1,4 @@
-config CRC_DEV_STM32
+config CRYPTO_DEV_STM32_CRC
 	tristate "Support for STM32 crc accelerators"
 	depends on ARCH_STM32
 	select CRYPTO_HASH
@@ -6,7 +6,7 @@ config CRC_DEV_STM32
           This enables support for the CRC32 hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
 
-config HASH_DEV_STM32
+config CRYPTO_DEV_STM32_HASH
 	tristate "Support for STM32 hash accelerators"
 	depends on ARCH_STM32
 	depends on HAS_DMA
@@ -18,3 +18,12 @@ config HASH_DEV_STM32
 	help
           This enables support for the HASH hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
+
+config CRYPTO_DEV_STM32_CRYP
+	tristate "Support for STM32 cryp accelerators"
+	depends on ARCH_STM32
+	select CRYPTO_HASH
+	select CRYPTO_ENGINE
+	help
+          This enables support for the CRYP (AES/DES/TDES) hw accelerator which
+	  can be found on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
index 73cd56cad0cc..53d1bb94b221 100644
--- a/drivers/crypto/stm32/Makefile
+++ b/drivers/crypto/stm32/Makefile
@@ -1,2 +1,3 @@
-obj-$(CONFIG_CRC_DEV_STM32) += stm32_crc32.o
-obj-$(CONFIG_HASH_DEV_STM32) += stm32-hash.o
\ No newline at end of file
+obj-$(CONFIG_CRYPTO_DEV_STM32_CRC) += stm32_crc32.o
+obj-$(CONFIG_CRYPTO_DEV_STM32_HASH) += stm32-hash.o
+obj-$(CONFIG_CRYPTO_DEV_STM32_CRYP) += stm32-cryp.o
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
new file mode 100644
index 000000000000..4a06a7a665ee
--- /dev/null
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -0,0 +1,1170 @@
+/*
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author: Fabien Dessenne <fabien.dessenne@st.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/engine.h>
+#include <crypto/scatterwalk.h>
+
+#define DRIVER_NAME             "stm32-cryp"
+
+/* Bit [0] encrypt / decrypt */
+#define FLG_ENCRYPT             BIT(0)
+/* Bit [8..1] algo & operation mode */
+#define FLG_AES                 BIT(1)
+#define FLG_DES                 BIT(2)
+#define FLG_TDES                BIT(3)
+#define FLG_ECB                 BIT(4)
+#define FLG_CBC                 BIT(5)
+#define FLG_CTR                 BIT(6)
+/* Mode mask = bits [15..0] */
+#define FLG_MODE_MASK           GENMASK(15, 0)
+
+/* Registers */
+#define CRYP_CR                 0x00000000
+#define CRYP_SR                 0x00000004
+#define CRYP_DIN                0x00000008
+#define CRYP_DOUT               0x0000000C
+#define CRYP_DMACR              0x00000010
+#define CRYP_IMSCR              0x00000014
+#define CRYP_RISR               0x00000018
+#define CRYP_MISR               0x0000001C
+#define CRYP_K0LR               0x00000020
+#define CRYP_K0RR               0x00000024
+#define CRYP_K1LR               0x00000028
+#define CRYP_K1RR               0x0000002C
+#define CRYP_K2LR               0x00000030
+#define CRYP_K2RR               0x00000034
+#define CRYP_K3LR               0x00000038
+#define CRYP_K3RR               0x0000003C
+#define CRYP_IV0LR              0x00000040
+#define CRYP_IV0RR              0x00000044
+#define CRYP_IV1LR              0x00000048
+#define CRYP_IV1RR              0x0000004C
+
+/* Registers values */
+#define CR_DEC_NOT_ENC          0x00000004
+#define CR_TDES_ECB             0x00000000
+#define CR_TDES_CBC             0x00000008
+#define CR_DES_ECB              0x00000010
+#define CR_DES_CBC              0x00000018
+#define CR_AES_ECB              0x00000020
+#define CR_AES_CBC              0x00000028
+#define CR_AES_CTR              0x00000030
+#define CR_AES_KP               0x00000038
+#define CR_AES_UNKNOWN          0xFFFFFFFF
+#define CR_ALGO_MASK            0x00080038
+#define CR_DATA32               0x00000000
+#define CR_DATA16               0x00000040
+#define CR_DATA8                0x00000080
+#define CR_DATA1                0x000000C0
+#define CR_KEY128               0x00000000
+#define CR_KEY192               0x00000100
+#define CR_KEY256               0x00000200
+#define CR_FFLUSH               0x00004000
+#define CR_CRYPEN               0x00008000
+
+#define SR_BUSY                 0x00000010
+#define SR_OFNE                 0x00000004
+
+#define IMSCR_IN                BIT(0)
+#define IMSCR_OUT               BIT(1)
+
+#define MISR_IN                 BIT(0)
+#define MISR_OUT                BIT(1)
+
+/* Misc */
+#define AES_BLOCK_32            (AES_BLOCK_SIZE / sizeof(u32))
+#define _walked_in              (cryp->in_walk.offset - cryp->in_sg->offset)
+#define _walked_out             (cryp->out_walk.offset - cryp->out_sg->offset)
+
+struct stm32_cryp_ctx {
+	struct stm32_cryp       *cryp;
+	int                     keylen;
+	u32                     key[AES_KEYSIZE_256 / sizeof(u32)];
+	unsigned long           flags;
+};
+
+struct stm32_cryp_reqctx {
+	unsigned long mode;
+};
+
+struct stm32_cryp {
+	struct list_head        list;
+	struct device           *dev;
+	void __iomem            *regs;
+	struct clk              *clk;
+	unsigned long           flags;
+	u32                     irq_status;
+	struct stm32_cryp_ctx   *ctx;
+
+	struct crypto_engine    *engine;
+
+	struct mutex            lock; /* protects req */
+	struct ablkcipher_request *req;
+
+	size_t                  hw_blocksize;
+
+	size_t                  total_in;
+	size_t                  total_in_save;
+	size_t                  total_out;
+	size_t                  total_out_save;
+
+	struct scatterlist      *in_sg;
+	struct scatterlist      *out_sg;
+	struct scatterlist      *out_sg_save;
+
+	struct scatterlist      in_sgl;
+	struct scatterlist      out_sgl;
+	bool                    sgs_copied;
+
+	int                     in_sg_len;
+	int                     out_sg_len;
+
+	struct scatter_walk     in_walk;
+	struct scatter_walk     out_walk;
+
+	u32                     last_ctr[4];
+};
+
+struct stm32_cryp_list {
+	struct list_head        dev_list;
+	spinlock_t              lock; /* protect dev_list */
+};
+
+static struct stm32_cryp_list cryp_list = {
+	.dev_list = LIST_HEAD_INIT(cryp_list.dev_list),
+	.lock     = __SPIN_LOCK_UNLOCKED(cryp_list.lock),
+};
+
+static inline bool is_aes(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_AES;
+}
+
+static inline bool is_des(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_DES;
+}
+
+static inline bool is_tdes(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_TDES;
+}
+
+static inline bool is_ecb(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_ECB;
+}
+
+static inline bool is_cbc(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_CBC;
+}
+
+static inline bool is_ctr(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_CTR;
+}
+
+static inline bool is_encrypt(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_ENCRYPT;
+}
+
+static inline bool is_decrypt(struct stm32_cryp *cryp)
+{
+	return !is_encrypt(cryp);
+}
+
+static inline u32 stm32_cryp_read(struct stm32_cryp *cryp, u32 ofst)
+{
+	return readl_relaxed(cryp->regs + ofst);
+}
+
+static inline void stm32_cryp_write(struct stm32_cryp *cryp, u32 ofst, u32 val)
+{
+	writel_relaxed(val, cryp->regs + ofst);
+}
+
+static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status,
+			!(status & SR_BUSY), 10, 100000);
+}
+
+static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx)
+{
+	struct stm32_cryp *tmp, *cryp = NULL;
+
+	spin_lock_bh(&cryp_list.lock);
+	if (!ctx->cryp) {
+		list_for_each_entry(tmp, &cryp_list.dev_list, list) {
+			cryp = tmp;
+			break;
+		}
+		ctx->cryp = cryp;
+	} else {
+		cryp = ctx->cryp;
+	}
+
+	spin_unlock_bh(&cryp_list.lock);
+
+	return cryp;
+}
+
+static int stm32_cryp_check_aligned(struct scatterlist *sg, size_t total,
+				    size_t align)
+{
+	int len = 0;
+
+	if (!total)
+		return 0;
+
+	if (!IS_ALIGNED(total, align))
+		return -EINVAL;
+
+	while (sg) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+			return -EINVAL;
+
+		if (!IS_ALIGNED(sg->length, align))
+			return -EINVAL;
+
+		len += sg->length;
+		sg = sg_next(sg);
+	}
+
+	if (len != total)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stm32_cryp_check_io_aligned(struct stm32_cryp *cryp)
+{
+	int ret;
+
+	ret = stm32_cryp_check_aligned(cryp->in_sg, cryp->total_in,
+				       cryp->hw_blocksize);
+	if (ret)
+		return ret;
+
+	ret = stm32_cryp_check_aligned(cryp->out_sg, cryp->total_out,
+				       cryp->hw_blocksize);
+
+	return ret;
+}
+
+static void sg_copy_buf(void *buf, struct scatterlist *sg,
+			unsigned int start, unsigned int nbytes, int out)
+{
+	struct scatter_walk walk;
+
+	if (!nbytes)
+		return;
+
+	scatterwalk_start(&walk, sg);
+	scatterwalk_advance(&walk, start);
+	scatterwalk_copychunks(buf, &walk, nbytes, out);
+	scatterwalk_done(&walk, out, 0);
+}
+
+static int stm32_cryp_copy_sgs(struct stm32_cryp *cryp)
+{
+	void *buf_in, *buf_out;
+	int pages, total_in, total_out;
+
+	if (!stm32_cryp_check_io_aligned(cryp)) {
+		cryp->sgs_copied = 0;
+		return 0;
+	}
+
+	total_in = ALIGN(cryp->total_in, cryp->hw_blocksize);
+	pages = total_in ? get_order(total_in) : 1;
+	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
+
+	total_out = ALIGN(cryp->total_out, cryp->hw_blocksize);
+	pages = total_out ? get_order(total_out) : 1;
+	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
+
+	if (!buf_in || !buf_out) {
+		dev_err(cryp->dev, "Can't allocate pages when unaligned\n");
+		cryp->sgs_copied = 0;
+		return -EFAULT;
+	}
+
+	sg_copy_buf(buf_in, cryp->in_sg, 0, cryp->total_in, 0);
+
+	sg_init_one(&cryp->in_sgl, buf_in, total_in);
+	cryp->in_sg = &cryp->in_sgl;
+	cryp->in_sg_len = 1;
+
+	sg_init_one(&cryp->out_sgl, buf_out, total_out);
+	cryp->out_sg_save = cryp->out_sg;
+	cryp->out_sg = &cryp->out_sgl;
+	cryp->out_sg_len = 1;
+
+	cryp->sgs_copied = 1;
+
+	return 0;
+}
+
+static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, u32 *iv)
+{
+	if (!iv)
+		return;
+
+	stm32_cryp_write(cryp, CRYP_IV0LR, cpu_to_be32(*iv++));
+	stm32_cryp_write(cryp, CRYP_IV0RR, cpu_to_be32(*iv++));
+
+	if (is_aes(cryp)) {
+		stm32_cryp_write(cryp, CRYP_IV1LR, cpu_to_be32(*iv++));
+		stm32_cryp_write(cryp, CRYP_IV1RR, cpu_to_be32(*iv++));
+	}
+}
+
+static void stm32_cryp_hw_write_key(struct stm32_cryp *c)
+{
+	unsigned int i;
+	int r_id;
+
+	if (is_des(c)) {
+		stm32_cryp_write(c, CRYP_K1LR, cpu_to_be32(c->ctx->key[0]));
+		stm32_cryp_write(c, CRYP_K1RR, cpu_to_be32(c->ctx->key[1]));
+	} else {
+		r_id = CRYP_K3RR;
+		for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4)
+			stm32_cryp_write(c, r_id,
+					 cpu_to_be32(c->ctx->key[i - 1]));
+	}
+}
+
+static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp)
+{
+	if (is_aes(cryp) && is_ecb(cryp))
+		return CR_AES_ECB;
+
+	if (is_aes(cryp) && is_cbc(cryp))
+		return CR_AES_CBC;
+
+	if (is_aes(cryp) && is_ctr(cryp))
+		return CR_AES_CTR;
+
+	if (is_des(cryp) && is_ecb(cryp))
+		return CR_DES_ECB;
+
+	if (is_des(cryp) && is_cbc(cryp))
+		return CR_DES_CBC;
+
+	if (is_tdes(cryp) && is_ecb(cryp))
+		return CR_TDES_ECB;
+
+	if (is_tdes(cryp) && is_cbc(cryp))
+		return CR_TDES_CBC;
+
+	dev_err(cryp->dev, "Unknown mode\n");
+	return CR_AES_UNKNOWN;
+}
+
+static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+{
+	int ret;
+	u32 cfg, hw_mode;
+
+	/* Disable interrupt */
+	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+
+	/* Set key */
+	stm32_cryp_hw_write_key(cryp);
+
+	/* Set configuration */
+	cfg = CR_DATA8 | CR_FFLUSH;
+
+	switch (cryp->ctx->keylen) {
+	case AES_KEYSIZE_128:
+		cfg |= CR_KEY128;
+		break;
+
+	case AES_KEYSIZE_192:
+		cfg |= CR_KEY192;
+		break;
+
+	default:
+	case AES_KEYSIZE_256:
+		cfg |= CR_KEY256;
+		break;
+	}
+
+	hw_mode = stm32_cryp_get_hw_mode(cryp);
+	if (hw_mode == CR_AES_UNKNOWN)
+		return -EINVAL;
+
+	/* AES ECB/CBC decrypt: run key preparation first */
+	if (is_decrypt(cryp) &&
+	    ((hw_mode == CR_AES_ECB) || (hw_mode == CR_AES_CBC))) {
+		stm32_cryp_write(cryp, CRYP_CR, cfg | CR_AES_KP | CR_CRYPEN);
+
+		/* Wait for end of processing */
+		ret = stm32_cryp_wait_busy(cryp);
+		if (ret) {
+			dev_err(cryp->dev, "Timeout (key preparation)\n");
+			return ret;
+		}
+	}
+
+	cfg |= hw_mode;
+
+	if (is_decrypt(cryp))
+		cfg |= CR_DEC_NOT_ENC;
+
+	/* Apply config and flush (valid when CRYPEN = 0) */
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	switch (hw_mode) {
+	case CR_DES_CBC:
+	case CR_TDES_CBC:
+	case CR_AES_CBC:
+	case CR_AES_CTR:
+		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->req->info);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Enable now */
+	cfg |= CR_CRYPEN;
+
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	return 0;
+}
+
+static void stm32_cryp_finish_req(struct stm32_cryp *cryp)
+{
+	int err = 0;
+
+	if (cryp->sgs_copied) {
+		void *buf_in, *buf_out;
+		int pages, len;
+
+		buf_in = sg_virt(&cryp->in_sgl);
+		buf_out = sg_virt(&cryp->out_sgl);
+
+		sg_copy_buf(buf_out, cryp->out_sg_save, 0,
+			    cryp->total_out_save, 1);
+
+		len = ALIGN(cryp->total_in_save, cryp->hw_blocksize);
+		pages = len ? get_order(len) : 1;
+		free_pages((unsigned long)buf_in, pages);
+
+		len = ALIGN(cryp->total_out_save, cryp->hw_blocksize);
+		pages = len ? get_order(len) : 1;
+		free_pages((unsigned long)buf_out, pages);
+	}
+
+	crypto_finalize_cipher_request(cryp->engine, cryp->req, err);
+	cryp->req = NULL;
+
+	memset(cryp->ctx->key, 0, cryp->ctx->keylen);
+
+	mutex_unlock(&cryp->lock);
+}
+
+static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
+{
+	/* Enable interrupt and let the IRQ handler do everything */
+	stm32_cryp_write(cryp, CRYP_IMSCR, IMSCR_IN | IMSCR_OUT);
+
+	return 0;
+}
+
+static int stm32_cryp_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize = sizeof(struct stm32_cryp_reqctx);
+
+	return 0;
+}
+
+static int stm32_cryp_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct stm32_cryp_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct stm32_cryp *cryp = stm32_cryp_find_dev(ctx);
+
+	if (!cryp)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return crypto_transfer_cipher_request_to_engine(cryp->engine, req);
+}
+
+static int stm32_cryp_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			     unsigned int keylen)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int stm32_cryp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	if (keylen != DES_KEY_SIZE)
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				  unsigned int keylen)
+{
+	if (keylen != (3 * DES_KEY_SIZE))
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_aes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB);
+}
+
+static int stm32_cryp_aes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC);
+}
+
+static int stm32_cryp_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR);
+}
+
+static int stm32_cryp_des_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_des_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB);
+}
+
+static int stm32_cryp_des_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_des_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC);
+}
+
+static int stm32_cryp_tdes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_tdes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB);
+}
+
+static int stm32_cryp_tdes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_tdes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC);
+}
+
+static int stm32_cryp_prepare_req(struct crypto_engine *engine,
+				  struct ablkcipher_request *req)
+{
+	struct stm32_cryp_ctx *ctx;
+	struct stm32_cryp *cryp;
+	struct stm32_cryp_reqctx *rctx;
+	int ret;
+
+	if (!req)
+		return -EINVAL;
+
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+
+	cryp = ctx->cryp;
+
+	if (!cryp)
+		return -ENODEV;
+
+	mutex_lock(&cryp->lock);
+
+	rctx = ablkcipher_request_ctx(req);
+	rctx->mode &= FLG_MODE_MASK;
+
+	ctx->cryp = cryp;
+
+	cryp->flags = (cryp->flags & ~FLG_MODE_MASK) | rctx->mode;
+	cryp->hw_blocksize = is_aes(cryp) ? AES_BLOCK_SIZE : DES_BLOCK_SIZE;
+	cryp->ctx = ctx;
+
+	cryp->req = req;
+	cryp->total_in = req->nbytes;
+	cryp->total_out = cryp->total_in;
+
+	cryp->total_in_save = cryp->total_in;
+	cryp->total_out_save = cryp->total_out;
+
+	cryp->in_sg = req->src;
+	cryp->out_sg = req->dst;
+	cryp->out_sg_save = cryp->out_sg;
+
+	cryp->in_sg_len = sg_nents_for_len(cryp->in_sg, cryp->total_in);
+	if (cryp->in_sg_len < 0) {
+		dev_err(cryp->dev, "Cannot get in_sg_len\n");
+		ret = cryp->in_sg_len;
+		goto out;
+	}
+
+	cryp->out_sg_len = sg_nents_for_len(cryp->out_sg, cryp->total_out);
+	if (cryp->out_sg_len < 0) {
+		dev_err(cryp->dev, "Cannot get out_sg_len\n");
+		ret = cryp->out_sg_len;
+		goto out;
+	}
+
+	ret = stm32_cryp_copy_sgs(cryp);
+	if (ret)
+		goto out;
+
+	scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+	scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+
+	ret = stm32_cryp_hw_init(cryp);
+out:
+	if (ret)
+		mutex_unlock(&cryp->lock);
+
+	return ret;
+}
+
+static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
+					 struct ablkcipher_request *req)
+{
+	return stm32_cryp_prepare_req(engine, req);
+}
+
+static int stm32_cryp_cipher_one_req(struct crypto_engine *engine,
+				     struct ablkcipher_request *req)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct stm32_cryp *cryp = ctx->cryp;
+
+	if (!cryp)
+		return -ENODEV;
+
+	return stm32_cryp_cpu_start(cryp);
+}
+
+static u32 *stm32_cryp_next_out(struct stm32_cryp *cryp, u32 *dst,
+				unsigned int n)
+{
+	scatterwalk_advance(&cryp->out_walk, n);
+
+	if (unlikely(cryp->out_sg->length == _walked_out)) {
+		cryp->out_sg = sg_next(cryp->out_sg);
+		if (cryp->out_sg) {
+			scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+			return (sg_virt(cryp->out_sg) + _walked_out);
+		}
+	}
+
+	return (u32 *)((u8 *)dst + n);
+}
+
+static u32 *stm32_cryp_next_in(struct stm32_cryp *cryp, u32 *src,
+			       unsigned int n)
+{
+	scatterwalk_advance(&cryp->in_walk, n);
+
+	if (unlikely(cryp->in_sg->length == _walked_in)) {
+		cryp->in_sg = sg_next(cryp->in_sg);
+		if (cryp->in_sg) {
+			scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+			return (sg_virt(cryp->in_sg) + _walked_in);
+		}
+	}
+
+	return (u32 *)((u8 *)src + n);
+}
+
+static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp)
+{
+	u32 cr;
+
+	if (unlikely(cryp->last_ctr[3] == 0xFFFFFFFF)) {
+		cryp->last_ctr[3] = 0;
+		cryp->last_ctr[2]++;
+		if (!cryp->last_ctr[2]) {
+			cryp->last_ctr[1]++;
+			if (!cryp->last_ctr[1])
+				cryp->last_ctr[0]++;
+		}
+
+		cr = stm32_cryp_read(cryp, CRYP_CR);
+		stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN);
+
+		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->last_ctr);
+
+		stm32_cryp_write(cryp, CRYP_CR, cr);
+	}
+
+	cryp->last_ctr[0] = stm32_cryp_read(cryp, CRYP_IV0LR);
+	cryp->last_ctr[1] = stm32_cryp_read(cryp, CRYP_IV0RR);
+	cryp->last_ctr[2] = stm32_cryp_read(cryp, CRYP_IV1LR);
+	cryp->last_ctr[3] = stm32_cryp_read(cryp, CRYP_IV1RR);
+}
+
+static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
+{
+	unsigned int i, j;
+	u32 d32, *dst;
+	u8 *d8;
+
+	dst = sg_virt(cryp->out_sg) + _walked_out;
+
+	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+		if (likely(cryp->total_out >= sizeof(u32))) {
+			/* Read a full u32 */
+			*dst = stm32_cryp_read(cryp, CRYP_DOUT);
+
+			dst = stm32_cryp_next_out(cryp, dst, sizeof(u32));
+			cryp->total_out -= sizeof(u32);
+		} else if (!cryp->total_out) {
+			/* Empty fifo out (data from input padding) */
+			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+		} else {
+			/* Read less than an u32 */
+			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+			d8 = (u8 *)&d32;
+
+			for (j = 0; j < cryp->total_out; j++) {
+				*((u8 *)dst) = *(d8++);
+				dst = stm32_cryp_next_out(cryp, dst, 1);
+			}
+			cryp->total_out = 0;
+		}
+	}
+
+	return !cryp->total_out || !cryp->total_in;
+}
+
+static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
+{
+	unsigned int i, j;
+	u32 *src;
+	u8 d8[4];
+
+	src = sg_virt(cryp->in_sg) + _walked_in;
+
+	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+		if (likely(cryp->total_in >= sizeof(u32))) {
+			/* Write a full u32 */
+			stm32_cryp_write(cryp, CRYP_DIN, *src);
+
+			src = stm32_cryp_next_in(cryp, src, sizeof(u32));
+			cryp->total_in -= sizeof(u32);
+		} else if (!cryp->total_in) {
+			/* Write padding data */
+			stm32_cryp_write(cryp, CRYP_DIN, 0);
+		} else {
+			/* Write less than an u32 */
+			memset(d8, 0, sizeof(u32));
+			for (j = 0; j < cryp->total_in; j++) {
+				d8[j] = *((u8 *)src);
+				src = stm32_cryp_next_in(cryp, src, 1);
+			}
+
+			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+			cryp->total_in = 0;
+		}
+	}
+}
+
+static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
+{
+	if (unlikely(!cryp->total_in)) {
+		dev_warn(cryp->dev, "No more data to process\n");
+		return;
+	}
+
+	if (is_aes(cryp) && is_ctr(cryp))
+		stm32_cryp_check_ctr_counter(cryp);
+
+	stm32_cryp_irq_write_block(cryp);
+}
+
+static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
+{
+	struct stm32_cryp *cryp = arg;
+
+	if (cryp->irq_status & MISR_OUT)
+		/* Output FIFO IRQ: read data */
+		if (unlikely(stm32_cryp_irq_read_data(cryp))) {
+			/* All bytes processed, finish */
+			stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+			stm32_cryp_finish_req(cryp);
+			return IRQ_HANDLED;
+		}
+
+	if (cryp->irq_status & MISR_IN) {
+		/* Input FIFO IRQ: write data */
+		stm32_cryp_irq_write_data(cryp);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stm32_cryp_irq(int irq, void *arg)
+{
+	struct stm32_cryp *cryp = arg;
+
+	cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static struct crypto_alg crypto_algs[] = {
+{
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "stm32-ecb-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_ecb_encrypt,
+		.decrypt	= stm32_cryp_aes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "stm32-cbc-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_cbc_encrypt,
+		.decrypt	= stm32_cryp_aes_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "stm32-ctr-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_ctr_encrypt,
+		.decrypt	= stm32_cryp_aes_ctr_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des)",
+	.cra_driver_name	= "stm32-ecb-des",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= DES_BLOCK_SIZE,
+		.max_keysize	= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_des_setkey,
+		.encrypt	= stm32_cryp_des_ecb_encrypt,
+		.decrypt	= stm32_cryp_des_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des)",
+	.cra_driver_name	= "stm32-cbc-des",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= DES_BLOCK_SIZE,
+		.max_keysize	= DES_BLOCK_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_des_setkey,
+		.encrypt	= stm32_cryp_des_cbc_encrypt,
+		.decrypt	= stm32_cryp_des_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des3_ede)",
+	.cra_driver_name	= "stm32-ecb-des3",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= 3 * DES_BLOCK_SIZE,
+		.max_keysize	= 3 * DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_tdes_setkey,
+		.encrypt	= stm32_cryp_tdes_ecb_encrypt,
+		.decrypt	= stm32_cryp_tdes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des3_ede)",
+	.cra_driver_name	= "stm32-cbc-des3",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= 3 * DES_BLOCK_SIZE,
+		.max_keysize	= 3 * DES_BLOCK_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_tdes_setkey,
+		.encrypt	= stm32_cryp_tdes_cbc_encrypt,
+		.decrypt	= stm32_cryp_tdes_cbc_decrypt,
+	}
+},
+};
+
+static const struct of_device_id stm32_dt_ids[] = {
+	{ .compatible = "st,stm32f756-cryp", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, stm32_dt_ids);
+
+static int stm32_cryp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct stm32_cryp *cryp;
+	struct resource *res;
+	struct reset_control *rst;
+	int irq, ret;
+
+	cryp = devm_kzalloc(dev, sizeof(*cryp), GFP_KERNEL);
+	if (!cryp)
+		return -ENOMEM;
+
+	cryp->dev = dev;
+
+	mutex_init(&cryp->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cryp->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cryp->regs))
+		return PTR_ERR(cryp->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Cannot get IRQ resource\n");
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, stm32_cryp_irq,
+					stm32_cryp_irq_thread, IRQF_ONESHOT,
+					dev_name(dev), cryp);
+	if (ret) {
+		dev_err(dev, "Cannot grab IRQ\n");
+		return ret;
+	}
+
+	cryp->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(cryp->clk)) {
+		dev_err(dev, "Could not get clock\n");
+		return PTR_ERR(cryp->clk);
+	}
+
+	ret = clk_prepare_enable(cryp->clk);
+	if (ret) {
+		dev_err(cryp->dev, "Failed to enable clock\n");
+		return ret;
+	}
+
+	rst = devm_reset_control_get(dev, NULL);
+	if (!IS_ERR(rst)) {
+		reset_control_assert(rst);
+		udelay(2);
+		reset_control_deassert(rst);
+	}
+
+	platform_set_drvdata(pdev, cryp);
+
+	spin_lock(&cryp_list.lock);
+	list_add(&cryp->list, &cryp_list.dev_list);
+	spin_unlock(&cryp_list.lock);
+
+	/* Initialize crypto engine */
+	cryp->engine = crypto_engine_alloc_init(dev, 1);
+	if (!cryp->engine) {
+		dev_err(dev, "Could not init crypto engine\n");
+		ret = -ENOMEM;
+		goto err_engine1;
+	}
+
+	cryp->engine->prepare_cipher_request = stm32_cryp_prepare_cipher_req;
+	cryp->engine->cipher_one_request = stm32_cryp_cipher_one_req;
+
+	ret = crypto_engine_start(cryp->engine);
+	if (ret) {
+		dev_err(dev, "Could not start crypto engine\n");
+		goto err_engine2;
+	}
+
+	ret = crypto_register_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+	if (ret) {
+		dev_err(dev, "Could not register algs\n");
+		goto err_algs;
+	}
+
+	dev_info(dev, "Initialized\n");
+
+	return 0;
+
+err_algs:
+err_engine2:
+	crypto_engine_exit(cryp->engine);
+err_engine1:
+	spin_lock(&cryp_list.lock);
+	list_del(&cryp->list);
+	spin_unlock(&cryp_list.lock);
+
+	clk_disable_unprepare(cryp->clk);
+
+	return ret;
+}
+
+static int stm32_cryp_remove(struct platform_device *pdev)
+{
+	struct stm32_cryp *cryp = platform_get_drvdata(pdev);
+
+	if (!cryp)
+		return -ENODEV;
+
+	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+
+	crypto_engine_exit(cryp->engine);
+
+	spin_lock(&cryp_list.lock);
+	list_del(&cryp->list);
+	spin_unlock(&cryp_list.lock);
+
+	clk_disable_unprepare(cryp->clk);
+
+	return 0;
+}
+
+static struct platform_driver stm32_cryp_driver = {
+	.probe  = stm32_cryp_probe,
+	.remove = stm32_cryp_remove,
+	.driver = {
+		.name           = DRIVER_NAME,
+		.of_match_table = stm32_dt_ids,
+	},
+};
+
+module_platform_driver(stm32_cryp_driver);
+
+MODULE_AUTHOR("Fabien Dessenne <fabien.dessenne@st.com>");
+MODULE_DESCRIPTION("STMicrolectronics STM32 CRYP hardware driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index 090582baecfe..8f09b8430893 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -208,6 +208,7 @@ static struct shash_alg algs[] = {
 			.cra_name               = "crc32",
 			.cra_driver_name        = DRIVER_NAME,
 			.cra_priority           = 200,
+			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
 			.cra_alignmask          = 3,
 			.cra_ctxsize            = sizeof(struct stm32_crc_ctx),
@@ -229,6 +230,7 @@ static struct shash_alg algs[] = {
 			.cra_name               = "crc32c",
 			.cra_driver_name        = DRIVER_NAME,
 			.cra_priority           = 200,
+			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
 			.cra_alignmask          = 3,
 			.cra_ctxsize            = sizeof(struct stm32_crc_ctx),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index d8424ed16c33..351f4bf37ca9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -58,6 +58,13 @@
 extern struct list_head adapter_list;
 extern struct mutex uld_mutex;
 
+/* Suspend an Ethernet Tx queue with fewer available descriptors than this.
+ * This is the same as calc_tx_descs() for a TSO packet with
+ * nr_frags == MAX_SKB_FRAGS.
+ */
+#define ETHTXQ_STOP_THRES \
+	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
+
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
@@ -563,6 +570,7 @@ enum {                                 /* adapter flags */
 
 enum {
 	ULP_CRYPTO_LOOKASIDE = 1 << 0,
+	ULP_CRYPTO_IPSEC_INLINE = 1 << 1,
 };
 
 struct rx_sw_desc;
@@ -967,6 +975,11 @@ enum {
 	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
 };
 
+struct tx_sw_desc {                /* SW state per Tx descriptor */
+	struct sk_buff *skb;
+	struct ulptx_sgl *sgl;
+};
+
 /* Support for "sched_queue" command to allow one or more NIC TX Queues
  * to be bound to a TX Scheduling Class.
  */
@@ -1699,4 +1712,14 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
 void free_txq(struct adapter *adap, struct sge_txq *q);
+void cxgb4_reclaim_completed_tx(struct adapter *adap,
+				struct sge_txq *q, bool unmap);
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr);
+void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
+			 void *pos);
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr);
+void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 917663b35603..cf471831ee71 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3096,6 +3096,8 @@ static int chcr_show(struct seq_file *seq, void *v)
 		   atomic_read(&adap->chcr_stats.error));
 	seq_printf(seq, "Fallback: %10u \n",
 		   atomic_read(&adap->chcr_stats.fallback));
+	seq_printf(seq, "IPSec PDU: %10u\n",
+		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6f900ffe25cc..05a4abfd5ec1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4096,7 +4096,7 @@ static int adap_init0(struct adapter *adap)
 		} else {
 			adap->vres.ncrypto_fc = val[0];
 		}
-		adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+		adap->params.crypto = ntohs(caps_cmd.cryptocaps);
 		adap->num_uld += 1;
 	}
 #undef FW_PARAM_PFVF
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 71a315bc1409..6b5fea4532f3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -637,6 +637,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
+	lld->crypto = adap->params.crypto;
 	lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
 	lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
 	lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 08e709ab6dd4..1d37672902da 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -297,6 +297,7 @@ struct chcr_stats_debug {
 	atomic_t complete;
 	atomic_t error;
 	atomic_t fallback;
+	atomic_t ipsec_cnt;
 };
 
 #define OCQ_WIN_OFFSET(pdev, vres) \
@@ -322,6 +323,7 @@ struct cxgb4_lld_info {
 	unsigned char wr_cred;               /* WR 16-byte credits */
 	unsigned char adapter_type;          /* type of adapter */
 	unsigned char fw_api_ver;            /* FW API version */
+	unsigned char crypto;                /* crypto support */
 	unsigned int fw_vers;                /* FW version */
 	unsigned int iscsi_iolen;            /* iSCSI max I/O length */
 	unsigned int cclk_ps;                /* Core clock period in psec */
@@ -370,6 +372,7 @@ struct cxgb4_uld_info {
 			      struct t4_lro_mgr *lro_mgr,
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
+	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 922f2f937789..6c7b0ac0b48b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -41,6 +41,7 @@
 #include <linux/jiffies.h>
 #include <linux/prefetch.h>
 #include <linux/export.h>
+#include <net/xfrm.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
 #include <net/busy_poll.h>
@@ -53,6 +54,7 @@
 #include "t4_msg.h"
 #include "t4fw_api.h"
 #include "cxgb4_ptp.h"
+#include "cxgb4_uld.h"
 
 /*
  * Rx buffer size.  We use largish buffers if possible but settle for single
@@ -110,14 +112,6 @@
 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
 
 /*
- * Suspend an Ethernet Tx queue with fewer available descriptors than this.
- * This is the same as calc_tx_descs() for a TSO packet with
- * nr_frags == MAX_SKB_FRAGS.
- */
-#define ETHTXQ_STOP_THRES \
-	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
-
-/*
  * Suspension threshold for non-Ethernet Tx queues.  We require enough room
  * for a full sized WR.
  */
@@ -134,11 +128,6 @@
  */
 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
 
-struct tx_sw_desc {                /* SW state per Tx descriptor */
-	struct sk_buff *skb;
-	struct ulptx_sgl *sgl;
-};
-
 struct rx_sw_desc {                /* SW state per Rx descriptor */
 	struct page *page;
 	dma_addr_t dma_addr;
@@ -248,8 +237,8 @@ static inline bool fl_starving(const struct adapter *adapter,
 	return fl->avail - fl->pend_cred <= s->fl_starve_thres;
 }
 
-static int map_skb(struct device *dev, const struct sk_buff *skb,
-		   dma_addr_t *addr)
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr)
 {
 	const skb_frag_t *fp, *end;
 	const struct skb_shared_info *si;
@@ -277,6 +266,7 @@ unwind:
 out_err:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(cxgb4_map_skb);
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 static void unmap_skb(struct device *dev, const struct sk_buff *skb,
@@ -411,7 +401,7 @@ static inline int reclaimable(const struct sge_txq *q)
 }
 
 /**
- *	reclaim_completed_tx - reclaims completed Tx descriptors
+ *	cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
  *	@adap: the adapter
  *	@q: the Tx queue to reclaim completed descriptors from
  *	@unmap: whether the buffers should be unmapped for DMA
@@ -420,7 +410,7 @@ static inline int reclaimable(const struct sge_txq *q)
  *	and frees the associated buffers if possible.  Called with the Tx
  *	queue locked.
  */
-static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 					bool unmap)
 {
 	int avail = reclaimable(q);
@@ -437,6 +427,7 @@ static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 		q->in_use -= avail;
 	}
 }
+EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 
 static inline int get_buf_size(struct adapter *adapter,
 			       const struct rx_sw_desc *d)
@@ -833,7 +824,7 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 }
 
 /**
- *	write_sgl - populate a scatter/gather list for a packet
+ *	cxgb4_write_sgl - populate a scatter/gather list for a packet
  *	@skb: the packet
  *	@q: the Tx queue we are writing into
  *	@sgl: starting location for writing the SGL
@@ -849,9 +840,9 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
  *	right after the end of the SGL but does not account for any potential
  *	wrap around, i.e., @end > @sgl.
  */
-static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
-		      struct ulptx_sgl *sgl, u64 *end, unsigned int start,
-		      const dma_addr_t *addr)
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr)
 {
 	unsigned int i, len;
 	struct ulptx_sge_pair *to;
@@ -903,6 +894,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 	if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
 		*end = 0;
 }
+EXPORT_SYMBOL(cxgb4_write_sgl);
 
 /* This function copies 64 byte coalesced work request to
  * memory mapped BAR2 space. For coalesced WR SGE fetches
@@ -921,14 +913,14 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
 }
 
 /**
- *	ring_tx_db - check and potentially ring a Tx queue's doorbell
+ *	cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell
  *	@adap: the adapter
  *	@q: the Tx queue
  *	@n: number of new descriptors to give to HW
  *
  *	Ring the doorbel for a Tx queue.
  */
-static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
+inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 {
 	/* Make sure that all writes to the TX Descriptors are committed
 	 * before we tell the hardware about them.
@@ -995,9 +987,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 		wmb();
 	}
 }
+EXPORT_SYMBOL(cxgb4_ring_tx_db);
 
 /**
- *	inline_tx_skb - inline a packet's data into Tx descriptors
+ *	cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors
  *	@skb: the packet
  *	@q: the Tx queue where the packet will be inlined
  *	@pos: starting position in the Tx queue where to inline the packet
@@ -1007,8 +1000,8 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
  *	Most of the complexity of this operation is dealing with wrap arounds
  *	in the middle of the packet we want to inline.
  */
-static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
-			  void *pos)
+void cxgb4_inline_tx_skb(const struct sk_buff *skb,
+			 const struct sge_txq *q, void *pos)
 {
 	u64 *p;
 	int left = (void *)q->stat - pos;
@@ -1030,6 +1023,7 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
 	if ((uintptr_t)p & 8)
 		*p = 0;
 }
+EXPORT_SYMBOL(cxgb4_inline_tx_skb);
 
 static void *inline_tx_skb_header(const struct sk_buff *skb,
 				  const struct sge_txq *q,  void *pos,
@@ -1199,6 +1193,12 @@ out_free:	dev_kfree_skb_any(skb);
 
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
+	ssi = skb_shinfo(skb);
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	if (xfrm_offload(skb) && !ssi->gso_size)
+		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+#endif /* CHELSIO_IPSEC_INLINE */
+
 	qidx = skb_get_queue_mapping(skb);
 	if (ptp_enabled) {
 		spin_lock(&adap->ptp_lock);
@@ -1215,7 +1215,7 @@ out_free:	dev_kfree_skb_any(skb);
 	}
 	skb_tx_timestamp(skb);
 
-	reclaim_completed_tx(adap, &q->q, true);
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1245,7 +1245,7 @@ out_free:	dev_kfree_skb_any(skb);
 		immediate = true;
 
 	if (!immediate &&
-	    unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
 		q->mapping_err++;
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
@@ -1264,7 +1264,6 @@ out_free:	dev_kfree_skb_any(skb);
 	end = (u64 *)wr + flits;
 
 	len = immediate ? skb->len : 0;
-	ssi = skb_shinfo(skb);
 	if (ssi->gso_size) {
 		struct cpl_tx_pkt_lso *lso = (void *)wr;
 		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
@@ -1341,13 +1340,13 @@ out_free:	dev_kfree_skb_any(skb);
 	cpl->ctrl1 = cpu_to_be64(cntrl);
 
 	if (immediate) {
-		inline_tx_skb(skb, &q->q, cpl + 1);
+		cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
 		dev_consume_skb_any(skb);
 	} else {
 		int last_desc;
 
-		write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
-			  addr);
+		cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
+				end, 0, addr);
 		skb_orphan(skb);
 
 		last_desc = q->q.pidx + ndesc - 1;
@@ -1359,7 +1358,7 @@ out_free:	dev_kfree_skb_any(skb);
 
 	txq_advance(&q->q, ndesc);
 
-	ring_tx_db(adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
 	if (ptp_enabled)
 		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
@@ -1369,9 +1368,9 @@ out_free:	dev_kfree_skb_any(skb);
  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
  *	@q: the SGE control Tx queue
  *
- *	This is a variant of reclaim_completed_tx() that is used for Tx queues
- *	that send only immediate data (presently just the control queues) and
- *	thus do not have any sk_buffs to release.
+ *	This is a variant of cxgb4_reclaim_completed_tx() that is used
+ *	for Tx queues that send only immediate data (presently just
+ *	the control queues) and	thus do not have any sk_buffs to release.
  */
 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
 {
@@ -1446,13 +1445,13 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
 	}
 
 	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
-	inline_tx_skb(skb, &q->q, wr);
+	cxgb4_inline_tx_skb(skb, &q->q, wr);
 
 	txq_advance(&q->q, ndesc);
 	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
 		ctrlq_check_stop(q, wr);
 
-	ring_tx_db(q->adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
 	spin_unlock(&q->sendq.lock);
 
 	kfree_skb(skb);
@@ -1487,7 +1486,7 @@ static void restart_ctrlq(unsigned long data)
 		txq_advance(&q->q, ndesc);
 		spin_unlock(&q->sendq.lock);
 
-		inline_tx_skb(skb, &q->q, wr);
+		cxgb4_inline_tx_skb(skb, &q->q, wr);
 		kfree_skb(skb);
 
 		if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
@@ -1500,14 +1499,15 @@ static void restart_ctrlq(unsigned long data)
 			}
 		}
 		if (written > 16) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 		spin_lock(&q->sendq.lock);
 	}
 	q->full = 0;
-ringdb: if (written)
-		ring_tx_db(q->adap, &q->q, written);
+ringdb:
+	if (written)
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 	spin_unlock(&q->sendq.lock);
 }
 
@@ -1650,7 +1650,7 @@ static void service_ofldq(struct sge_uld_txq *q)
 		 */
 		spin_unlock(&q->sendq.lock);
 
-		reclaim_completed_tx(q->adap, &q->q, false);
+		cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
 
 		flits = skb->priority;                /* previously saved */
 		ndesc = flits_to_desc(flits);
@@ -1661,9 +1661,9 @@ static void service_ofldq(struct sge_uld_txq *q)
 
 		pos = (u64 *)&q->q.desc[q->q.pidx];
 		if (is_ofld_imm(skb))
-			inline_tx_skb(skb, &q->q, pos);
-		else if (map_skb(q->adap->pdev_dev, skb,
-				 (dma_addr_t *)skb->head)) {
+			cxgb4_inline_tx_skb(skb, &q->q, pos);
+		else if (cxgb4_map_skb(q->adap->pdev_dev, skb,
+				       (dma_addr_t *)skb->head)) {
 			txq_stop_maperr(q);
 			spin_lock(&q->sendq.lock);
 			break;
@@ -1694,9 +1694,9 @@ static void service_ofldq(struct sge_uld_txq *q)
 				pos = (void *)txq->desc;
 			}
 
-			write_sgl(skb, &q->q, (void *)pos,
-				  end, hdr_len,
-				  (dma_addr_t *)skb->head);
+			cxgb4_write_sgl(skb, &q->q, (void *)pos,
+					end, hdr_len,
+					(dma_addr_t *)skb->head);
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 			skb->dev = q->adap->port[0];
 			skb->destructor = deferred_unmap_destructor;
@@ -1710,7 +1710,7 @@ static void service_ofldq(struct sge_uld_txq *q)
 		txq_advance(&q->q, ndesc);
 		written += ndesc;
 		if (unlikely(written > 32)) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 
@@ -1725,7 +1725,7 @@ static void service_ofldq(struct sge_uld_txq *q)
 			kfree_skb(skb);
 	}
 	if (likely(written))
-		ring_tx_db(q->adap, &q->q, written);
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 
 	/*Indicate that no thread is processing the Pending Send Queue
 	 * currently.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 57eb4ad3485d..be3658301832 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -513,6 +513,13 @@ struct fw_ulptx_wr {
 	u64 cookie;
 };
 
+#define FW_ULPTX_WR_DATA_S      28
+#define FW_ULPTX_WR_DATA_M      0x1
+#define FW_ULPTX_WR_DATA_V(x)   ((x) << FW_ULPTX_WR_DATA_S)
+#define FW_ULPTX_WR_DATA_G(x)   \
+	(((x) >> FW_ULPTX_WR_DATA_S) & FW_ULPTX_WR_DATA_M)
+#define FW_ULPTX_WR_DATA_F      FW_ULPTX_WR_DATA_V(1U)
+
 struct fw_tp_wr {
 	__be32 op_to_immdlen;
 	__be32 flowid_len16;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
index 2e5d311d2438..db81ed527452 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c
@@ -120,6 +120,7 @@ static struct shash_alg alg = {
 		.cra_name		= "adler32",
 		.cra_driver_name	= "adler32-zlib",
 		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(u32),
 		.cra_module		= THIS_MODULE,
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 03b97629442c..1e26f790b03f 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -327,7 +327,12 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
  */
 static inline int crypto_aead_encrypt(struct aead_request *req)
 {
-	return crypto_aead_alg(crypto_aead_reqtfm(req))->encrypt(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+
+	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return crypto_aead_alg(aead)->encrypt(req);
 }
 
 /**
@@ -356,6 +361,9 @@ static inline int crypto_aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 
+	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
 	if (req->cryptlen < crypto_aead_authsize(aead))
 		return -EINVAL;
 
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index caaa470389e0..b83d66073db0 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -13,12 +13,13 @@
 #define CHACHA20_IV_SIZE	16
 #define CHACHA20_KEY_SIZE	32
 #define CHACHA20_BLOCK_SIZE	64
+#define CHACHA20_BLOCK_WORDS	(CHACHA20_BLOCK_SIZE / sizeof(u32))
 
 struct chacha20_ctx {
 	u32 key[8];
 };
 
-void chacha20_block(u32 *state, void *stream);
+void chacha20_block(u32 *state, u32 *stream);
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
 int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keysize);
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 0ed31fd80242..2d1849dffb80 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -71,12 +71,11 @@ struct ahash_request {
 
 /**
  * struct ahash_alg - asynchronous message digest definition
- * @init: Initialize the transformation context. Intended only to initialize the
+ * @init: **[mandatory]** Initialize the transformation context. Intended only to initialize the
  *	  state of the HASH transformation at the beginning. This shall fill in
  *	  the internal structures used during the entire duration of the whole
  *	  transformation. No data processing happens at this point.
- *	  Note: mandatory.
- * @update: Push a chunk of data into the driver for transformation. This
+ * @update: **[mandatory]** Push a chunk of data into the driver for transformation. This
  *	   function actually pushes blocks of data from upper layers into the
  *	   driver, which then passes those to the hardware as seen fit. This
  *	   function must not finalize the HASH transformation by calculating the
@@ -85,20 +84,17 @@ struct ahash_request {
  *	   context, as this function may be called in parallel with the same
  *	   transformation object. Data processing can happen synchronously
  *	   [SHASH] or asynchronously [AHASH] at this point.
- *	   Note: mandatory.
- * @final: Retrieve result from the driver. This function finalizes the
+ * @final: **[mandatory]** Retrieve result from the driver. This function finalizes the
  *	   transformation and retrieves the resulting hash from the driver and
  *	   pushes it back to upper layers. No data processing happens at this
  *	   point unless hardware requires it to finish the transformation
  *	   (then the data buffered by the device driver is processed).
- *	   Note: mandatory.
- * @finup: Combination of @update and @final. This function is effectively a
+ * @finup: **[optional]** Combination of @update and @final. This function is effectively a
  *	   combination of @update and @final calls issued in sequence. As some
  *	   hardware cannot do @update and @final separately, this callback was
  *	   added to allow such hardware to be used at least by IPsec. Data
  *	   processing can happen synchronously [SHASH] or asynchronously [AHASH]
  *	   at this point.
- *	   Note: optional.
  * @digest: Combination of @init and @update and @final. This function
  *	    effectively behaves as the entire chain of operations, @init,
  *	    @update and @final issued in sequence. Just like @finup, this was
@@ -210,7 +206,6 @@ struct crypto_ahash {
 		      unsigned int keylen);
 
 	unsigned int reqsize;
-	bool has_setkey;
 	struct crypto_tfm base;
 };
 
@@ -410,11 +405,6 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen);
 
-static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm)
-{
-	return tfm->has_setkey;
-}
-
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
@@ -487,7 +477,12 @@ static inline int crypto_ahash_export(struct ahash_request *req, void *out)
  */
 static inline int crypto_ahash_import(struct ahash_request *req, const void *in)
 {
-	return crypto_ahash_reqtfm(req)->import(req, in);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return tfm->import(req, in);
 }
 
 /**
@@ -503,7 +498,12 @@ static inline int crypto_ahash_import(struct ahash_request *req, const void *in)
  */
 static inline int crypto_ahash_init(struct ahash_request *req)
 {
-	return crypto_ahash_reqtfm(req)->init(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return tfm->init(req);
 }
 
 /**
@@ -855,7 +855,12 @@ static inline int crypto_shash_export(struct shash_desc *desc, void *out)
  */
 static inline int crypto_shash_import(struct shash_desc *desc, const void *in)
 {
-	return crypto_shash_alg(desc->tfm)->import(desc, in);
+	struct crypto_shash *tfm = desc->tfm;
+
+	if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return crypto_shash_alg(tfm)->import(desc, in);
 }
 
 /**
@@ -871,7 +876,12 @@ static inline int crypto_shash_import(struct shash_desc *desc, const void *in)
  */
 static inline int crypto_shash_init(struct shash_desc *desc)
 {
-	return crypto_shash_alg(desc->tfm)->init(desc);
+	struct crypto_shash *tfm = desc->tfm;
+
+	if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return crypto_shash_alg(tfm)->init(desc);
 }
 
 /**
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index c2bae8da642c..27040a46d50a 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -90,6 +90,8 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
 	return alg->setkey != shash_no_setkey;
 }
 
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg);
+
 int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
 			    struct hash_alg_common *alg,
 			    struct crypto_instance *inst);
diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h
index ccad9b2c9bd6..0f6ddac1acfc 100644
--- a/include/crypto/internal/scompress.h
+++ b/include/crypto/internal/scompress.h
@@ -28,17 +28,6 @@ struct crypto_scomp {
  * @free_ctx:	Function frees context allocated with alloc_ctx
  * @compress:	Function performs a compress operation
  * @decompress:	Function performs a de-compress operation
- * @init:	Initialize the cryptographic transformation object.
- *		This function is used to initialize the cryptographic
- *		transformation object. This function is called only once at
- *		the instantiation time, right after the transformation context
- *		was allocated. In case the cryptographic hardware has some
- *		special requirements which need to be handled by software, this
- *		function shall check for the precise requirement of the
- *		transformation and put any software fallbacks in place.
- * @exit:	Deinitialize the cryptographic transformation object. This is a
- *		counterpart to @init, used to remove various changes set in
- *		@init.
  * @base:	Common crypto API algorithm data structure
  */
 struct scomp_alg {
diff --git a/include/crypto/null.h b/include/crypto/null.h
index 5757c0a4b321..15aeef6e30ef 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -12,14 +12,4 @@
 struct crypto_skcipher *crypto_get_default_null_skcipher(void);
 void crypto_put_default_null_skcipher(void);
 
-static inline struct crypto_skcipher *crypto_get_default_null_skcipher2(void)
-{
-	return crypto_get_default_null_skcipher();
-}
-
-static inline void crypto_put_default_null_skcipher2(void)
-{
-	crypto_put_default_null_skcipher();
-}
-
 #endif
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index c65567d01e8e..f718a19da82f 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -31,8 +31,6 @@ struct poly1305_desc_ctx {
 };
 
 int crypto_poly1305_init(struct shash_desc *desc);
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
-			   const u8 *key, unsigned int keylen);
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 					const u8 *src, unsigned int srclen);
 int crypto_poly1305_update(struct shash_desc *desc,
diff --git a/include/crypto/salsa20.h b/include/crypto/salsa20.h
new file mode 100644
index 000000000000..19ed48aefc86
--- /dev/null
+++ b/include/crypto/salsa20.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for the Salsa20 algorithm
+ */
+
+#ifndef _CRYPTO_SALSA20_H
+#define _CRYPTO_SALSA20_H
+
+#include <linux/types.h>
+
+#define SALSA20_IV_SIZE		8
+#define SALSA20_MIN_KEY_SIZE	16
+#define SALSA20_MAX_KEY_SIZE	32
+#define SALSA20_BLOCK_SIZE	64
+
+struct crypto_skcipher;
+
+struct salsa20_ctx {
+	u32 initial_state[16];
+};
+
+void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
+			 const u8 *iv);
+int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  unsigned int keysize);
+
+#endif /* _CRYPTO_SALSA20_H */
diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h
index b9d9bd553b48..080f60c2e6b1 100644
--- a/include/crypto/sha3.h
+++ b/include/crypto/sha3.h
@@ -19,7 +19,6 @@
 
 struct sha3_state {
 	u64		st[25];
-	unsigned int	md_len;
 	unsigned int	rsiz;
 	unsigned int	rsizw;
 
@@ -27,4 +26,9 @@ struct sha3_state {
 	u8		buf[SHA3_224_BLOCK_SIZE];
 };
 
+int crypto_sha3_init(struct shash_desc *desc);
+int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len);
+int crypto_sha3_final(struct shash_desc *desc, u8 *out);
+
 #endif
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 562001cb412b..2f327f090c3e 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -401,11 +401,6 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
 	return tfm->setkey(tfm, key, keylen);
 }
 
-static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
-{
-	return tfm->keysize;
-}
-
 static inline unsigned int crypto_skcipher_default_keysize(
 	struct crypto_skcipher *tfm)
 {
@@ -442,6 +437,9 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
 	return tfm->encrypt(req);
 }
 
@@ -460,6 +458,9 @@ static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
 	return tfm->decrypt(req);
 }
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 78508ca4b108..7e6e84cf6383 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -107,8 +107,16 @@
 #define CRYPTO_ALG_INTERNAL		0x00002000
 
 /*
+ * Set if the algorithm has a ->setkey() method but can be used without
+ * calling it first, i.e. there is a default key.
+ */
+#define CRYPTO_ALG_OPTIONAL_KEY		0x00004000
+
+/*
  * Transform masks and values (for crt_flags).
  */
+#define CRYPTO_TFM_NEED_KEY		0x00000001
+
 #define CRYPTO_TFM_REQ_MASK		0x000fff00
 #define CRYPTO_TFM_RES_MASK		0xfff00000
 
@@ -447,7 +455,7 @@ struct crypto_alg {
 	unsigned int cra_alignmask;
 
 	int cra_priority;
-	atomic_t cra_refcnt;
+	refcount_t cra_refcnt;
 
 	char cra_name[CRYPTO_MAX_ALG_NAME];
 	char cra_driver_name[CRYPTO_MAX_ALG_NAME];
diff --git a/kernel/padata.c b/kernel/padata.c
index 57c0074d50cc..d568cc56405f 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * padata.c - generic interface to process data streams in parallel
  *
diff --git a/lib/chacha20.c b/lib/chacha20.c
index 250ceed9ec9a..c1cc50fb68c9 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,12 +16,7 @@
 #include <asm/unaligned.h>
 #include <crypto/chacha20.h>
 
-static inline u32 rotl32(u32 v, u8 n)
-{
-	return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
-extern void chacha20_block(u32 *state, void *stream)
+void chacha20_block(u32 *state, u32 *stream)
 {
 	u32 x[16], *out = stream;
 	int i;
@@ -30,45 +25,45 @@ extern void chacha20_block(u32 *state, void *stream)
 		x[i] = state[i];
 
 	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
 
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(x); i++)