summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt17
-rw-r--r--Documentation/devicetree/bindings/rng/st,rng.txt15
-rw-r--r--Documentation/devicetree/bindings/rng/st,stm32-rng.txt21
-rw-r--r--Documentation/hw_random.txt8
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm/boot/dts/stih407-family.dtsi14
-rw-r--r--arch/arm/boot/dts/stm32f429.dtsi7
-rw-r--r--arch/s390/crypto/sha.h2
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/crypto/Makefile8
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S302
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c314
-rw-r--r--arch/x86/crypto/sha256_ni_asm.S353
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c329
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c249
-rw-r--r--crypto/Kconfig17
-rw-r--r--crypto/Makefile10
-rw-r--r--crypto/akcipher.c1
-rw-r--r--crypto/asymmetric_keys/pkcs7_verify.c5
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c6
-rw-r--r--crypto/asymmetric_keys/x509_public_key.c5
-rw-r--r--crypto/jitterentropy-kcapi.c4
-rw-r--r--crypto/keywrap.c419
-rw-r--r--crypto/rsa.c83
-rw-r--r--crypto/rsa_helper.c42
-rw-r--r--crypto/rsakey.asn15
-rw-r--r--crypto/rsaprivkey.asn111
-rw-r--r--crypto/rsapubkey.asn14
-rw-r--r--crypto/skcipher.c4
-rw-r--r--crypto/tcrypt.c17
-rw-r--r--crypto/testmgr.c83
-rw-r--r--crypto/testmgr.h77
-rw-r--r--drivers/char/hw_random/Kconfig24
-rw-r--r--drivers/char/hw_random/Makefile2
-rw-r--r--drivers/char/hw_random/core.c2
-rw-r--r--drivers/char/hw_random/exynos-rng.c58
-rw-r--r--drivers/char/hw_random/mxc-rnga.c14
-rw-r--r--drivers/char/hw_random/octeon-rng.c12
-rw-r--r--drivers/char/hw_random/pasemi-rng.c1
-rw-r--r--drivers/char/hw_random/ppc4xx-rng.c1
-rw-r--r--drivers/char/hw_random/st-rng.c151
-rw-r--r--drivers/char/hw_random/stm32-rng.c202
-rw-r--r--drivers/crypto/Kconfig5
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c23
-rw-r--r--drivers/crypto/atmel-aes.c44
-rw-r--r--drivers/crypto/atmel-sha.c33
-rw-r--r--drivers/crypto/atmel-tdes.c35
-rw-r--r--drivers/crypto/bfin_crc.c25
-rw-r--r--drivers/crypto/caam/caamalg.c232
-rw-r--r--drivers/crypto/caam/caamhash.c131
-rw-r--r--drivers/crypto/caam/desc.h1
-rw-r--r--drivers/crypto/caam/sg_sw_sec4.h72
-rw-r--r--drivers/crypto/ccp/Kconfig13
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c20
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c6
-rw-r--r--drivers/crypto/ccp/ccp-crypto-sha.c13
-rw-r--r--drivers/crypto/ccp/ccp-ops.c108
-rw-r--r--drivers/crypto/ccp/ccp-pci.c2
-rw-r--r--drivers/crypto/ccp/ccp-platform.c6
-rw-r--r--drivers/crypto/marvell/cesa.h55
-rw-r--r--drivers/crypto/marvell/cipher.c13
-rw-r--r--drivers/crypto/marvell/hash.c471
-rw-r--r--drivers/crypto/marvell/tdma.c42
-rw-r--r--drivers/crypto/n2_core.c2
-rw-r--r--drivers/crypto/nx/nx-842-powernv.c4
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c8
-rw-r--r--drivers/crypto/picoxcell_crypto.c1
-rw-r--r--drivers/crypto/qat/qat_common/Makefile12
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h4
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c6
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c8
-rw-r--r--drivers/crypto/qat/qat_common/adf_sriov.c7
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs.c178
-rw-r--r--drivers/crypto/qat/qat_common/qat_asym_algs.c213
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c79
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c5
-rw-r--r--drivers/crypto/qat/qat_common/qat_rsakey.asn15
-rw-r--r--drivers/crypto/qat/qat_common/qat_rsaprivkey.asn111
-rw-r--r--drivers/crypto/qat/qat_common/qat_rsapubkey.asn14
-rw-r--r--drivers/crypto/qce/ablkcipher.c30
-rw-r--r--drivers/crypto/qce/cipher.h4
-rw-r--r--drivers/crypto/qce/dma.c52
-rw-r--r--drivers/crypto/qce/dma.h5
-rw-r--r--drivers/crypto/qce/sha.c18
-rw-r--r--drivers/crypto/qce/sha.h2
-rw-r--r--drivers/crypto/sahara.c108
-rw-r--r--drivers/crypto/talitos.c104
-rw-r--r--drivers/crypto/ux500/cryp/cryp_core.c71
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c50
-rw-r--r--include/crypto/akcipher.h90
-rw-r--r--include/crypto/hash.h14
-rw-r--r--include/crypto/internal/rsa.h7
-rw-r--r--include/linux/mpi.h10
-rw-r--r--lib/842/842.h2
-rw-r--r--lib/842/842_compress.c13
-rw-r--r--lib/842/842_decompress.c17
-rw-r--r--lib/mpi/mpicoder.c199
98 files changed, 4070 insertions, 1538 deletions
diff --git a/Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt b/Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
new file mode 100644
index 000000000000..4ca8dd4d7e66
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
@@ -0,0 +1,17 @@
+Exynos Pseudo Random Number Generator
+
+Required properties:
+
+- compatible  : Should be "samsung,exynos4-rng".
+- reg         : Specifies base physical address and size of the registers map.
+- clocks      : Phandle to clock-controller plus clock-specifier pair.
+- clock-names : "secss" as a clock name.
+
+Example:
+
+	rng@10830400 {
+		compatible = "samsung,exynos4-rng";
+		reg = <0x10830400 0x200>;
+		clocks = <&clock CLK_SSS>;
+		clock-names = "secss";
+	};
diff --git a/Documentation/devicetree/bindings/rng/st,rng.txt b/Documentation/devicetree/bindings/rng/st,rng.txt
new file mode 100644
index 000000000000..35734bc282e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/st,rng.txt
@@ -0,0 +1,15 @@
+STMicroelectronics HW Random Number Generator
+----------------------------------------------
+
+Required parameters:
+compatible	: Should be "st,rng"
+reg		: Base address and size of IP's register map.
+clocks		: Phandle to device's clock (See: ../clocks/clock-bindings.txt)
+
+Example:
+
+rng@fee80000 {
+	compatible      = "st,rng";
+	reg		= <0xfee80000 0x1000>;
+	clocks          = <&clk_sysin>;
+}
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.txt b/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
new file mode 100644
index 000000000000..47f04176f93b
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
@@ -0,0 +1,21 @@
+STMicroelectronics STM32 HW RNG
+===============================
+
+The STM32 hardware random number generator is a simple fixed purpose IP and
+is fully separated from other crypto functions.
+
+Required properties:
+
+- compatible : Should be "st,stm32-rng"
+- reg : Should be register base and length as documented in the datasheet
+- interrupts : The designated IRQ line for the RNG
+- clocks : The clock needed to enable the RNG
+
+Example:
+
+	rng: rng@50060800 {
+		compatible = "st,stm32-rng";
+		reg = <0x50060800 0x400>;
+		interrupts = <80>;
+		clocks = <&rcc 0 38>;
+	};
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
index 026e237bbc87..fce1634907d0 100644
--- a/Documentation/hw_random.txt
+++ b/Documentation/hw_random.txt
@@ -3,7 +3,7 @@ Introduction:
 	The hw_random framework is software that makes use of a
 	special hardware feature on your CPU or motherboard,
 	a Random Number Generator (RNG).  The software has two parts:
-	a core providing the /dev/hw_random character device and its
+	a core providing the /dev/hwrng character device and its
 	sysfs support, plus a hardware-specific driver that plugs
 	into that core.
 
@@ -14,7 +14,7 @@ Introduction:
 
 		http://sourceforge.net/projects/gkernel/
 
-	Those tools use /dev/hw_random to fill the kernel entropy pool,
+	Those tools use /dev/hwrng to fill the kernel entropy pool,
 	which is used internally and exported by the /dev/urandom and
 	/dev/random special files.
 
@@ -32,13 +32,13 @@ Theory of operation:
 	The rng-tools package uses such tests in "rngd", and lets you
 	run them by hand with a "rngtest" utility.
 
-	/dev/hw_random is char device major 10, minor 183.
+	/dev/hwrng is char device major 10, minor 183.
 
 	CLASS DEVICE.  There is a /sys/class/misc/hw_random node with
 	two unique attributes, "rng_available" and "rng_current".  The
 	"rng_available" attribute lists the hardware-specific drivers
 	available, while "rng_current" lists the one which is currently
-	connected to /dev/hw_random.  If your system has more than one
+	connected to /dev/hwrng.  If your system has more than one
 	RNG available, you may change the one used by writing a name from
 	the list in "rng_available" into "rng_current".
 
diff --git a/MAINTAINERS b/MAINTAINERS
index dcc8ed6fccde..3036f5a02107 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1529,6 +1529,7 @@ W:	http://www.stlinux.com
 S:	Maintained
 F:	arch/arm/mach-sti/
 F:	arch/arm/boot/dts/sti*
+F:	drivers/char/hw_random/st-rng.c
 F:	drivers/clocksource/arm_global_timer.c
 F:	drivers/clocksource/clksrc_st_lpc.c
 F:	drivers/i2c/busses/i2c-st.c
@@ -6587,6 +6588,13 @@ M:	Guenter Roeck <linux@roeck-us.net>
 S:	Maintained
 F:	drivers/net/dsa/mv88e6352.c
 
+MARVELL CRYPTO DRIVER
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+M:	Arnaud Ebalard <arno@natisbad.org>
+F:	drivers/crypto/marvell/
+S:	Maintained
+L:	linux-crypto@vger.kernel.org
+
 MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
 M:	Mirko Lindner <mlindner@marvell.com>
 M:	Stephen Hemminger <stephen@networkplumber.org>
diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index ae0527754000..0c24fcb03577 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -610,5 +610,19 @@
 			clocks		= <&clk_sysin>;
 			st,pwm-num-chan = <4>;
 		};
+
+		rng10: rng@08a89000 {
+			compatible      = "st,rng";
+			reg		= <0x08a89000 0x1000>;
+			clocks          = <&clk_sysin>;
+			status		= "okay";
+		};
+
+		rng11: rng@08a8a000 {
+			compatible      = "st,rng";
+			reg		= <0x08a8a000 0x1000>;
+			clocks          = <&clk_sysin>;
+			status		= "okay";
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index d78a4815da8f..5e1e234e8c0a 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -174,6 +174,13 @@
 			reg = <0x40023800 0x400>;
 			clocks = <&clk_hse>;
 		};
+
+		rng: rng@50060800 {
+			compatible = "st,stm32-rng";
+			reg = <0x50060800 0x400>;
+			interrupts = <80>;
+			clocks = <&rcc 0 38>;
+		};
 	};
 };
 
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index f4e9dc71675f..10f200790079 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -19,7 +19,7 @@
 #include <crypto/sha.h>
 
 /* must be big enough for the largest SHA variant */
-#define SHA_MAX_STATE_SIZE	16
+#define SHA_MAX_STATE_SIZE	(SHA512_DIGEST_SIZE / 4)
 #define SHA_MAX_BLOCK_SIZE      SHA512_BLOCK_SIZE
 
 struct s390_sha_ctx {
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2dfaa72260b4..4086abca0b32 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -171,9 +171,11 @@ asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
 asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
 avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
+sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
+sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
 
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 9a2838cf0591..b9b912a44d61 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -5,6 +5,8 @@
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)
+sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
+sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
 
 obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
@@ -91,9 +93,15 @@ ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
 poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
+ifeq ($(sha1_ni_supported),yes)
+sha1-ssse3-y += sha1_ni_asm.o
+endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
+ifeq ($(sha256_ni_supported),yes)
+sha256-ssse3-y += sha256_ni_asm.o
+endif
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
 crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 225be06edc80..4fe27e074194 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -330,7 +330,7 @@ ENDPROC(crc_pcl)
 	## PCLMULQDQ tables
 	## Table is 128 entries x 2 words (8 bytes) each
 	################################################################
-.section	.rotata, "a", %progbits
+.section	.rodata, "a", %progbits
 .align 8
 K_table:
 	.long 0x493c7d27, 0x00000001
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
new file mode 100644
index 000000000000..874a651b9e7d
--- /dev/null
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -0,0 +1,302 @@
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-1 update function
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * 	Sean Gulley <sean.m.gulley@intel.com>
+ * 	Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 	* Redistributions of source code must retain the above copyright
+ * 	  notice, this list of conditions and the following disclaimer.
+ * 	* Redistributions in binary form must reproduce the above copyright
+ * 	  notice, this list of conditions and the following disclaimer in
+ * 	  the documentation and/or other materials provided with the
+ * 	  distribution.
+ * 	* Neither the name of Intel Corporation nor the names of its
+ * 	  contributors may be used to endorse or promote products derived
+ * 	  from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/linkage.h>
+
+#define DIGEST_PTR	%rdi	/* 1st arg */
+#define DATA_PTR	%rsi	/* 2nd arg */
+#define NUM_BLKS	%rdx	/* 3rd arg */
+
+#define RSPSAVE		%rax
+
+/* gcc conversion */
+#define FRAME_SIZE	32	/* space for 2x16 bytes */
+
+#define ABCD		%xmm0
+#define E0		%xmm1	/* Need two E's b/c they ping pong */
+#define E1		%xmm2
+#define MSG0		%xmm3
+#define MSG1		%xmm4
+#define MSG2		%xmm5
+#define MSG3		%xmm6
+#define SHUF_MASK	%xmm7
+
+
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-1 update function
+ *
+ * The function takes a pointer to the current hash values, a pointer to the
+ * input data, and a number of 64 byte blocks to process.  Once all blocks have
+ * been processed, the digest pointer is  updated with the resulting hash value.
+ * The function only processes complete blocks, there is no functionality to
+ * store partial blocks. All message padding and hash value initialization must
+ * be done outside the update function.
+ *
+ * The indented lines in the loop are instructions related to rounds processing.
+ * The non-indented lines are instructions related to the message schedule.
+ *
+ * void sha1_ni_transform(uint32_t *digest, const void *data,
+		uint32_t numBlocks)
+ * digest : pointer to digest
+ * data: pointer to input data
+ * numBlocks: Number of blocks to process
+ */
+.text
+.align 32
+ENTRY(sha1_ni_transform)
+	mov		%rsp, RSPSAVE
+	sub		$FRAME_SIZE, %rsp
+	and		$~0xF, %rsp
+
+	shl		$6, NUM_BLKS		/* convert to bytes */
+	jz		.Ldone_hash
+	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
+
+	/* load initial hash values */
+	pinsrd		$3, 1*16(DIGEST_PTR), E0
+	movdqu		0*16(DIGEST_PTR), ABCD
+	pand		UPPER_WORD_MASK(%rip), E0
+	pshufd		$0x1B, ABCD, ABCD
+
+	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
+
+.Lloop0:
+	/* Save hash values for addition after rounds */
+	movdqa		E0, (0*16)(%rsp)
+	movdqa		ABCD, (1*16)(%rsp)
+
+	/* Rounds 0-3 */
+	movdqu		0*16(DATA_PTR), MSG0
+	pshufb		SHUF_MASK, MSG0
+		paddd		MSG0, E0
+		movdqa		ABCD, E1
+		sha1rnds4	$0, E0, ABCD
+
+	/* Rounds 4-7 */
+	movdqu		1*16(DATA_PTR), MSG1
+	pshufb		SHUF_MASK, MSG1
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+		sha1rnds4	$0, E1, ABCD
+	sha1msg1	MSG1, MSG0
+
+	/* Rounds 8-11 */
+	movdqu		2*16(DATA_PTR), MSG2
+	pshufb		SHUF_MASK, MSG2
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+		sha1rnds4	$0, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 12-15 */
+	movdqu		3*16(DATA_PTR), MSG3
+	pshufb		SHUF_MASK, MSG3
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$0, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 16-19 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$0, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 20-23 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 24-27 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$1, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 28-31 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 32-35 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$1, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 36-39 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 40-43 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 44-47 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$2, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 48-51 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 52-55 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$2, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 56-59 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 60-63 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$3, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 64-67 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$3, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 68-71 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$3, E1, ABCD
+	pxor		MSG1, MSG3
+
+	/* Rounds 72-75 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$3, E0, ABCD
+
+	/* Rounds 76-79 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+		sha1rnds4	$3, E1, ABCD
+
+	/* Add current hash values with previously saved */
+	sha1nexte	(0*16)(%rsp), E0
+	paddd		(1*16)(%rsp), ABCD
+
+	/* Increment data pointer and loop if more to process */
+	add		$64, DATA_PTR
+	cmp		NUM_BLKS, DATA_PTR
+	jne		.Lloop0
+
+	/* Write hash values back in the correct order */
+	pshufd		$0x1B, ABCD, ABCD
+	movdqu		ABCD, 0*16(DIGEST_PTR)
+	pextrd		$3, E0, 1*16(DIGEST_PTR)
+
+.Ldone_hash:
+	mov		RSPSAVE, %rsp
+
+	ret
+ENDPROC(sha1_ni_transform)
+
+.data
+
+.align 64
+PSHUFFLE_BYTE_FLIP_MASK:
+	.octa 0x000102030405060708090a0b0c0d0e0f
+UPPER_WORD_MASK:
+	.octa 0xFFFFFFFF000000000000000000000000
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 00212c32d4db..dd14616b7739 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -31,24 +31,11 @@
 #include <crypto/sha1_base.h>
 #include <asm/fpu/api.h>
 
+typedef void (sha1_transform_fn)(u32 *digest, const char *data,
+				unsigned int rounds);
 
-asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
-				     unsigned int rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
-				   unsigned int rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
-
-asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
-				    unsigned int rounds);
-#endif
-
-static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
-
-static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, sha1_transform_fn *sha1_xform)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -61,14 +48,14 @@ static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha1_base_do_update(desc, data, len,
-			    (sha1_block_fn *)sha1_transform_asm);
+			    (sha1_block_fn *)sha1_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out, sha1_transform_fn *sha1_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha1_finup(desc, data, len, out);
@@ -76,32 +63,37 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha1_base_do_update(desc, data, len,
-				    (sha1_block_fn *)sha1_transform_asm);
-	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
+				    (sha1_block_fn *)sha1_xform);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform);
 	kernel_fpu_end();
 
 	return sha1_base_finish(desc, out);
 }
 
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
+				     unsigned int rounds);
+
+static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
 {
-	return sha1_ssse3_finup(desc, NULL, 0, out);
+	return sha1_update(desc, data, len,
+			(sha1_transform_fn *) sha1_transform_ssse3);
 }
 
-#ifdef CONFIG_AS_AVX2
-static void sha1_apply_transform_avx2(u32 *digest, const char *data,
-				unsigned int rounds)
+static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
 {
-	/* Select the optimal transform based on data block size */
-	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
-		sha1_transform_avx2(digest, data, rounds);
-	else
-		sha1_transform_avx(digest, data, rounds);
+	return sha1_finup(desc, data, len, out,
+			(sha1_transform_fn *) sha1_transform_ssse3);
+}
+
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_ssse3_finup(desc, NULL, 0, out);
 }
-#endif
 
-static struct shash_alg alg = {
+static struct shash_alg sha1_ssse3_alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_base_init,
 	.update		=	sha1_ssse3_update,
@@ -110,7 +102,7 @@ static struct shash_alg alg = {
 	.descsize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
-		.cra_driver_name=	"sha1-ssse3",
+		.cra_driver_name =	"sha1-ssse3",
 		.cra_priority	=	150,
 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
 		.cra_blocksize	=	SHA1_BLOCK_SIZE,
@@ -118,8 +110,60 @@ static struct shash_alg alg = {
 	}
 };
 
+static int register_sha1_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shash(&sha1_ssse3_alg);
+	return 0;
+}
+
+static void unregister_sha1_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shash(&sha1_ssse3_alg);
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
+				   unsigned int rounds);
+
+static int sha1_avx_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	return sha1_update(desc, data, len,
+			(sha1_transform_fn *) sha1_transform_avx);
+}
+
+static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+			(sha1_transform_fn *) sha1_transform_avx);
+}
+
+static int sha1_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_avx_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_avx_update,
+	.final		=	sha1_avx_final,
+	.finup		=	sha1_avx_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
@@ -130,55 +174,197 @@ static bool __init avx_usable(void)
 	return true;
 }
 
-#ifdef CONFIG_AS_AVX2
-static bool __init avx2_usable(void)
+static int register_sha1_avx(void)
+{
+	if (avx_usable())
+		return crypto_register_shash(&sha1_avx_alg);
+	return 0;
+}
+
+static void unregister_sha1_avx(void)
 {
-	if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) &&
-	    boot_cpu_has(X86_FEATURE_BMI2))
+	if (avx_usable())
+		crypto_unregister_shash(&sha1_avx_alg);
+}
+
+#else  /* CONFIG_AS_AVX */
+static inline int register_sha1_avx(void) { return 0; }
+static inline void unregister_sha1_avx(void) { }
+#endif /* CONFIG_AS_AVX */
+
+
+#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX)
+#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+				    unsigned int rounds);
+
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+		&& boot_cpu_has(X86_FEATURE_BMI1)
+		&& boot_cpu_has(X86_FEATURE_BMI2))
 		return true;
 
 	return false;
 }
+
+static void sha1_apply_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds)
+{
+	/* Select the optimal transform based on data block size */
+	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+		sha1_transform_avx2(digest, data, rounds);
+	else
+		sha1_transform_avx(digest, data, rounds);
+}
+
+static int sha1_avx2_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	return sha1_update(desc, data, len,
+		(sha1_transform_fn *) sha1_apply_transform_avx2);
+}
+
+static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+		(sha1_transform_fn *) sha1_apply_transform_avx2);
+}
+
+static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_avx2_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_avx2_update,
+	.final		=	sha1_avx2_final,
+	.finup		=	sha1_avx2_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int register_sha1_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shash(&sha1_avx2_alg);
+	return 0;
+}
+
+static void unregister_sha1_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shash(&sha1_avx2_alg);
+}
+
+#else
+static inline int register_sha1_avx2(void) { return 0; }
+static inline void unregister_sha1_avx2(void) { }
 #endif
+
+#ifdef CONFIG_AS_SHA1_NI
+asmlinkage void sha1_ni_transform(u32 *digest, const char *data,
+				   unsigned int rounds);
+
+static int sha1_ni_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	return sha1_update(desc, data, len,
+		(sha1_transform_fn *) sha1_ni_transform);
+}
+
+static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+		(sha1_transform_fn *) sha1_ni_transform);
+}
+
+static int sha1_ni_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_ni_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_ni_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_ni_update,
+	.final		=	sha1_ni_final,
+	.finup		=	sha1_ni_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int register_sha1_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		return crypto_register_shash(&sha1_ni_alg);
+	return 0;
+}
+
+static void unregister_sha1_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		crypto_unregister_shash(&sha1_ni_alg);
+}
+
+#else
+static inline int register_sha1_ni(void) { return 0; }
+static inline void unregister_sha1_ni(void) { }
 #endif
 
 static int __init sha1_ssse3_mod_init(void)
 {
-	char *algo_name;
+	if (register_sha1_ssse3())
+		goto fail;
 
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3) {
-		sha1_transform_asm = sha1_transform_ssse3;
-		algo_name = "SSSE3";
+	if (register_sha1_avx()) {
+		unregister_sha1_ssse3();
+		goto fail;
 	}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-		sha1_transform_asm = sha1_transform_avx;
-		algo_name = "AVX";
-#ifdef CONFIG_AS_AVX2
-		/* allow AVX2 to override AVX, it's a little faster */
-		if (avx2_usable()) {
-			sha1_transform_asm = sha1_apply_transform_avx2;
-			algo_name = "AVX2";
-		}
-#endif
+	if (register_sha1_avx2()) {
+		unregister_sha1_avx();
+		unregister_sha1_ssse3();
+		goto fail;
 	}
-#endif
 
-	if (sha1_transform_asm) {
-		pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
-		return crypto_register_shash(&alg);
+	if (register_sha1_ni()) {
+		unregister_sha1_avx2();
+		unregister_sha1_avx();
+		unregister_sha1_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
 
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha1_ssse3_mod_fini(void)
 {
-	crypto_unregister_shash(&alg);
+	unregister_sha1_ni();
+	unregister_sha1_avx2();
+	unregister_sha1_avx();
+	unregister_sha1_ssse3();
 }
 
 module_init(sha1_ssse3_mod_init);
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
new file mode 100644
index 000000000000..748cdf21a938
--- /dev/null
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -0,0 +1,353 @@
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * 	Sean Gulley <sean.m.gulley@intel.com>
+ * 	Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 	* Redistributions of source code must retain the above copyright
+ * 	  notice, this list of conditions and the following disclaimer.
+ * 	* Redistributions in binary form must reproduce the above copyright
+ * 	  notice, this list of conditions and the following disclaimer in
+ * 	  the documentation and/or other materials provided with the
+ * 	  distribution.
+ * 	* Neither the name of Intel Corporation nor the names of its
+ * 	  contributors may be used to endorse or promote products derived
+ * 	  from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/linkage.h>
+
+#define DIGEST_PTR	%rdi	/* 1st arg */
+#define DATA_PTR	%rsi	/* 2nd arg */
+#define NUM_BLKS	%rdx	/* 3rd arg */
+
+#define SHA256CONSTANTS	%rax
+
+#define MSG		%xmm0
+#define STATE0		%xmm1
+#define STATE1		%xmm2
+#define MSGTMP0		%xmm3
+#define MSGTMP1		%xmm4
+#define MSGTMP2		%xmm5
+#define MSGTMP3		%xmm6
+#define MSGTMP4		%xmm7
+
+#define SHUF_MASK	%xmm8
+
+#define ABEF_SAVE	%xmm9
+#define CDGH_SAVE	%xmm10
+
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ *
+ * The function takes a pointer to the current hash values, a pointer to the
+ * input data, and a number of 64 byte blocks to process.  Once all blocks have
+ * been processed, the digest pointer is  updated with the resulting hash value.
+ * The function only processes complete blocks, there is no functionality to
+ * store partial blocks.  All message padding and hash value initialization must
+ * be done outside the update function.
+ *
+ * The indented lines in the loop are instructions related to rounds processing.
+ * The non-indented lines are instructions related to the message schedule.
+ *
+ * void sha256_ni_transform(uint32_t *digest, const void *data,
+		uint32_t numBlocks);
+ * digest : pointer to digest
+ * data: pointer to input data
+ * numBlocks: Number of blocks to process
+ */
+
+.text
+.align 32
+ENTRY(sha256_ni_transform)
+
+	shl		$6, NUM_BLKS		/*  convert to bytes */
+	jz		.Ldone_hash
+	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
+
+	/*
+	 * load initial hash values
+	 * Need to reorder these appropriately
+	 * DCBA, HGFE -> ABEF, CDGH
+	 */
+	movdqu		0*16(DIGEST_PTR), STATE0
+	movdqu		1*16(DIGEST_PTR), STATE1
+
+	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
+	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
+	movdqa		STATE0, MSGTMP4
+	palignr		$8, STATE1,  STATE0		/* ABEF */
+	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
+
+	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
+	lea		K256(%rip), SHA256CONSTANTS
+
+.Lloop0:
+	/* Save hash values for addition after rounds */
+	movdqa		STATE0, ABEF_SAVE
+	movdqa		STATE1, CDGH_SAVE
+
+	/* Rounds 0-3 */
+	movdqu		0*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP0
+		paddd		0*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 4-7 */
+	movdqu		1*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP1
+		paddd		1*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 8-11 */
+	movdqu		2*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP2
+		paddd		2*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 12-15 */
+	movdqu		3*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP3
+		paddd		3*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 16-19 */
+	movdqa		MSGTMP0, MSG
+		paddd		4*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 20-23 */
+	movdqa		MSGTMP1, MSG
+		paddd		5*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 24-27 */
+	movdqa		MSGTMP2, MSG
+		paddd		6*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 28-31 */
+	movdqa		MSGTMP3, MSG
+		paddd		7*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 32-35 */
+	movdqa		MSGTMP0, MSG
+		paddd		8*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 36-39 */
+	movdqa		MSGTMP1, MSG
+		paddd		9*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 40-43 */
+	movdqa		MSGTMP2, MSG
+		paddd		10*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 44-47 */
+	movdqa		MSGTMP3, MSG
+		paddd		11*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 48-51 */
+	movdqa		MSGTMP0, MSG
+		paddd		12*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 52-55 */
+	movdqa		MSGTMP1, MSG
+		paddd		13*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 56-59 */
+	movdqa		MSGTMP2, MSG
+		paddd		14*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 60-63 */
+	movdqa		MSGTMP3, MSG
+		paddd		15*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Add current hash values with previously saved */
+	paddd		ABEF_SAVE, STATE0
+	paddd		CDGH_SAVE, STATE1
+
+	/* Increment data pointer and loop if more to process */
+	add		$64, DATA_PTR
+	cmp		NUM_BLKS, DATA_PTR
+	jne		.Lloop0
+
+	/* Write hash values back in the correct order */
+	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
+	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
+	movdqa		STATE0, MSGTMP4
+	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
+	palignr		$8, MSGTMP4, STATE1		/* HGFE */
+
+	movdqu		STATE0, 0*16(DIGEST_PTR)
+	movdqu		STATE1, 1*16(DIGEST_PTR)
+
+.Ldone_hash:
+
+	ret
+ENDPROC(sha256_ni_transform)
+
+.data
+.align 64
+K256:
+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+PSHUFFLE_BYTE_FLIP_MASK:
+	.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 0e0e85aea634..5f4d6086dc59 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -42,19 +42,10 @@
 
 asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
 				       u64 rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
-				     u64 rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
-				      u64 rounds);
-#endif
-
-static void (*sha256_transform_asm)(u32 *, const char *, u64);
+typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
 
-static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, sha256_transform_fn *sha256_xform)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -67,14 +58,14 @@ static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha256_base_do_update(desc, data, len,
-			      (sha256_block_fn *)sha256_transform_asm);
+			      (sha256_block_fn *)sha256_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out, sha256_transform_fn *sha256_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha256_finup(desc, data, len, out);
@@ -82,20 +73,32 @@ static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha256_base_do_update(desc, data, len,
-				      (sha256_block_fn *)sha256_transform_asm);
-	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
+				      (sha256_block_fn *)sha256_xform);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform);
 	kernel_fpu_end();
 
 	return sha256_base_finish(desc, out);
 }
 
+static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_ssse3);
+}
+
+static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_ssse3);
+}
+
 /* Add padding and return the message digest. */
 static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
 {
 	return sha256_ssse3_finup(desc, NULL, 0, out);
 }
 
-static struct shash_alg algs[] = { {
+static struct shash_alg sha256_ssse3_algs[] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_base_init,
 	.update		=	sha256_ssse3_update,
@@ -127,8 +130,75 @@ static struct shash_alg algs[] = { {
 	}
 } };
 
+static int register_sha256_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shashes(sha256_ssse3_algs,
+				ARRAY_SIZE(sha256_ssse3_algs));
+	return 0;
+}
+
+static void unregister_sha256_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(sha256_ssse3_algs,
+				ARRAY_SIZE(sha256_ssse3_algs));
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
+				     u64 rounds);
+
+static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_avx);
+}
+
+static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_avx);
+}
+
+static int sha256_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_avx_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_avx_update,
+	.final		=	sha256_avx_final,
+	.finup		=	sha256_avx_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_avx_update,
+	.final		=	sha256_avx_final,
+	.finup		=	sha256_avx_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
@@ -138,47 +208,216 @@ static bool __init avx_usable(void)
 
 	return true;
 }
-#endif
 
-static int __init sha256_ssse3_mod_init(void)
+static int register_sha256_avx(void)
 {
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
-		sha256_transform_asm = sha256_transform_ssse3;
+	if (avx_usable())
+		return crypto_register_shashes(sha256_avx_algs,
+				ARRAY_SIZE(sha256_avx_algs));
+	return 0;
+}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-#ifdef CONFIG_AS_AVX2
-		if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2))
-			sha256_transform_asm = sha256_transform_rorx;
-		else
+static void unregister_sha256_avx(void)
+{
+	if (avx_usable())
+		crypto_unregister_shashes(sha256_avx_algs,
+				ARRAY_SIZE(sha256_avx_algs));
+}
+
+#else
+static inline int register_sha256_avx(void) { return 0; }
+static inline void unregister_sha256_avx(void) { }
 #endif
-			sha256_transform_asm = sha256_transform_avx;
+
+#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
+asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
+				      u64 rounds);
+
+static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_rorx);
+}
+
+static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_rorx);
+}
+
+static int sha256_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_avx2_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_avx2_update,
+	.final		=	sha256_avx2_final,
+	.finup		=	sha256_avx2_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
 	}
-#endif
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_avx2_update,
+	.final		=	sha256_avx2_final,
+	.finup		=	sha256_avx2_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
 
-	if (sha256_transform_asm) {
-#ifdef CONFIG_AS_AVX
-		if (sha256_transform_asm == sha256_transform_avx)
-			pr_info("Using AVX optimized SHA-256 implementation\n");
-#ifdef CONFIG_AS_AVX2
-		else if (sha256_transform_asm == sha256_transform_rorx)
-			pr_info("Using AVX2 optimized SHA-256 implementation\n");
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
+		    boot_cpu_has(X86_FEATURE_BMI2))
+		return true;
+
+	return false;
+}
+
+static int register_sha256_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shashes(sha256_avx2_algs,
+				ARRAY_SIZE(sha256_avx2_algs));
+	return 0;
+}
+
+static void unregister_sha256_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shashes(sha256_avx2_algs,
+				ARRAY_SIZE(sha256_avx2_algs));
+}
+
+#else
+static inline int register_sha256_avx2(void) { return 0; }
+static inline void unregister_sha256_avx2(void) { }
 #endif
-		else
+
+#ifdef CONFIG_AS_SHA256_NI
+asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
+				   u64 rounds); /*unsigned int rounds);*/
+
+static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_ni_transform);
+}
+
+static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_ni_transform);
+}
+
+static int sha256_ni_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_ni_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_ni_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_ni_update,
+	.final		=	sha256_ni_final,
+	.finup		=	sha256_ni_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_ni_update,
+	.final		=	sha256_ni_final,
+	.finup		=	sha256_ni_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int register_sha256_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		return crypto_register_shashes(sha256_ni_algs,
+				ARRAY_SIZE(sha256_ni_algs));
+	return 0;
+}
+
+static void unregister_sha256_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		crypto_unregister_shashes(sha256_ni_algs,
+				ARRAY_SIZE(sha256_ni_algs));
+}
+
+#else
+static inline int register_sha256_ni(void) { return 0; }
+static inline void unregister_sha256_ni(void) { }
 #endif
-			pr_info("Using SSSE3 optimized SHA-256 implementation\n");
-		return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+static int __init sha256_ssse3_mod_init(void)
+{
+	if (register_sha256_ssse3())
+		goto fail;
+
+	if (register_sha256_avx()) {
+		unregister_sha256_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
 
+	if (register_sha256_avx2()) {
+		unregister_sha256_avx();
+		unregister_sha256_ssse3();
+		goto fail;
+	}
+
+	if (register_sha256_ni()) {
+		unregister_sha256_avx2();
+		unregister_sha256_avx();
+		unregister_sha256_ssse3();
+		goto fail;
+	}
+
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha256_ssse3_mod_fini(void)
 {
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	unregister_sha256_ni();
+	unregister_sha256_avx2();
+	unregister_sha256_avx();
+	unregister_sha256_ssse3();
 }
 
 module_init(sha256_ssse3_mod_init);
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0c8c38c101ac..34e5083d6f36 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -41,19 +41,11 @@
 
 asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
 				       u64 rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
-				     u64 rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
-				      u64 rounds);
-#endif
 
-static void (*sha512_transform_asm)(u64 *, const char *, u64);
+typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds);
 
-static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len)
+static int sha512_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, sha512_transform_fn *sha512_xform)
 {
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 
@@ -66,14 +58,14 @@ static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha512_base_do_update(desc, data, len,
-			      (sha512_block_fn *)sha512_transform_asm);
+			      (sha512_block_fn *)sha512_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha512_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out, sha512_transform_fn *sha512_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha512_finup(desc, data, len, out);
@@ -81,20 +73,32 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha512_base_do_update(desc, data, len,
-				      (sha512_block_fn *)sha512_transform_asm);
-	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
+				      (sha512_block_fn *)sha512_xform);
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform);
 	kernel_fpu_end();
 
 	return sha512_base_finish(desc, out);
 }
 
+static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	return sha512_update(desc, data, len, sha512_transform_ssse3);
+}
+
+static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
+}
+
 /* Add padding and return the message digest. */
 static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
 {
 	return sha512_ssse3_finup(desc, NULL, 0, out);
 }
 
-static struct shash_alg algs[] = { {
+static struct shash_alg sha512_ssse3_algs[] = { {
 	.digestsize	=	SHA512_DIGEST_SIZE,
 	.init		=	sha512_base_init,
 	.update		=	sha512_ssse3_update,
@@ -126,8 +130,25 @@ static struct shash_alg algs[] = { {
 	}
 } };
 
+static int register_sha512_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shashes(sha512_ssse3_algs,
+			ARRAY_SIZE(sha512_ssse3_algs));
+	return 0;
+}
+
+static void unregister_sha512_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(sha512_ssse3_algs,
+			ARRAY_SIZE(sha512_ssse3_algs));
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
+				     u64 rounds);
+static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
@@ -137,47 +158,185 @@ static bool __init avx_usable(void)
 
 	return true;
 }
-#endif
 
-static int __init sha512_ssse3_mod_init(void)
+static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
 {
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
-		sha512_transform_asm = sha512_transform_ssse3;
+	return sha512_update(desc, data, len, sha512_transform_avx);
+}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-#ifdef CONFIG_AS_AVX2
-		if (boot_cpu_has(X86_FEATURE_AVX2))
-			sha512_transform_asm = sha512_transform_rorx;
-		else
-#endif
-			sha512_transform_asm = sha512_transform_avx;
+static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_avx);
+}
+
+/* Add padding and return the message digest. */
+static int sha512_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha512_avx_algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_base_init,
+	.update		=	sha512_avx_update,
+	.final		=	sha512_avx_final,
+	.finup		=	sha512_avx_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
 	}
-#endif
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_base_init,
+	.update		=	sha512_avx_update,
+	.final		=	sha512_avx_final,
+	.finup		=	sha512_avx_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
 
-	if (sha512_transform_asm) {
-#ifdef CONFIG_AS_AVX
-		if (sha512_transform_asm == sha512_transform_avx)
-			pr_info("Using AVX optimized SHA-512 implementation\n");
-#ifdef CONFIG_AS_AVX2
-		else if (sha512_transform_asm == sha512_transform_rorx)
-			pr_info("Using AVX2 optimized SHA-512 implementation\n");
+static int register_sha512_avx(void)
+{
+	if (avx_usable())
+		return crypto_register_shashes(sha512_avx_algs,
+			ARRAY_SIZE(sha512_avx_algs));
+	return 0;
+}
+
+static void unregister_sha512_avx(void)
+{
+	if (avx_usable())
+		crypto_unregister_shashes(sha512_avx_algs,
+			ARRAY_SIZE(sha512_avx_algs));
+}
+#else
+static inline int register_sha512_avx(void) { return 0; }
+static inline void unregister_sha512_avx(void) { }
 #endif
-		else
+
+#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
+asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
+				      u64 rounds);
+
+static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	return sha512_update(desc, data, len, sha512_transform_rorx);
+}
+
+static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_rorx);
+}
+
+/* Add padding and return the message digest. */
+static int sha512_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha512_avx2_algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_base_init,
+	.update		=	sha512_avx2_update,
+	.final		=	sha512_avx2_final,
+	.finup		=	sha512_avx2_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_base_init,
+	.update		=	sha512_avx2_update,
+	.final		=	sha512_avx2_final,
+	.finup		=	sha512_avx2_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
+		    boot_cpu_has(X86_FEATURE_BMI2))
+		return true;
+
+	return false;
+}
+
+static int register_sha512_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shashes(sha512_avx2_algs,
+			ARRAY_SIZE(sha512_avx2_algs));
+	return 0;
+}
+
+static void unregister_sha512_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shashes(sha512_avx2_algs,
+			ARRAY_SIZE(sha512_avx2_algs));
+}
+#else
+static inline int register_sha512_avx2(void) { return 0; }
+static inline void unregister_sha512_avx2(void) { }
 #endif
-			pr_info("Using SSSE3 optimized SHA-512 implementation\n");
-		return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+static int __init sha512_ssse3_mod_init(void)
+{
+
+	if (register_sha512_ssse3())
+		goto fail;
+
+	if (register_sha512_avx()) {
+		unregister_sha512_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
 
+	if (register_sha512_avx2()) {
+		unregister_sha512_avx();
+		unregister_sha512_ssse3();
+		goto fail;
+	}
+
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha512_ssse3_mod_fini(void)
 {
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	unregister_sha512_avx2();
+	unregister_sha512_avx();
+	unregister_sha512_ssse3();
 }
 
 module_init(sha512_ssse3_mod_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 48ee3e175dac..7240821137fd 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -348,6 +348,13 @@ config CRYPTO_XTS
 	  key size 256, 384 or 512 bits. This implementation currently
 	  can't handle a sectorsize which is not a multiple of 16 bytes.
 
+config CRYPTO_KEYWRAP
+	tristate "Key wrapping support"
+	select CRYPTO_BLKCIPHER
+	help
+	  Support for key wrapping (NIST SP800-38F / RFC3394) without
+	  padding.
+
 comment "Hash modes"
 
 config CRYPTO_CMAC
@@ -597,17 +604,18 @@ config CRYPTO_SHA1
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA1_SSSE3
-	tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)"
+	tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2/SHA-NI)"
 	depends on X86 && 64BIT
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
 	help
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
 	  using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
-	  Extensions (AVX/AVX2), when available.
+	  Extensions (AVX/AVX2) or SHA-NI(SHA Extensions New Instructions),
+	  when available.
 
 config CRYPTO_SHA256_SSSE3
-	tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"
+	tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2/SHA-NI)"
 	depends on X86 && 64BIT
 	select CRYPTO_SHA256
 	select CRYPTO_HASH
@@ -615,7 +623,8 @@ config CRYPTO_SHA256_SSSE3
 	  SHA-256 secure hash standard (DFIPS 180-2) implemented
 	  using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector
 	  Extensions version 1 (AVX1), or Advanced Vector Extensions
-	  version 2 (AVX2) instructions, when available.
+	  version 2 (AVX2) instructions, or SHA-NI (SHA Extensions New
+	  Instructions) when available.
 
 config CRYPTO_SHA512_SSSE3
 	tristate "SHA512 digest algorithm (SSSE3/AVX/AVX2)"
diff --git a/crypto/Makefile b/crypto/Makefile
index e2c59819b236..f7aba923458d 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -31,10 +31,13 @@ obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
 obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
 
-$(obj)/rsakey-asn1.o: $(obj)/rsakey-asn1.c $(obj)/rsakey-asn1.h
-clean-files += rsakey-asn1.c rsakey-asn1.h
+$(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
+$(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
+clean-files += rsapubkey-asn1.c rsapubkey-asn1.h
+clean-files += rsaprivkey-asn1.c rsaprivkey-asn1.h
 
-rsa_generic-y := rsakey-asn1.o
+rsa_generic-y := rsapubkey-asn1.o
+rsa_generic-y += rsaprivkey-asn1.o
 rsa_generic-y += rsa.o
 rsa_generic-y += rsa_helper.o
 obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
@@ -67,6 +70,7 @@ obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
index 528ae6aa9bff..120ec042ec9e 100644
--- a/crypto/akcipher.c
+++ b/crypto/akcipher.c
@@ -21,7 +21,6 @@
 #include <linux/cryptouser.h>
 #include <net/netlink.h>
 #include <crypto/akcipher.h>
-#include <crypto/public_key.h>
 #include "internal.h"
 
 #ifdef CONFIG_NET
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index d20c0b4b880e..325575caf6b4 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -49,11 +49,12 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 	sinfo->sig.digest_size = digest_size = crypto_shash_digestsize(tfm);
 
 	ret = -ENOMEM;
-	digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+	digest = kzalloc(ALIGN(digest_size, __alignof__(*desc)) + desc_size,
+			 GFP_KERNEL);
 	if (!digest)
 		goto error_no_desc;
 
-	desc = digest + digest_size;
+	desc = PTR_ALIGN(digest + digest_size, __alignof__(*desc));
 	desc->tfm   = tfm;
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index af71878dc15b..3000ea3b6687 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -546,9 +546,9 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 	if (year < 1970 ||
 	    mon < 1 || mon > 12 ||
 	    day < 1 || day > mon_len ||
-	    hour < 0 || hour > 23 ||
-	    min < 0 || min > 59 ||
-	    sec < 0 || sec > 59)
+	    hour > 23 ||
+	    min > 59 ||
+	    sec > 59)
 		goto invalid_time;
 	
 	*_t = mktime64(year, mon, day, hour, min, sec);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 197096632412..68c3c40501ab 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -194,14 +194,15 @@ int x509_get_sig_params(struct x509_certificate *cert)
 	 * digest storage space.
 	 */
 	ret = -ENOMEM;
-	digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+	digest = kzalloc(ALIGN(digest_size, __alignof__(*desc)) + desc_size,
+			 GFP_KERNEL);
 	if (!digest)
 		goto error;
 
 	cert->sig.digest = digest;
 	cert->sig.digest_size = digest_size;
 
-	desc = digest + digest_size;
+	desc = PTR_ALIGN(digest + digest_size, __alignof__(*desc));
 	desc->tfm = tfm;
 	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c
index ceea83d13168..597cedd3531c 100644
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -98,10 +98,6 @@ void jent_get_nstime(__u64 *out)
 	 * If random_get_entropy does not return a value (which is possible on,
 	 * for example, MIPS), invoke __getnstimeofday
 	 * hoping that there are timers we can work with.
-	 *
-	 * The list of available timers can be obtained from
-	 * /sys/devices/system/clocksource/clocksource0/available_clocksource
-	 * and are registered with clocksource_register()
 	 */
 	if ((0 == tmp) &&
 	   (0 == __getnstimeofday(&ts))) {
diff --git a/crypto/keywrap.c b/crypto/keywrap.c
new file mode 100644
index 000000000000..b1d106ce55f3
--- /dev/null
+++ b/crypto/keywrap.c
@@ -0,0 +1,419 @@
+/*
+ * Key Wrapping: RFC3394 / NIST SP800-38F
+ *
+ * Copyright (C) 2015, Stephan Mueller <smueller@chronox.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL2
+ * are required INSTEAD OF the above restrictions.  (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Note for using key wrapping:
+ *
+ *	* The result of the encryption operation is the ciphertext starting
+ *	  with the 2nd semiblock. The first semiblock is provided as the IV.
+ *	  The IV used to start the encryption operation is the default IV.
+ *
+ *	* The input for the decryption is the first semiblock handed in as an
+ *	  IV. The ciphertext is the data starting with the 2nd semiblock. The
+ *	  return code of the decryption operation will be EBADMSG in case an
+ *	  integrity error occurs.
+ *
+ * To obtain the full result of an encryption as expected by SP800-38F, the
+ * caller must allocate a buffer of plaintext + 8 bytes:
+ *
+ *	unsigned int datalen = ptlen + crypto_skcipher_ivsize(tfm);
+ *	u8 data[datalen];
+ *	u8 *iv = data;
+ *	u8 *pt = data + crypto_skcipher_ivsize(tfm);
+ *		<ensure that pt contains the plaintext of size ptlen>
+ *	sg_init_one(&sg, ptdata, ptlen);
+ *	skcipher_request_set_crypt(req, &sg, &sg, ptlen, iv);
+ *
+ *	==> After encryption, data now contains full KW result as per SP800-38F.
+ *
+ * In case of decryption, ciphertext now already has the expected length
+ * and must be segmented appropriately:
+ *
+ *	unsigned int datalen = CTLEN;
+ *	u8 data[datalen];
+ *		<ensure that data contains full ciphertext>
+ *	u8 *iv = data;
+ *	u8 *ct = data + crypto_skcipher_ivsize(tfm);
+ *	unsigned int ctlen = datalen - crypto_skcipher_ivsize(tfm);
+ *	sg_init_one(&sg, ctdata, ctlen);
+ *	skcipher_request_set_crypt(req, &sg, &sg, ptlen, iv);
+ *
+ *	==> After decryption (which hopefully does not return EBADMSG), the ct
+ *	pointer now points to the plaintext of size ctlen.
+ *
+ * Note 2: KWP is not implemented as this would defy in-place operation.
+ *	   If somebody wants to wrap non-aligned data, he should simply pad
+ *	   the input with zeros to fill it up to the 8 byte boundary.
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
+
+struct crypto_kw_ctx {
+	struct crypto_cipher *child;
+};
+
+struct crypto_kw_block {
+#define SEMIBSIZE 8
+	u8 A[SEMIBSIZE];
+	u8 R[SEMIBSIZE];
+};
+
+/* convert 64 bit integer into its string representation */
+static inline void crypto_kw_cpu_to_be64(u64 val, u8 *buf)
+{
+	__be64 *a = (__be64 *)buf;
+
+	*a = cpu_to_be64(val);
+}
+
+/*
+ * Fast forward the SGL to the "end" length minus SEMIBSIZE.
+ * The start in the SGL defined by the fast-forward is returned with
+ * the walk variable
+ */
+static void crypto_kw_scatterlist_ff(struct scatter_walk *walk,
+				     struct scatterlist *sg,
+				     unsigned int end)
+{
+	unsigned int skip = 0;
+
+	/* The caller should only operate on full SEMIBLOCKs. */
+	BUG_ON(end < SEMIBSIZE);
+
+	skip = end - SEMIBSIZE;
+	while (sg) {
+		if (sg->length > skip) {
+			scatterwalk_start(walk, sg);
+			scatterwalk_advance(walk, skip);
+			break;
+		} else
+			skip -= sg->length;
+
+		sg = sg_next(sg);
+	}
+}
+
+static int crypto_kw_decrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst, struct scatterlist *src,
+			     unsigned int nbytes)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_kw_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	unsigned long alignmask = max_t(unsigned long, SEMIBSIZE,
+					crypto_cipher_alignmask(child));
+	unsigned int i;
+
+	u8 blockbuf[sizeof(struct crypto_kw_block) + alignmask];
+	struct crypto_kw_block *block = (struct crypto_kw_block *)
+					PTR_ALIGN(blockbuf + 0, alignmask + 1);
+
+	u64 t = 6 * ((nbytes) >> 3);
+	struct scatterlist *lsrc, *ldst;
+	int ret = 0;
+
+	/*
+	 * Require at least 2 semiblocks (note, the 3rd semiblock that is
+	 * required by SP800-38F is the IV.
+	 */
+	if (nbytes < (2 * SEMIBSIZE) || nbytes % SEMIBSIZE)
+		return -EINVAL;
+
+	/* Place the IV into block A */
+	memcpy(block->A, desc->info, SEMIBSIZE);
+
+	/*
+	 * src scatterlist is read-only. dst scatterlist is r/w. During the
+	 * first loop, lsrc points to src and ldst to dst. For any
+	 * subsequent round, the code operates on dst only.
+	 */
+	lsrc = src;
+	ldst = dst;
+
+	for (i = 0; i < 6; i++) {
+		u8 tbe_buffer[SEMIBSIZE + alignmask];
+		/* alignment for the crypto_xor and the _to_be64 operation */
+		u8 *tbe = PTR_ALIGN(tbe_buffer + 0, alignmask + 1);
+		unsigned int tmp_nbytes = nbytes;
+		struct scatter_walk src_walk, dst_walk;
+
+		while (tmp_nbytes) {
+			/* move pointer by tmp_nbytes in the SGL */
+			crypto_kw_scatterlist_ff(&src_walk, lsrc, tmp_nbytes);
+			/* get the source block */
+			scatterwalk_copychunks(block->R, &src_walk, SEMIBSIZE,
+					       false);
+
+			/* perform KW operation: get counter as byte string */
+			crypto_kw_cpu_to_be64(t, tbe);
+			/* perform KW operation: modify IV with counter */
+			crypto_xor(block->A, tbe, SEMIBSIZE);
+			t--;
+			/* perform KW operation: decrypt block */
+			crypto_cipher_decrypt_one(child, (u8*)block,
+						  (u8*)block);
+
+			/* move pointer by tmp_nbytes in the SGL */
+			crypto_kw_scatterlist_ff(&dst_walk, ldst, tmp_nbytes);
+			/* Copy block->R into place */
+			scatterwalk_copychunks(block->R, &dst_walk, SEMIBSIZE,
+					       true);
+
+			tmp_nbytes -= SEMIBSIZE;
+		}
+
+		/* we now start to operate on the dst SGL only */
+		lsrc = dst;
+		ldst = dst;
+	}
+
+	/* Perform authentication check */
+	if (crypto_memneq("\xA6\xA6\xA6\xA6\xA6\xA6\xA6\xA6", block->A,
+			  SEMIBSIZE))
+		ret = -EBADMSG;
+
+	memzero_explicit(&block, sizeof(struct crypto_kw_block));
+
+	return ret;
+}
+
+static int crypto_kw_encrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst, struct scatterlist *src,
+			     unsigned int nbytes)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_kw_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	unsigned long alignmask = max_t(unsigned long, SEMIBSIZE,
+					crypto_cipher_alignmask(child));
+	unsigned int i;
+
+	u8 blockbuf[sizeof(struct crypto_kw_block) + alignmask];
+	struct crypto_kw_block *block = (struct crypto_kw_block *)
+					PTR_ALIGN(blockbuf + 0, alignmask + 1);
+
+	u64 t = 1;
+	struct scatterlist *lsrc, *ldst;
+
+	/*
+	 * Require at least 2 semiblocks (note, the 3rd semiblock that is
+	 * required by SP800-38F is the IV that occupies the first semiblock.
+	 * This means that the dst memory must be one semiblock larger than src.
+	 * Also ensure that the given data is aligned to semiblock.
+	 */
+	if (nbytes < (2 * SEMIBSIZE) || nbytes % SEMIBSIZE)
+		return -EINVAL;
+
+	/*
+	 * Place the predefined IV into block A -- for encrypt, the caller
+	 * does not need to provide an IV, but he needs to fetch the final IV.
+	 */
+	memcpy(block->A, "\xA6\xA6\xA6\xA6\xA6\xA6\xA6\xA6", SEMIBSIZE);
+
+	/*
+	 * src scatterlist is read-only. dst scatterlist is r/w. During the
+	 * first loop, lsrc points to src and ldst to dst. For any
+	 * subsequent round, the code operates on dst only.
+	 */
+	lsrc = src;
+	ldst = dst;
+
+	for (i = 0; i < 6; i++) {
+		u8 tbe_buffer[SEMIBSIZE + alignmask];
+		u8 *tbe = PTR_ALIGN(tbe_buffer + 0, alignmask + 1);
+		unsigned int tmp_nbytes = nbytes;
+		struct scatter_walk src_walk, dst_walk;
+
+		scatterwalk_start(&src_walk, lsrc);
+		scatterwalk_start(&dst_walk, ldst);
+
+		while (tmp_nbytes) {
+			/* get the source block */
+			scatterwalk_copychunks(block->R, &src_walk, SEMIBSIZE,
+					       false);
+
+			/* perform KW operation: encrypt block */
+			crypto_cipher_encrypt_one(child, (u8 *)block,
+						  (u8 *)block);
+			/* perform KW operation: get counter as byte string */
+			crypto_kw_cpu_to_be64(t, tbe);
+			/* perform KW operation: modify IV with counter */
+			crypto_xor(block->A, tbe, SEMIBSIZE);
+			t++;
+
+			/* Copy block->R into place */
+			scatterwalk_copychunks(block->R, &dst_walk, SEMIBSIZE,
+					       true);
+
+			tmp_nbytes -= SEMIBSIZE;
+		}
+
+		/* we now start to operate on the dst SGL only */
+		lsrc = dst;
+		ldst = dst;
+	}
+
+	/* establish the IV for the caller to pick up */
+	memcpy(desc->info, block->A, SEMIBSIZE);
+
+	memzero_explicit(&block, sizeof(struct crypto_kw_block));
+
+	return 0;
+}
+
+static int crypto_kw_setkey(struct crypto_tfm *parent, const u8 *key,
+			    unsigned int keylen)
+{
+	struct crypto_kw_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_kw_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_kw_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_kw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_kw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_kw_alloc(struct rtattr **tb)
+{
+	struct crypto_instance *inst = NULL;
+	struct crypto_alg *alg = NULL;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	if (err)
+		return ERR_PTR(err);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(alg))
+		return ERR_CAST(alg);
+
+	inst = ERR_PTR(-EINVAL);
+	/* Section 5.1 requirement for KW */
+	if (alg->cra_blocksize != sizeof(struct crypto_kw_block))
+		goto err;
+
+	inst = crypto_alloc_instance("kw", alg);
+	if (IS_ERR(inst))
+		goto err;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = SEMIBSIZE;
+	inst->alg.cra_alignmask = 0;
+	inst->alg.cra_type = &crypto_blkcipher_type;
+	inst->alg.cra_blkcipher.ivsize = SEMIBSIZE;
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_kw_ctx);
+
+	inst->alg.cra_init = crypto_kw_init_tfm;
+	inst->alg.cra_exit = crypto_kw_exit_tfm;
+
+	inst->alg.cra_blkcipher.setkey = crypto_kw_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_kw_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_kw_decrypt;
+
+err:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_kw_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_kw_tmpl = {
+	.name = "kw",
+	.alloc = crypto_kw_alloc,
+	.free = crypto_kw_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_kw_init(void)
+{
+	return crypto_register_template(&crypto_kw_tmpl);
+}
+
+static void __exit crypto_kw_exit(void)
+{
+	crypto_unregister_template(&crypto_kw_tmpl);
+}
+
+module_init(crypto_kw_init);
+module_exit(crypto_kw_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
+MODULE_DESCRIPTION("Key Wrapping (RFC3394 / NIST SP800-38F)");
+MODULE_ALIAS_CRYPTO("kw");
diff --git a/crypto/rsa.c b/crypto/rsa.c
index 466003e1a8cf..1093e041db03 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -97,24 +97,21 @@ static int rsa_enc(struct akcipher_request *req)
 		goto err_free_c;
 	}
 
-	m = mpi_read_raw_data(req->src, req->src_len);
-	if (!m) {
-		ret = -ENOMEM;
+	ret = -ENOMEM;
+	m = mpi_read_raw_from_sgl(req->src, req->src_len);
+	if (!m)
 		goto err_free_c;
-	}
 
 	ret = _rsa_enc(pkey, c, m);
 	if (ret)
 		goto err_free_m;
 
-	ret = mpi_read_buffer(c, req->dst, req->dst_len, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(c, req->dst, &req->dst_len, &sign);
 	if (ret)
 		goto err_free_m;
 
-	if (sign < 0) {
+	if (sign < 0)
 		ret = -EBADMSG;
-		goto err_free_m;
-	}
 
 err_free_m:
 	mpi_free(m);
@@ -145,25 +142,21 @@ static int rsa_dec(struct akcipher_request *req)
 		goto err_free_m;
 	}
 
-	c = mpi_read_raw_data(req->src, req->src_len);
-	if (!c) {
-		ret = -ENOMEM;
+	ret = -ENOMEM;
+	c = mpi_read_raw_from_sgl(req->src, req->src_len);
+	if (!c)
 		goto err_free_m;
-	}
 
 	ret = _rsa_dec(pkey, m, c);
 	if (ret)
 		goto err_free_c;
 
-	ret = mpi_read_buffer(m, req->dst, req->dst_len, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign);
 	if (ret)
 		goto err_free_c;
 
-	if (sign < 0) {
+	if (sign < 0)
 		ret = -EBADMSG;
-		goto err_free_c;
-	}
-
 err_free_c:
 	mpi_free(c);
 err_free_m:
@@ -193,24 +186,21 @@ static int rsa_sign(struct akcipher_request *req)
 		goto err_free_s;
 	}
 
-	m = mpi_read_raw_data(req->src, req->src_len);
-	if (!m) {
-		ret = -ENOMEM;
+	ret = -ENOMEM;
+	m = mpi_read_raw_from_sgl(req->src, req->src_len);
+	if (!m)
 		goto err_free_s;
-	}
 
 	ret = _rsa_sign(pkey, s, m);
 	if (ret)
 		goto err_free_m;
 
-	ret = mpi_read_buffer(s, req->dst, req->dst_len, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(s, req->dst, &req->dst_len, &sign);
 	if (ret)
 		goto err_free_m;
 
-	if (sign < 0) {
+	if (sign < 0)
 		ret = -EBADMSG;
-		goto err_free_m;
-	}
 
 err_free_m:
 	mpi_free(m);
@@ -241,7 +231,8 @@ static int rsa_verify(struct akcipher_request *req)
 		goto err_free_m;
 	}
 
-	s = mpi_read_raw_data(req->src, req->src_len);
+	ret = -ENOMEM;
+	s = mpi_read_raw_from_sgl(req->src, req->src_len);
 	if (!s) {
 		ret = -ENOMEM;
 		goto err_free_m;
@@ -251,14 +242,12 @@ static int rsa_verify(struct akcipher_request *req)
 	if (ret)
 		goto err_free_s;
 
-	ret = mpi_read_buffer(m, req->dst, req->dst_len, &req->dst_len, &sign);
+	ret = mpi_write_to_sgl(m, req->dst, &req->dst_len, &sign);
 	if (ret)
 		goto err_free_s;
 
-	if (sign < 0) {
+	if (sign < 0)
 		ret = -EBADMSG;
-		goto err_free_s;
-	}
 
 err_free_s:
 	mpi_free(s);
@@ -282,13 +271,13 @@ static int rsa_check_key_length(unsigned int len)
 	return -EINVAL;
 }
 
-static int rsa_setkey(struct crypto_akcipher *tfm, const void *key,
-		      unsigned int keylen)
+static int rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
+			   unsigned int keylen)
 {
 	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
 	int ret;
 
-	ret = rsa_parse_key(pkey, key, keylen);
+	ret = rsa_parse_pub_key(pkey, key, keylen);
 	if (ret)
 		return ret;
 
@@ -299,6 +288,30 @@ static int rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 	return ret;
 }
 
+static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
+			    unsigned int keylen)
+{
+	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+	int ret;
+
+	ret = rsa_parse_priv_key(pkey, key, keylen);
+	if (ret)
+		return ret;
+
+	if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) {
+		rsa_free_key(pkey);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static int rsa_max_size(struct crypto_akcipher *tfm)
+{
+	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+
+	return pkey->n ? mpi_get_size(pkey->n) : -EINVAL;
+}
+
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)
 {
 	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
@@ -311,7 +324,9 @@ static struct akcipher_alg rsa = {
 	.decrypt = rsa_dec,
 	.sign = rsa_sign,
 	.verify = rsa_verify,
-	.setkey = rsa_setkey,
+	.set_priv_key = rsa_set_priv_key,
+	.set_pub_key = rsa_set_pub_key,
+	.max_size = rsa_max_size,
 	.exit = rsa_exit_tfm,
 	.base = {
 		.cra_name = "rsa",
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index 8d96ce969b44..d226f48d0907 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -15,7 +15,8 @@
 #include <linux/err.h>
 #include <linux/fips.h>
 #include <crypto/internal/rsa.h>
-#include "rsakey-asn1.h"
+#include "rsapubkey-asn1.h"
+#include "rsaprivkey-asn1.h"
 
 int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
 	      const void *value, size_t vlen)
@@ -94,8 +95,8 @@ void rsa_free_key(struct rsa_key *key)
 EXPORT_SYMBOL_GPL(rsa_free_key);
 
 /**
- * rsa_parse_key() - extracts an rsa key from BER encoded buffer
- *		     and stores it in the provided struct rsa_key
+ * rsa_parse_pub_key() - extracts an rsa public key from BER encoded buffer
+ *			 and stores it in the provided struct rsa_key
  *
  * @rsa_key:	struct rsa_key key representation
  * @key:	key in BER format
@@ -103,13 +104,13 @@ EXPORT_SYMBOL_GPL(rsa_free_key);
  *
  * Return:	0 on success or error code in case of error
  */
-int rsa_parse_key(struct rsa_key *rsa_key, const void *key,
-		  unsigned int key_len)
+int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
+		      unsigned int key_len)
 {
 	int ret;
 
 	free_mpis(rsa_key);
-	ret = asn1_ber_decoder(&rsakey_decoder, rsa_key, key, key_len);
+	ret = asn1_ber_decoder(&rsapubkey_decoder, rsa_key, key, key_len);
 	if (ret < 0)
 		goto error;
 
@@ -118,4 +119,31 @@ error:
 	free_mpis(rsa_key);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(rsa_parse_key);
+EXPORT_SYMBOL_GPL(rsa_parse_pub_key);
+
+/**
+ * rsa_parse_pub_key() - extracts an rsa private key from BER encoded buffer
+ *			 and stores it in the provided struct rsa_key
+ *
+ * @rsa_key:	struct rsa_key key representation
+ * @key:	key in BER format
+ * @key_len:	length of key
+ *
+ * Return:	0 on success or error code in case of error
+ */
+int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key,
+		       unsigned int key_len)
+{
+	int ret;
+
+	free_mpis(rsa_key);
+	ret = asn1_ber_decoder(&rsaprivkey_decoder, rsa_key, key, key_len);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+error:
+	free_mpis(rsa_key);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rsa_parse_priv_key);
diff --git a/crypto/rsakey.asn1 b/crypto/rsakey.asn1
deleted file mode 100644
index 3c7b5df7b428..000000000000
--- a/crypto/rsakey.asn1
+++ /dev/null
@@ -1,5 +0,0 @@
-RsaKey ::= SEQUENCE {
-	n INTEGER ({ rsa_get_n }),
-	e INTEGER ({ rsa_get_e }),
-	d INTEGER ({ rsa_get_d })
-}
diff --git a/crypto/rsaprivkey.asn1 b/crypto/rsaprivkey.asn1
new file mode 100644
index 000000000000..731aea5edb0c
--- /dev/null
+++ b/crypto/rsaprivkey.asn1
@@ -0,0 +1,11 @@
+RsaPrivKey ::= SEQUENCE {
+	version		INTEGER,
+	n		INTEGER ({ rsa_get_n }),
+	e		INTEGER ({ rsa_get_e }),
+	d		INTEGER ({ rsa_get_d }),
+	prime1		INTEGER,
+	prime2		INTEGER,
+	exponent1	INTEGER,
+	exponent2	INTEGER,
+	coefficient	INTEGER
+}
diff --git a/crypto/rsapubkey.asn1 b/crypto/rsapubkey.asn1
new file mode 100644
index 000000000000..725498e461d2
--- /dev/null
+++ b/crypto/rsapubkey.asn1
@@ -0,0 +1,4 @@
+RsaPubKey ::= SEQUENCE {
+	n INTEGER ({ rsa_get_n }),
+	e INTEGER ({ rsa_get_e })
+}
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index dd5fc1bf6447..7591928be7ca 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -91,7 +91,7 @@ static void crypto_exit_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
 	crypto_free_blkcipher(*ctx);
 }
 
-int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
+static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *calg = tfm->__crt_alg;
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
@@ -182,7 +182,7 @@ static void crypto_exit_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	crypto_free_ablkcipher(*ctx);
 }
 
-int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
+static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *calg = tfm->__crt_alg;
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 2b00b617daab..46a4a757d478 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -48,6 +48,8 @@
 #define ENCRYPT 1
 #define DECRYPT 0
 
+#define MAX_DIGEST_SIZE		64
+
 /*
  * return a string with the driver name
  */
@@ -950,7 +952,7 @@ static void test_ahash_speed(const char *algo, unsigned int secs,
 	struct tcrypt_result tresult;
 	struct ahash_request *req;
 	struct crypto_ahash *tfm;
-	static char output[1024];
+	char *output;
 	int i, ret;
 
 	tfm = crypto_alloc_ahash(algo, 0, 0);
@@ -963,9 +965,9 @@ static void test_ahash_speed(const char *algo, unsigned int secs,
 	printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo,
 			get_driver_name(crypto_ahash, tfm));
 
-	if (crypto_ahash_digestsize(tfm) > sizeof(output)) {
-		pr_err("digestsize(%u) > outputbuffer(%zu)\n",
-		       crypto_ahash_digestsize(tfm), sizeof(output));
+	if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) {
+		pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm),
+		       MAX_DIGEST_SIZE);
 		goto out;
 	}
 
@@ -980,6 +982,10 @@ static void test_ahash_speed(const char *algo, unsigned int secs,
 	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   tcrypt_complete, &tresult);
 
+	output = kmalloc(MAX_DIGEST_SIZE, GFP_KERNEL);
+	if (!output)
+		goto out_nomem;
+
 	for (i = 0; speed[i].blen != 0; i++) {
 		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
 			pr_err("template (%u) too big for tvmem (%lu)\n",
@@ -1006,6 +1012,9 @@ static void test_ahash_speed(const char *algo, unsigned int secs,
 		}
 	}
 
+	kfree(output);
+
+out_nomem:
 	ahash_request_free(req);
 
 out:
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index fa18753f5c34..ae8c57fd8bc7 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1034,12 +1034,22 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
 		q = data;
 		if (memcmp(q, template[i].result, template[i].rlen)) {
-			pr_err("alg: skcipher%s: Test %d failed on %s for %s\n",
+			pr_err("alg: skcipher%s: Test %d failed (invalid result) on %s for %s\n",
 			       d, j, e, algo);
 			hexdump(q, template[i].rlen);
 			ret = -EINVAL;
 			goto out;
 		}
+
+		if (template[i].iv_out &&
+		    memcmp(iv, template[i].iv_out,
+			   crypto_skcipher_ivsize(tfm))) {
+			pr_err("alg: skcipher%s: Test %d failed (invalid output IV) on %s for %s\n",
+			       d, j, e, algo);
+			hexdump(iv, crypto_skcipher_ivsize(tfm));
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 
 	j = 0;
@@ -1845,34 +1855,34 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	struct tcrypt_result result;
 	unsigned int out_len_max, out_len = 0;
 	int err = -ENOMEM;
+	struct scatterlist src, dst, src_tab[2];
 
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
 		return err;
 
 	init_completion(&result.completion);
-	err = crypto_akcipher_setkey(tfm, vecs->key, vecs->key_len);
-	if (err)
-		goto free_req;
 
-	akcipher_request_set_crypt(req, vecs->m, outbuf_enc, vecs->m_size,
-				   out_len);
-	/* expect this to fail, and update the required buf len */
-	crypto_akcipher_encrypt(req);
-	out_len = req->dst_len;
-	if (!out_len) {
-		err = -EINVAL;
+	if (vecs->public_key_vec)
+		err = crypto_akcipher_set_pub_key(tfm, vecs->key,
+						  vecs->key_len);
+	else
+		err = crypto_akcipher_set_priv_key(tfm, vecs->key,
+						   vecs->key_len);
+	if (err)
 		goto free_req;
-	}
 
-	out_len_max = out_len;
-	err = -ENOMEM;
+	out_len_max = crypto_akcipher_maxsize(tfm);
 	outbuf_enc = kzalloc(out_len_max, GFP_KERNEL);
 	if (!outbuf_enc)
 		goto free_req;
 
-	akcipher_request_set_crypt(req, vecs->m, outbuf_enc, vecs->m_size,
-				   out_len);
+	sg_init_table(src_tab, 2);
+	sg_set_buf(&src_tab[0], vecs->m, 8);
+	sg_set_buf(&src_tab[1], vecs->m + 8, vecs->m_size - 8);
+	sg_init_one(&dst, outbuf_enc, out_len_max);
+	akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size,
+				   out_len_max);
 	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				      tcrypt_complete, &result);
 
@@ -1882,13 +1892,13 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		pr_err("alg: rsa: encrypt test failed. err %d\n", err);
 		goto free_all;
 	}
-	if (out_len != vecs->c_size) {
+	if (req->dst_len != vecs->c_size) {
 		pr_err("alg: rsa: encrypt test failed. Invalid output len\n");
 		err = -EINVAL;
 		goto free_all;
 	}
 	/* verify that encrypted message is equal to expected */
-	if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) {
+	if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) {
 		pr_err("alg: rsa: encrypt test failed. Invalid output\n");
 		err = -EINVAL;
 		goto free_all;
@@ -1903,9 +1913,10 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		err = -ENOMEM;
 		goto free_all;
 	}
+	sg_init_one(&src, vecs->c, vecs->c_size);
+	sg_init_one(&dst, outbuf_dec, out_len_max);
 	init_completion(&result.completion);
-	akcipher_request_set_crypt(req, outbuf_enc, outbuf_dec, vecs->c_size,
-				   out_len);
+	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
 
 	/* Run RSA decrypt - m = c^d mod n;*/
 	err = wait_async_op(&result, crypto_akcipher_decrypt(req));
@@ -2080,7 +2091,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(md5),ecb(cipher_null))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2096,7 +2106,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha1),cbc(aes))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2110,7 +2119,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha1),cbc(des))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2124,7 +2132,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha1),cbc(des3_ede))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2138,7 +2145,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha1),ecb(cipher_null))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2158,7 +2164,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha224),cbc(des))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2172,7 +2177,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha224),cbc(des3_ede))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2186,7 +2190,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha256),cbc(aes))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2200,7 +2203,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha256),cbc(des))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2214,7 +2216,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha256),cbc(des3_ede))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2228,7 +2229,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha384),cbc(des))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2242,7 +2242,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha384),cbc(des3_ede))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2256,7 +2255,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha512),cbc(aes))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2270,7 +2268,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha512),cbc(des))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2284,7 +2281,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "authenc(hmac(sha512),cbc(des3_ede))",
 		.test = alg_test_aead,
-		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -3011,7 +3007,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(des)",
 		.test = alg_test_skcipher,
-		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -3292,6 +3287,22 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.fips_allowed = 1,
 		.test = alg_test_null,
 	}, {
+		.alg = "kw(aes)",
+		.test = alg_test_skcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = aes_kw_enc_tv_template,
+					.count = ARRAY_SIZE(aes_kw_enc_tv_template)
+				},
+				.dec = {
+					.vecs = aes_kw_dec_tv_template,
+					.count = ARRAY_SIZE(aes_kw_dec_tv_template)
+				}
+			}
+		}
+	}, {
 		.alg = "lrw(aes)",
 		.test = alg_test_skcipher,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 64b8a8082645..da0a8fd765f4 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -67,6 +67,7 @@ struct hash_testvec {
 struct cipher_testvec {
 	char *key;
 	char *iv;
+	char *iv_out;
 	char *input;
 	char *result;
 	unsigned short tap[MAX_TAP];
@@ -149,7 +150,8 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	{
 #ifndef CONFIG_CRYPTO_FIPS
 	.key =
-	"\x30\x81\x88" /* sequence of 136 bytes */
+	"\x30\x81\x9A" /* sequence of 154 bytes */
+	"\x02\x01\x01" /* version - integer of 1 byte */
 	"\x02\x41" /* modulus - integer of 65 bytes */
 	"\x00\xAA\x36\xAB\xCE\x88\xAC\xFD\xFF\x55\x52\x3C\x7F\xC4\x52\x3F"
 	"\x90\xEF\xA0\x0D\xF3\x77\x4A\x25\x9F\x2E\x62\xB4\xC5\xD9\x9C\xB5"
@@ -161,19 +163,25 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	"\x0A\x03\x37\x48\x62\x64\x87\x69\x5F\x5F\x30\xBC\x38\xB9\x8B\x44"
 	"\xC2\xCD\x2D\xFF\x43\x40\x98\xCD\x20\xD8\xA1\x38\xD0\x90\xBF\x64"
 	"\x79\x7C\x3F\xA7\xA2\xCD\xCB\x3C\xD1\xE0\xBD\xBA\x26\x54\xB4\xF9"
-	"\xDF\x8E\x8A\xE5\x9D\x73\x3D\x9F\x33\xB3\x01\x62\x4A\xFD\x1D\x51",
+	"\xDF\x8E\x8A\xE5\x9D\x73\x3D\x9F\x33\xB3\x01\x62\x4A\xFD\x1D\x51"
+	"\x02\x01\x00" /* prime1 - integer of 1 byte */
+	"\x02\x01\x00" /* prime2 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent1 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent2 - integer of 1 byte */
+	"\x02\x01\x00", /* coefficient - integer of 1 byte */
 	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
 	.c =
 	"\x63\x1c\xcd\x7b\xe1\x7e\xe4\xde\xc9\xa8\x89\xa1\x74\xcb\x3c\x63"
 	"\x7d\x24\xec\x83\xc3\x15\xe4\x7f\x73\x05\x34\xd1\xec\x22\xbb\x8a"
 	"\x5e\x32\x39\x6d\xc1\x1d\x7d\x50\x3b\x9f\x7a\xad\xf0\x2e\x25\x53"
 	"\x9f\x6e\xbd\x4c\x55\x84\x0c\x9b\xcf\x1a\x4b\x51\x1e\x9e\x0c\x06",
-	.key_len = 139,
+	.key_len = 157,
 	.m_size = 8,
 	.c_size = 64,
 	}, {
 	.key =
-	"\x30\x82\x01\x0B" /* sequence of 267 bytes */
+	"\x30\x82\x01\x1D" /* sequence of 285 bytes */
+	"\x02\x01\x01" /* version - integer of 1 byte */
 	"\x02\x81\x81" /* modulus - integer of 129 bytes */
 	"\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71"
 	"\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5"
@@ -194,8 +202,13 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	"\x44\xE5\x6A\xAF\x68\xC5\x6C\x09\x2C\xD3\x8D\xC3\xBE\xF5\xD2\x0A"
 	"\x93\x99\x26\xED\x4F\x74\xA1\x3E\xDD\xFB\xE1\xA1\xCE\xCC\x48\x94"
 	"\xAF\x94\x28\xC2\xB7\xB8\x88\x3F\xE4\x46\x3A\x4B\xC8\x5B\x1C\xB3"
-	"\xC1",
-	.key_len = 271,
+	"\xC1"
+	"\x02\x01\x00" /* prime1 - integer of 1 byte */
+	"\x02\x01\x00" /* prime2 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent1 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent2 - integer of 1 byte */
+	"\x02\x01\x00", /* coefficient - integer of 1 byte */
+	.key_len = 289,
 	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
 	.c =
 	"\x74\x1b\x55\xac\x47\xb5\x08\x0a\x6e\x2b\x2d\xf7\x94\xb8\x8a\x95"
@@ -211,7 +224,8 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	}, {
 #endif
 	.key =
-	"\x30\x82\x02\x0D" /* sequence of 525 bytes */
+	"\x30\x82\x02\x1F" /* sequence of 543 bytes */
+	"\x02\x01\x01" /* version - integer of 1 byte */
 	"\x02\x82\x01\x00" /* modulus - integer of 256 bytes */
 	"\xDB\x10\x1A\xC2\xA3\xF1\xDC\xFF\x13\x6B\xED\x44\xDF\xF0\x02\x6D"
 	"\x13\xC7\x88\xDA\x70\x6B\x54\xF1\xE8\x27\xDC\xC3\x0F\x99\x6A\xFA"
@@ -246,8 +260,13 @@ static struct akcipher_testvec rsa_tv_template[] = {
 	"\x77\xAF\x51\x27\x5B\x5E\x69\xB8\x81\xE6\x11\xC5\x43\x23\x81\x04"
 	"\x62\xFF\xE9\x46\xB8\xD8\x44\xDB\xA5\xCC\x31\x54\x34\xCE\x3E\x82"
 	"\xD6\xBF\x7A\x0B\x64\x21\x6D\x88\x7E\x5B\x45\x12\x1E\x63\x8D\x49"
-	"\xA7\x1D\xD9\x1E\x06\xCD\xE8\xBA\x2C\x8C\x69\x32\xEA\xBE\x60\x71",
-	.key_len = 529,
+	"\xA7\x1D\xD9\x1E\x06\xCD\xE8\xBA\x2C\x8C\x69\x32\xEA\xBE\x60\x71"
+	"\x02\x01\x00" /* prime1 - integer of 1 byte */
+	"\x02\x01\x00" /* prime2 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent1 - integer of 1 byte */
+	"\x02\x01\x00" /* exponent2 - integer of 1 byte */
+	"\x02\x01\x00", /* coefficient - integer of 1 byte */
+	.key_len = 547,
 	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
 	.c =
 	"\xb2\x97\x76\xb4\xae\x3e\x38\x3c\x7e\x64\x1f\xcc\xa2\x7f\xf6\xbe"
@@ -23814,6 +23833,46 @@ static struct aead_testvec rfc7539esp_dec_tv_template[] = {
 };
 
 /*
+ * All key wrapping test vectors taken from
+ * http://csrc.nist.gov/groups/STM/cavp/documents/mac/kwtestvectors.zip
+ *
+ * Note: as documented in keywrap.c, the ivout for encryption is the first
+ * semiblock of the ciphertext from the test vector. For decryption, iv is
+ * the first semiblock of the ciphertext.
+ */
+static struct cipher_testvec aes_kw_enc_tv_template[] = {
+	{
+		.key	= "\x75\x75\xda\x3a\x93\x60\x7c\xc2"
+			  "\xbf\xd8\xce\xc7\xaa\xdf\xd9\xa6",
+		.klen	= 16,
+		.input	= "\x42\x13\x6d\x3c\x38\x4a\x3e\xea"
+			  "\xc9\x5a\x06\x6f\xd2\x8f\xed\x3f",
+		.ilen	= 16,
+		.result	= "\xf6\x85\x94\x81\x6f\x64\xca\xa3"
+			  "\xf5\x6f\xab\xea\x25\x48\xf5\xfb",
+		.rlen	= 16,
+		.iv_out	= "\x03\x1f\x6b\xd7\xe6\x1e\x64\x3d",
+	},
+};
+
+static struct cipher_testvec aes_kw_dec_tv_template[] = {
+	{
+		.key	= "\x80\xaa\x99\x73\x27\xa4\x80\x6b"
+			  "\x6a\x7a\x41\xa5\x2b\x86\xc3\x71"
+			  "\x03\x86\xf9\x32\x78\x6e\xf7\x96"
+			  "\x76\xfa\xfb\x90\xb8\x26\x3c\x5f",
+		.klen	= 32,
+		.input	= "\xd3\x3d\x3d\x97\x7b\xf0\xa9\x15"
+			  "\x59\xf9\x9c\x8a\xcd\x29\x3d\x43",
+		.ilen	= 16,
+		.result	= "\x0a\x25\x6b\xa7\x5c\xfa\x03\xaa"
+			  "\xa0\x2b\xa9\x42\x03\xf1\x5b\xaa",
+		.rlen	= 16,
+		.iv	= "\x42\x3c\x96\x0d\x8a\x2a\xc4\xc1",
+	},
+};
+
+/*
  * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode)
  * test vectors, taken from Appendix B.2.9 and B.2.10:
  *     http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index f48cf11c655e..dbf22719462f 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -10,7 +10,7 @@ menuconfig HW_RANDOM
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rng-core.  This provides a device
-	  that's usually called /dev/hw_random, and which exposes one
+	  that's usually called /dev/hwrng, and which exposes one
 	  of possibly several hardware random number generators.
 
 	  These hardware random number generators do not feed directly
@@ -346,6 +346,16 @@ config HW_RANDOM_MSM
 
 	  If unsure, say Y.
 
+config HW_RANDOM_ST
+	tristate "ST Microelectronics HW Random Number Generator support"
+	depends on HW_RANDOM && ARCH_STI
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on STi series of SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called st-rng.
+
 config HW_RANDOM_XGENE
 	tristate "APM X-Gene True Random Number Generator (TRNG) support"
 	depends on HW_RANDOM && ARCH_XGENE
@@ -359,6 +369,18 @@ config HW_RANDOM_XGENE
 
 	  If unsure, say Y.
 
+config HW_RANDOM_STM32
+	tristate "STMicroelectronics STM32 random number generator"
+	depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+	help
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on STM32 microcontrollers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called stm32-rng.
+
+	  If unsure, say N.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 055bb01510ad..5ad397635128 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -30,4 +30,6 @@ obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
 obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
 obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
 obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o
+obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o
 obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
+obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 5643b65cee20..6f497aa1b276 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -323,7 +323,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 		return -ERESTARTSYS;
 	err = -ENODEV;
 	list_for_each_entry(rng, &rng_list, list) {
-		if (strcmp(rng->name, buf) == 0) {
+		if (sysfs_streq(rng->name, buf)) {
 			err = 0;
 			if (rng != current_rng)
 				err = set_current_rng(rng);
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
index dc4701fd814f..30cf4623184f 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -53,15 +53,11 @@ static void exynos_rng_writel(struct exynos_rng *rng, u32 val, u32 offset)
 	__raw_writel(val, rng->mem + offset);
 }
 
-static int exynos_init(struct hwrng *rng)
+static int exynos_rng_configure(struct exynos_rng *exynos_rng)
 {
-	struct exynos_rng *exynos_rng = container_of(rng,
-						struct exynos_rng, rng);
 	int i;
 	int ret = 0;
 
-	pm_runtime_get_sync(exynos_rng->dev);
-
 	for (i = 0 ; i < 5 ; i++)
 		exynos_rng_writel(exynos_rng, jiffies,
 				EXYNOS_PRNG_SEED_OFFSET + 4*i);
@@ -70,6 +66,17 @@ static int exynos_init(struct hwrng *rng)
 						 & SEED_SETTING_DONE))
 		ret = -EIO;
 
+	return ret;
+}
+
+static int exynos_init(struct hwrng *rng)
+{
+	struct exynos_rng *exynos_rng = container_of(rng,
+						struct exynos_rng, rng);
+	int ret = 0;
+
+	pm_runtime_get_sync(exynos_rng->dev);
+	ret = exynos_rng_configure(exynos_rng);
 	pm_runtime_put_noidle(exynos_rng->dev);
 
 	return ret;
@@ -81,21 +88,24 @@ static int exynos_read(struct hwrng *rng, void *buf,
 	struct exynos_rng *exynos_rng = container_of(rng,
 						struct exynos_rng, rng);
 	u32 *data = buf;
+	int retry = 100;
 
 	pm_runtime_get_sync(exynos_rng->dev);
 
 	exynos_rng_writel(exynos_rng, PRNG_START, 0);
 
 	while (!(exynos_rng_readl(exynos_rng,
-			EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE))
+			EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE) && --retry)
 		cpu_relax();
+	if (!retry)
+		return -ETIMEDOUT;
 
 	exynos_rng_writel(exynos_rng, PRNG_DONE, EXYNOS_PRNG_STATUS_OFFSET);
 
 	*data = exynos_rng_readl(exynos_rng, EXYNOS_PRNG_OUT1_OFFSET);
 
 	pm_runtime_mark_last_busy(exynos_rng->dev);
-	pm_runtime_autosuspend(exynos_rng->dev);
+	pm_runtime_put_sync_autosuspend(exynos_rng->dev);
 
 	return 4;
 }
@@ -152,15 +162,45 @@ static int exynos_rng_runtime_resume(struct device *dev)
 
 	return clk_prepare_enable(exynos_rng->clk);
 }
+
+static int exynos_rng_suspend(struct device *dev)
+{
+	return pm_runtime_force_suspend(dev);
+}
+
+static int exynos_rng_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		return ret;
+
+	return exynos_rng_configure(exynos_rng);
+}
 #endif
 
-static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend,
-					exynos_rng_runtime_resume, NULL);
+static const struct dev_pm_ops exynos_rng_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(exynos_rng_suspend, exynos_rng_resume)
+	SET_RUNTIME_PM_OPS(exynos_rng_runtime_suspend,
+			   exynos_rng_runtime_resume, NULL)
+};
+
+static const struct of_device_id exynos_rng_dt_match[] = {
+	{
+		.compatible = "samsung,exynos4-rng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, exynos_rng_dt_match);
 
 static struct platform_driver exynos_rng_driver = {
 	.driver		= {
 		.name	= "exynos-rng",
 		.pm	= &exynos_rng_pm_ops,
+		.of_match_table = exynos_rng_dt_match,
 	},
 	.probe		= exynos_rng_probe,
 };
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
index 6cbb72ec6013..467362262651 100644
--- a/drivers/char/hw_random/mxc-rnga.c
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -141,12 +141,11 @@ static void mxc_rnga_cleanup(struct hwrng *rng)
 
 static int __init mxc_rnga_probe(struct platform_device *pdev)
 {
-	int err = -ENODEV;
+	int err;
 	struct resource *res;
 	struct mxc_rng *mxc_rng;
 
-	mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng),
-					GFP_KERNEL);
+	mxc_rng = devm_kzalloc(&pdev->dev, sizeof(*mxc_rng), GFP_KERNEL);
 	if (!mxc_rng)
 		return -ENOMEM;
 
@@ -160,13 +159,12 @@ static int __init mxc_rnga_probe(struct platform_device *pdev)
 	mxc_rng->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(mxc_rng->clk)) {
 		dev_err(&pdev->dev, "Could not get rng_clk!\n");
-		err = PTR_ERR(mxc_rng->clk);
-		goto out;
+		return PTR_ERR(mxc_rng->clk);
 	}
 
 	err = clk_prepare_enable(mxc_rng->clk);
 	if (err)
-		goto out;
+		return err;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mxc_rng->mem = devm_ioremap_resource(&pdev->dev, res);
@@ -181,14 +179,10 @@ static int __init mxc_rnga_probe(struct platform_device *pdev)
 		goto err_ioremap;
 	}
 
-	dev_info(&pdev->dev, "MXC RNGA Registered.\n");
-
 	return 0;
 
 err_ioremap:
 	clk_disable_unprepare(mxc_rng->clk);
-
-out:
 	return err;
 }
 
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c
index 6234a4a19b56..8c78aa090492 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
 	rng->ops = ops;
 
 	platform_set_drvdata(pdev, &rng->ops);
-	ret = hwrng_register(&rng->ops);
+	ret = devm_hwrng_register(&pdev->dev, &rng->ops);
 	if (ret)
 		return -ENOENT;
 
@@ -105,21 +105,11 @@ static int octeon_rng_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int octeon_rng_remove(struct platform_device *pdev)
-{
-	struct hwrng *rng = platform_get_drvdata(pdev);
-
-	hwrng_unregister(rng);
-
-	return 0;
-}
-
 static struct platform_driver octeon_rng_driver = {
 	.driver = {
 		.name		= "octeon_rng",
 	},
 	.probe		= octeon_rng_probe,
-	.remove		= octeon_rng_remove,
 };
 
 module_platform_driver(octeon_rng_driver);
diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c
index 51cb1d5cc489..699b7259f5d7 100644
--- a/drivers/char/hw_random/pasemi-rng.c
+++ b/drivers/char/hw_random/pasemi-rng.c
@@ -138,6 +138,7 @@ static const struct of_device_id rng_match[] = {
 	{ .compatible      = "pasemi,pwrficient-rng", },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, rng_match);
 
 static struct platform_driver rng_driver = {
 	.driver = {
diff --git a/drivers/char/hw_random/ppc4xx-rng.c b/drivers/char/hw_random/ppc4xx-rng.c
index b2cfda0fa93e..c0db4387d2e2 100644
--- a/drivers/char/hw_random/ppc4xx-rng.c
+++ b/drivers/char/hw_random/ppc4xx-rng.c
@@ -129,6 +129,7 @@ static const struct of_device_id ppc4xx_rng_match[] = {
 	{ .compatible = "amcc,ppc440epx-rng", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, ppc4xx_rng_match);
 
 static struct platform_driver ppc4xx_rng_driver = {
 	.driver = {
diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c
new file mode 100644
index 000000000000..1d35363d23c5
--- /dev/null
+++ b/drivers/char/hw_random/st-rng.c
@@ -0,0 +1,151 @@
+/*
+ * ST Random Number Generator Driver ST's Platforms
+ *
+ * Author: Pankaj Dev: <pankaj.dev@st.com>
+ *         Lee Jones <lee.jones@linaro.org>
+ *
+ * Copyright (C) 2015 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Registers */
+#define ST_RNG_STATUS_REG		0x20
+#define ST_RNG_DATA_REG			0x24
+
+/* Registers fields */
+#define ST_RNG_STATUS_BAD_SEQUENCE	BIT(0)
+#define ST_RNG_STATUS_BAD_ALTERNANCE	BIT(1)
+#define ST_RNG_STATUS_FIFO_FULL		BIT(5)
+
+#define ST_RNG_SAMPLE_SIZE		2 /* 2 Byte (16bit) samples */
+#define ST_RNG_FIFO_DEPTH		4
+#define ST_RNG_FIFO_SIZE		(ST_RNG_FIFO_DEPTH * ST_RNG_SAMPLE_SIZE)
+
+/*
+ * Samples are documented to be available every 0.667us, so in theory
+ * the 4 sample deep FIFO should take 2.668us to fill.  However, during
+ * thorough testing, it became apparent that filling the FIFO actually
+ * takes closer to 12us.  We then multiply by 2 in order to account for
+ * the lack of udelay()'s reliability, suggested by Russell King.
+ */
+#define ST_RNG_FILL_FIFO_TIMEOUT	(12 * 2)
+
+struct st_rng_data {
+	void __iomem	*base;
+	struct clk	*clk;
+	struct hwrng	ops;
+};
+
+static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct st_rng_data *ddata = (struct st_rng_data *)rng->priv;
+	u32 status;
+	int i;
+
+	if (max < sizeof(u16))
+		return -EINVAL;
+
+	/* Wait until FIFO is full - max 4uS*/
+	for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) {
+		status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG);
+		if (status & ST_RNG_STATUS_FIFO_FULL)
+			break;
+		udelay(1);
+	}
+
+	if (i == ST_RNG_FILL_FIFO_TIMEOUT)
+		return 0;
+
+	for (i = 0; i < ST_RNG_FIFO_SIZE && i < max; i += 2)
+		*(u16 *)(data + i) =
+			readl_relaxed(ddata->base + ST_RNG_DATA_REG);
+
+	return i;	/* No of bytes read */
+}
+
+static int st_rng_probe(struct platform_device *pdev)
+{
+	struct st_rng_data *ddata;
+	struct resource *res;
+	struct clk *clk;
+	void __iomem *base;
+	int ret;
+
+	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
+	if (!ddata)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
+
+	ddata->ops.priv	= (unsigned long)ddata;
+	ddata->ops.read	= st_rng_read;
+	ddata->ops.name	= pdev->name;
+	ddata->base	= base;
+	ddata->clk	= clk;
+
+	dev_set_drvdata(&pdev->dev, ddata);
+
+	ret = hwrng_register(&ddata->ops);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to register HW RNG\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Successfully registered HW RNG\n");
+
+	return 0;
+}
+
+static int st_rng_remove(struct platform_device *pdev)
+{
+	struct st_rng_data *ddata = dev_get_drvdata(&pdev->dev);
+
+	hwrng_unregister(&ddata->ops);
+
+	clk_disable_unprepare(ddata->clk);
+
+	return 0;
+}
+
+static const struct of_device_id st_rng_match[] = {
+	{ .compatible = "st,rng" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_rng_match);
+
+static struct platform_driver st_rng_driver = {
+	.driver = {
+		.name = "st-hwrandom",
+		.of_match_table = of_match_ptr(st_rng_match),
+	},
+	.probe = st_rng_probe,
+	.remove = st_rng_remove
+};
+
+module_platform_driver(st_rng_driver);
+
+MODULE_AUTHOR("Pankaj Dev <pankaj.dev@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
new file mode 100644
index 000000000000..92a810648bd0
--- /dev/null
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2015, Daniel Thompson
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#define RNG_CR 0x00
+#define RNG_CR_RNGEN BIT(2)
+
+#define RNG_SR 0x04
+#define RNG_SR_SEIS BIT(6)
+#define RNG_SR_CEIS BIT(5)
+#define RNG_SR_DRDY BIT(0)
+
+#define RNG_DR 0x08
+
+/*
+ * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
+ * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
+ * of 500 leaves us a very comfortable margin for error. The loop to which
+ * the timeout applies takes at least 4 instructions per iteration so the
+ * timeout is enough to take us up to multi-GHz parts!
+ */
+#define RNG_TIMEOUT 500
+
+struct stm32_rng_private {
+	struct hwrng rng;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+	u32 sr;
+	int retval = 0;
+
+	pm_runtime_get_sync((struct device *) priv->rng.priv);
+
+	while (max > sizeof(u32)) {
+		sr = readl_relaxed(priv->base + RNG_SR);
+		if (!sr && wait) {
+			unsigned int timeout = RNG_TIMEOUT;
+
+			do {
+				cpu_relax();
+				sr = readl_relaxed(priv->base + RNG_SR);
+			} while (!sr && --timeout);
+		}
+
+		/* If error detected or data not ready... */
+		if (sr != RNG_SR_DRDY)
+			break;
+
+		*(u32 *)data = readl_relaxed(priv->base + RNG_DR);
+
+		retval += sizeof(u32);
+		data += sizeof(u32);
+		max -= sizeof(u32);
+	}
+
+	if (WARN_ONCE(sr & (RNG_SR_SEIS | RNG_SR_CEIS),
+		      "bad RNG status - %x\n", sr))
+		writel_relaxed(0, priv->base + RNG_SR);
+
+	pm_runtime_mark_last_busy((struct device *) priv->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *) priv->rng.priv);
+
+	return retval || !wait ? retval : -EIO;
+}
+
+static int stm32_rng_init(struct hwrng *rng)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		return err;
+
+	writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+
+	/* clear error indicators */
+	writel_relaxed(0, priv->base + RNG_SR);
+
+	return 0;
+}
+
+static void stm32_rng_cleanup(struct hwrng *rng)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+
+	writel_relaxed(0, priv->base + RNG_CR);
+	clk_disable_unprepare(priv->clk);
+}
+
+static int stm32_rng_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	struct device_node *np = ofdev->dev.of_node;
+	struct stm32_rng_private *priv;
+	struct resource res;
+	int err;
+
+	priv = devm_kzalloc(dev, sizeof(struct stm32_rng_private), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	err = of_address_to_resource(np, 0, &res);
+	if (err)
+		return err;
+
+	priv->base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(&ofdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	dev_set_drvdata(dev, priv);
+
+	priv->rng.name = dev_driver_string(dev),
+#ifndef CONFIG_PM
+	priv->rng.init = stm32_rng_init,
+	priv->rng.cleanup = stm32_rng_cleanup,
+#endif
+	priv->rng.read = stm32_rng_read,
+	priv->rng.priv = (unsigned long) dev;
+
+	pm_runtime_set_autosuspend_delay(dev, 100);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_enable(dev);
+
+	return devm_hwrng_register(dev, &priv->rng);
+}
+
+#ifdef CONFIG_PM
+static int stm32_rng_runtime_suspend(struct device *dev)
+{
+	struct stm32_rng_private *priv = dev_get_drvdata(dev);
+
+	stm32_rng_cleanup(&priv->rng);
+
+	return 0;
+}
+
+static int stm32_rng_runtime_resume(struct device *dev)
+{
+	struct stm32_rng_private *priv = dev_get_drvdata(dev);
+
+	return stm32_rng_init(&priv->rng);
+}
+#endif
+
+static UNIVERSAL_DEV_PM_OPS(stm32_rng_pm_ops, stm32_rng_runtime_suspend,
+			    stm32_rng_runtime_resume, NULL);
+
+static const struct of_device_id stm32_rng_match[] = {
+	{
+		.compatible = "st,stm32-rng",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, stm32_rng_match);
+
+static struct platform_driver stm32_rng_driver = {
+	.driver = {
+		.name = "stm32-rng",
+		.pm = &stm32_rng_pm_ops,
+		.of_match_table = stm32_rng_match,
+	},
+	.probe = stm32_rng_probe,
+};
+
+module_platform_driver(stm32_rng_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Thompson <daniel.thompson@linaro.org>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 RNG device driver");
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d234719065a5..2569e043317e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -420,7 +420,7 @@ config CRYPTO_DEV_CCP
 	bool "Support for AMD Cryptographic Coprocessor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
 	help
-	  The AMD Cryptographic Coprocessor provides hardware support
+	  The AMD Cryptographic Coprocessor provides hardware offload support
 	  for encryption, hashing and related operations.
 
 if CRYPTO_DEV_CCP
@@ -429,7 +429,8 @@ endif
 
 config CRYPTO_DEV_MXS_DCP
 	tristate "Support for Freescale MXS DCP"
-	depends on ARCH_MXS
+	depends on (ARCH_MXS || ARCH_MXC)
+	select STMP_DEVICE
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_AES
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index 192a8fa325c1..58a630e55d5d 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -740,26 +740,6 @@ void crypto4xx_return_pd(struct crypto4xx_device *dev,
 	pd_uinfo->state = PD_ENTRY_FREE;
 }
 
-/*
- * derive number of elements in scatterlist
- * Shamlessly copy from talitos.c
- */
-static int get_sg_count(struct scatterlist *sg_list, int nbytes)
-{
-	struct scatterlist *sg = sg_list;
-	int sg_nents = 0;
-
-	while (nbytes) {
-		sg_nents++;
-		if (sg->length > nbytes)
-			break;
-		nbytes -= sg->length;
-		sg = sg_next(sg);
-	}
-
-	return sg_nents;
-}
-
 static u32 get_next_gd(u32 current)
 {
 	if (current != PPC4XX_LAST_GD)
@@ -800,7 +780,7 @@ u32 crypto4xx_build_pd(struct crypto_async_request *req,
 	u32 gd_idx = 0;
 
 	/* figure how many gd is needed */
-	num_gd = get_sg_count(src, datalen);
+	num_gd = sg_nents_for_len(src, datalen);
 	if (num_gd == 1)
 		num_gd = 0;
 
@@ -1284,6 +1264,7 @@ static const struct of_device_id crypto4xx_match[] = {
 	{ .compatible      = "amcc,ppc4xx-crypto",},
 	{ },
 };
+MODULE_DEVICE_TABLE(of, crypto4xx_match);
 
 static struct platform_driver crypto4xx_driver = {
 	.driver = {
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 0f9a9dc06a83..fb16d812c8f5 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -260,7 +260,11 @@ static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx)
 
 static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
 {
-	clk_prepare_enable(dd->iclk);
+	int err;
+
+	err = clk_prepare_enable(dd->iclk);
+	if (err)
+		return err;
 
 	if (!(dd->flags & AES_FLAGS_INIT)) {
 		atmel_aes_write(dd, AES_CR, AES_CR_SWRST);
@@ -1320,7 +1324,6 @@ static int atmel_aes_probe(struct platform_device *pdev)
 	struct crypto_platform_data *pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *aes_res;
-	unsigned long aes_phys_size;
 	int err;
 
 	pdata = pdev->dev.platform_data;
@@ -1337,7 +1340,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
 		goto aes_dd_err;
 	}
 
-	aes_dd = kzalloc(sizeof(struct atmel_aes_dev), GFP_KERNEL);
+	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
 	if (aes_dd == NULL) {
 		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
@@ -1368,36 +1371,35 @@ static int atmel_aes_probe(struct platform_device *pdev)
 		goto res_err;
 	}
 	aes_dd->phys_base = aes_res->start;
-	aes_phys_size = resource_size(aes_res);
 
 	/* Get the IRQ */
 	aes_dd->irq = platform_get_irq(pdev,  0);
 	if (aes_dd->irq < 0) {
 		dev_err(dev, "no IRQ resource info\n");
 		err = aes_dd->irq;
-		goto aes_irq_err;
+		goto res_err;
 	}
 
-	err = request_irq(aes_dd->irq, atmel_aes_irq, IRQF_SHARED, "atmel-aes",
-						aes_dd);
+	err = devm_request_irq(&pdev->dev, aes_dd->irq, atmel_aes_irq,
+			       IRQF_SHARED, "atmel-aes", aes_dd);
 	if (err) {
 		dev_err(dev, "unable to request aes irq.\n");
-		goto aes_irq_err;
+		goto res_err;
 	}
 
 	/* Initializing the clock */
-	aes_dd->iclk = clk_get(&pdev->dev, "aes_clk");
+	aes_dd->iclk = devm_clk_get(&pdev->dev, "aes_clk");
 	if (IS_ERR(aes_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(aes_dd->iclk);
-		goto clk_err;
+		goto res_err;
 	}
 
-	aes_dd->io_base = ioremap(aes_dd->phys_base, aes_phys_size);
+	aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res);
 	if (!aes_dd->io_base) {
 		dev_err(dev, "can't ioremap\n");
 		err = -ENOMEM;
-		goto aes_io_err;
+		goto res_err;
 	}
 
 	atmel_aes_hw_version_init(aes_dd);
@@ -1434,17 +1436,9 @@ err_algs:
 err_aes_dma:
 	atmel_aes_buff_cleanup(aes_dd);
 err_aes_buff:
-	iounmap(aes_dd->io_base);
-aes_io_err:
-	clk_put(aes_dd->iclk);
-clk_err:
-	free_irq(aes_dd->irq, aes_dd);
-aes_irq_err:
 res_err:
 	tasklet_kill(&aes_dd->done_task);
 	tasklet_kill(&aes_dd->queue_task);
-	kfree(aes_dd);
-	aes_dd = NULL;
 aes_dd_err:
 	dev_err(dev, "initialization failed.\n");
 
@@ -1469,16 +1463,6 @@ static int atmel_aes_remove(struct platform_device *pdev)
 
 	atmel_aes_dma_cleanup(aes_dd);
 
-	iounmap(aes_dd->io_base);
-
-	clk_put(aes_dd->iclk);
-
-	if (aes_dd->irq > 0)
-		free_irq(aes_dd->irq, aes_dd);
-
-	kfree(aes_dd);
-	aes_dd = NULL;
-
 	return 0;
 }
 
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 5b35433c5399..660d8c06540b 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -794,7 +794,11 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err)
 
 static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
 {
-	clk_prepare_enable(dd->iclk);
+	int err;
+
+	err = clk_prepare_enable(dd->iclk);
+	if (err)
+		return err;
 
 	if (!(SHA_FLAGS_INIT & dd->flags)) {
 		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
@@ -1345,11 +1349,9 @@ static int atmel_sha_probe(struct platform_device *pdev)
 	struct crypto_platform_data	*pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *sha_res;
-	unsigned long sha_phys_size;
 	int err;
 
-	sha_dd = devm_kzalloc(&pdev->dev, sizeof(struct atmel_sha_dev),
-				GFP_KERNEL);
+	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
 	if (sha_dd == NULL) {
 		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
@@ -1378,7 +1380,6 @@ static int atmel_sha_probe(struct platform_device *pdev)
 		goto res_err;
 	}
 	sha_dd->phys_base = sha_res->start;
-	sha_phys_size = resource_size(sha_res);
 
 	/* Get the IRQ */
 	sha_dd->irq = platform_get_irq(pdev,  0);
@@ -1388,26 +1389,26 @@ static int atmel_sha_probe(struct platform_device *pdev)
 		goto res_err;
 	}
 
-	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
-						sha_dd);
+	err = devm_request_irq(&pdev->dev, sha_dd->irq, atmel_sha_irq,
+			       IRQF_SHARED, "atmel-sha", sha_dd);
 	if (err) {
 		dev_err(dev, "unable to request sha irq.\n");
 		goto res_err;
 	}
 
 	/* Initializing the clock */
-	sha_dd->iclk = clk_get(&pdev->dev, "sha_clk");
+	sha_dd->iclk = devm_clk_get(&pdev->dev, "sha_clk");
 	if (IS_ERR(sha_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(sha_dd->iclk);
-		goto clk_err;
+		goto res_err;
 	}
 
-	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
+	sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res);
 	if (!sha_dd->io_base) {
 		dev_err(dev, "can't ioremap\n");
 		err = -ENOMEM;
-		goto sha_io_err;
+		goto res_err;
 	}
 
 	atmel_sha_hw_version_init(sha_dd);
@@ -1421,12 +1422,12 @@ static int atmel_sha_probe(struct platform_device *pdev)
 			if (IS_ERR(pdata)) {
 				dev_err(&pdev->dev, "platform data not available\n");
 				err = PTR_ERR(pdata);
-				goto err_pdata;
+				goto res_err;
 			}
 		}
 		if (!pdata->dma_slave) {
 			err = -ENXIO;
-			goto err_pdata;
+			goto res_err;
 		}
 		err = atmel_sha_dma_init(sha_dd, pdata);
 		if (err)
@@ -1457,12 +1458,6 @@ err_algs:
 	if (sha_dd->caps.has_dma)
 		atmel_sha_dma_cleanup(sha_dd);
 err_sha_dma:
-err_pdata:
-	iounmap(sha_dd->io_base);
-sha_io_err:
-	clk_put(sha_dd->iclk);
-clk_err:
-	free_irq(sha_dd->irq, sha_dd);
 res_err:
 	tasklet_kill(&sha_dd->done_task);
 sha_dd_err:
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index ca2999709eb4..2c7a628d0375 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -218,7 +218,11 @@ static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
 
 static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd)
 {
-	clk_prepare_enable(dd->iclk);
+	int err;
+
+	err = clk_prepare_enable(dd->iclk);
+	if (err)
+		return err;
 
 	if (!(dd->flags & TDES_FLAGS_INIT)) {
 		atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST);
@@ -1355,7 +1359,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 	struct crypto_platform_data	*pdata;
 	struct device *dev = &pdev->dev;
 	struct resource *tdes_res;
-	unsigned long tdes_phys_size;
 	int err;
 
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
@@ -1389,7 +1392,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 		goto res_err;
 	}
 	tdes_dd->phys_base = tdes_res->start;
-	tdes_phys_size = resource_size(tdes_res);
 
 	/* Get the IRQ */
 	tdes_dd->irq = platform_get_irq(pdev,  0);
@@ -1399,26 +1401,26 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 		goto res_err;
 	}
 
-	err = request_irq(tdes_dd->irq, atmel_tdes_irq, IRQF_SHARED,
-			"atmel-tdes", tdes_dd);
+	err = devm_request_irq(&pdev->dev, tdes_dd->irq, atmel_tdes_irq,
+			       IRQF_SHARED, "atmel-tdes", tdes_dd);
 	if (err) {
 		dev_err(dev, "unable to request tdes irq.\n");
-		goto tdes_irq_err;
+		goto res_err;
 	}
 
 	/* Initializing the clock */
-	tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk");
+	tdes_dd->iclk = devm_clk_get(&pdev->dev, "tdes_clk");
 	if (IS_ERR(tdes_dd->iclk)) {
 		dev_err(dev, "clock initialization failed.\n");
 		err = PTR_ERR(tdes_dd->iclk);
-		goto clk_err;
+		goto res_err;
 	}
 
-	tdes_dd->io_base = ioremap(tdes_dd->phys_base, tdes_phys_size);
+	tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res);
 	if (!tdes_dd->io_base) {
 		dev_err(dev, "can't ioremap\n");
 		err = -ENOMEM;
-		goto tdes_io_err;
+		goto res_err;
 	}
 
 	atmel_tdes_hw_version_init(tdes_dd);
@@ -1474,12 +1476,6 @@ err_tdes_dma:
 err_pdata:
 	atmel_tdes_buff_cleanup(tdes_dd);
 err_tdes_buff:
-	iounmap(tdes_dd->io_base);
-tdes_io_err:
-	clk_put(tdes_dd->iclk);
-clk_err:
-	free_irq(tdes_dd->irq, tdes_dd);
-tdes_irq_err:
 res_err:
 	tasklet_kill(&tdes_dd->done_task);
 	tasklet_kill(&tdes_dd->queue_task);
@@ -1510,13 +1506,6 @@ static int atmel_tdes_remove(struct platform_device *pdev)
 
 	atmel_tdes_buff_cleanup(tdes_dd);
 
-	iounmap(tdes_dd->io_base);
-
-	clk_put(tdes_dd->iclk);
-
-	if (tdes_dd->irq >= 0)
-		free_irq(tdes_dd->irq, tdes_dd);
-
 	return 0;
 }
 
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index 2f0b3337505d..95b73968cf72 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -96,26 +96,6 @@ struct bfin_crypto_crc_ctx {
 	u32			key;
 };
 
-
-/*
- * derive number of elements in scatterlist
- */
-static int sg_count(struct scatterlist *sg_list)
-{
-	struct scatterlist *sg = sg_list;
-	int sg_nents = 1;
-
-	if (sg_list == NULL)
-		return 0;
-
-	while (!sg_is_last(sg)) {
-		sg_nents++;
-		sg = sg_next(sg);
-	}
-
-	return sg_nents;
-}
-
 /*
  * get element in scatter list by given index
  */
@@ -160,7 +140,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req)
 	}
 	spin_unlock_bh(&crc_list.lock);
 
-	if (sg_count(req->src) > CRC_MAX_DMA_DESC) {
+	if (sg_nents(req->src) > CRC_MAX_DMA_DESC) {
 		dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n",
 			CRC_MAX_DMA_DESC);
 		return -EINVAL;
@@ -376,7 +356,8 @@ static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
 			ctx->sg = req->src;
 
 		/* Chop crc buffer size to multiple of 32 bit */
-		nsg = ctx->sg_nents = sg_count(ctx->sg);
+		nsg = sg_nents(ctx->sg);
+		ctx->sg_nents = nsg;
 		ctx->sg_buflen = ctx->buflast_len + req->nbytes;
 		ctx->bufnext_len = ctx->sg_buflen % 4;
 		ctx->sg_buflen &= ~0x3;
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ba79d638f782..ea8189f4b021 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1705,14 +1705,131 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	return ret;
 }
 
+static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
+				 const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	struct device *jrdev = ctx->jrdev;
+	u32 *key_jump_cmd, *desc;
+	__be64 sector_size = cpu_to_be64(512);
+
+	if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(ablkcipher,
+					    CRYPTO_TFM_RES_BAD_KEY_LEN);
+		dev_err(jrdev, "key size mismatch\n");
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->key_dma)) {
+		dev_err(jrdev, "unable to map key i/o memory\n");
+		return -ENOMEM;
+	}
+	ctx->enckeylen = keylen;
+
+	/* xts_ablkcipher_encrypt shared descriptor */
+	desc = ctx->sh_desc_enc;
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 keys only */
+	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
+			  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load sector size with index 40 bytes (0x28) */
+	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
+	append_data(desc, (void *)&sector_size, 8);
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/*
+	 * create sequence for loading the sector index
+	 * Upper 8B of IV - will be used as sector index
+	 * Lower 8B of IV - will be discarded
+	 */
+	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Load operation */
+	append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL |
+			 OP_ALG_ENCRYPT);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* xts_ablkcipher_decrypt shared descriptor */
+	desc = ctx->sh_desc_dec;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+
+	/* Load class1 key only */
+	append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
+			  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* Load sector size with index 40 bytes (0x28) */
+	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
+	append_data(desc, (void *)&sector_size, 8);
+
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/*
+	 * create sequence for loading the sector index
+	 * Upper 8B of IV - will be used as sector index
+	 * Lower 8B of IV - will be discarded
+	 */
+	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
+		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
+	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+	/* Load operation */
+	append_dec_op1(desc, ctx->class1_alg_type);
+
+	/* Perform operation */
+	ablkcipher_append_src_dst(desc);
+
+	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
+		dma_unmap_single(jrdev, ctx->sh_desc_enc_dma,
+				 desc_bytes(ctx->sh_desc_enc), DMA_TO_DEVICE);
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	return 0;
+}
+
 /*
  * aead_edesc - s/w-extended aead descriptor
  * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
- * @assoc_chained: if source is chained
  * @src_nents: number of segments in input scatterlist
- * @src_chained: if source is chained
  * @dst_nents: number of segments in output scatterlist
- * @dst_chained: if destination is chained
  * @iv_dma: dma address of iv for checking continuity and link table
  * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
@@ -1721,11 +1838,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
  */
 struct aead_edesc {
 	int assoc_nents;
-	bool assoc_chained;
 	int src_nents;
-	bool src_chained;
 	int dst_nents;
-	bool dst_chained;
 	dma_addr_t iv_dma;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
@@ -1736,9 +1850,7 @@ struct aead_edesc {
 /*
  * ablkcipher_edesc - s/w-extended ablkcipher descriptor
  * @src_nents: number of segments in input scatterlist
- * @src_chained: if source is chained
  * @dst_nents: number of segments in output scatterlist
- * @dst_chained: if destination is chained
  * @iv_dma: dma address of iv for checking continuity and link table
  * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
@@ -1747,9 +1859,7 @@ struct aead_edesc {
  */
 struct ablkcipher_edesc {
 	int src_nents;
-	bool src_chained;
 	int dst_nents;
-	bool dst_chained;
 	dma_addr_t iv_dma;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
@@ -1759,18 +1869,15 @@ struct ablkcipher_edesc {
 
 static void caam_unmap(struct device *dev, struct scatterlist *src,
 		       struct scatterlist *dst, int src_nents,
-		       bool src_chained, int dst_nents, bool dst_chained,
+		       int dst_nents,
 		       dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
 		       int sec4_sg_bytes)
 {
 	if (dst != src) {
-		dma_unmap_sg_chained(dev, src, src_nents ? : 1, DMA_TO_DEVICE,
-				     src_chained);
-		dma_unmap_sg_chained(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE,
-				     dst_chained);
+		dma_unmap_sg(dev, src, src_nents ? : 1, DMA_TO_DEVICE);
+		dma_unmap_sg(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE);
 	} else {
-		dma_unmap_sg_chained(dev, src, src_nents ? : 1,
-				     DMA_BIDIRECTIONAL, src_chained);
+		dma_unmap_sg(dev, src, src_nents ? : 1, DMA_BIDIRECTIONAL);
 	}
 
 	if (iv_dma)
@@ -1785,8 +1892,7 @@ static void aead_unmap(struct device *dev,
 		       struct aead_request *req)
 {
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->src_chained, edesc->dst_nents,
-		   edesc->dst_chained, 0, 0,
+		   edesc->src_nents, edesc->dst_nents, 0, 0,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
@@ -1798,8 +1904,8 @@ static void ablkcipher_unmap(struct device *dev,
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->src_chained, edesc->dst_nents,
-		   edesc->dst_chained, edesc->iv_dma, ivsize,
+		   edesc->src_nents, edesc->dst_nents,
+		   edesc->iv_dma, ivsize,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
@@ -2169,22 +2275,18 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct aead_edesc *edesc;
 	int sgc;
 	bool all_contig = true;
-	bool src_chained = false, dst_chained = false;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	unsigned int authsize = ctx->authsize;
 
 	if (unlikely(req->dst != req->src)) {
-		src_nents = sg_count(req->src, req->assoclen + req->cryptlen,
-				     &src_chained);
+		src_nents = sg_count(req->src, req->assoclen + req->cryptlen);
 		dst_nents = sg_count(req->dst,
 				     req->assoclen + req->cryptlen +
-					(encrypt ? authsize : (-authsize)),
-				     &dst_chained);
+					(encrypt ? authsize : (-authsize)));
 	} else {
 		src_nents = sg_count(req->src,
 				     req->assoclen + req->cryptlen +
-					(encrypt ? authsize : 0),
-				     &src_chained);
+					(encrypt ? authsize : 0));
 	}
 
 	/* Check if data are contiguous. */
@@ -2207,37 +2309,35 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	}
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_BIDIRECTIONAL, src_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_BIDIRECTIONAL);
 		if (unlikely(!sgc)) {
 			dev_err(jrdev, "unable to map source\n");
 			kfree(edesc);
 			return ERR_PTR(-ENOMEM);
 		}
 	} else {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_TO_DEVICE, src_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_TO_DEVICE);
 		if (unlikely(!sgc)) {
 			dev_err(jrdev, "unable to map source\n");
 			kfree(edesc);
 			return ERR_PTR(-ENOMEM);
 		}
 
-		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
-					 DMA_FROM_DEVICE, dst_chained);
+		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
+				 DMA_FROM_DEVICE);
 		if (unlikely(!sgc)) {
 			dev_err(jrdev, "unable to map destination\n");
-			dma_unmap_sg_chained(jrdev, req->src, src_nents ? : 1,
-					     DMA_TO_DEVICE, src_chained);
+			dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
+				     DMA_TO_DEVICE);
 			kfree(edesc);
 			return ERR_PTR(-ENOMEM);
 		}
 	}
 
 	edesc->src_nents = src_nents;
-	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
-	edesc->dst_chained = dst_chained;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
 			 desc_bytes;
 	*all_contig_ptr = all_contig;
@@ -2467,22 +2567,21 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	bool iv_contig = false;
 	int sgc;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	bool src_chained = false, dst_chained = false;
 	int sec4_sg_index;
 
-	src_nents = sg_count(req->src, req->nbytes, &src_chained);
+	src_nents = sg_count(req->src, req->nbytes);
 
 	if (req->dst != req->src)
-		dst_nents = sg_count(req->dst, req->nbytes, &dst_chained);
+		dst_nents = sg_count(req->dst, req->nbytes);
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_BIDIRECTIONAL, src_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_BIDIRECTIONAL);
 	} else {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_TO_DEVICE, src_chained);
-		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
-					 DMA_FROM_DEVICE, dst_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_TO_DEVICE);
+		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
+				 DMA_FROM_DEVICE);
 	}
 
 	iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
@@ -2511,9 +2610,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	}
 
 	edesc->src_nents = src_nents;
-	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
-	edesc->dst_chained = dst_chained;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
@@ -2646,22 +2743,21 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	bool iv_contig = false;
 	int sgc;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	bool src_chained = false, dst_chained = false;
 	int sec4_sg_index;
 
-	src_nents = sg_count(req->src, req->nbytes, &src_chained);
+	src_nents = sg_count(req->src, req->nbytes);
 
 	if (unlikely(req->dst != req->src))
-		dst_nents = sg_count(req->dst, req->nbytes, &dst_chained);
+		dst_nents = sg_count(req->dst, req->nbytes);
 
 	if (likely(req->src == req->dst)) {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_BIDIRECTIONAL, src_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_BIDIRECTIONAL);
 	} else {
-		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-					 DMA_TO_DEVICE, src_chained);
-		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
-					 DMA_FROM_DEVICE, dst_chained);
+		sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
+				 DMA_TO_DEVICE);
+		sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
+				 DMA_FROM_DEVICE);
 	}
 
 	/*
@@ -2690,9 +2786,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	}
 
 	edesc->src_nents = src_nents;
-	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
-	edesc->dst_chained = dst_chained;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
@@ -2871,7 +2965,23 @@ static struct caam_alg_template driver_algs[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			},
 		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CTR_MOD128,
-	}
+	},
+	{
+		.name = "xts(aes)",
+		.driver_name = "xts-aes-caam",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_ablkcipher = {
+			.setkey = xts_ablkcipher_setkey,
+			.encrypt = ablkcipher_encrypt,
+			.decrypt = ablkcipher_decrypt,
+			.geniv = "eseqiv",
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
+	},
 };
 
 static struct caam_aead_alg driver_aeads[] = {
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 94433b9fc200..49106ea42887 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -134,6 +134,15 @@ struct caam_hash_state {
 	int current_buf;
 };
 
+struct caam_export_state {
+	u8 buf[CAAM_MAX_HASH_BLOCK_SIZE];
+	u8 caam_ctx[MAX_CTX_LEN];
+	int buflen;
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+};
+
 /* Common job descriptor seq in/out ptr routines */
 
 /* Map state->caam_ctx, and append seq_out_ptr command that points to it */
@@ -181,10 +190,9 @@ static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
 /* Map req->src and put it in link table */
 static inline void src_map_to_sec4_sg(struct device *jrdev,
 				      struct scatterlist *src, int src_nents,
-				      struct sec4_sg_entry *sec4_sg,
-				      bool chained)
+				      struct sec4_sg_entry *sec4_sg)
 {
-	dma_map_sg_chained(jrdev, src, src_nents, DMA_TO_DEVICE, chained);
+	dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE);
 	sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0);
 }
 
@@ -585,7 +593,6 @@ badkey:
  * ahash_edesc - s/w-extended ahash descriptor
  * @dst_dma: physical mapped address of req->result
  * @sec4_sg_dma: physical mapped address of h/w link table
- * @chained: if source is chained
  * @src_nents: number of segments in input scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg: pointer to h/w link table
@@ -594,7 +601,6 @@ badkey:
 struct ahash_edesc {
 	dma_addr_t dst_dma;
 	dma_addr_t sec4_sg_dma;
-	bool chained;
 	int src_nents;
 	int sec4_sg_bytes;
 	struct sec4_sg_entry *sec4_sg;
@@ -606,8 +612,7 @@ static inline void ahash_unmap(struct device *dev,
 			struct ahash_request *req, int dst_len)
 {
 	if (edesc->src_nents)
-		dma_unmap_sg_chained(dev, req->src, edesc->src_nents,
-				     DMA_TO_DEVICE, edesc->chained);
+		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
 	if (edesc->dst_dma)
 		dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE);
 
@@ -788,7 +793,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 	dma_addr_t ptr = ctx->sh_desc_update_dma;
 	int src_nents, sec4_sg_bytes, sec4_sg_src_index;
 	struct ahash_edesc *edesc;
-	bool chained = false;
 	int ret = 0;
 	int sh_len;
 
@@ -797,8 +801,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 	to_hash = in_len - *next_buflen;
 
 	if (to_hash) {
-		src_nents = __sg_count(req->src, req->nbytes - (*next_buflen),
-				       &chained);
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
 		sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
 		sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
 				 sizeof(struct sec4_sg_entry);
@@ -816,7 +820,6 @@ static int ahash_update_ctx(struct ahash_request *req)
 		}
 
 		edesc->src_nents = src_nents;
-		edesc->chained = chained;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
 				 DESC_JOB_IO_LEN;
@@ -829,12 +832,11 @@ static int ahash_update_ctx(struct ahash_request *req)
 		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
 							edesc->sec4_sg + 1,
 							buf, state->buf_dma,
-							*next_buflen, *buflen);
+							*buflen, last_buflen);
 
 		if (src_nents) {
 			src_map_to_sec4_sg(jrdev, req->src, src_nents,
-					   edesc->sec4_sg + sec4_sg_src_index,
-					   chained);
+					   edesc->sec4_sg + sec4_sg_src_index);
 			if (*next_buflen)
 				scatterwalk_map_and_copy(next_buf, req->src,
 							 to_hash - *buflen,
@@ -996,11 +998,10 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	int src_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
-	bool chained = false;
 	int ret = 0;
 	int sh_len;
 
-	src_nents = __sg_count(req->src, req->nbytes, &chained);
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
 	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
 			 sizeof(struct sec4_sg_entry);
@@ -1018,7 +1019,6 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 	edesc->src_nents = src_nents;
-	edesc->chained = chained;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
 			 DESC_JOB_IO_LEN;
@@ -1033,7 +1033,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 						last_buflen);
 
 	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg +
-			   sec4_sg_src_index, chained);
+			   sec4_sg_src_index);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
@@ -1081,14 +1081,12 @@ static int ahash_digest(struct ahash_request *req)
 	int src_nents, sec4_sg_bytes;
 	dma_addr_t src_dma;
 	struct ahash_edesc *edesc;
-	bool chained = false;
 	int ret = 0;
 	u32 options;
 	int sh_len;
 
-	src_nents = sg_count(req->src, req->nbytes, &chained);
-	dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE,
-			   chained);
+	src_nents = sg_count(req->src, req->nbytes);
+	dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
 	sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
@@ -1102,7 +1100,6 @@ static int ahash_digest(struct ahash_request *req)
 			  DESC_JOB_IO_LEN;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->src_nents = src_nents;
-	edesc->chained = chained;
 
 	sh_len = desc_len(sh_desc);
 	desc = edesc->hw_desc;
@@ -1228,7 +1225,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct ahash_edesc *edesc;
 	u32 *desc, *sh_desc = ctx->sh_desc_update_first;
 	dma_addr_t ptr = ctx->sh_desc_update_first_dma;
-	bool chained = false;
 	int ret = 0;
 	int sh_len;
 
@@ -1236,8 +1232,8 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	to_hash = in_len - *next_buflen;
 
 	if (to_hash) {
-		src_nents = __sg_count(req->src, req->nbytes - (*next_buflen),
-				       &chained);
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
 		sec4_sg_bytes = (1 + src_nents) *
 				sizeof(struct sec4_sg_entry);
 
@@ -1254,7 +1250,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		}
 
 		edesc->src_nents = src_nents;
-		edesc->chained = chained;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
 				 DESC_JOB_IO_LEN;
@@ -1263,7 +1258,7 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
 						    buf, *buflen);
 		src_map_to_sec4_sg(jrdev, req->src, src_nents,
-				   edesc->sec4_sg + 1, chained);
+				   edesc->sec4_sg + 1);
 		if (*next_buflen) {
 			scatterwalk_map_and_copy(next_buf, req->src,
 						 to_hash - *buflen,
@@ -1343,11 +1338,10 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
-	bool chained = false;
 	int sh_len;
 	int ret = 0;
 
-	src_nents = __sg_count(req->src, req->nbytes, &chained);
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	sec4_sg_src_index = 2;
 	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
 			 sizeof(struct sec4_sg_entry);
@@ -1365,7 +1359,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 	edesc->src_nents = src_nents;
-	edesc->chained = chained;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
 			 DESC_JOB_IO_LEN;
@@ -1374,8 +1367,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 						state->buf_dma, buflen,
 						last_buflen);
 
-	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1,
-			   chained);
+	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
@@ -1429,7 +1421,6 @@ static int ahash_update_first(struct ahash_request *req)
 	dma_addr_t src_dma;
 	u32 options;
 	struct ahash_edesc *edesc;
-	bool chained = false;
 	int ret = 0;
 	int sh_len;
 
@@ -1438,10 +1429,8 @@ static int ahash_update_first(struct ahash_request *req)
 	to_hash = req->nbytes - *next_buflen;
 
 	if (to_hash) {
-		src_nents = sg_count(req->src, req->nbytes - (*next_buflen),
-				     &chained);
-		dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
-				   DMA_TO_DEVICE, chained);
+		src_nents = sg_count(req->src, req->nbytes - (*next_buflen));
+		dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
 		sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
 
 		/*
@@ -1457,7 +1446,6 @@ static int ahash_update_first(struct ahash_request *req)
 		}
 
 		edesc->src_nents = src_nents;
-		edesc->chained = chained;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
 				 DESC_JOB_IO_LEN;
@@ -1574,25 +1562,42 @@ static int ahash_final(struct ahash_request *req)
 
 static int ahash_export(struct ahash_request *req, void *out)
 {
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_export_state *export = out;
+	int len;
+	u8 *buf;
+
+	if (state->current_buf) {
+		buf = state->buf_1;
+		len = state->buflen_1;
+	} else {
+		buf = state->buf_0;
+		len = state->buflen_1;
+	}
+
+	memcpy(export->buf, buf, len);
+	memcpy(export->caam_ctx, state->caam_ctx, sizeof(export->caam_ctx));
+	export->buflen = len;
+	export->update = state->update;
+	export->final = state->final;
+	export->finup = state->finup;
 
-	memcpy(out, ctx, sizeof(struct caam_hash_ctx));
-	memcpy(out + sizeof(struct caam_hash_ctx), state,
-	       sizeof(struct caam_hash_state));
 	return 0;
 }
 
 static int ahash_import(struct ahash_request *req, const void *in)
 {
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
+	const struct caam_export_state *export = in;
+
+	memset(state, 0, sizeof(*state));
+	memcpy(state->buf_0, export->buf, export->buflen);
+	memcpy(state->caam_ctx, export->caam_ctx, sizeof(state->caam_ctx));
+	state->buflen_0 = export->buflen;
+	state->update = export->update;
+	state->final = export->final;
+	state->finup = export->finup;
 
-	memcpy(ctx, in, sizeof(struct caam_hash_ctx));
-	memcpy(state, in + sizeof(struct caam_hash_ctx),
-	       sizeof(struct caam_hash_state));
 	return 0;
 }
 
@@ -1626,8 +1631,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA1_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_SHA1,
 		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
 	}, {
@@ -1647,8 +1653,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA224_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_SHA224,
 		.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
 	}, {
@@ -1668,8 +1675,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA256_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_SHA256,
 		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
 	}, {
@@ -1689,8 +1697,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA384_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_SHA384,
 		.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
 	}, {
@@ -1710,8 +1719,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = SHA512_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_SHA512,
 		.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 	}, {
@@ -1731,8 +1741,9 @@ static struct caam_hash_template driver_hash[] = {
 			.setkey = ahash_setkey,
 			.halg = {
 				.digestsize = MD5_DIGEST_SIZE,
-				},
+				.statesize = sizeof(struct caam_export_state),
 			},
+		},
 		.alg_type = OP_ALG_ALGSEL_MD5,
 		.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
 	},
@@ -1952,8 +1963,9 @@ static int __init caam_algapi_hash_init(void)
 
 		err = crypto_register_ahash(&t_alg->ahash_alg);
 		if (err) {
-			pr_warn("%s alg registration failed\n",
-				t_alg->ahash_alg.halg.base.cra_driver_name);
+			pr_warn("%s alg registration failed: %d\n",
+				t_alg->ahash_alg.halg.base.cra_driver_name,
+				err);
 			kfree(t_alg);
 		} else
 			list_add_tail(&t_alg->entry, &hash_list);
@@ -1968,8 +1980,9 @@ static int __init caam_algapi_hash_init(void)
 
 		err = crypto_register_ahash(&t_alg->ahash_alg);
 		if (err) {
-			pr_warn("%s alg registration failed\n",
-				t_alg->ahash_alg.halg.base.cra_driver_name);
+			pr_warn("%s alg registration failed: %d\n",
+				t_alg->ahash_alg.halg.base.cra_driver_name,
+				err);
 			kfree(t_alg);
 		} else
 			list_add_tail(&t_alg->entry, &hash_list);
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 983d663ef671..1e93c6af2275 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -1492,7 +1492,6 @@ struct sec4_sg_entry {
 #define JUMP_JSL		(1 << JUMP_JSL_SHIFT)
 
 #define JUMP_TYPE_SHIFT		22
-#define JUMP_TYPE_MASK		(0x03 << JUMP_TYPE_SHIFT)
 #define JUMP_TYPE_LOCAL		(0x00 << JUMP_TYPE_SHIFT)
 #define JUMP_TYPE_NONLOCAL	(0x01 << JUMP_TYPE_SHIFT)
 #define JUMP_TYPE_HALT		(0x02 << JUMP_TYPE_SHIFT)
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 18cd6d1f5870..12ec6616e89d 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -69,81 +69,13 @@ static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
 	return sec4_sg_ptr - 1;
 }
 
-/* count number of elements in scatterlist */
-static inline int __sg_count(struct scatterlist *sg_list, int nbytes,
-			     bool *chained)
-{
-	struct scatterlist *sg = sg_list;
-	int sg_nents = 0;
-
-	while (nbytes > 0) {
-		sg_nents++;
-		nbytes -= sg->length;
-		if (!sg_is_last(sg) && (sg + 1)->length == 0)
-			*chained = true;
-		sg = sg_next(sg);
-	}
-
-	return sg_nents;
-}
-
 /* derive number of elements in scatterlist, but return 0 for 1 */
-static inline int sg_count(struct scatterlist *sg_list, int nbytes,
-			     bool *chained)
+static inline int sg_count(struct scatterlist *sg_list, int nbytes)
 {
-	int sg_nents = __sg_count(sg_list, nbytes, chained);
+	int sg_nents = sg_nents_for_len(sg_list, nbytes);
 
 	if (likely(sg_nents == 1))
 		return 0;
 
 	return sg_nents;
 }
-
-static inline void dma_unmap_sg_chained(
-	struct device *dev, struct scatterlist *sg, unsigned int nents,
-	enum dma_data_direction dir, bool chained)
-{
-	if (unlikely(chained)) {
-		int i;
-		struct scatterlist *tsg = sg;
-
-		/*
-		 * Use a local copy of the sg pointer to avoid moving the
-		 * head of the list pointed to by sg as we walk the list.
-		 */
-		for (i = 0; i < nents; i++) {
-			dma_unmap_sg(dev, tsg, 1, dir);
-			tsg = sg_next(tsg);
-		}
-	} else if (nents) {
-		dma_unmap_sg(dev, sg, nents, dir);
-	}
-}
-
-static inline int dma_map_sg_chained(
-	struct device *dev, struct scatterlist *sg, unsigned int nents,
-	enum dma_data_direction dir, bool chained)
-{
-	if (unlikely(chained)) {
-		int i;
-		struct scatterlist *tsg = sg;
-
-		/*
-		 * Use a local copy of the sg pointer to avoid moving the
-		 * head of the list pointed to by sg as we walk the list.
-		 */
-		for (i = 0; i < nents; i++) {
-			if (!dma_map_sg(dev, tsg, 1, dir)) {
-				dma_unmap_sg_chained(dev, sg, i, dir,
-						     chained);
-				nents = 0;
-				break;
-			}
-
-			tsg = sg_next(tsg);
-		}
-	} else
-		nents = dma_map_sg(dev, sg, nents, dir);
-
-	return nents;
-}
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index ae38f6b6cc10..3cd8481065f8 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -5,12 +5,12 @@ config CRYPTO_DEV_CCP_DD
 	select HW_RANDOM
 	help
 	  Provides the interface to use the AMD Cryptographic Coprocessor
-	  which can be used to accelerate or offload encryption operations
-	  such as SHA, AES and more. If you choose 'M' here, this module
-	  will be called ccp.
+	  which can be used to offload encryption operations such as SHA,
+	  AES and more. If you choose 'M' here, this module will be called
+	  ccp.
 
 config CRYPTO_DEV_CCP_CRYPTO
-	tristate "Encryption and hashing acceleration support"
+	tristate "Encryption and hashing offload support"
 	depends on CRYPTO_DEV_CCP_DD
 	default m
 	select CRYPTO_HASH
@@ -18,6 +18,5 @@ config CRYPTO_DEV_CCP_CRYPTO
 	select CRYPTO_AUTHENC
 	help
 	  Support for using the cryptographic API with the AMD Cryptographic
-	  Coprocessor. This module supports acceleration and offload of SHA
-	  and AES algorithms.  If you choose 'M' here, this module will be
-	  called ccp_crypto.
+	  Coprocessor. This module supports offload of SHA and AES algorithms.
+	  If you choose 'M' here, this module will be called ccp_crypto.
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index ea7e8446956a..d89f20c04266 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -118,10 +118,19 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
 	if (rctx->buf_count) {
 		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
 		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
 	}
 
-	if (nbytes)
+	if (nbytes) {
 		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+	}
 
 	if (need_pad) {
 		int pad_length = block_size - (len & (block_size - 1));
@@ -132,6 +141,10 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
 		rctx->pad[0] = 0x80;
 		sg_init_one(&rctx->pad_sg, rctx->pad, pad_length);
 		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
 	}
 	if (sg) {
 		sg_mark_end(sg);
@@ -163,6 +176,11 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
 	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
 
 	return ret;
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
 }
 
 static int ccp_aes_cmac_init(struct ahash_request *req)
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index bdec01ec608f..e0380e59c361 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -305,14 +305,16 @@ struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
 	for (sg = table->sgl; sg; sg = sg_next(sg))
 		if (!sg_page(sg))
 			break;
-	BUG_ON(!sg);
+	if (WARN_ON(!sg))
+		return NULL;
 
 	for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) {
 		sg_set_page(sg, sg_page(sg_add), sg_add->length,
 			    sg_add->offset);
 		sg_last = sg;
 	}
-	BUG_ON(sg_add);
+	if (WARN_ON(sg_add))
+		return NULL;
 
 	return sg_last;
 }
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 507b34e0cc19..d14b3f28e010 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -107,7 +107,15 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
 
 		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
 		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
 		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
 		sg_mark_end(sg);
 
 		sg = rctx->data_sg.sgl;
@@ -142,6 +150,11 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
 	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
 
 	return ret;
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
 }
 
 static int ccp_sha_init(struct ahash_request *req)
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index d09c6c4af4aa..c6e883b296a9 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -611,15 +611,16 @@ static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 				 1);
 }
 
-static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
-				    struct scatterlist *sg,
-				    unsigned int len, unsigned int se_len,
-				    bool sign_extend)
+static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
+				   struct scatterlist *sg,
+				   unsigned int len, unsigned int se_len,
+				   bool sign_extend)
 {
 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
 	u8 buffer[CCP_REVERSE_BUF_SIZE];
 
-	BUG_ON(se_len > sizeof(buffer));
+	if (WARN_ON(se_len > sizeof(buffer)))
+		return -EINVAL;
 
 	sg_offset = len;
 	dm_offset = 0;
@@ -642,6 +643,8 @@ static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 				       se_len - ksb_len);
 		}
 	}
+
+	return 0;
 }
 
 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
@@ -1606,8 +1609,10 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (ret)
 		goto e_ksb;
 
-	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
-				false);
+	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
+				      CCP_KSB_BYTES, false);
+	if (ret)
+		goto e_exp;
 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
 			      CCP_PASSTHRU_BYTESWAP_NOOP);
 	if (ret) {
@@ -1623,11 +1628,15 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (ret)
 		goto e_exp;
 
-	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
-				false);
+	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
+				      CCP_KSB_BYTES, false);
+	if (ret)
+		goto e_src;
 	src.address += o_len;	/* Adjust the address for the copy operation */
-	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
-				false);
+	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
+				      CCP_KSB_BYTES, false);
+	if (ret)
+		goto e_src;
 	src.address -= o_len;	/* Reset the address to original value */
 
 	/* Prepare the output area for the operation */
@@ -1841,21 +1850,27 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	save = src.address;
 
 	/* Copy the ECC modulus */
-	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
-				CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
+				      CCP_ECC_OPERAND_SIZE, false);
+	if (ret)
+		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	/* Copy the first operand */
-	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
-				ecc->u.mm.operand_1_len,
-				CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
+				      ecc->u.mm.operand_1_len,
+				      CCP_ECC_OPERAND_SIZE, false);
+	if (ret)
+		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
 		/* Copy the second operand */
-		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
-					ecc->u.mm.operand_2_len,
-					CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
+					      ecc->u.mm.operand_2_len,
+					      CCP_ECC_OPERAND_SIZE, false);
+		if (ret)
+			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
 	}
 
@@ -1960,18 +1975,24 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	save = src.address;
 
 	/* Copy the ECC modulus */
-	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
-				CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
+				      CCP_ECC_OPERAND_SIZE, false);
+	if (ret)
+		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	/* Copy the first point X and Y coordinate */
-	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
-				ecc->u.pm.point_1.x_len,
-				CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
+				      ecc->u.pm.point_1.x_len,
+				      CCP_ECC_OPERAND_SIZE, false);
+	if (ret)
+		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
-	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
-				ecc->u.pm.point_1.y_len,
-				CCP_ECC_OPERAND_SIZE, false);
+	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
+				      ecc->u.pm.point_1.y_len,
+				      CCP_ECC_OPERAND_SIZE, false);
+	if (ret)
+		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 	/* Set the first point Z coordianate to 1 */
@@ -1980,13 +2001,17 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
 		/* Copy the second point X and Y coordinate */
-		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
-					ecc->u.pm.point_2.x_len,
-					CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
+					      ecc->u.pm.point_2.x_len,
+					      CCP_ECC_OPERAND_SIZE, false);
+		if (ret)
+			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
-		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
-					ecc->u.pm.point_2.y_len,
-					CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
+					      ecc->u.pm.point_2.y_len,
+					      CCP_ECC_OPERAND_SIZE, false);
+		if (ret)
+			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
 
 		/* Set the second point Z coordianate to 1 */
@@ -1994,16 +2019,21 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		src.address += CCP_ECC_OPERAND_SIZE;
 	} else {
 		/* Copy the Domain "a" parameter */
-		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
-					ecc->u.pm.domain_a_len,
-					CCP_ECC_OPERAND_SIZE, false);
+		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
+					      ecc->u.pm.domain_a_len,
+					      CCP_ECC_OPERAND_SIZE, false);
+		if (ret)
+			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
 
 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
 			/* Copy the scalar value */
-			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
-						ecc->u.pm.scalar_len,
-						CCP_ECC_OPERAND_SIZE, false);
+			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
+						      ecc->u.pm.scalar_len,
+						      CCP_ECC_OPERAND_SIZE,
+						      false);
+			if (ret)
+				goto e_src;
 			src.address += CCP_ECC_OPERAND_SIZE;
 		}
 	}
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index af190d4795a8..6ade02f04f91 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -319,7 +319,7 @@ static const struct pci_device_id ccp_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, ccp_pci_table);
 
 static struct pci_driver ccp_pci_driver = {
-	.name = "AMD Cryptographic Coprocessor",
+	.name = "ccp",
 	.id_table = ccp_pci_table,
 	.probe = ccp_pci_probe,
 	.remove = ccp_pci_remove,
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index bb241c3ab6b9..8b923b7e9389 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -29,7 +29,6 @@
 #include "ccp-dev.h"
 
 struct ccp_platform {
-	int use_acpi;
 	int coherent;
 };
 
@@ -95,7 +94,6 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	struct ccp_device *ccp;
 	struct ccp_platform *ccp_platform;
 	struct device *dev = &pdev->dev;
-	struct acpi_device *adev = ACPI_COMPANION(dev);
 	struct resource *ior;
 	int ret;
 
@@ -112,8 +110,6 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	ccp->get_irq = ccp_get_irqs;
 	ccp->free_irq = ccp_free_irqs;
 
-	ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1;
-
 	ior = ccp_find_mmio_area(ccp);
 	ccp->io_map = devm_ioremap_resource(dev, ior);
 	if (IS_ERR(ccp->io_map)) {
@@ -229,7 +225,7 @@ MODULE_DEVICE_TABLE(of, ccp_of_match);
 
 static struct platform_driver ccp_platform_driver = {
 	.driver = {
-		.name = "AMD Cryptographic Coprocessor",
+		.name = "ccp",
 #ifdef CONFIG_ACPI
 		.acpi_match_table = ccp_acpi_match,
 #endif
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index bc2a55bc35e4..bd985e72520b 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -174,19 +174,19 @@
 
 #define CESA_SA_DESC_MAC_DATA(offset)					\
 	cpu_to_le32(CESA_SA_DATA_SRAM_OFFSET + (offset))
-#define CESA_SA_DESC_MAC_DATA_MSK		GENMASK(15, 0)
+#define CESA_SA_DESC_MAC_DATA_MSK		cpu_to_le32(GENMASK(15, 0))
 
 #define CESA_SA_DESC_MAC_TOTAL_LEN(total_len)	cpu_to_le32((total_len) << 16)
-#define CESA_SA_DESC_MAC_TOTAL_LEN_MSK		GENMASK(31, 16)
+#define CESA_SA_DESC_MAC_TOTAL_LEN_MSK		cpu_to_le32(GENMASK(31, 16))
 
 #define CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX	0xffff
 
 #define CESA_SA_DESC_MAC_DIGEST(offset)					\
 	cpu_to_le32(CESA_SA_MAC_DIG_SRAM_OFFSET + (offset))
-#define CESA_SA_DESC_MAC_DIGEST_MSK		GENMASK(15, 0)
+#define CESA_SA_DESC_MAC_DIGEST_MSK		cpu_to_le32(GENMASK(15, 0))
 
 #define CESA_SA_DESC_MAC_FRAG_LEN(frag_len)	cpu_to_le32((frag_len) << 16)
-#define CESA_SA_DESC_MAC_FRAG_LEN_MSK		GENMASK(31, 16)
+#define CESA_SA_DESC_MAC_FRAG_LEN_MSK		cpu_to_le32(GENMASK(31, 16))
 
 #define CESA_SA_DESC_MAC_IV(offset)					\
 	cpu_to_le32((CESA_SA_MAC_IIV_SRAM_OFFSET + (offset)) |		\
@@ -219,14 +219,14 @@
  * to be executed.
  */
 struct mv_cesa_sec_accel_desc {
-	u32 config;
-	u32 enc_p;
-	u32 enc_len;
-	u32 enc_key_p;
-	u32 enc_iv;
-	u32 mac_src_p;
-	u32 mac_digest;
-	u32 mac_iv;
+	__le32 config;
+	__le32 enc_p;
+	__le32 enc_len;
+	__le32 enc_key_p;
+	__le32 enc_iv;
+	__le32 mac_src_p;
+	__le32 mac_digest;
+	__le32 mac_iv;
 };
 
 /**
@@ -293,11 +293,13 @@ struct mv_cesa_op_ctx {
  * operation.
  */
 struct mv_cesa_tdma_desc {
-	u32 byte_cnt;
-	u32 src;
-	u32 dst;
-	u32 next_dma;
-	u32 cur_dma;
+	__le32 byte_cnt;
+	__le32 src;
+	__le32 dst;
+	__le32 next_dma;
+
+	/* Software state */
+	dma_addr_t cur_dma;
 	struct mv_cesa_tdma_desc *next;
 	union {
 		struct mv_cesa_op_ctx *op;
@@ -612,7 +614,8 @@ struct mv_cesa_ahash_req {
 	u64 len;
 	int src_nents;
 	bool last_req;
-	__be32 state[8];
+	bool algo_le;
+	u32 state[8];
 };
 
 /* CESA functions */
@@ -626,7 +629,7 @@ static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op,
 	op->desc.config |= cpu_to_le32(cfg);
 }
 
-static inline u32 mv_cesa_get_op_cfg(struct mv_cesa_op_ctx *op)
+static inline u32 mv_cesa_get_op_cfg(const struct mv_cesa_op_ctx *op)
 {
 	return le32_to_cpu(op->desc.config);
 }
@@ -676,7 +679,7 @@ static inline void mv_cesa_set_int_mask(struct mv_cesa_engine *engine,
 	if (int_mask == engine->int_mask)
 		return;
 
-	writel(int_mask, engine->regs + CESA_SA_INT_MSK);
+	writel_relaxed(int_mask, engine->regs + CESA_SA_INT_MSK);
 	engine->int_mask = int_mask;
 }
 
@@ -685,6 +688,12 @@ static inline u32 mv_cesa_get_int_mask(struct mv_cesa_engine *engine)
 	return engine->int_mask;
 }
 
+static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op)
+{
+	return (mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK) ==
+		CESA_SA_DESC_CFG_FIRST_FRAG;
+}
+
 int mv_cesa_queue_req(struct crypto_async_request *req);
 
 /*
@@ -789,10 +798,8 @@ int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain,
 				  dma_addr_t dst, dma_addr_t src, u32 size,
 				  u32 flags, gfp_t gfp_flags);
 
-int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain,
-				 u32 flags);
-
-int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags);
+int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, gfp_t flags);
+int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, gfp_t flags);
 
 int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain,
 				 struct mv_cesa_dma_iter *dma_iter,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index 3df2f4e7adb2..6edae64bb387 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -98,14 +98,14 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req)
 
 	/* FIXME: only update enc_len field */
 	if (!sreq->skip_ctx) {
-		memcpy(engine->sram, &sreq->op, sizeof(sreq->op));
+		memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op));
 		sreq->skip_ctx = true;
 	} else {
-		memcpy(engine->sram, &sreq->op, sizeof(sreq->op.desc));
+		memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op.desc));
 	}
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
-	writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -145,8 +145,9 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req,
 	if (ret)
 		return ret;
 
-	memcpy(ablkreq->info, engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
-	       crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)));
+	memcpy_fromio(ablkreq->info,
+		      engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
+		      crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)));
 
 	return 0;
 }
@@ -181,7 +182,7 @@ mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req)
 	sreq->size = 0;
 	sreq->offset = 0;
 	mv_cesa_adjust_op(engine, &sreq->op);
-	memcpy(engine->sram, &sreq->op, sizeof(sreq->op));
+	memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op));
 }
 
 static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req,
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index e8d0d7128137..6ec55b4a087b 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -27,10 +27,10 @@ mv_cesa_ahash_req_iter_init(struct mv_cesa_ahash_dma_iter *iter,
 			    struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int len = req->nbytes;
+	unsigned int len = req->nbytes + creq->cache_ptr;
 
 	if (!creq->last_req)
-		len = (len + creq->cache_ptr) & ~CESA_HASH_BLOCK_SIZE_MSK;
+		len &= ~CESA_HASH_BLOCK_SIZE_MSK;
 
 	mv_cesa_req_dma_iter_init(&iter->base, len);
 	mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE);
@@ -179,7 +179,6 @@ static int mv_cesa_ahash_pad_len(struct mv_cesa_ahash_req *creq)
 
 static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf)
 {
-	__be64 bits = cpu_to_be64(creq->len << 3);
 	unsigned int index, padlen;
 
 	buf[0] = 0x80;
@@ -187,7 +186,14 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf)
 	index = creq->len & CESA_HASH_BLOCK_SIZE_MSK;
 	padlen = mv_cesa_ahash_pad_len(creq);
 	memset(buf + 1, 0, padlen - 1);
-	memcpy(buf + padlen, &bits, sizeof(bits));
+
+	if (creq->algo_le) {
+		__le64 bits = cpu_to_le64(creq->len << 3);
+		memcpy(buf + padlen, &bits, sizeof(bits));
+	} else {
+		__be64 bits = cpu_to_be64(creq->len << 3);
+		memcpy(buf + padlen, &bits, sizeof(bits));
+	}
 
 	return padlen + 8;
 }
@@ -203,8 +209,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 	size_t  len;
 
 	if (creq->cache_ptr)
-		memcpy(engine->sram + CESA_SA_DATA_SRAM_OFFSET, creq->cache,
-		       creq->cache_ptr);
+		memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET,
+			    creq->cache, creq->cache_ptr);
 
 	len = min_t(size_t, req->nbytes + creq->cache_ptr - sreq->offset,
 		    CESA_SA_SRAM_PAYLOAD_SIZE);
@@ -245,10 +251,10 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 			if (len + trailerlen > CESA_SA_SRAM_PAYLOAD_SIZE) {
 				len &= CESA_HASH_BLOCK_SIZE_MSK;
 				new_cache_ptr = 64 - trailerlen;
-				memcpy(creq->cache,
-				       engine->sram +
-				       CESA_SA_DATA_SRAM_OFFSET + len,
-				       new_cache_ptr);
+				memcpy_fromio(creq->cache,
+					      engine->sram +
+					      CESA_SA_DATA_SRAM_OFFSET + len,
+					      new_cache_ptr);
 			} else {
 				len += mv_cesa_ahash_pad_req(creq,
 						engine->sram + len +
@@ -266,7 +272,7 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 	mv_cesa_update_op_cfg(op, frag_mode, CESA_SA_DESC_CFG_FRAG_MSK);
 
 	/* FIXME: only update enc_len field */
-	memcpy(engine->sram, op, sizeof(*op));
+	memcpy_toio(engine->sram, op, sizeof(*op));
 
 	if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG)
 		mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG,
@@ -275,7 +281,7 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req)
 	creq->cache_ptr = new_cache_ptr;
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
-	writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -306,7 +312,7 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
 
 	sreq->offset = 0;
 	mv_cesa_adjust_op(engine, &creq->op_tmpl);
-	memcpy(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
+	memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
 }
 
 static void mv_cesa_ahash_step(struct crypto_async_request *req)
@@ -338,7 +344,7 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status)
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
 	for (i = 0; i < digsize / 4; i++)
-		creq->state[i] = readl(engine->regs + CESA_IVDIG(i));
+		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
 
 	if (creq->cache_ptr)
 		sg_pcopy_to_buffer(ahashreq->src, creq->src_nents,
@@ -347,18 +353,21 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status)
 				   ahashreq->nbytes - creq->cache_ptr);
 
 	if (creq->last_req) {
-		for (i = 0; i < digsize / 4; i++) {
-			/*
-			 * Hardware provides MD5 digest in a different
-			 * endianness than SHA-1 and SHA-256 ones.
-			 */
-			if (digsize == MD5_DIGEST_SIZE)
-				creq->state[i] = cpu_to_le32(creq->state[i]);
-			else
-				creq->state[i] = cpu_to_be32(creq->state[i]);
-		}
+		/*
+		 * Hardware's MD5 digest is in little endian format, but
+		 * SHA in big endian format
+		 */
+		if (creq->algo_le) {
+			__le32 *result = (void *)ahashreq->result;
+
+			for (i = 0; i < digsize / 4; i++)
+				result[i] = cpu_to_le32(creq->state[i]);
+		} else {
+			__be32 *result = (void *)ahashreq->result;
 
-		memcpy(ahashreq->result, creq->state, digsize);
+			for (i = 0; i < digsize / 4; i++)
+				result[i] = cpu_to_be32(creq->state[i]);
+		}
 	}
 
 	return ret;
@@ -381,8 +390,7 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req,
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
 	for (i = 0; i < digsize / 4; i++)
-		writel(creq->state[i],
-		       engine->regs + CESA_IVDIG(i));
+		writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i));
 }
 
 static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req)
@@ -404,7 +412,7 @@ static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
 };
 
 static int mv_cesa_ahash_init(struct ahash_request *req,
-			      struct mv_cesa_op_ctx *tmpl)
+			      struct mv_cesa_op_ctx *tmpl, bool algo_le)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
@@ -418,6 +426,7 @@ static int mv_cesa_ahash_init(struct ahash_request *req,
 	mv_cesa_set_mac_op_frag_len(tmpl, 0);
 	creq->op_tmpl = *tmpl;
 	creq->len = 0;
+	creq->algo_le = algo_le;
 
 	return 0;
 }
@@ -462,145 +471,114 @@ static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
 }
 
 static struct mv_cesa_op_ctx *
-mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
-			    struct mv_cesa_ahash_dma_iter *dma_iter,
-			    struct mv_cesa_ahash_req *creq,
-			    gfp_t flags)
+mv_cesa_dma_add_frag(struct mv_cesa_tdma_chain *chain,
+		     struct mv_cesa_op_ctx *tmpl, unsigned int frag_len,
+		     gfp_t flags)
 {
-	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
-	struct mv_cesa_op_ctx *op = NULL;
+	struct mv_cesa_op_ctx *op;
 	int ret;
 
-	if (!creq->cache_ptr)
-		return NULL;
+	op = mv_cesa_dma_add_op(chain, tmpl, false, flags);
+	if (IS_ERR(op))
+		return op;
 
-	ret = mv_cesa_dma_add_data_transfer(chain,
-					    CESA_SA_DATA_SRAM_OFFSET,
-					    ahashdreq->cache_dma,
-					    creq->cache_ptr,
-					    CESA_TDMA_DST_IN_SRAM,
-					    flags);
+	/* Set the operation block fragment length. */
+	mv_cesa_set_mac_op_frag_len(op, frag_len);
+
+	/* Append dummy desc to launch operation */
+	ret = mv_cesa_dma_add_dummy_launch(chain, flags);
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (!dma_iter->base.op_len) {
-		op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
-		if (IS_ERR(op))
-			return op;
-
-		mv_cesa_set_mac_op_frag_len(op, creq->cache_ptr);
-
-		/* Add dummy desc to launch crypto operation */
-		ret = mv_cesa_dma_add_dummy_launch(chain, flags);
-		if (ret)
-			return ERR_PTR(ret);
-	}
+	if (mv_cesa_mac_op_is_first_frag(tmpl))
+		mv_cesa_update_op_cfg(tmpl,
+				      CESA_SA_DESC_CFG_MID_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
 
 	return op;
 }
 
-static struct mv_cesa_op_ctx *
-mv_cesa_ahash_dma_add_data(struct mv_cesa_tdma_chain *chain,
-			   struct mv_cesa_ahash_dma_iter *dma_iter,
-			   struct mv_cesa_ahash_req *creq,
-			   gfp_t flags)
+static int
+mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
+			    struct mv_cesa_ahash_dma_iter *dma_iter,
+			    struct mv_cesa_ahash_req *creq,
+			    gfp_t flags)
 {
-	struct mv_cesa_op_ctx *op;
-	int ret;
-
-	op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
-	if (IS_ERR(op))
-		return op;
-
-	mv_cesa_set_mac_op_frag_len(op, dma_iter->base.op_len);
-
-	if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) ==
-	    CESA_SA_DESC_CFG_FIRST_FRAG)
-		mv_cesa_update_op_cfg(&creq->op_tmpl,
-				      CESA_SA_DESC_CFG_MID_FRAG,
-				      CESA_SA_DESC_CFG_FRAG_MSK);
-
-	/* Add input transfers */
-	ret = mv_cesa_dma_add_op_transfers(chain, &dma_iter->base,
-					   &dma_iter->src, flags);
-	if (ret)
-		return ERR_PTR(ret);
+	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
 
-	/* Add dummy desc to launch crypto operation */
-	ret = mv_cesa_dma_add_dummy_launch(chain, flags);
-	if (ret)
-		return ERR_PTR(ret);
+	if (!creq->cache_ptr)
+		return 0;
 
-	return op;
+	return mv_cesa_dma_add_data_transfer(chain,
+					     CESA_SA_DATA_SRAM_OFFSET,
+					     ahashdreq->cache_dma,
+					     creq->cache_ptr,
+					     CESA_TDMA_DST_IN_SRAM,
+					     flags);
 }
 
 static struct mv_cesa_op_ctx *
 mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
 			   struct mv_cesa_ahash_dma_iter *dma_iter,
 			   struct mv_cesa_ahash_req *creq,
-			   struct mv_cesa_op_ctx *op,
-			   gfp_t flags)
+			   unsigned int frag_len, gfp_t flags)
 {
 	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
 	unsigned int len, trailerlen, padoff = 0;
+	struct mv_cesa_op_ctx *op;
 	int ret;
 
-	if (!creq->last_req)
-		return op;
-
-	if (op && creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) {
-		u32 frag = CESA_SA_DESC_CFG_NOT_FRAG;
-
-		if ((mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK) !=
-		    CESA_SA_DESC_CFG_FIRST_FRAG)
-			frag = CESA_SA_DESC_CFG_LAST_FRAG;
+	/*
+	 * If the transfer is smaller than our maximum length, and we have
+	 * some data outstanding, we can ask the engine to finish the hash.
+	 */
+	if (creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX && frag_len) {
+		op = mv_cesa_dma_add_frag(chain, &creq->op_tmpl, frag_len,
+					  flags);
+		if (IS_ERR(op))
+			return op;
 
-		mv_cesa_update_op_cfg(op, frag, CESA_SA_DESC_CFG_FRAG_MSK);
+		mv_cesa_set_mac_op_total_len(op, creq->len);
+		mv_cesa_update_op_cfg(op, mv_cesa_mac_op_is_first_frag(op) ?
+						CESA_SA_DESC_CFG_NOT_FRAG :
+						CESA_SA_DESC_CFG_LAST_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
 
 		return op;
 	}
 
+	/*
+	 * The request is longer than the engine can handle, or we have
+	 * no data outstanding. Manually generate the padding, adding it
+	 * as a "mid" fragment.
+	 */
 	ret = mv_cesa_ahash_dma_alloc_padding(ahashdreq, flags);
 	if (ret)
 		return ERR_PTR(ret);
 
 	trailerlen = mv_cesa_ahash_pad_req(creq, ahashdreq->padding);
 
-	if (op) {
-		len = min(CESA_SA_SRAM_PAYLOAD_SIZE - dma_iter->base.op_len,
-			  trailerlen);
-		if (len) {
-			ret = mv_cesa_dma_add_data_transfer(chain,
+	len = min(CESA_SA_SRAM_PAYLOAD_SIZE - frag_len, trailerlen);
+	if (len) {
+		ret = mv_cesa_dma_add_data_transfer(chain,
 						CESA_SA_DATA_SRAM_OFFSET +
-						dma_iter->base.op_len,
+						frag_len,
 						ahashdreq->padding_dma,
 						len, CESA_TDMA_DST_IN_SRAM,
 						flags);
-			if (ret)
-				return ERR_PTR(ret);
-
-			mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG,
-					      CESA_SA_DESC_CFG_FRAG_MSK);
-			mv_cesa_set_mac_op_frag_len(op,
-					dma_iter->base.op_len + len);
-			padoff += len;
-		}
-	}
-
-	if (padoff >= trailerlen)
-		return op;
+		if (ret)
+			return ERR_PTR(ret);
 
-	if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) !=
-	    CESA_SA_DESC_CFG_FIRST_FRAG)
-		mv_cesa_update_op_cfg(&creq->op_tmpl,
-				      CESA_SA_DESC_CFG_MID_FRAG,
-				      CESA_SA_DESC_CFG_FRAG_MSK);
+		op = mv_cesa_dma_add_frag(chain, &creq->op_tmpl, frag_len + len,
+					  flags);
+		if (IS_ERR(op))
+			return op;
 
-	op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
-	if (IS_ERR(op))
-		return op;
+		if (len == trailerlen)
+			return op;
 
-	mv_cesa_set_mac_op_frag_len(op, trailerlen - padoff);
+		padoff += len;
+	}
 
 	ret = mv_cesa_dma_add_data_transfer(chain,
 					    CESA_SA_DATA_SRAM_OFFSET,
@@ -612,12 +590,8 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
 	if (ret)
 		return ERR_PTR(ret);
 
-	/* Add dummy desc to launch crypto operation */
-	ret = mv_cesa_dma_add_dummy_launch(chain, flags);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return op;
+	return mv_cesa_dma_add_frag(chain, &creq->op_tmpl, trailerlen - padoff,
+				    flags);
 }
 
 static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
@@ -627,9 +601,9 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		      GFP_KERNEL : GFP_ATOMIC;
 	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
 	struct mv_cesa_tdma_req *dreq = &ahashdreq->base;
-	struct mv_cesa_tdma_chain chain;
 	struct mv_cesa_ahash_dma_iter iter;
 	struct mv_cesa_op_ctx *op = NULL;
+	unsigned int frag_len;
 	int ret;
 
 	dreq->chain.first = NULL;
@@ -644,29 +618,59 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		}
 	}
 
-	mv_cesa_tdma_desc_iter_init(&chain);
+	mv_cesa_tdma_desc_iter_init(&dreq->chain);
 	mv_cesa_ahash_req_iter_init(&iter, req);
 
-	op = mv_cesa_ahash_dma_add_cache(&chain, &iter,
-					 creq, flags);
-	if (IS_ERR(op)) {
-		ret = PTR_ERR(op);
+	/*
+	 * Add the cache (left-over data from a previous block) first.
+	 * This will never overflow the SRAM size.
+	 */
+	ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags);
+	if (ret)
 		goto err_free_tdma;
-	}
 
-	do {
-		if (!iter.base.op_len)
-			break;
+	if (iter.src.sg) {
+		/*
+		 * Add all the new data, inserting an operation block and
+		 * launch command between each full SRAM block-worth of
+		 * data. We intentionally do not add the final op block.
+		 */
+		while (true) {
+			ret = mv_cesa_dma_add_op_transfers(&dreq->chain,
+							   &iter.base,
+							   &iter.src, flags);
+			if (ret)
+				goto err_free_tdma;
+
+			frag_len = iter.base.op_len;
 
-		op = mv_cesa_ahash_dma_add_data(&chain, &iter,
-						creq, flags);
-		if (IS_ERR(op)) {
-			ret = PTR_ERR(op);
-			goto err_free_tdma;
+			if (!mv_cesa_ahash_req_iter_next_op(&iter))
+				break;
+
+			op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl,
+						  frag_len, flags);
+			if (IS_ERR(op)) {
+				ret = PTR_ERR(op);
+				goto err_free_tdma;
+			}
 		}
-	} while (mv_cesa_ahash_req_iter_next_op(&iter));
+	} else {
+		/* Account for the data that was in the cache. */
+		frag_len = iter.base.op_len;
+	}
+
+	/*
+	 * At this point, frag_len indicates whether we have any data
+	 * outstanding which needs an operation.  Queue up the final
+	 * operation, which depends whether this is the final request.
+	 */
+	if (creq->last_req)
+		op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq,
+						frag_len, flags);
+	else if (frag_len)
+		op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl,
+					  frag_len, flags);
 
-	op = mv_cesa_ahash_dma_last_req(&chain, &iter, creq, op, flags);
 	if (IS_ERR(op)) {
 		ret = PTR_ERR(op);
 		goto err_free_tdma;
@@ -674,7 +678,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 
 	if (op) {
 		/* Add dummy desc to wait for crypto operation end */
-		ret = mv_cesa_dma_add_dummy_end(&chain, flags);
+		ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags);
 		if (ret)
 			goto err_free_tdma;
 	}
@@ -685,8 +689,6 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	else
 		creq->cache_ptr = 0;
 
-	dreq->chain = chain;
-
 	return 0;
 
 err_free_tdma:
@@ -795,47 +797,50 @@ static int mv_cesa_ahash_finup(struct ahash_request *req)
 	return ret;
 }
 
-static int mv_cesa_md5_init(struct ahash_request *req)
+static int mv_cesa_ahash_export(struct ahash_request *req, void *hash,
+				u64 *len, void *cache)
 {
-	struct mv_cesa_op_ctx tmpl;
-
-	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
-
-	mv_cesa_ahash_init(req, &tmpl);
-
-	return 0;
-}
-
-static int mv_cesa_md5_export(struct ahash_request *req, void *out)
-{
-	struct md5_state *out_state = out;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int blocksize;
+
+	blocksize = crypto_ahash_blocksize(ahash);
 
-	out_state->byte_count = creq->len;
-	memcpy(out_state->hash, creq->state, digsize);
-	memset(out_state->block, 0, sizeof(out_state->block));
+	*len = creq->len;
+	memcpy(hash, creq->state, digsize);
+	memset(cache, 0, blocksize);
 	if (creq->cache)
-		memcpy(out_state->block, creq->cache, creq->cache_ptr);
+		memcpy(cache, creq->cache, creq->cache_ptr);
 
 	return 0;
 }
 
-static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
+static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash,
+				u64 len, const void *cache)
 {
-	const struct md5_state *in_state = in;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int blocksize;
 	unsigned int cache_ptr;
 	int ret;
 
-	creq->len = in_state->byte_count;
-	memcpy(creq->state, in_state->hash, digsize);
+	ret = crypto_ahash_init(req);
+	if (ret)
+		return ret;
+
+	blocksize = crypto_ahash_blocksize(ahash);
+	if (len >= blocksize)
+		mv_cesa_update_op_cfg(&creq->op_tmpl,
+				      CESA_SA_DESC_CFG_MID_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
+
+	creq->len = len;
+	memcpy(creq->state, hash, digsize);
 	creq->cache_ptr = 0;
 
-	cache_ptr = creq->len % sizeof(in_state->block);
+	cache_ptr = do_div(len, blocksize);
 	if (!cache_ptr)
 		return 0;
 
@@ -843,12 +848,39 @@ static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
 	if (ret)
 		return ret;
 
-	memcpy(creq->cache, in_state->block, cache_ptr);
+	memcpy(creq->cache, cache, cache_ptr);
 	creq->cache_ptr = cache_ptr;
 
 	return 0;
 }
 
+static int mv_cesa_md5_init(struct ahash_request *req)
+{
+	struct mv_cesa_op_ctx tmpl = { };
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+
+	mv_cesa_ahash_init(req, &tmpl, true);
+
+	return 0;
+}
+
+static int mv_cesa_md5_export(struct ahash_request *req, void *out)
+{
+	struct md5_state *out_state = out;
+
+	return mv_cesa_ahash_export(req, out_state->hash,
+				    &out_state->byte_count, out_state->block);
+}
+
+static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
+{
+	const struct md5_state *in_state = in;
+
+	return mv_cesa_ahash_import(req, in_state->hash, in_state->byte_count,
+				    in_state->block);
+}
+
 static int mv_cesa_md5_digest(struct ahash_request *req)
 {
 	int ret;
@@ -870,6 +902,7 @@ struct ahash_alg mv_md5_alg = {
 	.import = mv_cesa_md5_import,
 	.halg = {
 		.digestsize = MD5_DIGEST_SIZE,
+		.statesize = sizeof(struct md5_state),
 		.base = {
 			.cra_name = "md5",
 			.cra_driver_name = "mv-md5",
@@ -886,11 +919,11 @@ struct ahash_alg mv_md5_alg = {
 
 static int mv_cesa_sha1_init(struct ahash_request *req)
 {
-	struct mv_cesa_op_ctx tmpl;
+	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
 
-	mv_cesa_ahash_init(req, &tmpl);
+	mv_cesa_ahash_init(req, &tmpl, false);
 
 	return 0;
 }
@@ -898,44 +931,17 @@ static int mv_cesa_sha1_init(struct ahash_request *req)
 static int mv_cesa_sha1_export(struct ahash_request *req, void *out)
 {
 	struct sha1_state *out_state = out;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-
-	out_state->count = creq->len;
-	memcpy(out_state->state, creq->state, digsize);
-	memset(out_state->buffer, 0, sizeof(out_state->buffer));
-	if (creq->cache)
-		memcpy(out_state->buffer, creq->cache, creq->cache_ptr);
 
-	return 0;
+	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
+				    out_state->buffer);
 }
 
 static int mv_cesa_sha1_import(struct ahash_request *req, const void *in)
 {
 	const struct sha1_state *in_state = in;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-	unsigned int cache_ptr;
-	int ret;
 
-	creq->len = in_state->count;
-	memcpy(creq->state, in_state->state, digsize);
-	creq->cache_ptr = 0;
-
-	cache_ptr = creq->len % SHA1_BLOCK_SIZE;
-	if (!cache_ptr)
-		return 0;
-
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
-	memcpy(creq->cache, in_state->buffer, cache_ptr);
-	creq->cache_ptr = cache_ptr;
-
-	return 0;
+	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
+				    in_state->buffer);
 }
 
 static int mv_cesa_sha1_digest(struct ahash_request *req)
@@ -959,6 +965,7 @@ struct ahash_alg mv_sha1_alg = {
 	.import = mv_cesa_sha1_import,
 	.halg = {
 		.digestsize = SHA1_DIGEST_SIZE,
+		.statesize = sizeof(struct sha1_state),
 		.base = {
 			.cra_name = "sha1",
 			.cra_driver_name = "mv-sha1",
@@ -975,11 +982,11 @@ struct ahash_alg mv_sha1_alg = {
 
 static int mv_cesa_sha256_init(struct ahash_request *req)
 {
-	struct mv_cesa_op_ctx tmpl;
+	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
 
-	mv_cesa_ahash_init(req, &tmpl);
+	mv_cesa_ahash_init(req, &tmpl, false);
 
 	return 0;
 }
@@ -998,44 +1005,17 @@ static int mv_cesa_sha256_digest(struct ahash_request *req)
 static int mv_cesa_sha256_export(struct ahash_request *req, void *out)
 {
 	struct sha256_state *out_state = out;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int ds = crypto_ahash_digestsize(ahash);
 
-	out_state->count = creq->len;
-	memcpy(out_state->state, creq->state, ds);
-	memset(out_state->buf, 0, sizeof(out_state->buf));
-	if (creq->cache)
-		memcpy(out_state->buf, creq->cache, creq->cache_ptr);
-
-	return 0;
+	return mv_cesa_ahash_export(req, out_state->state, &out_state->count,
+				    out_state->buf);
 }
 
 static int mv_cesa_sha256_import(struct ahash_request *req, const void *in)
 {
 	const struct sha256_state *in_state = in;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	unsigned int digsize = crypto_ahash_digestsize(ahash);
-	unsigned int cache_ptr;
-	int ret;
-
-	creq->len = in_state->count;
-	memcpy(creq->state, in_state->state, digsize);
-	creq->cache_ptr = 0;
-
-	cache_ptr = creq->len % SHA256_BLOCK_SIZE;
-	if (!cache_ptr)
-		return 0;
-
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
-	memcpy(creq->cache, in_state->buf, cache_ptr);
-	creq->cache_ptr = cache_ptr;
 
-	return 0;
+	return mv_cesa_ahash_import(req, in_state->state, in_state->count,
+				    in_state->buf);
 }
 
 struct ahash_alg mv_sha256_alg = {
@@ -1048,6 +1028,7 @@ struct ahash_alg mv_sha256_alg = {
 	.import = mv_cesa_sha256_import,
 	.halg = {
 		.digestsize = SHA256_DIGEST_SIZE,
+		.statesize = sizeof(struct sha256_state),
 		.base = {
 			.cra_name = "sha256",
 			.cra_driver_name = "mv-sha256",
@@ -1231,12 +1212,12 @@ static int mv_cesa_ahmac_cra_init(struct crypto_tfm *tfm)
 static int mv_cesa_ahmac_md5_init(struct ahash_request *req)
 {
 	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct mv_cesa_op_ctx tmpl;
+	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_MD5);
 	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
 
-	mv_cesa_ahash_init(req, &tmpl);
+	mv_cesa_ahash_init(req, &tmpl, true);
 
 	return 0;
 }
@@ -1301,12 +1282,12 @@ struct ahash_alg mv_ahmac_md5_alg = {
 static int mv_cesa_ahmac_sha1_init(struct ahash_request *req)
 {
 	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct mv_cesa_op_ctx tmpl;
+	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA1);
 	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
 
-	mv_cesa_ahash_init(req, &tmpl);
+	mv_cesa_ahash_init(req, &tmpl, false);
 
 	return 0;
 }
@@ -1391,12 +1372,12 @@ static int mv_cesa_ahmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int mv_cesa_ahmac_sha256_init(struct ahash_request *req)
 {
 	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct mv_cesa_op_ctx tmpl;
+	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA256);
 	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
 
-	mv_cesa_ahash_init(req, &tmpl);
+	mv_cesa_ahash_init(req, &tmpl, false);
 
 	return 0;
 }
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 64a366c50174..76427981275b 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -41,18 +41,18 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq)
 {
 	struct mv_cesa_engine *engine = dreq->base.engine;
 
-	writel(0, engine->regs + CESA_SA_CFG);
+	writel_relaxed(0, engine->regs + CESA_SA_CFG);
 
 	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE);
-	writel(CESA_TDMA_DST_BURST_128B | CESA_TDMA_SRC_BURST_128B |
-	       CESA_TDMA_NO_BYTE_SWAP | CESA_TDMA_EN,
-	       engine->regs + CESA_TDMA_CONTROL);
-
-	writel(CESA_SA_CFG_ACT_CH0_IDMA | CESA_SA_CFG_MULTI_PKT |
-	       CESA_SA_CFG_CH0_W_IDMA | CESA_SA_CFG_PARA_DIS,
-	       engine->regs + CESA_SA_CFG);
-	writel(dreq->chain.first->cur_dma,
-	       engine->regs + CESA_TDMA_NEXT_ADDR);
+	writel_relaxed(CESA_TDMA_DST_BURST_128B | CESA_TDMA_SRC_BURST_128B |
+		       CESA_TDMA_NO_BYTE_SWAP | CESA_TDMA_EN,
+		       engine->regs + CESA_TDMA_CONTROL);
+
+	writel_relaxed(CESA_SA_CFG_ACT_CH0_IDMA | CESA_SA_CFG_MULTI_PKT |
+		       CESA_SA_CFG_CH0_W_IDMA | CESA_SA_CFG_PARA_DIS,
+		       engine->regs + CESA_SA_CFG);
+	writel_relaxed(dreq->chain.first->cur_dma,
+		       engine->regs + CESA_TDMA_NEXT_ADDR);
 	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
 }
 
@@ -69,7 +69,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq)
 
 		tdma = tdma->next;
 		dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
-			      le32_to_cpu(old_tdma->cur_dma));
+			      old_tdma->cur_dma);
 	}
 
 	dreq->chain.first = NULL;
@@ -105,9 +105,9 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 		return ERR_PTR(-ENOMEM);
 
 	memset(new_tdma, 0, sizeof(*new_tdma));
-	new_tdma->cur_dma = cpu_to_le32(dma_handle);
+	new_tdma->cur_dma = dma_handle;
 	if (chain->last) {
-		chain->last->next_dma = new_tdma->cur_dma;
+		chain->last->next_dma = cpu_to_le32(dma_handle);
 		chain->last->next = new_tdma;
 	} else {
 		chain->first = new_tdma;
@@ -126,6 +126,7 @@ struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
 	struct mv_cesa_tdma_desc *tdma;
 	struct mv_cesa_op_ctx *op;
 	dma_addr_t dma_handle;
+	unsigned int size;
 
 	tdma = mv_cesa_dma_add_desc(chain, flags);
 	if (IS_ERR(tdma))
@@ -137,10 +138,12 @@ struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
 
 	*op = *op_templ;
 
+	size = skip_ctx ? sizeof(op->desc) : sizeof(*op);
+
 	tdma = chain->last;
 	tdma->op = op;
-	tdma->byte_cnt = (skip_ctx ? sizeof(op->desc) : sizeof(*op)) | BIT(31);
-	tdma->src = dma_handle;
+	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
+	tdma->src = cpu_to_le32(dma_handle);
 	tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP;
 
 	return op;
@@ -156,7 +159,7 @@ int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain,
 	if (IS_ERR(tdma))
 		return PTR_ERR(tdma);
 
-	tdma->byte_cnt = size | BIT(31);
+	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->src = src;
 	tdma->dst = dst;
 
@@ -166,8 +169,7 @@ int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain,
 	return 0;
 }
 
-int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain,
-				 u32 flags)
+int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 {
 	struct mv_cesa_tdma_desc *tdma;
 
@@ -178,7 +180,7 @@ int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain,
 	return 0;
 }
 
-int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags)
+int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 {
 	struct mv_cesa_tdma_desc *tdma;
 
@@ -186,7 +188,7 @@ int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags)
 	if (IS_ERR(tdma))
 		return PTR_ERR(tdma);
 
-	tdma->byte_cnt = BIT(31);
+	tdma->byte_cnt = cpu_to_le32(BIT(31));
 
 	return 0;
 }
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 2e8dab9d4263..5450880abb7b 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -34,7 +34,7 @@
 #define DRV_MODULE_VERSION	"0.2"
 #define DRV_MODULE_RELDATE	"July 28, 2011"
 
-static char version[] =
+static const char version[] =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index 3750e13d8721..9ef51fafdbff 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -491,7 +491,7 @@ static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
 				  void *wmem)
 {
 	return nx842_powernv_function(in, inlen, out, outlenp,
-				      wmem, CCW_FC_842_COMP_NOCRC);
+				      wmem, CCW_FC_842_COMP_CRC);
 }
 
 /**
@@ -519,7 +519,7 @@ static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
 				    void *wmem)
 {
 	return nx842_powernv_function(in, inlen, out, outlenp,
-				      wmem, CCW_FC_842_DECOMP_NOCRC);
+				      wmem, CCW_FC_842_DECOMP_CRC);
 }
 
 static int __init nx842_powernv_probe(struct device_node *dn)
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index f4cbde03c6ad..cddc6d8b55d9 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -234,6 +234,10 @@ static int nx842_validate_result(struct device *dev,
 		dev_dbg(dev, "%s: Out of space in output buffer\n",
 					__func__);
 		return -ENOSPC;
+	case 65: /* Calculated CRC doesn't match the passed value */
+		dev_dbg(dev, "%s: CRC mismatch for decompression\n",
+					__func__);
+		return -EINVAL;
 	case 66: /* Input data contains an illegal template field */
 	case 67: /* Template indicates data past the end of the input stream */
 		dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
@@ -324,7 +328,7 @@ static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
 	slout.entries = (struct nx842_slentry *)workmem->slout;
 
 	/* Init operation */
-	op.flags = NX842_OP_COMPRESS;
+	op.flags = NX842_OP_COMPRESS_CRC;
 	csbcpb = &workmem->csbcpb;
 	memset(csbcpb, 0, sizeof(*csbcpb));
 	op.csbcpb = nx842_get_pa(csbcpb);
@@ -457,7 +461,7 @@ static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
 	slout.entries = (struct nx842_slentry *)workmem->slout;
 
 	/* Init operation */
-	op.flags = NX842_OP_DECOMPRESS;
+	op.flags = NX842_OP_DECOMPRESS_CRC;
 	csbcpb = &workmem->csbcpb;
 	memset(csbcpb, 0, sizeof(*csbcpb));
 	op.csbcpb = nx842_get_pa(csbcpb);
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index da36de26a4dc..615da961c4d8 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1591,6 +1591,7 @@ static const struct of_device_id spacc_of_id_table[] = {
 	{ .compatible = "picochip,spacc-l2" },
 	{}
 };
+MODULE_DEVICE_TABLE(of, spacc_of_id_table);
 #endif /* CONFIG_OF */
 
 static bool spacc_is_compatible(struct platform_device *pdev,
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index df20a9de1c58..9e9e196c6d51 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -1,5 +1,10 @@
-$(obj)/qat_rsakey-asn1.o: $(obj)/qat_rsakey-asn1.c $(obj)/qat_rsakey-asn1.h
-clean-files += qat_rsakey-asn1.c qat_rsakey-asn1.h
+$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \
+			     $(obj)/qat_rsapubkey-asn1.h
+$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \
+			      $(obj)/qat_rsaprivkey-asn1.h
+
+clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h
+clean-files += qat_rsaprivkey-asn1.c qat_rsapvivkey-asn1.h
 
 obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o
 intel_qat-objs := adf_cfg.o \
@@ -13,7 +18,8 @@ intel_qat-objs := adf_cfg.o \
 	adf_hw_arbiter.o \
 	qat_crypto.o \
 	qat_algs.o \
-	qat_rsakey-asn1.o \
+	qat_rsapubkey-asn1.o \
+	qat_rsaprivkey-asn1.o \
 	qat_asym_algs.o \
 	qat_uclo.o \
 	qat_hal.o
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 7836dffc3d47..3f76bd495bcb 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -163,10 +163,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node);
 void qat_crypto_put_instance(struct qat_crypto_instance *inst);
 void qat_alg_callback(void *resp);
 void qat_alg_asym_callback(void *resp);
-int qat_algs_init(void);
-void qat_algs_exit(void);
 int qat_algs_register(void);
-int qat_algs_unregister(void);
+void qat_algs_unregister(void);
 int qat_asym_algs_register(void);
 void qat_asym_algs_unregister(void);
 
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index cd8a12af8ec5..03856ad280b9 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -463,9 +463,6 @@ static int __init adf_register_ctl_device_driver(void)
 {
 	mutex_init(&adf_ctl_lock);
 
-	if (qat_algs_init())
-		goto err_algs_init;
-
 	if (adf_chr_drv_create())
 		goto err_chr_dev;
 
@@ -482,8 +479,6 @@ err_crypto_register:
 err_aer:
 	adf_chr_drv_destroy();
 err_chr_dev:
-	qat_algs_exit();
-err_algs_init:
 	mutex_destroy(&adf_ctl_lock);
 	return -EFAULT;
 }
@@ -493,7 +488,6 @@ static void __exit adf_unregister_ctl_device_driver(void)
 	adf_chr_drv_destroy();
 	adf_exit_aer();
 	qat_crypto_unregister();
-	qat_algs_exit();
 	adf_clean_vf_map(false);
 	mutex_destroy(&adf_ctl_lock);
 }
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index ac37a89965ac..d873eeecc363 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -272,12 +272,10 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
 	clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
 	clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
 
-	if (!list_empty(&accel_dev->crypto_list) && qat_algs_unregister())
-		dev_err(&GET_DEV(accel_dev),
-			"Failed to unregister crypto algs\n");
-
-	if (!list_empty(&accel_dev->crypto_list))
+	if (!list_empty(&accel_dev->crypto_list)) {
+		qat_algs_unregister();
 		qat_asym_algs_unregister();
+	}
 
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 2f77a4a8cecb..1117a8b58280 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -244,11 +244,8 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
 		return -EFAULT;
 	}
 
-	if (!iommu_present(&pci_bus_type)) {
-		dev_err(&pdev->dev,
-			"IOMMU must be enabled for SR-IOV to work\n");
-		return -EINVAL;
-	}
+	if (!iommu_present(&pci_bus_type))
+		dev_warn(&pdev->dev, "IOMMU should be enabled for SR-IOV to work correctly\n");
 
 	if (accel_dev->pf.vf_info) {
 		dev_info(&pdev->dev, "Already enabled for this device\n");
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 2bd913aceaeb..59e4c3af15ed 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -62,13 +62,13 @@
 #include "icp_qat_fw.h"
 #include "icp_qat_fw_la.h"
 
-#define QAT_AES_HW_CONFIG_CBC_ENC(alg) \
-	ICP_QAT_HW_CIPHER_CONFIG_BUILD(ICP_QAT_HW_CIPHER_CBC_MODE, alg, \
+#define QAT_AES_HW_CONFIG_ENC(alg, mode) \
+	ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \
 				       ICP_QAT_HW_CIPHER_NO_CONVERT, \
 				       ICP_QAT_HW_CIPHER_ENCRYPT)
 
-#define QAT_AES_HW_CONFIG_CBC_DEC(alg) \
-	ICP_QAT_HW_CIPHER_CONFIG_BUILD(ICP_QAT_HW_CIPHER_CBC_MODE, alg, \
+#define QAT_AES_HW_CONFIG_DEC(alg, mode) \
+	ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \
 				       ICP_QAT_HW_CIPHER_KEY_CONVERT, \
 				       ICP_QAT_HW_CIPHER_DECRYPT)
 
@@ -271,7 +271,8 @@ static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header)
 
 static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm,
 					 int alg,
-					 struct crypto_authenc_keys *keys)
+					 struct crypto_authenc_keys *keys,
+					 int mode)
 {
 	struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(aead_tfm);
 	unsigned int digestsize = crypto_aead_authsize(aead_tfm);
@@ -288,7 +289,7 @@ static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm,
 	struct icp_qat_fw_auth_cd_ctrl_hdr *hash_cd_ctrl = ptr;
 
 	/* CD setup */
-	cipher->aes.cipher_config.val = QAT_AES_HW_CONFIG_CBC_ENC(alg);
+	cipher->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg, mode);
 	memcpy(cipher->aes.key, keys->enckey, keys->enckeylen);
 	hash->sha.inner_setup.auth_config.config =
 		ICP_QAT_HW_AUTH_CONFIG_BUILD(ICP_QAT_HW_AUTH_MODE1,
@@ -351,7 +352,8 @@ static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm,
 
 static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm,
 					 int alg,
-					 struct crypto_authenc_keys *keys)
+					 struct crypto_authenc_keys *keys,
+					 int mode)
 {
 	struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(aead_tfm);
 	unsigned int digestsize = crypto_aead_authsize(aead_tfm);
@@ -373,7 +375,7 @@ static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm,
 		sizeof(struct icp_qat_fw_la_cipher_req_params));
 
 	/* CD setup */
-	cipher->aes.cipher_config.val = QAT_AES_HW_CONFIG_CBC_DEC(alg);
+	cipher->aes.cipher_config.val = QAT_AES_HW_CONFIG_DEC(alg, mode);
 	memcpy(cipher->aes.key, keys->enckey, keys->enckeylen);
 	hash->sha.inner_setup.auth_config.config =
 		ICP_QAT_HW_AUTH_CONFIG_BUILD(ICP_QAT_HW_AUTH_MODE1,
@@ -464,7 +466,7 @@ static void qat_alg_ablkcipher_init_com(struct qat_alg_ablkcipher_ctx *ctx,
 
 static void qat_alg_ablkcipher_init_enc(struct qat_alg_ablkcipher_ctx *ctx,
 					int alg, const uint8_t *key,
-					unsigned int keylen)
+					unsigned int keylen, int mode)
 {
 	struct icp_qat_hw_cipher_algo_blk *enc_cd = ctx->enc_cd;
 	struct icp_qat_fw_la_bulk_req *req = &ctx->enc_fw_req;
@@ -472,12 +474,12 @@ static void qat_alg_ablkcipher_init_enc(struct qat_alg_ablkcipher_ctx *ctx,
 
 	qat_alg_ablkcipher_init_com(ctx, req, enc_cd, key, keylen);
 	cd_pars->u.s.content_desc_addr = ctx->enc_cd_paddr;
-	enc_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_CBC_ENC(alg);
+	enc_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg, mode);
 }
 
 static void qat_alg_ablkcipher_init_dec(struct qat_alg_ablkcipher_ctx *ctx,
 					int alg, const uint8_t *key,
-					unsigned int keylen)
+					unsigned int keylen, int mode)
 {
 	struct icp_qat_hw_cipher_algo_blk *dec_cd = ctx->dec_cd;
 	struct icp_qat_fw_la_bulk_req *req = &ctx->dec_fw_req;
@@ -485,29 +487,48 @@ static void qat_alg_ablkcipher_init_dec(struct qat_alg_ablkcipher_ctx *ctx,
 
 	qat_alg_ablkcipher_init_com(ctx, req, dec_cd, key, keylen);
 	cd_pars->u.s.content_desc_addr = ctx->dec_cd_paddr;
-	dec_cd->aes.cipher_config.val = QAT_AES_HW_CONFIG_CBC_DEC(alg);
+
+	if (mode != ICP_QAT_HW_CIPHER_CTR_MODE)
+		dec_cd->aes.cipher_config.val =
+					QAT_AES_HW_CONFIG_DEC(alg, mode);
+	else
+		dec_cd->aes.cipher_config.val =
+					QAT_AES_HW_CONFIG_ENC(alg, mode);
 }
 
-static int qat_alg_validate_key(int key_len, int *alg)
+static int qat_alg_validate_key(int key_len, int *alg, int mode)
 {
-	switch (key_len) {
-	case AES_KEYSIZE_128:
-		*alg = ICP_QAT_HW_CIPHER_ALGO_AES128;
-		break;
-	case AES_KEYSIZE_192:
-		*alg = ICP_QAT_HW_CIPHER_ALGO_AES192;
-		break;
-	case AES_KEYSIZE_256:
-		*alg = ICP_QAT_HW_CIPHER_ALGO_AES256;
-		break;
-	default:
-		return -EINVAL;
+	if (mode != ICP_QAT_HW_CIPHER_XTS_MODE) {
+		switch (key_len) {
+		case AES_KEYSIZE_128:
+			*alg = ICP_QAT_HW_CIPHER_ALGO_AES128;
+			break;
+		case AES_KEYSIZE_192:
+			*alg = ICP_QAT_HW_CIPHER_ALGO_AES192;
+			break;
+		case AES_KEYSIZE_256:
+			*alg = ICP_QAT_HW_CIPHER_ALGO_AES256;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		switch (key_len) {
+		case AES_KEYSIZE_128 << 1:
+			*alg = ICP_QAT_HW_CIPHER_ALGO_AES128;
+			break;
+		case AES_KEYSIZE_256 << 1:
+			*alg = ICP_QAT_HW_CIPHER_ALGO_AES256;
+			break;
+		default:
+			return -EINVAL;
+		}
 	}
 	return 0;
 }
 
-static int qat_alg_aead_init_sessions(struct crypto_aead *tfm,
-				      const uint8_t *key, unsigned int keylen)
+static int qat_alg_aead_init_sessions(struct crypto_aead *tfm, const u8 *key,
+				      unsigned int keylen,  int mode)
 {
 	struct crypto_authenc_keys keys;
 	int alg;
@@ -515,13 +536,13 @@ static int qat_alg_aead_init_sessions(struct crypto_aead *tfm,
 	if (crypto_authenc_extractkeys(&keys, key, keylen))
 		goto bad_key;
 
-	if (qat_alg_validate_key(keys.enckeylen, &alg))
+	if (qat_alg_validate_key(keys.enckeylen, &alg, mode))
 		goto bad_key;
 
-	if (qat_alg_aead_init_enc_session(tfm, alg, &keys))
+	if (qat_alg_aead_init_enc_session(tfm, alg, &keys, mode))
 		goto error;
 
-	if (qat_alg_aead_init_dec_session(tfm, alg, &keys))
+	if (qat_alg_aead_init_dec_session(tfm, alg, &keys, mode))
 		goto error;
 
 	return 0;
@@ -534,15 +555,16 @@ error:
 
 static int qat_alg_ablkcipher_init_sessions(struct qat_alg_ablkcipher_ctx *ctx,
 					    const uint8_t *key,
-					    unsigned int keylen)
+					    unsigned int keylen,
+					    int mode)
 {
 	int alg;
 
-	if (qat_alg_validate_key(keylen, &alg))
+	if (qat_alg_validate_key(keylen, &alg, mode))
 		goto bad_key;
 
-	qat_alg_ablkcipher_init_enc(ctx, alg, key, keylen);
-	qat_alg_ablkcipher_init_dec(ctx, alg, key, keylen);
+	qat_alg_ablkcipher_init_enc(ctx, alg, key, keylen, mode);
+	qat_alg_ablkcipher_init_dec(ctx, alg, key, keylen, mode);
 	return 0;
 bad_key:
 	crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -586,7 +608,8 @@ static int qat_alg_aead_setkey(struct crypto_aead *tfm, const uint8_t *key,
 			goto out_free_enc;
 		}
 	}
-	if (qat_alg_aead_init_sessions(tfm, key, keylen))
+	if (qat_alg_aead_init_sessions(tfm, key, keylen,
+				       ICP_QAT_HW_CIPHER_CBC_MODE))
 		goto out_free_all;
 
 	return 0;
@@ -876,8 +899,8 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 }
 
 static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
-				     const uint8_t *key,
-				     unsigned int keylen)
+				     const u8 *key, unsigned int keylen,
+				     int mode)
 {
 	struct qat_alg_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct device *dev;
@@ -918,7 +941,7 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 		}
 	}
 	spin_unlock(&ctx->lock);
-	if (qat_alg_ablkcipher_init_sessions(ctx, key, keylen))
+	if (qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode))
 		goto out_free_all;
 
 	return 0;
@@ -936,6 +959,27 @@ out_free_enc:
 	return -ENOMEM;
 }
 
+static int qat_alg_ablkcipher_cbc_setkey(struct crypto_ablkcipher *tfm,
+					 const u8 *key, unsigned int keylen)
+{
+	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
+					 ICP_QAT_HW_CIPHER_CBC_MODE);
+}
+
+static int qat_alg_ablkcipher_ctr_setkey(struct crypto_ablkcipher *tfm,
+					 const u8 *key, unsigned int keylen)
+{
+	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
+					 ICP_QAT_HW_CIPHER_CTR_MODE);
+}
+
+static int qat_alg_ablkcipher_xts_setkey(struct crypto_ablkcipher *tfm,
+					 const u8 *key, unsigned int keylen)
+{
+	return qat_alg_ablkcipher_setkey(tfm, key, keylen,
+					 ICP_QAT_HW_CIPHER_XTS_MODE);
+}
+
 static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
@@ -1171,7 +1215,51 @@ static struct crypto_alg qat_algs[] = { {
 	.cra_exit = qat_alg_ablkcipher_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.setkey = qat_alg_ablkcipher_setkey,
+			.setkey = qat_alg_ablkcipher_cbc_setkey,
+			.decrypt = qat_alg_ablkcipher_decrypt,
+			.encrypt = qat_alg_ablkcipher_encrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+	},
+}, {
+	.cra_name = "ctr(aes)",
+	.cra_driver_name = "qat_aes_ctr",
+	.cra_priority = 4001,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = qat_alg_ablkcipher_init,
+	.cra_exit = qat_alg_ablkcipher_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.setkey = qat_alg_ablkcipher_ctr_setkey,
+			.decrypt = qat_alg_ablkcipher_decrypt,
+			.encrypt = qat_alg_ablkcipher_encrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+	},
+}, {
+	.cra_name = "xts(aes)",
+	.cra_driver_name = "qat_aes_xts",
+	.cra_priority = 4001,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = qat_alg_ablkcipher_init,
+	.cra_exit = qat_alg_ablkcipher_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.setkey = qat_alg_ablkcipher_xts_setkey,
 			.decrypt = qat_alg_ablkcipher_decrypt,
 			.encrypt = qat_alg_ablkcipher_encrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1212,7 +1300,7 @@ unreg_algs:
 	goto unlock;
 }
 
-int qat_algs_unregister(void)
+void qat_algs_unregister(void)
 {
 	mutex_lock(&algs_lock);
 	if (--active_devs != 0)
@@ -1223,14 +1311,4 @@ int qat_algs_unregister(void)
 
 unlock:
 	mutex_unlock(&algs_lock);
-	return 0;
-}
-
-int qat_algs_init(void)
-{
-	return 0;
-}
-
-void qat_algs_exit(void)
-{
 }
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index e87f51023ba4..51c594fdacdc 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -51,7 +51,9 @@
 #include <crypto/akcipher.h>
 #include <linux/dma-mapping.h>
 #include <linux/fips.h>
-#include "qat_rsakey-asn1.h"
+#include <crypto/scatterwalk.h>
+#include "qat_rsapubkey-asn1.h"
+#include "qat_rsaprivkey-asn1.h"
 #include "icp_qat_fw_pke.h"
 #include "adf_accel_devices.h"
 #include "adf_transport.h"
@@ -106,6 +108,7 @@ struct qat_rsa_request {
 	dma_addr_t phy_in;
 	dma_addr_t phy_out;
 	char *src_align;
+	char *dst_align;
 	struct icp_qat_fw_pke_request req;
 	struct qat_rsa_ctx *ctx;
 	int err;
@@ -118,7 +121,6 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
 	struct device *dev = &GET_DEV(req->ctx->inst->accel_dev);
 	int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET(
 				resp->pke_resp_hdr.comn_resp_flags);
-	char *ptr = areq->dst;
 
 	err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
 
@@ -129,24 +131,44 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
 		dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz,
 				 DMA_TO_DEVICE);
 
-	dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz,
-			 DMA_FROM_DEVICE);
+	areq->dst_len = req->ctx->key_sz;
+	if (req->dst_align) {
+		char *ptr = req->dst_align;
+
+		while (!(*ptr) && areq->dst_len) {
+			areq->dst_len--;
+			ptr++;
+		}
+
+		if (areq->dst_len != req->ctx->key_sz)
+			memmove(req->dst_align, ptr, areq->dst_len);
+
+		scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
+					 areq->dst_len, 1);
+
+		dma_free_coherent(dev, req->ctx->key_sz, req->dst_align,
+				  req->out.enc.c);
+	} else {
+		char *ptr = sg_virt(areq->dst);
+
+		while (!(*ptr) && areq->dst_len) {
+			areq->dst_len--;
+			ptr++;
+		}
+
+		if (sg_virt(areq->dst) != ptr && areq->dst_len)
+			memmove(sg_virt(areq->dst), ptr, areq->dst_len);
+
+		dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz,
+				 DMA_FROM_DEVICE);
+	}
+
 	dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params),
 			 DMA_TO_DEVICE);
 	dma_unmap_single(dev, req->phy_out,
 			 sizeof(struct qat_rsa_output_params),
 			 DMA_TO_DEVICE);
 
-	areq->dst_len = req->ctx->key_sz;
-	/* Need to set the corect length of the output */
-	while (!(*ptr) && areq->dst_len) {
-		areq->dst_len--;
-		ptr++;
-	}
-
-	if (areq->dst_len != req->ctx->key_sz)
-		memmove(areq->dst, ptr, areq->dst_len);
-
 	akcipher_request_complete(areq, err);
 }
 
@@ -255,8 +277,16 @@ static int qat_rsa_enc(struct akcipher_request *req)
 	 * same as modulo n so in case it is different we need to allocate a
 	 * new buf and copy src data.
 	 * In other case we just need to map the user provided buffer.
+	 * Also need to make sure that it is in contiguous buffer.
 	 */
-	if (req->src_len < ctx->key_sz) {
+	if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
+		qat_req->src_align = NULL;
+		qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src),
+						   req->src_len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m)))
+			return ret;
+
+	} else {
 		int shift = ctx->key_sz - req->src_len;
 
 		qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
@@ -265,29 +295,39 @@ static int qat_rsa_enc(struct akcipher_request *req)
 		if (unlikely(!qat_req->src_align))
 			return ret;
 
-		memcpy(qat_req->src_align + shift, req->src, req->src_len);
+		scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
+					 0, req->src_len, 0);
+	}
+	if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+		qat_req->dst_align = NULL;
+		qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst),
+						    req->dst_len,
+						    DMA_FROM_DEVICE);
+
+		if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c)))
+			goto unmap_src;
+
 	} else {
-		qat_req->src_align = NULL;
-		qat_req->in.enc.m = dma_map_single(dev, req->src, req->src_len,
-					   DMA_TO_DEVICE);
+		qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz,
+							 &qat_req->out.enc.c,
+							 GFP_KERNEL);
+		if (unlikely(!qat_req->dst_align))
+			goto unmap_src;
+
 	}
 	qat_req->in.in_tab[3] = 0;
-	qat_req->out.enc.c = dma_map_single(dev, req->dst, req->dst_len,
-					    DMA_FROM_DEVICE);
 	qat_req->out.out_tab[1] = 0;
 	qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m,
 					 sizeof(struct qat_rsa_input_params),
 					 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+		goto unmap_dst;
+
 	qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c,
 					  sizeof(struct qat_rsa_output_params),
-					    DMA_TO_DEVICE);
-
-	if (unlikely((!qat_req->src_align &&
-		      dma_mapping_error(dev, qat_req->in.enc.m)) ||
-		     dma_mapping_error(dev, qat_req->out.enc.c) ||
-		     dma_mapping_error(dev, qat_req->phy_in) ||
-		     dma_mapping_error(dev, qat_req->phy_out)))
-		goto unmap;
+					  DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+		goto unmap_in_params;
 
 	msg->pke_mid.src_data_addr = qat_req->phy_in;
 	msg->pke_mid.dest_data_addr = qat_req->phy_out;
@@ -300,7 +340,7 @@ static int qat_rsa_enc(struct akcipher_request *req)
 
 	if (!ret)
 		return -EINPROGRESS;
-unmap:
+unmap_src:
 	if (qat_req->src_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
 				  qat_req->in.enc.m);
@@ -308,9 +348,15 @@ unmap:
 		if (!dma_mapping_error(dev, qat_req->in.enc.m))
 			dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz,
 					 DMA_TO_DEVICE);
-	if (!dma_mapping_error(dev, qat_req->out.enc.c))
-		dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz,
-				 DMA_FROM_DEVICE);
+unmap_dst:
+	if (qat_req->dst_align)
+		dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
+				  qat_req->out.enc.c);
+	else
+		if (!dma_mapping_error(dev, qat_req->out.enc.c))
+			dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz,
+					 DMA_FROM_DEVICE);
+unmap_in_params:
 	if (!dma_mapping_error(dev, qat_req->phy_in))
 		dma_unmap_single(dev, qat_req->phy_in,
 				 sizeof(struct qat_rsa_input_params),
@@ -362,8 +408,16 @@ static int qat_rsa_dec(struct akcipher_request *req)
 	 * same as modulo n so in case it is different we need to allocate a
 	 * new buf and copy src data.
 	 * In other case we just need to map the user provided buffer.
+	 * Also need to make sure that it is in contiguous buffer.
 	 */
-	if (req->src_len < ctx->key_sz) {
+	if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
+		qat_req->src_align = NULL;
+		qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src),
+						   req->dst_len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c)))
+			return ret;
+
+	} else {
 		int shift = ctx->key_sz - req->src_len;
 
 		qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
@@ -372,29 +426,40 @@ static int qat_rsa_dec(struct akcipher_request *req)
 		if (unlikely(!qat_req->src_align))
 			return ret;
 
-		memcpy(qat_req->src_align + shift, req->src, req->src_len);
+		scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
+					 0, req->src_len, 0);
+	}
+	if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+		qat_req->dst_align = NULL;
+		qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst),
+						    req->dst_len,
+						    DMA_FROM_DEVICE);
+
+		if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m)))
+			goto unmap_src;
+
 	} else {
-		qat_req->src_align = NULL;
-		qat_req->in.dec.c = dma_map_single(dev, req->src, req->src_len,
-						   DMA_TO_DEVICE);
+		qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz,
+							 &qat_req->out.dec.m,
+							 GFP_KERNEL);
+		if (unlikely(!qat_req->dst_align))
+			goto unmap_src;
+
 	}
+
 	qat_req->in.in_tab[3] = 0;
-	qat_req->out.dec.m = dma_map_single(dev, req->dst, req->dst_len,
-					    DMA_FROM_DEVICE);
 	qat_req->out.out_tab[1] = 0;
 	qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c,
 					 sizeof(struct qat_rsa_input_params),
 					 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+		goto unmap_dst;
+
 	qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m,
 					  sizeof(struct qat_rsa_output_params),
-					    DMA_TO_DEVICE);
-
-	if (unlikely((!qat_req->src_align &&
-		      dma_mapping_error(dev, qat_req->in.dec.c)) ||
-		     dma_mapping_error(dev, qat_req->out.dec.m) ||
-		     dma_mapping_error(dev, qat_req->phy_in) ||
-		     dma_mapping_error(dev, qat_req->phy_out)))
-		goto unmap;
+					  DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+		goto unmap_in_params;
 
 	msg->pke_mid.src_data_addr = qat_req->phy_in;
 	msg->pke_mid.dest_data_addr = qat_req->phy_out;
@@ -407,7 +472,7 @@ static int qat_rsa_dec(struct akcipher_request *req)
 
 	if (!ret)
 		return -EINPROGRESS;
-unmap:
+unmap_src:
 	if (qat_req->src_align)
 		dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
 				  qat_req->in.dec.c);
@@ -415,9 +480,15 @@ unmap:
 		if (!dma_mapping_error(dev, qat_req->in.dec.c))
 			dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz,
 					 DMA_TO_DEVICE);
-	if (!dma_mapping_error(dev, qat_req->out.dec.m))
-		dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz,
-				 DMA_FROM_DEVICE);
+unmap_dst:
+	if (qat_req->dst_align)
+		dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
+				  qat_req->out.dec.m);
+	else
+		if (!dma_mapping_error(dev, qat_req->out.dec.m))
+			dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz,
+					 DMA_FROM_DEVICE);
+unmap_in_params:
 	if (!dma_mapping_error(dev, qat_req->phy_in))
 		dma_unmap_single(dev, qat_req->phy_in,
 				 sizeof(struct qat_rsa_input_params),
@@ -531,7 +602,7 @@ err:
 }
 
 static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
-			  unsigned int keylen)
+			  unsigned int keylen, bool private)
 {
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct device *dev = &GET_DEV(ctx->inst->accel_dev);
@@ -550,7 +621,13 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 	ctx->n = NULL;
 	ctx->e = NULL;
 	ctx->d = NULL;
-	ret = asn1_ber_decoder(&qat_rsakey_decoder, ctx, key, keylen);
+
+	if (private)
+		ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key,
+				       keylen);
+	else
+		ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key,
+				       keylen);
 	if (ret < 0)
 		goto free;
 
@@ -559,6 +636,11 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 		ret = -EINVAL;
 		goto free;
 	}
+	if (private && !ctx->d) {
+		/* invalid private key provided */
+		ret = -EINVAL;
+		goto free;
+	}
 
 	return 0;
 free:
@@ -579,6 +661,25 @@ free:
 	return ret;
 }
 
+static int qat_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			     unsigned int keylen)
+{
+	return qat_rsa_setkey(tfm, key, keylen, false);
+}
+
+static int qat_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	return qat_rsa_setkey(tfm, key, keylen, true);
+}
+
+static int qat_rsa_max_size(struct crypto_akcipher *tfm)
+{
+	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	return (ctx->n) ? ctx->key_sz : -EINVAL;
+}
+
 static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
 {
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
@@ -617,7 +718,9 @@ static struct akcipher_alg rsa = {
 	.decrypt = qat_rsa_dec,
 	.sign = qat_rsa_dec,
 	.verify = qat_rsa_enc,
-	.setkey = qat_rsa_setkey,
+	.set_pub_key = qat_rsa_setpubkey,
+	.set_priv_key = qat_rsa_setprivkey,
+	.max_size = qat_rsa_max_size,
 	.init = qat_rsa_init_tfm,
 	.exit = qat_rsa_exit_tfm,
 	.reqsize = sizeof(struct qat_rsa_request) + 64,
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 07c2f9f9d1fc..9cab15497f04 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -60,8 +60,8 @@ static struct service_hndl qat_crypto;
 
 void qat_crypto_put_instance(struct qat_crypto_instance *inst)
 {
-	if (atomic_sub_return(1, &inst->refctr) == 0)
-		adf_dev_put(inst->accel_dev);
+	atomic_dec(&inst->refctr);
+	adf_dev_put(inst->accel_dev);
 }
 
 static int qat_crypto_free_instances(struct adf_accel_dev *accel_dev)
@@ -97,49 +97,66 @@ static int qat_crypto_free_instances(struct adf_accel_dev *accel_dev)
 struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
 {
 	struct adf_accel_dev *accel_dev = NULL;
-	struct qat_crypto_instance *inst_best = NULL;
+	struct qat_crypto_instance *inst = NULL;
 	struct list_head *itr;
 	unsigned long best = ~0;
 
 	list_for_each(itr, adf_devmgr_get_head()) {
-		accel_dev = list_entry(itr, struct adf_accel_dev, list);
+		struct adf_accel_dev *tmp_dev;
+		unsigned long ctr;
+
+		tmp_dev = list_entry(itr, struct adf_accel_dev, list);
+
+		if ((node == dev_to_node(&GET_DEV(tmp_dev)) ||
+		     dev_to_node(&GET_DEV(tmp_dev)) < 0) &&
+		    adf_dev_started(tmp_dev) &&
+		    !list_empty(&tmp_dev->crypto_list)) {
+			ctr = atomic_read(&tmp_dev->ref_count);
+			if (best > ctr) {
+				accel_dev = tmp_dev;
+				best = ctr;
+			}
+		}
+	}
+	if (!accel_dev)
+		pr_info("QAT: Could not find a device on node %d\n", node);
+
+	/* Get any started device */
+	list_for_each(itr, adf_devmgr_get_head()) {
+		struct adf_accel_dev *tmp_dev;
 
-		if ((node == dev_to_node(&GET_DEV(accel_dev)) ||
-		     dev_to_node(&GET_DEV(accel_dev)) < 0) &&
-		    adf_dev_started(accel_dev) &&
-		    !list_empty(&accel_dev->crypto_list))
+		tmp_dev = list_entry(itr, struct adf_accel_dev, list);
+
+		if (adf_dev_started(tmp_dev) &&
+		    !list_empty(&tmp_dev->crypto_list)) {
+			accel_dev = tmp_dev;
 			break;
-		accel_dev = NULL;
-	}
-	if (!accel_dev) {
-		pr_err("QAT: Could not find a device on node %d\n", node);
-		accel_dev = adf_devmgr_get_first();
+		}
 	}
-	if (!accel_dev || !adf_dev_started(accel_dev))
+
+	if (!accel_dev)
 		return NULL;
 
+	best = ~0;
 	list_for_each(itr, &accel_dev->crypto_list) {
-		struct qat_crypto_instance *inst;
-		unsigned long cur;
-
-		inst = list_entry(itr, struct qat_crypto_instance, list);
-		cur = atomic_read(&inst->refctr);
-		if (best > cur) {
-			inst_best = inst;
-			best = cur;
+		struct qat_crypto_instance *tmp_inst;
+		unsigned long ctr;
+
+		tmp_inst = list_entry(itr, struct qat_crypto_instance, list);
+		ctr = atomic_read(&tmp_inst->refctr);
+		if (best > ctr) {
+			inst = tmp_inst;
+			best = ctr;
 		}
 	}
-	if (inst_best) {
-		if (atomic_add_return(1, &inst_best->refctr) == 1) {
-			if (adf_dev_get(accel_dev)) {
-				atomic_dec(&inst_best->refctr);
-				dev_err(&GET_DEV(accel_dev),
-					"Could not increment dev refctr\n");
-				return NULL;
-			}
+	if (inst) {
+		if (adf_dev_get(accel_dev)) {
+			dev_err(&GET_DEV(accel_dev), "Could not increment dev refctr\n");
+			return NULL;
 		}
+		atomic_inc(&inst->refctr);
 	}
-	return inst_best;
+	return inst;
 }
 
 static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 8e711d1c3084..380e761801a7 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -1034,7 +1034,7 @@ static int qat_hal_concat_micro_code(uint64_t *micro_inst,
 				     unsigned int inst_num, unsigned int size,
 				     unsigned int addr, unsigned int *value)
 {
-	int i, val_indx;
+	int i;
 	unsigned int cur_value;
 	const uint64_t *inst_arr;
 	int fixup_offset;
@@ -1042,8 +1042,7 @@ static int qat_hal_concat_micro_code(uint64_t *micro_inst,
 	int orig_num;
 
 	orig_num = inst_num;
-	val_indx = 0;
-	cur_value = value[val_indx++];
+	cur_value = value[0];
 	inst_arr = inst_4b;
 	usize = ARRAY_SIZE(inst_4b);
 	fixup_offset = inst_num;
diff --git a/drivers/crypto/qat/qat_common/qat_rsakey.asn1 b/drivers/crypto/qat/qat_common/qat_rsakey.asn1
deleted file mode 100644
index 97b0e02b600a..000000000000
--- a/drivers/crypto/qat/qat_common/qat_rsakey.asn1
+++ /dev/null
@@ -1,5 +0,0 @@
-RsaKey ::= SEQUENCE {
-	n INTEGER ({ qat_rsa_get_n }),
-	e INTEGER ({ qat_rsa_get_e }),
-	d INTEGER ({ qat_rsa_get_d })
-}
diff --git a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1
new file mode 100644
index 000000000000..f0066adb79b8
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1
@@ -0,0 +1,11 @@
+RsaPrivKey ::= SEQUENCE {
+	version		INTEGER,
+	n		INTEGER ({ qat_rsa_get_n }),
+	e		INTEGER ({ qat_rsa_get_e }),
+	d		INTEGER ({ qat_rsa_get_d }),
+	prime1		INTEGER,
+	prime2		INTEGER,
+	exponent1	INTEGER,
+	exponent2	INTEGER,
+	coefficient	INTEGER
+}
diff --git a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1
new file mode 100644
index 000000000000..bd667b31a21a
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1
@@ -0,0 +1,4 @@
+RsaPubKey ::= SEQUENCE {
+	n INTEGER ({ qat_rsa_get_n }),
+	e INTEGER ({ qat_rsa_get_e })
+}
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
index ad592de475a4..2c0d63d48747 100644
--- a/drivers/crypto/qce/ablkcipher.c
+++ b/drivers/crypto/qce/ablkcipher.c
@@ -44,10 +44,8 @@ static void qce_ablkcipher_done(void *data)
 			error);
 
 	if (diff_dst)
-		qce_unmapsg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src,
-			    rctx->dst_chained);
-	qce_unmapsg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst,
-		    rctx->dst_chained);
+		dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
 
 	sg_free_table(&rctx->dst_tbl);
 
@@ -80,15 +78,11 @@ qce_ablkcipher_async_req_handle(struct crypto_async_request *async_req)
 	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
 	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
 
-	rctx->src_nents = qce_countsg(req->src, req->nbytes,
-				      &rctx->src_chained);
-	if (diff_dst) {
-		rctx->dst_nents = qce_countsg(req->dst, req->nbytes,
-					      &rctx->dst_chained);
-	} else {
+	rctx->src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (diff_dst)
+		rctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+	else
 		rctx->dst_nents = rctx->src_nents;
-		rctx->dst_chained = rctx->src_chained;
-	}
 
 	rctx->dst_nents += 1;
 
@@ -116,14 +110,12 @@ qce_ablkcipher_async_req_handle(struct crypto_async_request *async_req)
 	sg_mark_end(sg);
 	rctx->dst_sg = rctx->dst_tbl.sgl;
 
-	ret = qce_mapsg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst,
-			rctx->dst_chained);
+	ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
 	if (ret < 0)
 		goto error_free;
 
 	if (diff_dst) {
-		ret = qce_mapsg(qce->dev, req->src, rctx->src_nents, dir_src,
-				rctx->src_chained);
+		ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
 		if (ret < 0)
 			goto error_unmap_dst;
 		rctx->src_sg = req->src;
@@ -149,11 +141,9 @@ error_terminate:
 	qce_dma_terminate_all(&qce->dma);
 error_unmap_src:
 	if (diff_dst)
-		qce_unmapsg(qce->dev, req->src, rctx->src_nents, dir_src,
-			    rctx->src_chained);
+		dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src);
 error_unmap_dst:
-	qce_unmapsg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst,
-		    rctx->dst_chained);
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
 error_free:
 	sg_free_table(&rctx->dst_tbl);
 	return ret;
diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h
index d5757cfcda2d..5c6a5f8633e5 100644
--- a/drivers/crypto/qce/cipher.h
+++ b/drivers/crypto/qce/cipher.h
@@ -32,8 +32,6 @@ struct qce_cipher_ctx {
  * @ivsize: IV size
  * @src_nents: source entries
  * @dst_nents: destination entries
- * @src_chained: is source chained
- * @dst_chained: is destination chained
  * @result_sg: scatterlist used for result buffer
  * @dst_tbl: destination sg table
  * @dst_sg: destination sg pointer table beginning
@@ -47,8 +45,6 @@ struct qce_cipher_reqctx {
 	unsigned int ivsize;
 	int src_nents;
 	int dst_nents;
-	bool src_chained;
-	bool dst_chained;
 	struct scatterlist result_sg;
 	struct sg_table dst_tbl;
 	struct scatterlist *dst_sg;
diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
index 378cb768647f..4797e795c9b9 100644
--- a/drivers/crypto/qce/dma.c
+++ b/drivers/crypto/qce/dma.c
@@ -54,58 +54,6 @@ void qce_dma_release(struct qce_dma_data *dma)
 	kfree(dma->result_buf);
 }
 
-int qce_mapsg(struct device *dev, struct scatterlist *sg, int nents,
-	      enum dma_data_direction dir, bool chained)
-{
-	int err;
-
-	if (chained) {
-		while (sg) {
-			err = dma_map_sg(dev, sg, 1, dir);
-			if (!err)
-				return -EFAULT;
-			sg = sg_next(sg);
-		}
-	} else {
-		err = dma_map_sg(dev, sg, nents, dir);
-		if (!err)
-			return -EFAULT;
-	}
-
-	return nents;
-}
-
-void qce_unmapsg(struct device *dev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction dir, bool chained)
-{
-	if (chained)
-		while (sg) {
-			dma_unmap_sg(dev, sg, 1, dir);
-			sg = sg_next(sg);
-		}
-	else
-		dma_unmap_sg(dev, sg, nents, dir);
-}
-
-int qce_countsg(struct scatterlist *sglist, int nbytes, bool *chained)
-{
-	struct scatterlist *sg = sglist;
-	int nents = 0;
-
-	if (chained)
-		*chained = false;
-
-	while (nbytes > 0 && sg) {
-		nents++;
-		nbytes -= sg->length;
-		if (!sg_is_last(sg) && (sg + 1)->length == 0 && chained)
-			*chained = true;
-		sg = sg_next(sg);
-	}
-
-	return nents;
-}
-
 struct scatterlist *
 qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl)
 {
diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
index 65bedb81de0b..130235d17bb4 100644
--- a/drivers/crypto/qce/dma.h
+++ b/drivers/crypto/qce/dma.h
@@ -49,11 +49,6 @@ int qce_dma_prep_sgs(struct qce_dma_data *dma, struct scatterlist *sg_in,
 		     dma_async_tx_callback cb, void *cb_param);
 void qce_dma_issue_pending(struct qce_dma_data *dma);
 int qce_dma_terminate_all(struct qce_dma_data *dma);
-int qce_countsg(struct scatterlist *sg_list, int nbytes, bool *chained);
-void qce_unmapsg(struct device *dev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction dir, bool chained);
-int qce_mapsg(struct device *dev, struct scatterlist *sg, int nents,
-	      enum dma_data_direction dir, bool chained);
 struct scatterlist *
 qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add);
 
diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c
index be2f5049256a..0c9973ec80eb 100644
--- a/drivers/crypto/qce/sha.c
+++ b/drivers/crypto/qce/sha.c
@@ -51,9 +51,8 @@ static void qce_ahash_done(void *data)
 	if (error)
 		dev_dbg(qce->dev, "ahash dma termination error (%d)\n", error);
 
-	qce_unmapsg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE,
-		    rctx->src_chained);
-	qce_unmapsg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE, 0);
+	dma_unmap_sg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE);
+	dma_unmap_sg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE);
 
 	memcpy(rctx->digest, result->auth_iv, digestsize);
 	if (req->result)
@@ -92,16 +91,14 @@ static int qce_ahash_async_req_handle(struct crypto_async_request *async_req)
 		rctx->authklen = AES_KEYSIZE_128;
 	}
 
-	rctx->src_nents = qce_countsg(req->src, req->nbytes,
-				      &rctx->src_chained);
-	ret = qce_mapsg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE,
-			rctx->src_chained);
+	rctx->src_nents = sg_nents_for_len(req->src, req->nbytes);
+	ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE);
 	if (ret < 0)
 		return ret;
 
 	sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
 
-	ret = qce_mapsg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE, 0);
+	ret = dma_map_sg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE);
 	if (ret < 0)
 		goto error_unmap_src;
 
@@ -121,10 +118,9 @@ static int qce_ahash_async_req_handle(struct crypto_async_request *async_req)
 error_terminate:
 	qce_dma_terminate_all(&qce->dma);
 error_unmap_dst:
-	qce_unmapsg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE, 0);
+	dma_unmap_sg(qce->dev, &rctx->result_sg, 1, DMA_FROM_DEVICE);
 error_unmap_src:
-	qce_unmapsg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE,
-		    rctx->src_chained);
+	dma_unmap_sg(qce->dev, req->src, rctx->src_nents, DMA_TO_DEVICE);
 	return ret;
 }
 
diff --git a/drivers/crypto/qce/sha.h b/drivers/crypto/qce/sha.h
index 286f0d5397f3..236bb5e9ae75 100644
--- a/drivers/crypto/qce/sha.h
+++ b/drivers/crypto/qce/sha.h
@@ -36,7 +36,6 @@ struct qce_sha_ctx {
  * @flags: operation flags
  * @src_orig: original request sg list
  * @nbytes_orig: original request number of bytes
- * @src_chained: is source scatterlist chained
  * @src_nents: source number of entries
  * @byte_count: byte count
  * @count: save count in states during update, import and export
@@ -55,7 +54,6 @@ struct qce_sha_reqctx {
 	unsigned long flags;
 	struct scatterlist *src_orig;
 	unsigned int nbytes_orig;
-	bool src_chained;
 	int src_nents;
 	__be32 byte_count[2];
 	u64 count;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 820dc3acb28c..f68c24a98277 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -173,7 +173,6 @@ struct sahara_aes_reqctx {
  * @sg_in_idx: number of hw links
  * @in_sg: scatterlist for input data
  * @in_sg_chain: scatterlists for chained input data
- * @in_sg_chained: specifies if chained scatterlists are used or not
  * @total: total number of bytes for transfer
  * @last: is this the last block
  * @first: is this the first block
@@ -191,7 +190,6 @@ struct sahara_sha_reqctx {
 	unsigned int		sg_in_idx;
 	struct scatterlist	*in_sg;
 	struct scatterlist	in_sg_chain[2];
-	bool			in_sg_chained;
 	size_t			total;
 	unsigned int		last;
 	unsigned int		first;
@@ -274,31 +272,7 @@ static u32 sahara_aes_data_link_hdr(struct sahara_dev *dev)
 			SAHARA_HDR_CHA_SKHA | SAHARA_HDR_PARITY_BIT;
 }
 
-static int sahara_sg_length(struct scatterlist *sg,
-			    unsigned int total)
-{
-	int sg_nb;
-	unsigned int len;
-	struct scatterlist *sg_list;
-
-	sg_nb = 0;
-	sg_list = sg;
-
-	while (total) {
-		len = min(sg_list->length, total);
-
-		sg_nb++;
-		total -= len;
-
-		sg_list = sg_next(sg_list);
-		if (!sg_list)
-			total = 0;
-	}
-
-	return sg_nb;
-}
-
-static char *sahara_err_src[16] = {
+static const char *sahara_err_src[16] = {
 	"No error",
 	"Header error",
 	"Descriptor length error",
@@ -317,14 +291,14 @@ static char *sahara_err_src[16] = {
 	"DMA error"
 };
 
-static char *sahara_err_dmasize[4] = {
+static const char *sahara_err_dmasize[4] = {
 	"Byte transfer",
 	"Half-word transfer",
 	"Word transfer",
 	"Reserved"
 };
 
-static char *sahara_err_dmasrc[8] = {
+static const char *sahara_err_dmasrc[8] = {
 	"No error",
 	"AHB bus error",
 	"Internal IP bus error",
@@ -335,7 +309,7 @@ static char *sahara_err_dmasrc[8] = {
 	"DMA HW error"
 };
 
-static char *sahara_cha_errsrc[12] = {
+static const char *sahara_cha_errsrc[12] = {
 	"Input buffer non-empty",
 	"Illegal address",
 	"Illegal mode",
@@ -350,7 +324,7 @@ static char *sahara_cha_errsrc[12] = {
 	"Reserved"
 };
 
-static char *sahara_cha_err[4] = { "No error", "SKHA", "MDHA", "RNG" };
+static const char *sahara_cha_err[4] = { "No error", "SKHA", "MDHA", "RNG" };
 
 static void sahara_decode_error(struct sahara_dev *dev, unsigned int error)
 {
@@ -380,7 +354,7 @@ static void sahara_decode_error(struct sahara_dev *dev, unsigned int error)
 	dev_err(dev->device, "\n");
 }
 
-static char *sahara_state[4] = { "Idle", "Busy", "Error", "HW Fault" };
+static const char *sahara_state[4] = { "Idle", "Busy", "Error", "HW Fault" };
 
 static void sahara_decode_status(struct sahara_dev *dev, unsigned int status)
 {
@@ -502,8 +476,8 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
 		idx++;
 	}
 
-	dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total);
-	dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total);
+	dev->nb_in_sg = sg_nents_for_len(dev->in_sg, dev->total);
+	dev->nb_out_sg = sg_nents_for_len(dev->out_sg, dev->total);
 	if ((dev->nb_in_sg + dev->nb_out_sg) > SAHARA_MAX_HW_LINK) {
 		dev_err(dev->device, "not enough hw links (%d)\n",
 			dev->nb_in_sg + dev->nb_out_sg);
@@ -818,45 +792,26 @@ static int sahara_sha_hw_links_create(struct sahara_dev *dev,
 
 	dev->in_sg = rctx->in_sg;
 
-	dev->nb_in_sg = sahara_sg_length(dev->in_sg, rctx->total);
+	dev->nb_in_sg = sg_nents_for_len(dev->in_sg, rctx->total);
 	if ((dev->nb_in_sg) > SAHARA_MAX_HW_LINK) {
 		dev_err(dev->device, "not enough hw links (%d)\n",
 			dev->nb_in_sg + dev->nb_out_sg);
 		return -EINVAL;
 	}
 
-	if (rctx->in_sg_chained) {
-		i = start;
-		sg = dev->in_sg;
-		while (sg) {
-			ret = dma_map_sg(dev->device, sg, 1,
-					 DMA_TO_DEVICE);
-			if (!ret)
-				return -EFAULT;
-
-			dev->hw_link[i]->len = sg->length;
-			dev->hw_link[i]->p = sg->dma_address;
+	sg = dev->in_sg;
+	ret = dma_map_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE);
+	if (!ret)
+		return -EFAULT;
+
+	for (i = start; i < dev->nb_in_sg + start; i++) {
+		dev->hw_link[i]->len = sg->length;
+		dev->hw_link[i]->p = sg->dma_address;
+		if (i == (dev->nb_in_sg + start - 1)) {
+			dev->hw_link[i]->next = 0;
+		} else {
 			dev->hw_link[i]->next = dev->hw_phys_link[i + 1];
 			sg = sg_next(sg);
-			i += 1;
-		}
-		dev->hw_link[i-1]->next = 0;
-	} else {
-		sg = dev->in_sg;
-		ret = dma_map_sg(dev->device, dev->in_sg, dev->nb_in_sg,
-				 DMA_TO_DEVICE);
-		if (!ret)
-			return -EFAULT;
-
-		for (i = start; i < dev->nb_in_sg + start; i++) {
-			dev->hw_link[i]->len = sg->length;
-			dev->hw_link[i]->p = sg->dma_address;
-			if (i == (dev->nb_in_sg + start - 1)) {
-				dev->hw_link[i]->next = 0;
-			} else {
-				dev->hw_link[i]->next = dev->hw_phys_link[i + 1];
-				sg = sg_next(sg);
-			}
 		}
 	}
 
@@ -1004,7 +959,6 @@ static int sahara_sha_prepare_request(struct ahash_request *req)
 		rctx->total = req->nbytes + rctx->buf_cnt;
 		rctx->in_sg = rctx->in_sg_chain;
 
-		rctx->in_sg_chained = true;
 		req->src = rctx->in_sg_chain;
 	/* only data from previous operation */
 	} else if (rctx->buf_cnt) {
@@ -1015,13 +969,11 @@ static int sahara_sha_prepare_request(struct ahash_request *req)
 		/* buf was copied into rembuf above */
 		sg_init_one(rctx->in_sg, rctx->rembuf, rctx->buf_cnt);
 		rctx->total = rctx->buf_cnt;
-		rctx->in_sg_chained = false;
 	/* no data from previous operation */
 	} else {
 		rctx->in_sg = req->src;
 		rctx->total = req->nbytes;
 		req->src = rctx->in_sg;
-		rctx->in_sg_chained = false;
 	}
 
 	/* on next call, we only have the remaining data in the buffer */
@@ -1030,23 +982,6 @@ static int sahara_sha_prepare_request(struct ahash_request *req)
 	return -EINPROGRESS;
 }
 
-static void sahara_sha_unmap_sg(struct sahara_dev *dev,
-				struct sahara_sha_reqctx *rctx)
-{
-	struct scatterlist *sg;
-
-	if (rctx->in_sg_chained) {
-		sg = dev->in_sg;
-		while (sg) {
-			dma_unmap_sg(dev->device, sg, 1, DMA_TO_DEVICE);
-			sg = sg_next(sg);
-		}
-	} else {
-		dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
-			DMA_TO_DEVICE);
-	}
-}
-
 static int sahara_sha_process(struct ahash_request *req)
 {
 	struct sahara_dev *dev = dev_ptr;
@@ -1086,7 +1021,8 @@ static int sahara_sha_process(struct ahash_request *req)
 	}
 
 	if (rctx->sg_in_idx)
-		sahara_sha_unmap_sg(dev, rctx);
+		dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
+			     DMA_TO_DEVICE);
 
 	memcpy(rctx->context, dev->context_base, rctx->context_size);
 
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 3b20a1bce703..46f531e19ccf 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -857,8 +857,6 @@ badkey:
  * talitos_edesc - s/w-extended descriptor
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
- * @src_chained: whether src is chained or not
- * @dst_chained: whether dst is chained or not
  * @icv_ool: whether ICV is out-of-line
  * @iv_dma: dma address of iv for checking continuity and link table
  * @dma_len: length of dma mapped link_tbl space
@@ -874,8 +872,6 @@ badkey:
 struct talitos_edesc {
 	int src_nents;
 	int dst_nents;
-	bool src_chained;
-	bool dst_chained;
 	bool icv_ool;
 	dma_addr_t iv_dma;
 	int dma_len;
@@ -887,29 +883,6 @@ struct talitos_edesc {
 	};
 };
 
-static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
-			  unsigned int nents, enum dma_data_direction dir,
-			  bool chained)
-{
-	if (unlikely(chained))
-		while (sg) {
-			dma_map_sg(dev, sg, 1, dir);
-			sg = sg_next(sg);
-		}
-	else
-		dma_map_sg(dev, sg, nents, dir);
-	return nents;
-}
-
-static void talitos_unmap_sg_chain(struct device *dev, struct scatterlist *sg,
-				   enum dma_data_direction dir)
-{
-	while (sg) {
-		dma_unmap_sg(dev, sg, 1, dir);
-		sg = sg_next(sg);
-	}
-}
-
 static void talitos_sg_unmap(struct device *dev,
 			     struct talitos_edesc *edesc,
 			     struct scatterlist *src,
@@ -919,24 +892,13 @@ static void talitos_sg_unmap(struct device *dev,
 	unsigned int dst_nents = edesc->dst_nents ? : 1;
 
 	if (src != dst) {
-		if (edesc->src_chained)
-			talitos_unmap_sg_chain(dev, src, DMA_TO_DEVICE);
-		else
-			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+		dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
 
 		if (dst) {
-			if (edesc->dst_chained)
-				talitos_unmap_sg_chain(dev, dst,
-						       DMA_FROM_DEVICE);
-			else
-				dma_unmap_sg(dev, dst, dst_nents,
-					     DMA_FROM_DEVICE);
+			dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
 		}
 	} else
-		if (edesc->src_chained)
-			talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
-		else
-			dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
 }
 
 static void ipsec_esp_unmap(struct device *dev,
@@ -1118,10 +1080,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key,
 			       DMA_TO_DEVICE);
 
-	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ?: 1,
-				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-							   : DMA_TO_DEVICE,
-				  edesc->src_chained);
+	sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1,
+			      (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
+							   : DMA_TO_DEVICE);
 
 	/* hmac data */
 	desc->ptr[1].len = cpu_to_be16(areq->assoclen);
@@ -1185,9 +1146,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[5].j_extent = authsize;
 
 	if (areq->src != areq->dst)
-		sg_count = talitos_map_sg(dev, areq->dst,
-					  edesc->dst_nents ? : 1,
-					  DMA_FROM_DEVICE, edesc->dst_chained);
+		sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1,
+				      DMA_FROM_DEVICE);
 
 	edesc->icv_ool = false;
 
@@ -1234,26 +1194,6 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 }
 
 /*
- * derive number of elements in scatterlist
- */
-static int sg_count(struct scatterlist *sg_list, int nbytes, bool *chained)
-{
-	struct scatterlist *sg = sg_list;
-	int sg_nents = 0;
-
-	*chained = false;
-	while (nbytes > 0 && sg) {
-		sg_nents++;
-		nbytes -= sg->length;
-		if (!sg_is_last(sg) && (sg + 1)->length == 0)
-			*chained = true;
-		sg = sg_next(sg);
-	}
-
-	return sg_nents;
-}
-
-/*
  * allocate and map the extended descriptor
  */
 static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
@@ -1270,7 +1210,6 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 {
 	struct talitos_edesc *edesc;
 	int src_nents, dst_nents, alloc_len, dma_len;
-	bool src_chained = false, dst_chained = false;
 	dma_addr_t iv_dma = 0;
 	gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
@@ -1287,18 +1226,16 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 		iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
 
 	if (!dst || dst == src) {
-		src_nents = sg_count(src, assoclen + cryptlen + authsize,
-				     &src_chained);
+		src_nents = sg_nents_for_len(src,
+					     assoclen + cryptlen + authsize);
 		src_nents = (src_nents == 1) ? 0 : src_nents;
 		dst_nents = dst ? src_nents : 0;
 	} else { /* dst && dst != src*/
-		src_nents = sg_count(src, assoclen + cryptlen +
-					  (encrypt ? 0 : authsize),
-				     &src_chained);
+		src_nents = sg_nents_for_len(src, assoclen + cryptlen +
+						 (encrypt ? 0 : authsize));
 		src_nents = (src_nents == 1) ? 0 : src_nents;
-		dst_nents = sg_count(dst, assoclen + cryptlen +
-					  (encrypt ? authsize : 0),
-				     &dst_chained);
+		dst_nents = sg_nents_for_len(dst, assoclen + cryptlen +
+						 (encrypt ? authsize : 0));
 		dst_nents = (dst_nents == 1) ? 0 : dst_nents;
 	}
 
@@ -1332,8 +1269,6 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
-	edesc->src_chained = src_chained;
-	edesc->dst_chained = dst_chained;
 	edesc->iv_dma = iv_dma;
 	edesc->dma_len = dma_len;
 	if (dma_len)
@@ -1518,8 +1453,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src,
 	} else {
 		to_talitos_ptr_extent_clear(ptr, is_sec1);
 
-		sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
-					  edesc->src_chained);
+		sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir);
 
 		if (sg_count == 1) {
 			to_talitos_ptr(ptr, sg_dma_address(src), is_sec1);
@@ -1552,8 +1486,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst,
 	bool is_sec1 = has_ftr_sec1(priv);
 
 	if (dir != DMA_NONE)
-		sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
-					  dir, edesc->dst_chained);
+		sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir);
 
 	to_talitos_ptr_len(ptr, len, is_sec1);
 
@@ -1897,12 +1830,11 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	unsigned int nbytes_to_hash;
 	unsigned int to_hash_later;
 	unsigned int nsg;
-	bool chained;
 
 	if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
 		/* Buffer up to one whole block */
 		sg_copy_to_buffer(areq->src,
-				  sg_count(areq->src, nbytes, &chained),
+				  sg_nents_for_len(areq->src, nbytes),
 				  req_ctx->buf + req_ctx->nbuf, nbytes);
 		req_ctx->nbuf += nbytes;
 		return 0;
@@ -1935,7 +1867,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 		req_ctx->psrc = areq->src;
 
 	if (to_hash_later) {
-		int nents = sg_count(areq->src, nbytes, &chained);
+		int nents = sg_nents_for_len(areq->src, nbytes);
 		sg_pcopy_to_buffer(areq->src, nents,
 				      req_ctx->bufnext,
 				      to_hash_later,
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index fded0a5cfcd7..4c243c1ffc7f 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1414,7 +1414,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 
 	dev_dbg(dev, "[%s]", __func__);
-	device_data = kzalloc(sizeof(struct cryp_device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
 	if (!device_data) {
 		dev_err(dev, "[%s]: kzalloc() failed!", __func__);
 		ret = -ENOMEM;
@@ -1435,23 +1435,15 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 		dev_err(dev, "[%s]: platform_get_resource() failed",
 				__func__);
 		ret = -ENODEV;
-		goto out_kfree;
-	}
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL) {
-		dev_err(dev, "[%s]: request_mem_region() failed",
-				__func__);
-		ret = -EBUSY;
-		goto out_kfree;
+		goto out;
 	}
 
 	device_data->phybase = res->start;
-	device_data->base = ioremap(res->start, resource_size(res));
+	device_data->base = devm_ioremap_resource(dev, res);
 	if (!device_data->base) {
 		dev_err(dev, "[%s]: ioremap failed!", __func__);
 		ret = -ENOMEM;
-		goto out_free_mem;
+		goto out;
 	}
 
 	spin_lock_init(&device_data->ctx_lock);
@@ -1463,11 +1455,11 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 		dev_err(dev, "[%s]: could not get cryp regulator", __func__);
 		ret = PTR_ERR(device_data->pwr_regulator);
 		device_data->pwr_regulator = NULL;
-		goto out_unmap;
+		goto out;
 	}
 
 	/* Enable the clk for CRYP hardware block */
-	device_data->clk = clk_get(&pdev->dev, NULL);
+	device_data->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(device_data->clk)) {
 		dev_err(dev, "[%s]: clk_get() failed!", __func__);
 		ret = PTR_ERR(device_data->clk);
@@ -1477,7 +1469,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	ret = clk_prepare(device_data->clk);
 	if (ret) {
 		dev_err(dev, "[%s]: clk_prepare() failed!", __func__);
-		goto out_clk;
+		goto out_regulator;
 	}
 
 	/* Enable device power (and clock) */
@@ -1510,11 +1502,8 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 		goto out_power;
 	}
 
-	ret = request_irq(res_irq->start,
-			  cryp_interrupt_handler,
-			  0,
-			  "cryp1",
-			  device_data);
+	ret = devm_request_irq(&pdev->dev, res_irq->start,
+			       cryp_interrupt_handler, 0, "cryp1", device_data);
 	if (ret) {
 		dev_err(dev, "[%s]: Unable to request IRQ", __func__);
 		goto out_power;
@@ -1550,28 +1539,15 @@ out_power:
 out_clk_unprepare:
 	clk_unprepare(device_data->clk);
 
-out_clk:
-	clk_put(device_data->clk);
-
 out_regulator:
 	regulator_put(device_data->pwr_regulator);
 
-out_unmap:
-	iounmap(device_data->base);
-
-out_free_mem:
-	release_mem_region(res->start, resource_size(res));
-
-out_kfree:
-	kfree(device_data);
 out:
 	return ret;
 }
 
 static int ux500_cryp_remove(struct platform_device *pdev)
 {
-	struct resource *res = NULL;
-	struct resource *res_irq = NULL;
 	struct cryp_device_data *device_data;
 
 	dev_dbg(&pdev->dev, "[%s]", __func__);
@@ -1607,37 +1583,18 @@ static int ux500_cryp_remove(struct platform_device *pdev)
 	if (list_empty(&driver_data.device_list.k_list))
 		cryp_algs_unregister_all();
 
-	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res_irq)
-		dev_err(&pdev->dev, "[%s]: IORESOURCE_IRQ, unavailable",
-			__func__);
-	else {
-		disable_irq(res_irq->start);
-		free_irq(res_irq->start, device_data);
-	}
-
 	if (cryp_disable_power(&pdev->dev, device_data, false))
 		dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed",
 			__func__);
 
 	clk_unprepare(device_data->clk);
-	clk_put(device_data->clk);
 	regulator_put(device_data->pwr_regulator);
 
-	iounmap(device_data->base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
-	kfree(device_data);
-
 	return 0;
 }
 
 static void ux500_cryp_shutdown(struct platform_device *pdev)
 {
-	struct resource *res_irq = NULL;
 	struct cryp_device_data *device_data;
 
 	dev_dbg(&pdev->dev, "[%s]", __func__);
@@ -1673,15 +1630,6 @@ static void ux500_cryp_shutdown(struct platform_device *pdev)
 	if (list_empty(&driver_data.device_list.k_list))
 		cryp_algs_unregister_all();
 
-	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res_irq)
-		dev_err(&pdev->dev, "[%s]: IORESOURCE_IRQ, unavailable",
-			__func__);
-	else {
-		disable_irq(res_irq->start);
-		free_irq(res_irq->start, device_data);
-	}
-
 	if (cryp_disable_power(&pdev->dev, device_data, false))
 		dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed",
 			__func__);
@@ -1777,6 +1725,7 @@ static const struct of_device_id ux500_cryp_match[] = {
 	{ .compatible = "stericsson,ux500-cryp" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, ux500_cryp_match);
 
 static struct platform_driver cryp_driver = {
 	.probe  = ux500_cryp_probe,
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 5f5f360628fc..f47d112041b2 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1657,7 +1657,7 @@ static int ux500_hash_probe(struct platform_device *pdev)
 	struct hash_device_data *device_data;
 	struct device		*dev = &pdev->dev;
 
-	device_data = kzalloc(sizeof(*device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
 	if (!device_data) {
 		ret = -ENOMEM;
 		goto out;
@@ -1670,22 +1670,15 @@ static int ux500_hash_probe(struct platform_device *pdev)
 	if (!res) {
 		dev_dbg(dev, "%s: platform_get_resource() failed!\n", __func__);
 		ret = -ENODEV;
-		goto out_kfree;
-	}
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL) {
-		dev_dbg(dev, "%s: request_mem_region() failed!\n", __func__);
-		ret = -EBUSY;
-		goto out_kfree;
+		goto out;
 	}
 
 	device_data->phybase = res->start;
-	device_data->base = ioremap(res->start, resource_size(res));
+	device_data->base = devm_ioremap_resource(dev, res);
 	if (!device_data->base) {
 		dev_err(dev, "%s: ioremap() failed!\n", __func__);
 		ret = -ENOMEM;
-		goto out_free_mem;
+		goto out;
 	}
 	spin_lock_init(&device_data->ctx_lock);
 	spin_lock_init(&device_data->power_state_lock);
@@ -1696,11 +1689,11 @@ static int ux500_hash_probe(struct platform_device *pdev)
 		dev_err(dev, "%s: regulator_get() failed!\n", __func__);
 		ret = PTR_ERR(device_data->regulator);
 		device_data->regulator = NULL;
-		goto out_unmap;
+		goto out;
 	}
 
 	/* Enable the clock for HASH1 hardware block */
-	device_data->clk = clk_get(dev, NULL);
+	device_data->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(device_data->clk)) {
 		dev_err(dev, "%s: clk_get() failed!\n", __func__);
 		ret = PTR_ERR(device_data->clk);
@@ -1710,7 +1703,7 @@ static int ux500_hash_probe(struct platform_device *pdev)
 	ret = clk_prepare(device_data->clk);
 	if (ret) {
 		dev_err(dev, "%s: clk_prepare() failed!\n", __func__);
-		goto out_clk;
+		goto out_regulator;
 	}
 
 	/* Enable device power (and clock) */
@@ -1752,20 +1745,9 @@ out_power:
 out_clk_unprepare:
 	clk_unprepare(device_data->clk);
 
-out_clk:
-	clk_put(device_data->clk);
-
 out_regulator:
 	regulator_put(device_data->regulator);
 
-out_unmap:
-	iounmap(device_data->base);
-
-out_free_mem:
-	release_mem_region(res->start, resource_size(res));
-
-out_kfree:
-	kfree(device_data);
 out:
 	return ret;
 }
@@ -1776,7 +1758,6 @@ out:
  */
 static int ux500_hash_remove(struct platform_device *pdev)
 {
-	struct resource		*res;
 	struct hash_device_data *device_data;
 	struct device		*dev = &pdev->dev;
 
@@ -1816,17 +1797,8 @@ static int ux500_hash_remove(struct platform_device *pdev)
 			__func__);
 
 	clk_unprepare(device_data->clk);
-	clk_put(device_data->clk);
 	regulator_put(device_data->regulator);
 
-	iounmap(device_data->base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
-	kfree(device_data);
-
 	return 0;
 }
 
@@ -1836,7 +1808,6 @@ static int ux500_hash_remove(struct platform_device *pdev)
  */
 static void ux500_hash_shutdown(struct platform_device *pdev)
 {
-	struct resource *res = NULL;
 	struct hash_device_data *device_data;
 
 	device_data = platform_get_drvdata(pdev);
@@ -1870,12 +1841,6 @@ static void ux500_hash_shutdown(struct platform_device *pdev)
 	if (list_empty(&driver_data.device_list.k_list))
 		ahash_algs_unregister_all(device_data);
 
-	iounmap(device_data->base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
 	if (hash_disable_power(device_data, false))
 		dev_err(&pdev->dev, "%s: hash_disable_power() failed\n",
 			__func__);
@@ -1958,6 +1923,7 @@ static const struct of_device_id ux500_hash_match[] = {
 	{ .compatible = "stericsson,ux500-hash" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, ux500_hash_match);
 
 static struct platform_driver hash_driver = {
 	.probe  = ux500_hash_probe,
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index 69d163e39101..45cd5b328040 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -18,21 +18,21 @@
  * struct akcipher_request - public key request
  *
  * @base:	Common attributes for async crypto requests
- * @src:	Pointer to memory containing the input parameters
- *		The format of the parameter(s) is expeted to be Octet String
- * @dst:	Pointer to memory whare the result will be stored
- * @src_len:	Size of the input parameter
+ * @src:	Source data
+ * @dst:	Destination data
+ * @src_len:	Size of the input buffer
  * @dst_len:	Size of the output buffer. It needs to be at leaset
  *		as big as the expected result depending	on the operation
  *		After operation it will be updated with the acctual size of the
- *		result. In case of error, where the dst_len was insufficient,
+ *		result.
+ *		In case of error where the dst sgl size was insufficient,
  *		it will be updated to the size required for the operation.
  * @__ctx:	Start of private context data
  */
 struct akcipher_request {
 	struct crypto_async_request base;
-	void *src;
-	void *dst;
+	struct scatterlist *src;
+	struct scatterlist *dst;
 	unsigned int src_len;
 	unsigned int dst_len;
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
@@ -67,8 +67,13 @@ struct crypto_akcipher {
  *		algorithm. In case of error, where the dst_len was insufficient,
  *		the req->dst_len will be updated to the size required for the
  *		operation
- * @setkey:	Function invokes the algorithm specific set key function, which
- *		knows how to decode and interpret the BER encoded key
+ * @set_pub_key: Function invokes the algorithm specific set public key
+ *		function, which knows how to decode and interpret
+ *		the BER encoded public key
+ * @set_priv_key: Function invokes the algorithm specific set private key
+ *		function, which knows how to decode and interpret
+ *		the BER encoded private key
+ * @max_size:	Function returns dest buffer size reqired for a given key.
  * @init:	Initialize the cryptographic transformation object.
  *		This function is used to initialize the cryptographic
  *		transformation object. This function is called only once at
@@ -89,8 +94,11 @@ struct akcipher_alg {
 	int (*verify)(struct akcipher_request *req);
 	int (*encrypt)(struct akcipher_request *req);
 	int (*decrypt)(struct akcipher_request *req);
-	int (*setkey)(struct crypto_akcipher *tfm, const void *key,
-		      unsigned int keylen);
+	int (*set_pub_key)(struct crypto_akcipher *tfm, const void *key,
+			   unsigned int keylen);
+	int (*set_priv_key)(struct crypto_akcipher *tfm, const void *key,
+			    unsigned int keylen);
+	int (*max_size)(struct crypto_akcipher *tfm);
 	int (*init)(struct crypto_akcipher *tfm);
 	void (*exit)(struct crypto_akcipher *tfm);
 
@@ -229,14 +237,14 @@ static inline void akcipher_request_set_callback(struct akcipher_request *req,
  * Sets parameters required by crypto operation
  *
  * @req:	public key request
- * @src:	ptr to input parameter
- * @dst:	ptr of output parameter
- * @src_len:	size of the input buffer
- * @dst_len:	size of the output buffer. It will be updated by the
- *		implementation to reflect the acctual size of the result
+ * @src:	ptr to input scatter list
+ * @dst:	ptr to output scatter list
+ * @src_len:	size of the src input scatter list to be processed
+ * @dst_len:	size of the dst output scatter list
  */
 static inline void akcipher_request_set_crypt(struct akcipher_request *req,
-					      void *src, void *dst,
+					      struct scatterlist *src,
+					      struct scatterlist *dst,
 					      unsigned int src_len,
 					      unsigned int dst_len)
 {
@@ -247,6 +255,22 @@ static inline void akcipher_request_set_crypt(struct akcipher_request *req,
 }
 
 /**
+ * crypto_akcipher_maxsize() -- Get len for output buffer
+ *
+ * Function returns the dest buffer size required for a given key
+ *
+ * @tfm:	AKCIPHER tfm handle allocated with crypto_alloc_akcipher()
+ *
+ * Return: minimum len for output buffer or error code in key hasn't been set
+ */
+static inline int crypto_akcipher_maxsize(struct crypto_akcipher *tfm)
+{
+	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+
+	return alg->max_size(tfm);
+}
+
+/**
  * crypto_akcipher_encrypt() -- Invoke public key encrypt operation
  *
  * Function invokes the specific public key encrypt operation for a given
@@ -319,22 +343,44 @@ static inline int crypto_akcipher_verify(struct akcipher_request *req)
 }
 
 /**
- * crypto_akcipher_setkey() -- Invoke public key setkey operation
+ * crypto_akcipher_set_pub_key() -- Invoke set public key operation
+ *
+ * Function invokes the algorithm specific set key function, which knows
+ * how to decode and interpret the encoded key
+ *
+ * @tfm:	tfm handle
+ * @key:	BER encoded public key
+ * @keylen:	length of the key
+ *
+ * Return: zero on success; error code in case of error
+ */
+static inline int crypto_akcipher_set_pub_key(struct crypto_akcipher *tfm,
+					      const void *key,
+					      unsigned int keylen)
+{
+	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+
+	return alg->set_pub_key(tfm, key, keylen);
+}
+
+/**
+ * crypto_akcipher_set_priv_key() -- Invoke set private key operation
  *
  * Function invokes the algorithm specific set key function, which knows
  * how to decode and interpret the encoded key
  *
  * @tfm:	tfm handle
- * @key:	BER encoded private or public key
+ * @key:	BER encoded private key
  * @keylen:	length of the key
  *
  * Return: zero on success; error code in case of error
  */
-static inline int crypto_akcipher_setkey(struct crypto_akcipher *tfm, void *key,
-					 unsigned int keylen)
+static inline int crypto_akcipher_set_priv_key(struct crypto_akcipher *tfm,
+					       const void *key,
+					       unsigned int keylen)
 {
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
 
-	return alg->setkey(tfm, key, keylen);
+	return alg->set_priv_key(tfm, key, keylen);
 }
 #endif
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 8e920b44c0ac..3d69c93d50e8 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -264,6 +264,20 @@ static inline unsigned int crypto_ahash_alignmask(
 	return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm));
 }
 
+/**
+ * crypto_ahash_blocksize() - obtain block size for cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the message digest cipher referenced with the cipher
+ * handle is returned.
+ *
+ * Return: block size of cipher
+ */
+static inline unsigned int crypto_ahash_blocksize(struct crypto_ahash *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+}
+
 static inline struct hash_alg_common *__crypto_hash_alg_common(
 	struct crypto_alg *alg)
 {
diff --git a/include/crypto/internal/rsa.h b/include/crypto/internal/rsa.h
index a8c86365439f..f997e2d29b5a 100644
--- a/include/crypto/internal/rsa.h
+++ b/include/crypto/internal/rsa.h
@@ -20,8 +20,11 @@ struct rsa_key {
 	MPI d;
 };
 
-int rsa_parse_key(struct rsa_key *rsa_key, const void *key,
-		  unsigned int key_len);
+int rsa_parse_pub_key(struct rsa_key *rsa_key, const void *key,
+		      unsigned int key_len);
+
+int rsa_parse_priv_key(struct rsa_key *rsa_key, const void *key,
+		       unsigned int key_len);
 
 void rsa_free_key(struct rsa_key *rsa_key);
 #endif
diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 641b7d6fd096..3a5abe95affd 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -31,12 +31,7 @@
 #define G10_MPI_H
 
 #include <linux/types.h>
-
-/* DSI defines */
-
-#define SHA1_DIGEST_LENGTH   20
-
-/*end of DSI defines */
+#include <linux/scatterlist.h>
 
 #define BYTES_PER_MPI_LIMB	(BITS_PER_LONG / 8)
 #define BITS_PER_MPI_LIMB	BITS_PER_LONG
@@ -78,6 +73,7 @@ void mpi_swap(MPI a, MPI b);
 MPI do_encode_md(const void *sha_buffer, unsigned nbits);
 MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
 MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
+MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len);
 int mpi_fromstr(MPI val, const char *str);
 u32 mpi_get_keyid(MPI a, u32 *keyid);
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
@@ -85,6 +81,8 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 		    int *sign);
 void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
+int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes,
+		     int *sign);
 
 #define log_mpidump g10_log_mpidump
 
diff --git a/lib/842/842.h b/lib/842/842.h
index 7c200030acf7..e0a122bc1cdb 100644
--- a/lib/842/842.h
+++ b/lib/842/842.h
@@ -76,6 +76,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
+#include <linux/crc32.h>
 #include <asm/unaligned.h>
 
 #include <linux/sw842.h>
@@ -98,6 +99,7 @@
 #define I2_BITS		(8)
 #define I4_BITS		(9)
 #define I8_BITS		(8)
+#define CRC_BITS	(32)
 
 #define REPEAT_BITS_MAX		(0x3f)
 #define SHORT_DATA_BITS_MAX	(0x7)
diff --git a/lib/842/842_compress.c b/lib/842/842_compress.c
index 7ce68948e68c..4051339bdfbd 100644
--- a/lib/842/842_compress.c
+++ b/lib/842/842_compress.c
@@ -490,6 +490,7 @@ int sw842_compress(const u8 *in, unsigned int ilen,
 	int ret;
 	u64 last, next, pad, total;
 	u8 repeat_count = 0;
+	u32 crc;
 
 	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
 
@@ -580,6 +581,18 @@ skip_comp:
 	if (ret)
 		return ret;
 
+	/*
+	 * crc(0:31) is appended to target data starting with the next
+	 * bit after End of stream template.
+	 * nx842 calculates CRC for data in big-endian format. So doing
+	 * same here so that sw842 decompression can be used for both
+	 * compressed data.
+	 */
+	crc = crc32_be(0, in, ilen);
+	ret = add_bits(p, crc, CRC_BITS);
+	if (ret)
+		return ret;
+
 	if (p->bit) {
 		p->out++;
 		p->olen--;
diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c
index 5446ff0c9ba0..8881dad2a6a0 100644
--- a/lib/842/842_decompress.c
+++ b/lib/842/842_decompress.c
@@ -285,6 +285,7 @@ int sw842_decompress(const u8 *in, unsigned int ilen,
 	struct sw842_param p;
 	int ret;
 	u64 op, rep, tmp, bytes, total;
+	u64 crc;
 
 	p.in = (u8 *)in;
 	p.bit = 0;
@@ -375,6 +376,22 @@ int sw842_decompress(const u8 *in, unsigned int ilen,
 		}
 	} while (op != OP_END);
 
+	/*
+	 * crc(0:31) is saved in compressed data starting with the
+	 * next bit after End of stream template.
+	 */
+	ret = next_bits(&p, &crc, CRC_BITS);
+	if (ret)
+		return ret;
+
+	/*
+	 * Validate CRC saved in compressed data.
+	 */
+	if (crc != (u64)crc32_be(0, out, total - p.olen)) {
+		pr_debug("CRC mismatch for decompression\n");
+		return -EINVAL;
+	}
+
 	if (unlikely((total - p.olen) > UINT_MAX))
 		return -ENOSPC;
 
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 95c52a95259e..c7e0a705eecf 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -319,3 +319,202 @@ int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mpi_set_buffer);
+
+/**
+ * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
+ *
+ * This function works in the same way as the mpi_read_buffer, but it
+ * takes an sgl instead of u8 * buf.
+ *
+ * @a:		a multi precision integer
+ * @sgl:	scatterlist to write to. Needs to be at least
+ *		mpi_get_size(a) long.
+ * @nbytes:	in/out param - it has the be set to the maximum number of
+ *		bytes that can be written to sgl. This has to be at least
+ *		the size of the integer a. On return it receives the actual
+ *		length of the data written.
+ * @sign:	if not NULL, it will be set to the sign of a.
+ *
+ * Return:	0 on success or error code in case of error
+ */
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+		     int *sign)
+{
+	u8 *p, *p2;
+	mpi_limb_t alimb, alimb2;
+	unsigned int n = mpi_get_size(a);
+	int i, x, y = 0, lzeros = 0, buf_len;
+
+	if (!nbytes || *nbytes < n)
+		return -EINVAL;
+
+	if (sign)
+		*sign = a->sign;
+
+	p = (void *)&a->d[a->nlimbs] - 1;
+
+	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
+		if (!*p)
+			lzeros++;
+		else
+			break;
+	}
+
+	*nbytes = n - lzeros;
+	buf_len = sgl->length;
+	p2 = sg_virt(sgl);
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+		p = (u8 *)&alimb2;
+#if BYTES_PER_MPI_LIMB == 4
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+		*p++ = alimb >> 56;
+		*p++ = alimb >> 48;
+		*p++ = alimb >> 40;
+		*p++ = alimb >> 32;
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+		if (lzeros > 0) {
+			if (lzeros >= sizeof(alimb)) {
+				p -= sizeof(alimb);
+				continue;
+			} else {
+				mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
+				mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+							+ lzeros;
+				*limb1 = *limb2;
+				p -= lzeros;
+				y = lzeros;
+			}
+			lzeros -= sizeof(alimb);
+		}
+
+		p = p - (sizeof(alimb) - y);
+
+		for (x = 0; x < sizeof(alimb) - y; x++) {
+			if (!buf_len) {
+				sgl = sg_next(sgl);
+				if (!sgl)
+					return -EINVAL;
+				buf_len = sgl->length;
+				p2 = sg_virt(sgl);
+			}
+			*p2++ = *p++;
+			buf_len--;
+		}
+		y = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
+
+/*
+ * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with
+ *			     data from the sgl
+ *
+ * This function works in the same way as the mpi_read_raw_data, but it
+ * takes an sgl instead of void * buffer. i.e. it allocates
+ * a new MPI and reads the content of the sgl to the MPI.
+ *
+ * @sgl:	scatterlist to read from
+ * @len:	number of bytes to read
+ *
+ * Return:	Pointer to a new MPI or NULL on error
+ */
+MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
+{
+	struct scatterlist *sg;
+	int x, i, j, z, lzeros, ents;
+	unsigned int nbits, nlimbs, nbytes;
+	mpi_limb_t a;
+	MPI val = NULL;
+
+	lzeros = 0;
+	ents = sg_nents(sgl);
+
+	for_each_sg(sgl, sg, ents, i) {
+		const u8 *buff = sg_virt(sg);
+		int len = sg->length;
+
+		while (len && !*buff) {
+			lzeros++;
+			len--;
+			buff++;
+		}
+
+		if (len && *buff)
+			break;
+
+		ents--;
+		lzeros = 0;
+	}
+
+	sgl = sg;
+
+	if (!ents)
+		nbytes = 0;
+	else
+		nbytes = len - lzeros;
+
+	nbits = nbytes * 8;
+	if (nbits > MAX_EXTERN_MPI_BITS) {
+		pr_info("MPI: mpi too large (%u bits)\n", nbits);
+		return NULL;
+	}
+
+	if (nbytes > 0)
+		nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros));
+	else
+		nbits = 0;
+
+	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
+	val = mpi_alloc(nlimbs);
+	if (!val)
+		return NULL;
+
+	val->nbits = nbits;
+	val->sign = 0;
+	val->nlimbs = nlimbs;
+
+	if (nbytes == 0)
+		return val;
+
+	j = nlimbs - 1;
+	a = 0;
+	z = 0;
+	x = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	x %= BYTES_PER_MPI_LIMB;
+
+	for_each_sg(sgl, sg, ents, i) {
+		const u8 *buffer = sg_virt(sg) + lzeros;
+		int len = sg->length - lzeros;
+		int buf_shift = x;
+
+		if  (sg_is_last(sg) && (len % BYTES_PER_MPI_LIMB))
+			len += BYTES_PER_MPI_LIMB - (len % BYTES_PER_MPI_LIMB);
+
+		for (; x < len + buf_shift; x++) {
+			a <<= 8;
+			a |= *buffer++;
+			if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
+				val->d[j--] = a;
+				a = 0;
+			}
+		}
+		z += x;
+		x = 0;
+		lzeros = 0;
+	}
+	return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);