This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "IPFire 2.x development tree".
The branch, next has been updated via 4bdbf22ee4609aed3cc378f80a1eb656ed68a634 (commit) via ed7a7f77db713e8977dec2c3c165e7bbd7f73fb5 (commit) from 5a2ebd32c0fdbda840271796c8d25a8e36b7c84b (commit)
Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below.
- Log ----------------------------------------------------------------- commit 4bdbf22ee4609aed3cc378f80a1eb656ed68a634 Author: Arne Fitzenreiter arne_f@ipfire.org Date: Sat Oct 22 20:20:22 2016 +0200
kernel: fix CVE-2016-5159 (Dirty COW)
Signed-off-by: Arne Fitzenreiter arne_f@ipfire.org
commit ed7a7f77db713e8977dec2c3c165e7bbd7f73fb5 Author: Arne Fitzenreiter arne_f@ipfire.org Date: Sat Oct 22 16:36:04 2016 +0200
kernel: add support aes-ni support for aes-192 and 256
Signed-off-by: Arne Fitzenreiter arne_f@ipfire.org
-----------------------------------------------------------------------
Summary of changes: lfs/linux | 10 +- ...i-Add-support-for-192-256-bit-keys-to-AES.patch | 689 +++++++++++++++++++++ ...up_flags-FOLL_WRITE-games-from-__get_user.patch | 96 +++ 3 files changed, 794 insertions(+), 1 deletion(-) create mode 100644 src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch create mode 100644 src/patches/linux/0110-mm-remove-gup_flags-FOLL_WRITE-games-from-__get_user.patch
Difference in files: diff --git a/lfs/linux b/lfs/linux index da13c17..19d2729 100644 --- a/lfs/linux +++ b/lfs/linux @@ -37,7 +37,7 @@ DIR_APP = $(DIR_SRC)/$(THISAPP) CFLAGS = CXXFLAGS =
-PAK_VER = 72 +PAK_VER = 73 DEPS = ""
KERNEL_ARCH = $(MACHINE) @@ -201,6 +201,14 @@ endif cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-hyperv_Mark_the_Hyoer-V_TSC_as_unstable.patch
+ifneq "$(KCFG)" "-headers" + # fix AES-NI 192 and 256 bits / grsec is needed for this patch version + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch +endif + + # fix CVE-2016-5195 (Dirty COW) + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0110-mm-remove-gup_flags-FOLL_WRITE-games-from-__get_user.patch + # fix empty symbol crc's cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-genksyms_fix_typeof_handling.patch
diff --git a/src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch b/src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch new file mode 100644 index 0000000..51b4394 --- /dev/null +++ b/src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch @@ -0,0 +1,689 @@ +From bcdbd313c0e6fd630a8945fd58dc5383631dc6dd Mon Sep 17 00:00:00 2001 +From: Timothy McCaffrey timothy.mccaffrey@unisys.com +Date: Tue, 13 Jan 2015 13:16:43 -0500 +Subject: [PATCH] crypto: aesni - Add support for 192 & 256 bit keys to AESNI + RFC4106 + +These patches fix the RFC4106 implementation in the aesni-intel +module so it supports 192 & 256 bit keys. + +Since the AVX support that was added to this module also only +supports 128 bit keys, and this patch only affects the SSE +implementation, changes were also made to use the SSE version +if key sizes other than 128 are specified. + +RFC4106 specifies that 192 & 256 bit keys must be supported (section +8.4). + +Also, this should fix Strongswan issue 341 where the aesni module +needs to be unloaded if 256 bit keys are used: + +http://wiki.strongswan.org/issues/341 + +This patch has been tested with Sandy Bridge and Haswell processors. +With 128 bit keys and input buffers > 512 bytes a slight performance +degradation was noticed (~1%). For input buffers of less than 512 +bytes there was no performance impact. Compared to 128 bit keys, +256 bit key size performance is approx. .5 cycles per byte slower +on Sandy Bridge, and .37 cycles per byte slower on Haswell (vs. +SSE code). + +This patch has also been tested with StrongSwan IPSec connections +where it worked correctly. + +I created this diff from a git clone of crypto-2.6.git. + +Any questions, please feel free to contact me. + +Signed-off-by: Timothy McCaffrey timothy.mccaffrey@unisys.com +Signed-off-by: Jarod Wilson jarod@redhat.com +Signed-off-by: Herbert Xu herbert@gondor.apana.org.au +--- + arch/x86/crypto/aesni-intel_asm.S | 342 +++++++++++++++++++------------------ + arch/x86/crypto/aesni-intel_glue.c | 31 +++- + 2 files changed, 202 insertions(+), 171 deletions(-) + +diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S +index c92c7d8..f5cdfbf 100644 +--- a/arch/x86/crypto/aesni-intel_asm.S ++++ b/arch/x86/crypto/aesni-intel_asm.S +@@ -33,12 +33,23 @@ + #include <asm/inst.h> + #include <asm/alternative-asm.h> + ++/* ++ * The following macros are used to move an (un)aligned 16 byte value to/from ++ * an XMM register. This can done for either FP or integer values, for FP use ++ * movaps (move aligned packed single) or integer use movdqa (move double quad ++ * aligned). It doesn't make a performance difference which instruction is used ++ * since Nehalem (original Core i7) was released. However, the movaps is a byte ++ * shorter, so that is the one we'll use for now. (same for unaligned). ++ */ ++#define MOVADQ movaps ++#define MOVUDQ movups ++ + #ifdef __x86_64__ ++ + .data + .align 16 + .Lgf128mul_x_ble_mask: + .octa 0x00000000000000010000000000000087 +- + POLY: .octa 0xC2000000000000000000000000000001 + TWOONE: .octa 0x00000001000000000000000000000001 + +@@ -90,6 +101,7 @@ enc: .octa 0x2 + #define arg8 STACK_OFFSET+16(%r14) + #define arg9 STACK_OFFSET+24(%r14) + #define arg10 STACK_OFFSET+32(%r14) ++#define keysize 2*15*16(%arg1) + #endif + + +@@ -214,10 +226,12 @@ enc: .octa 0x2 + + .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation ++ MOVADQ SHUF_MASK(%rip), %xmm14 + mov arg7, %r10 # %r10 = AAD + mov arg8, %r15 # %r15 = aadLen + mov %r15, %r11 + pxor %xmm\i, %xmm\i ++ + _get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 +@@ -226,6 +240,7 @@ _get_AAD_loop\num_initial_blocks\operation: + add $4, %r10 + sub $4, %r15 + jne _get_AAD_loop\num_initial_blocks\operation ++ + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r15 +@@ -234,8 +249,8 @@ _get_AAD_loop2\num_initial_blocks\operation: + sub $4, %r15 + cmp %r11, %r15 + jne _get_AAD_loop2\num_initial_blocks\operation ++ + _get_AAD_loop2_done\num_initial_blocks\operation: +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero +@@ -244,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + + .if (\i == 5) || (\i == 6) || (\i == 7) ++ MOVADQ ONE(%RIP),\TMP1 ++ MOVADQ (%arg1),\TMP2 + .irpc index, \i_seq +- paddd ONE(%rip), \XMM0 # INCR Y0 ++ paddd \TMP1, \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap +- +-.endr +-.irpc index, \i_seq +- pxor 16*0(%arg1), %xmm\index +-.endr +-.irpc index, \i_seq +- movaps 0x10(%rdi), \TMP1 +- AESENC \TMP1, %xmm\index # Round 1 +-.endr +-.irpc index, \i_seq +- movaps 0x20(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 ++ pxor \TMP2, %xmm\index + .endr +-.irpc index, \i_seq +- movaps 0x30(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x40(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x50(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x60(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x70(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x80(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x90(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 ++ lea 0x10(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ add $5,%eax # 128->9, 192->11, 256->13 ++ ++aes_loop_initial_dec\num_initial_blocks: ++ MOVADQ (%r10),\TMP1 ++.irpc index, \i_seq ++ AESENC \TMP1, %xmm\index + .endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_initial_dec\num_initial_blocks ++ ++ MOVADQ (%r10), \TMP1 + .irpc index, \i_seq +- movaps 0xa0(%arg1), \TMP1 +- AESENCLAST \TMP1, %xmm\index # Round 10 ++ AESENCLAST \TMP1, %xmm\index # Last Round + .endr + .irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 +@@ -306,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + add $16, %r11 + + movdqa \TMP1, %xmm\index +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index +- +- # prepare plaintext/ciphertext for GHASH computation ++ # prepare plaintext/ciphertext for GHASH computation + .endr + .endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +@@ -339,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + * Precomputations for HashKey parallel with encryption of first 4 blocks. + * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + */ +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM1 +- movdqa SHUF_MASK(%rip), %xmm14 ++ MOVADQ ONE(%rip), \TMP1 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM1 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM2 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM2 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM3 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM3 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM4 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM4 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +- pxor 16*0(%arg1), \XMM1 +- pxor 16*0(%arg1), \XMM2 +- pxor 16*0(%arg1), \XMM3 +- pxor 16*0(%arg1), \XMM4 ++ MOVADQ 0(%arg1),\TMP1 ++ pxor \TMP1, \XMM1 ++ pxor \TMP1, \XMM2 ++ pxor \TMP1, \XMM3 ++ pxor \TMP1, \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 +@@ -400,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) +- movaps 0xa0(%arg1), \TMP2 ++ lea 0xa0(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ sub $4,%eax # 128->0, 192->2, 256->4 ++ jz aes_loop_pre_dec_done\num_initial_blocks ++ ++aes_loop_pre_dec\num_initial_blocks: ++ MOVADQ (%r10),\TMP2 ++.irpc index, 1234 ++ AESENC \TMP2, %xmm\index ++.endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_pre_dec\num_initial_blocks ++ ++aes_loop_pre_dec_done\num_initial_blocks: ++ MOVADQ (%r10), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 +@@ -422,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM4 + add $64, %r11 +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 + # combine GHASHed value with the corresponding ciphertext +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + _initial_blocks_done\num_initial_blocks\operation: +@@ -452,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation: + + .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation ++ MOVADQ SHUF_MASK(%rip), %xmm14 + mov arg7, %r10 # %r10 = AAD + mov arg8, %r15 # %r15 = aadLen + mov %r15, %r11 +@@ -473,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation: + cmp %r11, %r15 + jne _get_AAD_loop2\num_initial_blocks\operation + _get_AAD_loop2_done\num_initial_blocks\operation: +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero +@@ -482,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + + .if (\i == 5) || (\i == 6) || (\i == 7) +-.irpc index, \i_seq +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, %xmm\index +- movdqa SHUF_MASK(%rip), %xmm14 +- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +-.endr +-.irpc index, \i_seq +- pxor 16*0(%arg1), %xmm\index +-.endr +-.irpc index, \i_seq +- movaps 0x10(%rdi), \TMP1 +- AESENC \TMP1, %xmm\index # Round 1 +-.endr +-.irpc index, \i_seq +- movaps 0x20(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr ++ MOVADQ ONE(%RIP),\TMP1 ++ MOVADQ 0(%arg1),\TMP2 + .irpc index, \i_seq +- movaps 0x30(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, %xmm\index ++ PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap ++ pxor \TMP2, %xmm\index + .endr +-.irpc index, \i_seq +- movaps 0x40(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x50(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x60(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x70(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x80(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 +-.endr +-.irpc index, \i_seq +- movaps 0x90(%arg1), \TMP1 +- AESENC \TMP1, %xmm\index # Round 2 ++ lea 0x10(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ add $5,%eax # 128->9, 192->11, 256->13 ++ ++aes_loop_initial_enc\num_initial_blocks: ++ MOVADQ (%r10),\TMP1 ++.irpc index, \i_seq ++ AESENC \TMP1, %xmm\index + .endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_initial_enc\num_initial_blocks ++ ++ MOVADQ (%r10), \TMP1 + .irpc index, \i_seq +- movaps 0xa0(%arg1), \TMP1 +- AESENCLAST \TMP1, %xmm\index # Round 10 ++ AESENCLAST \TMP1, %xmm\index # Last Round + .endr + .irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 +@@ -542,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 +- +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +@@ -576,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + * Precomputations for HashKey parallel with encryption of first 4 blocks. + * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i + */ +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM1 +- movdqa SHUF_MASK(%rip), %xmm14 ++ MOVADQ ONE(%RIP),\TMP1 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM1 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM2 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM2 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM3 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM3 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + +- paddd ONE(%rip), \XMM0 # INCR Y0 +- movdqa \XMM0, \XMM4 +- movdqa SHUF_MASK(%rip), %xmm14 ++ paddd \TMP1, \XMM0 # INCR Y0 ++ MOVADQ \XMM0, \XMM4 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +- pxor 16*0(%arg1), \XMM1 +- pxor 16*0(%arg1), \XMM2 +- pxor 16*0(%arg1), \XMM3 +- pxor 16*0(%arg1), \XMM4 ++ MOVADQ 0(%arg1),\TMP1 ++ pxor \TMP1, \XMM1 ++ pxor \TMP1, \XMM2 ++ pxor \TMP1, \XMM3 ++ pxor \TMP1, \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 +@@ -637,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) +- movaps 0xa0(%arg1), \TMP2 ++ lea 0xa0(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ sub $4,%eax # 128->0, 192->2, 256->4 ++ jz aes_loop_pre_enc_done\num_initial_blocks ++ ++aes_loop_pre_enc\num_initial_blocks: ++ MOVADQ (%r10),\TMP2 ++.irpc index, 1234 ++ AESENC \TMP2, %xmm\index ++.endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_pre_enc\num_initial_blocks ++ ++aes_loop_pre_enc_done\num_initial_blocks: ++ MOVADQ (%r10), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 +@@ -656,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + + add $64, %r11 +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 + # combine GHASHed value with the corresponding ciphertext +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap +- movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + _initial_blocks_done\num_initial_blocks\operation: +@@ -795,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 +- movaps 0xa0(%arg1), \TMP3 ++ lea 0xa0(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ sub $4,%eax # 128->0, 192->2, 256->4 ++ jz aes_loop_par_enc_done ++ ++aes_loop_par_enc: ++ MOVADQ (%r10),\TMP3 ++.irpc index, 1234 ++ AESENC \TMP3, %xmm\index ++.endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_par_enc ++ ++aes_loop_par_enc_done: ++ MOVADQ (%r10), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 +@@ -987,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 +- movaps 0xa0(%arg1), \TMP3 +- AESENCLAST \TMP3, \XMM1 # Round 10 ++ lea 0xa0(%arg1),%r10 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ sub $4,%eax # 128->0, 192->2, 256->4 ++ jz aes_loop_par_dec_done ++ ++aes_loop_par_dec: ++ MOVADQ (%r10),\TMP3 ++.irpc index, 1234 ++ AESENC \TMP3, %xmm\index ++.endr ++ add $16,%r10 ++ sub $1,%eax ++ jnz aes_loop_par_dec ++ ++aes_loop_par_dec_done: ++ MOVADQ (%r10), \TMP3 ++ AESENCLAST \TMP3, \XMM1 # last round + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 +@@ -1156,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst + pxor \TMP6, \XMMDst # reduced result is in XMMDst + .endm + +-/* Encryption of a single block done*/ +-.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + +- pxor (%arg1), \XMM0 +- movaps 16(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 32(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 48(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 64(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 80(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 96(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 112(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 128(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 144(%arg1), \TMP1 +- AESENC \TMP1, \XMM0 +- movaps 160(%arg1), \TMP1 +- AESENCLAST \TMP1, \XMM0 +-.endm ++/* Encryption of a single block ++* uses eax & r10 ++*/ + ++.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + ++ pxor (%arg1), \XMM0 ++ mov keysize,%eax ++ shr $2,%eax # 128->4, 192->6, 256->8 ++ add $5,%eax # 128->9, 192->11, 256->13 ++ lea 16(%arg1), %r10 # get first expanded key address ++ ++_esb_loop_@: ++ MOVADQ (%r10),\TMP1 ++ AESENC \TMP1,\XMM0 ++ add $16,%r10 ++ sub $1,%eax ++ jnz _esb_loop_@ ++ ++ MOVADQ (%r10),\TMP1 ++ AESENCLAST \TMP1,\XMM0 ++.endm + /***************************************************************************** + * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, // Plaintext output. Encrypt in-place is allowed. +diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c +index 6d4faba..bfaf817 100644 +--- a/arch/x86/crypto/aesni-intel_glue.c ++++ b/arch/x86/crypto/aesni-intel_glue.c +@@ -177,7 +177,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len) + { +- if (plaintext_len < AVX_GEN2_OPTSIZE) { ++ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; ++ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ + aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); + } else { +@@ -192,7 +193,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len) + { +- if (ciphertext_len < AVX_GEN2_OPTSIZE) { ++ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; ++ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { + aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); + } else { +@@ -226,7 +228,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len) + { +- if (plaintext_len < AVX_GEN2_OPTSIZE) { ++ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; ++ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { + aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); + } else if (plaintext_len < AVX_GEN4_OPTSIZE) { +@@ -245,7 +248,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len) + { +- if (ciphertext_len < AVX_GEN2_OPTSIZE) { ++ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; ++ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { + aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, + aad, aad_len, auth_tag, auth_tag_len); + } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { +@@ -878,7 +882,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, + } + /*Account for 4 byte nonce at the end.*/ + key_len -= 4; +- if (key_len != AES_KEYSIZE_128) { ++ if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && ++ key_len != AES_KEYSIZE_256) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } +@@ -989,6 +994,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); ++ u32 key_len = ctx->aes_key_expanded.key_length; + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_tab[16+AESNI_ALIGN]; +@@ -1003,6 +1009,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) + /* to 8 or 12 bytes */ + if (unlikely(req->assoclen != 8 && req->assoclen != 12)) + return -EINVAL; ++ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) ++ return -EINVAL; ++ if (unlikely(key_len != AES_KEYSIZE_128 && ++ key_len != AES_KEYSIZE_192 && ++ key_len != AES_KEYSIZE_256)) ++ return -EINVAL; ++ + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; +@@ -1067,6 +1080,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) + int retval = 0; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); ++ u32 key_len = ctx->aes_key_expanded.key_length; + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_and_authTag[32+AESNI_ALIGN]; +@@ -1080,6 +1094,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) + if (unlikely((req->cryptlen < auth_tag_len) || + (req->assoclen != 8 && req->assoclen != 12))) + return -EINVAL; ++ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) ++ return -EINVAL; ++ if (unlikely(key_len != AES_KEYSIZE_128 && ++ key_len != AES_KEYSIZE_192 && ++ key_len != AES_KEYSIZE_256)) ++ return -EINVAL; ++ + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 8 or 12 bytes */ +-- +2.7.4 + diff --git a/src/patches/linux/0110-mm-remove-gup_flags-FOLL_WRITE-games-from-__get_user.patch b/src/patches/linux/0110-mm-remove-gup_flags-FOLL_WRITE-games-from-__get_user.patch new file mode 100644 index 0000000..a47f080 --- /dev/null +++ b/src/patches/linux/0110-mm-remove-gup_flags-FOLL_WRITE-games-from-__get_user.patch @@ -0,0 +1,96 @@ +From a289eb38cfb481de48e36b481c72fb2492c6d242 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds torvalds@linux-foundation.org +Date: Thu, 13 Oct 2016 13:07:36 -0700 +Subject: [PATCH] mm: remove gup_flags FOLL_WRITE games from __get_user_pages() + +commit 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 upstream. + +This is an ancient bug that was actually attempted to be fixed once +(badly) by me eleven years ago in commit 4ceb5db9757a ("Fix +get_user_pages() race for write access") but that was then undone due to +problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). + +In the meantime, the s390 situation has long been fixed, and we can now +fix it by checking the pte_dirty() bit properly (and do it better). The +s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement +software dirty bits") which made it into v3.9. Earlier kernels will +have to look at the page state itself. + +Also, the VM has become more scalable, and what used a purely +theoretical race back then has become easier to trigger. + +To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, +we already did a COW" rather than play racy games with FOLL_WRITE that +is very fundamental, and then use the pte dirty flag to validate that +the FOLL_COW flag is still valid. + +Reported-and-tested-by: Phil "not Paul" Oester kernel@linuxace.com +Acked-by: Hugh Dickins hughd@google.com +Reviewed-by: Michal Hocko mhocko@suse.com +Cc: Andy Lutomirski luto@kernel.org +Cc: Kees Cook keescook@chromium.org +Cc: Oleg Nesterov oleg@redhat.com +Cc: Willy Tarreau w@1wt.eu +Cc: Nick Piggin npiggin@gmail.com +Cc: Greg Thelen gthelen@google.com +Signed-off-by: Linus Torvalds torvalds@linux-foundation.org +Signed-off-by: Jiri Slaby jslaby@suse.cz +--- + include/linux/mm.h | 1 + + mm/memory.c | 14 ++++++++++++-- + 2 files changed, 13 insertions(+), 2 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 5aef73c..6f56355 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1964,6 +1964,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, + #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ + #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ + #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ ++#define FOLL_COW 0x4000 /* internal GUP flag */ + + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, + void *data); +diff --git a/mm/memory.c b/mm/memory.c +index e9f4d40..5c1df12 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1449,6 +1449,16 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + } + EXPORT_SYMBOL_GPL(zap_vma_ptes); + ++/* ++ * FOLL_FORCE can write to even unwritable pte's, but only ++ * after we've gone through a COW cycle and they are dirty. ++ */ ++static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) ++{ ++ return pte_write(pte) || ++ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); ++} ++ + /** + * follow_page_mask - look up a page descriptor from a user-virtual address + * @vma: vm_area_struct mapping @address +@@ -1569,7 +1579,7 @@ split_fallthrough: + } + if ((flags & FOLL_NUMA) && pte_numa(pte)) + goto no_page; +- if ((flags & FOLL_WRITE) && !pte_write(pte)) ++ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) + goto unlock; + + page = vm_normal_page(vma, address, pte); +@@ -1866,7 +1876,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + */ + if ((ret & VM_FAULT_WRITE) && + !(vma->vm_flags & VM_WRITE)) +- foll_flags &= ~FOLL_WRITE; ++ foll_flags |= FOLL_COW; + + cond_resched(); + } +-- +2.7.4 +
hooks/post-receive -- IPFire 2.x development tree