/* MIT License * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #if !defined(SIMDE_X86_AES_H) #define SIMDE_X86_AES_H /* * Advanced Encryption Standard * @author Dani Huertas * @email huertas.dani@gmail.com * * Based on the document FIPS PUB 197 */ #include "sse2.h" /* * Multiplication in GF(2^8) * http://en.wikipedia.org/wiki/Finite_field_arithmetic * Irreducible polynomial m(x) = x8 + x4 + x3 + x + 1 * * NOTE: This function can be easily replaced with a look up table for a speed * boost, at the expense of an increase in memory size. SIMDE_FUNCTION_ATTRIBUTES uint8_t gmult(uint8_t a, uint8_t b) { uint8_t p = 0, i = 0, hbs = 0; for (i = 0; i < 8; i++) { if (b & 1) { p ^= a; } hbs = a & 0x80; a <<= 1; if (hbs) a ^= 0x1b; // 0000 0001 0001 1011 b >>= 1; } return (uint8_t)p; } */ #if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) #include "../simde-aes.h" /* * Transformation in the Cipher and Inverse Cipher in which a Round * Key is added to the State using an XOR operation. The length of a * Round Key equals the size of the State (i.e., for Nb = 4, the Round * Key length equals 128 bits/16 bytes). */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_add_round_key(uint8_t *state, simde__m128i_private w, uint8_t r) { int Nb = simde_x_aes_Nb; uint8_t c; for (c = 0; c < Nb; c++) { state[Nb*0+c] = state[Nb*0+c]^w.u8[4*Nb*r+4*c+0]; state[Nb*1+c] = state[Nb*1+c]^w.u8[4*Nb*r+4*c+1]; state[Nb*2+c] = state[Nb*2+c]^w.u8[4*Nb*r+4*c+2]; state[Nb*3+c] = state[Nb*3+c]^w.u8[4*Nb*r+4*c+3]; } } /* * Transformation in the Cipher that takes all of the columns of the * State and mixes their data (independently of one another) to * produce new columns. */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_mix_columns(uint8_t *state) { int Nb = simde_x_aes_Nb; // uint8_t k[] = {0x02, 0x01, 0x01, 0x03}; // a(x) = {02} + {01}x + {01}x2 + {03}x3 uint8_t i, j, col[4], res[4]; for (j = 0; j < Nb; j++) { for (i = 0; i < 4; i++) { col[i] = state[Nb*i+j]; } //coef_mult(k, col, res); simde_x_aes_coef_mult_lookup(0, col, res); for (i = 0; i < 4; i++) { state[Nb*i+j] = res[i]; } } } /* * Transformation in the Inverse Cipher that is the inverse of * MixColumns(). */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_inv_mix_columns(uint8_t *state) { int Nb = simde_x_aes_Nb; // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3 uint8_t i, j, col[4], res[4]; for (j = 0; j < Nb; j++) { for (i = 0; i < 4; i++) { col[i] = state[Nb*i+j]; } //coef_mult(k, col, res); simde_x_aes_coef_mult_lookup(4, col, res); for (i = 0; i < 4; i++) { state[Nb*i+j] = res[i]; } } } /* * Transformation in the Cipher that processes the State by cyclically * shifting the last three rows of the State by different offsets. */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_shift_rows(uint8_t *state) { int Nb = simde_x_aes_Nb; uint8_t i, k, s, tmp; for (i = 1; i < 4; i++) { // shift(1,4)=1; shift(2,4)=2; shift(3,4)=3 // shift(r, 4) = r; s = 0; while (s < i) { tmp = state[Nb*i+0]; for (k = 1; k < Nb; k++) { state[Nb*i+k-1] = state[Nb*i+k]; } state[Nb*i+Nb-1] = tmp; s++; } } } /* * Transformation in the Inverse Cipher that is the inverse of * ShiftRows(). */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_inv_shift_rows(uint8_t *state) { uint8_t Nb = simde_x_aes_Nb; uint8_t i, k, s, tmp; for (i = 1; i < 4; i++) { s = 0; while (s < i) { tmp = state[Nb*i+Nb-1]; for (k = Nb-1; k > 0; k--) { state[Nb*i+k] = state[Nb*i+k-1]; } state[Nb*i+0] = tmp; s++; } } } /* * Transformation in the Cipher that processes the State using a non * linear byte substitution table (S-box) that operates on each of the * State bytes independently. */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_sub_bytes(uint8_t *state) { int Nb = simde_x_aes_Nb; uint8_t i, j; for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { // s_box row: yyyy ---- // s_box col: ---- xxxx // s_box[16*(yyyy) + xxxx] == s_box[yyyyxxxx] state[Nb*i+j] = simde_x_aes_s_box[state[Nb*i+j]]; } } } /* * Transformation in the Inverse Cipher that is the inverse of * SubBytes(). */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_inv_sub_bytes(uint8_t *state) { int Nb = simde_x_aes_Nb; uint8_t i, j; for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { state[Nb*i+j] = simde_x_aes_inv_s_box[state[Nb*i+j]]; } } } /* * Performs the AES cipher operation */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_enc(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) { int Nb = simde_x_aes_Nb; uint8_t state[4*simde_x_aes_Nb]; uint8_t r = 0, i, j; for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { state[Nb*i+j] = in.u8[i+4*j]; } } simde_x_aes_sub_bytes(state); simde_x_aes_shift_rows(state); if (!is_last) simde_x_aes_mix_columns(state); simde_x_aes_add_round_key(state, w, r); for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { out->u8[i+4*j] = state[Nb*i+j]; } } } /* * Performs the AES inverse cipher operation */ SIMDE_FUNCTION_ATTRIBUTES void simde_x_aes_dec(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) { int Nb = simde_x_aes_Nb; uint8_t state[4*simde_x_aes_Nb]; uint8_t r = 0, i, j; for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { state[Nb*i+j] = in.u8[i+4*j]; } } simde_x_aes_inv_shift_rows(state); simde_x_aes_inv_sub_bytes(state); if (!is_last) simde_x_aes_inv_mix_columns(state); simde_x_aes_add_round_key(state, w, r); for (i = 0; i < 4; i++) { for (j = 0; j < Nb; j++) { out->u8[i+4*j] = state[Nb*i+j]; } } } #endif // if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)) SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_aesenc_si128(simde__m128i a, simde__m128i round_key) { #if defined(SIMDE_X86_AES_NATIVE) return _mm_aesenc_si128(a, round_key); #else simde__m128i_private result_; simde__m128i_private a_ = simde__m128i_to_private(a); simde__m128i_private round_key_ = simde__m128i_to_private(round_key); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) result_.neon_u8 = veorq_u8( vaesmcq_u8(vaeseq_u8(a_.neon_u8, vdupq_n_u8(0))), round_key_.neon_u8); #else simde_x_aes_enc(a_, &result_, round_key_, 0); #endif return simde__m128i_from_private(result_); #endif } #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) #define _mm_aesenc_si128(a, b) simde_mm_aesenc_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_aesdec_si128(simde__m128i a, simde__m128i round_key) { #if defined(SIMDE_X86_AES_NATIVE) return _mm_aesdec_si128(a, round_key); #else simde__m128i_private result_; simde__m128i_private a_ = simde__m128i_to_private(a); simde__m128i_private round_key_ = simde__m128i_to_private(round_key); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) result_.neon_u8 = veorq_u8( vaesimcq_u8(vaesdq_u8(a_.neon_u8, vdupq_n_u8(0))), round_key_.neon_u8); #else simde_x_aes_dec(a_, &result_, round_key_, 0); #endif return simde__m128i_from_private(result_); #endif } #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) #define _mm_aesdec_si128(a, b) simde_mm_aesdec_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_aesenclast_si128(simde__m128i a, simde__m128i round_key) { #if defined(SIMDE_X86_AES_NATIVE) return _mm_aesenclast_si128(a, round_key); #else simde__m128i_private result_; simde__m128i_private a_ = simde__m128i_to_private(a); simde__m128i_private round_key_ = simde__m128i_to_private(round_key); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) result_.neon_u8 = vaeseq_u8(a_.neon_u8, vdupq_n_u8(0)); result_.neon_i32 = veorq_s32(result_.neon_i32, round_key_.neon_i32); // _mm_xor_si128 #else simde_x_aes_enc(a_, &result_, round_key_, 1); #endif return simde__m128i_from_private(result_); #endif } #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) #define _mm_aesenclast_si128(a, b) simde_mm_aesenclast_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_aesdeclast_si128(simde__m128i a, simde__m128i round_key) { #if defined(SIMDE_X86_AES_NATIVE) return _mm_aesdeclast_si128(a, round_key); #else simde__m128i_private result_; simde__m128i_private a_ = simde__m128i_to_private(a); simde__m128i_private round_key_ = simde__m128i_to_private(round_key); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) result_.neon_u8 = veorq_u8( vaesdq_u8(a_.neon_u8, vdupq_n_u8(0)), round_key_.neon_u8); #else simde_x_aes_dec(a_, &result_, round_key_, 1); #endif return simde__m128i_from_private(result_); #endif } #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) #define _mm_aesdeclast_si128(a, b) simde_mm_aesdeclast_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_aesimc_si128(simde__m128i a) { #if defined(SIMDE_X86_AES_NATIVE) return _mm_aesimc_si128(a); #else simde__m128i_private result_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO) result_.neon_u8 = vaesimcq_u8(a_.neon_u8); #else int Nb = simde_x_aes_Nb; // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3 uint8_t i, j, col[4], res[4]; for (j = 0; j < Nb; j++) { for (i = 0; i < 4; i++) { col[i] = a_.u8[Nb*j+i]; } //coef_mult(k, col, res); simde_x_aes_coef_mult_lookup(4, col, res); for (i = 0; i < 4; i++) { result_.u8[Nb*j+i] = res[i]; } } #endif return simde__m128i_from_private(result_); #endif } #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES) #define _mm_aesimc_si128(a) simde_mm_aesimc_si128(a) #endif #undef simde_x_aes_Nb #endif /* !defined(SIMDE_X86_AES_H) */