From a6de77de1727d5c40fdfdf841f3e8d13e0fc0140 Mon Sep 17 00:00:00 2001 From: Doug Zongker Date: Mon, 17 Aug 2009 15:38:31 -0700 Subject: [PATCH] add optimized SHA1 algorithm This optimized implementation of the SHA1 algorithm is about 28% faster than the old one (on sapphire hardware) but assumes little-endianness. Add it, but continue using the old implementation on big-endian hardware. --- include/mincrypt/sha.h | 28 +++--- libmincrypt/sha.c | 191 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 195 insertions(+), 24 deletions(-) diff --git a/include/mincrypt/sha.h b/include/mincrypt/sha.h index c52346066..2bcc52224 100644 --- a/include/mincrypt/sha.h +++ b/include/mincrypt/sha.h @@ -13,14 +13,14 @@ ** be used to endorse or promote products derived from this software ** without specific prior written permission. ** -** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR +** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR ** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO ** EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ** OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ** ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ @@ -29,6 +29,7 @@ #define _EMBEDDED_SHA_H_ #include +#include #ifdef __cplusplus extern "C" { @@ -36,16 +37,23 @@ extern "C" { typedef struct SHA_CTX { uint64_t count; - uint8_t buf[64]; uint32_t state[5]; +#if __BYTE_ORDER == __LITTLE_ENDIAN + union { + uint8_t b[64]; + uint32_t w[16]; + } buf; +#else + uint8_t buf[64]; +#endif } SHA_CTX; -void SHA_init(SHA_CTX *ctx); -void SHA_update(SHA_CTX *ctx, const void* data, int len); -const uint8_t* SHA_final(SHA_CTX *ctx); +void SHA_init(SHA_CTX* ctx); +void SHA_update(SHA_CTX* ctx, const void* data, int len); +const uint8_t* SHA_final(SHA_CTX* ctx); /* Convenience method. Returns digest parameter value. */ -const uint8_t* SHA(const void *data, int len, uint8_t *digest); +const uint8_t* SHA(const void* data, int len, uint8_t* digest); #define SHA_DIGEST_SIZE 20 diff --git a/libmincrypt/sha.c b/libmincrypt/sha.c index d6120dae3..33d1cb3f1 100644 --- a/libmincrypt/sha.c +++ b/libmincrypt/sha.c @@ -13,20 +13,181 @@ ** be used to endorse or promote products derived from this software ** without specific prior written permission. ** -** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR +** THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR ** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO ** EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ** OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ** ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include +#include + #include "mincrypt/sha.h" +#if __BYTE_ORDER == __LITTLE_ENDIAN + +// This version is about 28% faster than the generic version below, +// but assumes little-endianness. + +static inline uint32_t ror27(uint32_t val) { + return (val >> 27) | (val << 5); +} +static inline uint32_t ror2(uint32_t val) { + return (val >> 2) | (val << 30); +} +static inline uint32_t ror31(uint32_t val) { + return (val >> 31) | (val << 1); +} + +static void SHA1_Transform(SHA_CTX* ctx) { + uint32_t W[80]; + register uint32_t A, B, C, D, E; + int t; + + A = ctx->state[0]; + B = ctx->state[1]; + C = ctx->state[2]; + D = ctx->state[3]; + E = ctx->state[4]; + +#define SHA_F1(A,B,C,D,E,t) \ + E += ror27(A) + \ + (W[t] = bswap_32(ctx->buf.w[t])) + \ + (D^(B&(C^D))) + 0x5A827999; \ + B = ror2(B); + + for (t = 0; t < 15; t += 5) { + SHA_F1(A,B,C,D,E,t + 0); + SHA_F1(E,A,B,C,D,t + 1); + SHA_F1(D,E,A,B,C,t + 2); + SHA_F1(C,D,E,A,B,t + 3); + SHA_F1(B,C,D,E,A,t + 4); + } + SHA_F1(A,B,C,D,E,t + 0); // 16th one, t == 15 + +#undef SHA_F1 + +#define SHA_F1(A,B,C,D,E,t) \ + E += ror27(A) + \ + (W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \ + (D^(B&(C^D))) + 0x5A827999; \ + B = ror2(B); + + SHA_F1(E,A,B,C,D,t + 1); + SHA_F1(D,E,A,B,C,t + 2); + SHA_F1(C,D,E,A,B,t + 3); + SHA_F1(B,C,D,E,A,t + 4); + +#undef SHA_F1 + +#define SHA_F2(A,B,C,D,E,t) \ + E += ror27(A) + \ + (W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \ + (B^C^D) + 0x6ED9EBA1; \ + B = ror2(B); + + for (t = 20; t < 40; t += 5) { + SHA_F2(A,B,C,D,E,t + 0); + SHA_F2(E,A,B,C,D,t + 1); + SHA_F2(D,E,A,B,C,t + 2); + SHA_F2(C,D,E,A,B,t + 3); + SHA_F2(B,C,D,E,A,t + 4); + } + +#undef SHA_F2 + +#define SHA_F3(A,B,C,D,E,t) \ + E += ror27(A) + \ + (W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \ + ((B&C)|(D&(B|C))) + 0x8F1BBCDC; \ + B = ror2(B); + + for (; t < 60; t += 5) { + SHA_F3(A,B,C,D,E,t + 0); + SHA_F3(E,A,B,C,D,t + 1); + SHA_F3(D,E,A,B,C,t + 2); + SHA_F3(C,D,E,A,B,t + 3); + SHA_F3(B,C,D,E,A,t + 4); + } + +#undef SHA_F3 + +#define SHA_F4(A,B,C,D,E,t) \ + E += ror27(A) + \ + (W[t] = ror31(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16])) + \ + (B^C^D) + 0xCA62C1D6; \ + B = ror2(B); + + for (; t < 80; t += 5) { + SHA_F4(A,B,C,D,E,t + 0); + SHA_F4(E,A,B,C,D,t + 1); + SHA_F4(D,E,A,B,C,t + 2); + SHA_F4(C,D,E,A,B,t + 3); + SHA_F4(B,C,D,E,A,t + 4); + } + +#undef SHA_F4 + + ctx->state[0] += A; + ctx->state[1] += B; + ctx->state[2] += C; + ctx->state[3] += D; + ctx->state[4] += E; +} + +void SHA_update(SHA_CTX* ctx, const void* data, int len) { + int i = ctx->count % sizeof(ctx->buf); + const uint8_t* p = (const uint8_t*)data; + + ctx->count += len; + + while (len > sizeof(ctx->buf) - i) { + memcpy(&ctx->buf.b[i], p, sizeof(ctx->buf) - i); + len -= sizeof(ctx->buf) - i; + p += sizeof(ctx->buf) - i; + SHA1_Transform(ctx); + i = 0; + } + + while (len--) { + ctx->buf.b[i++] = *p++; + if (i == sizeof(ctx->buf)) { + SHA1_Transform(ctx); + i = 0; + } + } +} + + +const uint8_t* SHA_final(SHA_CTX* ctx) { + uint64_t cnt = ctx->count * 8; + int i; + + SHA_update(ctx, (uint8_t*)"\x80", 1); + while ((ctx->count % sizeof(ctx->buf)) != (sizeof(ctx->buf) - 8)) { + SHA_update(ctx, (uint8_t*)"\0", 1); + } + for (i = 0; i < 8; ++i) { + uint8_t tmp = cnt >> ((7 - i) * 8); + SHA_update(ctx, &tmp, 1); + } + + for (i = 0; i < 5; i++) { + ctx->buf.w[i] = bswap_32(ctx->state[i]); + } + + return ctx->buf.b; +} + +#else // __BYTE_ORDER == BIG_ENDIAN + #define rol(bits, value) (((value) << (bits)) | ((value) >> (32 - (bits)))) static void SHA1_transform(SHA_CTX *ctx) { @@ -79,15 +240,6 @@ static void SHA1_transform(SHA_CTX *ctx) { ctx->state[4] += E; } -void SHA_init(SHA_CTX *ctx) { - ctx->state[0] = 0x67452301; - ctx->state[1] = 0xEFCDAB89; - ctx->state[2] = 0x98BADCFE; - ctx->state[3] = 0x10325476; - ctx->state[4] = 0xC3D2E1F0; - ctx->count = 0; -} - void SHA_update(SHA_CTX *ctx, const void *data, int len) { int i = ctx->count % sizeof(ctx->buf); const uint8_t* p = (const uint8_t*)data; @@ -127,6 +279,17 @@ const uint8_t *SHA_final(SHA_CTX *ctx) { return ctx->buf; } +#endif // endianness + +void SHA_init(SHA_CTX* ctx) { + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xEFCDAB89; + ctx->state[2] = 0x98BADCFE; + ctx->state[3] = 0x10325476; + ctx->state[4] = 0xC3D2E1F0; + ctx->count = 0; +} + /* Convenience function */ const uint8_t* SHA(const void *data, int len, uint8_t *digest) { const uint8_t *p;