diff --git a/src/se050_blake2s.c b/src/se050_blake2s.c index bc4c54d..013bd98 100644 --- a/src/se050_blake2s.c +++ b/src/se050_blake2s.c @@ -22,8 +22,8 @@ /* Initialization vector */ static const uint32_t BLAKE2S_IV[8] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 + 0x6a09e667UL, 0xbb67ae85UL, 0x3c6ef372UL, 0xa54ff53aUL, + 0x510e527fUL, 0x9b05688cUL, 0x1f83d9abUL, 0x5be0cd19UL }; /* Permutation table */ @@ -74,73 +74,115 @@ static void blake2s_compress(se050_blake2s_ctx_t *ctx, const uint8_t *block) uint32_t v[16]; uint32_t m[16]; - /* Load message */ + /* Load message block */ for (int i = 0; i < 16; i++) { m[i] = load32_le(block + i * 4); } /* Initialize working vector */ - for (int i = 0; i < 8; i++) { - v[i] = ctx->h[i]; - v[i + 8] = BLAKE2S_IV[i]; - } + v[0] = ctx->h[0]; + v[1] = ctx->h[1]; + v[2] = ctx->h[2]; + v[3] = ctx->h[3]; + v[4] = ctx->h[4]; + v[5] = ctx->h[5]; + v[6] = ctx->h[6]; + v[7] = ctx->h[7]; + v[8] = BLAKE2S_IV[0]; + v[9] = BLAKE2S_IV[1]; + v[10] = BLAKE2S_IV[2]; + v[11] = BLAKE2S_IV[3]; + v[12] = BLAKE2S_IV[4] ^ ctx->t[0]; + v[13] = BLAKE2S_IV[5] ^ ctx->t[1]; + v[14] = BLAKE2S_IV[6] ^ ctx->f[0]; + v[15] = BLAKE2S_IV[7] ^ ctx->f[1]; - v[12] ^= ctx->t[0]; - v[13] ^= ctx->t[1]; - v[14] ^= ctx->f[0]; - v[15] ^= ctx->f[1]; - - /* 10 rounds */ + /* 10 rounds of mixing */ for (int r = 0; r < 10; r++) { const uint8_t *s = BLAKE2S_SIGMA[r]; - /* G function - column step */ - v[0] += v[4] + m[s[0]]; v[12] = rotr32(v[12] ^ v[0], 16); - v[8] += v[12]; v[4] = rotr32(v[4] ^ v[8], 12); - v[0] += v[4] + m[s[1]]; v[12] = rotr32(v[12] ^ v[0], 8); - v[8] += v[12]; v[4] = rotr32(v[4] ^ v[8], 7); + v[0] = v[0] + v[4] + m[s[0]]; + v[12] = rotr32(v[12] ^ v[0], 16); + v[8] = v[8] + v[12]; + v[4] = rotr32(v[4] ^ v[8], 12); + v[0] = v[0] + v[4] + m[s[1]]; + v[12] = rotr32(v[12] ^ v[0], 8); + v[8] = v[8] + v[12]; + v[4] = rotr32(v[4] ^ v[8], 7); - v[1] += v[5] + m[s[2]]; v[13] = rotr32(v[13] ^ v[1], 16); - v[9] += v[13]; v[5] = rotr32(v[5] ^ v[9], 12); - v[1] += v[5] + m[s[3]]; v[13] = rotr32(v[13] ^ v[1], 8); - v[9] += v[13]; v[5] = rotr32(v[5] ^ v[9], 7); + v[1] = v[1] + v[5] + m[s[2]]; + v[13] = rotr32(v[13] ^ v[1], 16); + v[9] = v[9] + v[13]; + v[5] = rotr32(v[5] ^ v[9], 12); + v[1] = v[1] + v[5] + m[s[3]]; + v[13] = rotr32(v[13] ^ v[1], 8); + v[9] = v[9] + v[13]; + v[5] = rotr32(v[5] ^ v[9], 7); - v[2] += v[6] + m[s[4]]; v[14] = rotr32(v[14] ^ v[2], 16); - v[10] += v[14]; v[6] = rotr32(v[6] ^ v[10], 12); - v[2] += v[6] + m[s[5]]; v[14] = rotr32(v[14] ^ v[2], 8); - v[10] += v[14]; v[6] = rotr32(v[6] ^ v[10], 7); + v[2] = v[2] + v[6] + m[s[4]]; + v[14] = rotr32(v[14] ^ v[2], 16); + v[10] = v[10] + v[14]; + v[6] = rotr32(v[6] ^ v[10], 12); + v[2] = v[2] + v[6] + m[s[5]]; + v[14] = rotr32(v[14] ^ v[2], 8); + v[10] = v[10] + v[14]; + v[6] = rotr32(v[6] ^ v[10], 7); - v[3] += v[7] + m[s[6]]; v[15] = rotr32(v[15] ^ v[3], 16); - v[11] += v[15]; v[7] = rotr32(v[7] ^ v[11], 12); - v[3] += v[7] + m[s[7]]; v[15] = rotr32(v[15] ^ v[3], 8); - v[11] += v[15]; v[7] = rotr32(v[7] ^ v[11], 7); + v[3] = v[3] + v[7] + m[s[6]]; + v[15] = rotr32(v[15] ^ v[3], 16); + v[11] = v[11] + v[15]; + v[7] = rotr32(v[7] ^ v[11], 12); + v[3] = v[3] + v[7] + m[s[7]]; + v[15] = rotr32(v[15] ^ v[3], 8); + v[11] = v[11] + v[15]; + v[7] = rotr32(v[7] ^ v[11], 7); - /* Diagonal step */ - v[0] += v[5] + m[s[8]]; v[15] = rotr32(v[15] ^ v[0], 16); - v[10] += v[15]; v[5] = rotr32(v[5] ^ v[10], 12); - v[0] += v[5] + m[s[9]]; v[15] = rotr32(v[15] ^ v[0], 8); - v[10] += v[15]; v[5] = rotr32(v[5] ^ v[10], 7); + v[0] = v[0] + v[5] + m[s[8]]; + v[15] = rotr32(v[15] ^ v[0], 16); + v[10] = v[10] + v[15]; + v[5] = rotr32(v[5] ^ v[10], 12); + v[0] = v[0] + v[5] + m[s[9]]; + v[15] = rotr32(v[15] ^ v[0], 8); + v[10] = v[10] + v[15]; + v[5] = rotr32(v[5] ^ v[10], 7); - v[1] += v[6] + m[s[10]]; v[12] = rotr32(v[12] ^ v[1], 16); - v[11] += v[12]; v[6] = rotr32(v[6] ^ v[11], 12); - v[1] += v[6] + m[s[11]]; v[12] = rotr32(v[12] ^ v[1], 8); - v[11] += v[12]; v[6] = rotr32(v[6] ^ v[11], 7); + v[1] = v[1] + v[6] + m[s[10]]; + v[12] = rotr32(v[12] ^ v[1], 16); + v[11] = v[11] + v[12]; + v[6] = rotr32(v[6] ^ v[11], 12); + v[1] = v[1] + v[6] + m[s[11]]; + v[12] = rotr32(v[12] ^ v[1], 8); + v[11] = v[11] + v[12]; + v[6] = rotr32(v[6] ^ v[11], 7); - v[2] += v[7] + m[s[12]]; v[13] = rotr32(v[13] ^ v[2], 16); - v[8] += v[13]; v[7] = rotr32(v[7] ^ v[8], 12); - v[2] += v[7] + m[s[13]]; v[13] = rotr32(v[13] ^ v[2], 8); - v[8] += v[13]; v[7] = rotr32(v[7] ^ v[8], 7); + v[2] = v[2] + v[7] + m[s[12]]; + v[13] = rotr32(v[13] ^ v[2], 16); + v[8] = v[8] + v[13]; + v[7] = rotr32(v[7] ^ v[8], 12); + v[2] = v[2] + v[7] + m[s[13]]; + v[13] = rotr32(v[13] ^ v[2], 8); + v[8] = v[8] + v[13]; + v[7] = rotr32(v[7] ^ v[8], 7); - v[3] += v[4] + m[s[14]]; v[14] = rotr32(v[14] ^ v[3], 16); - v[9] += v[14]; v[4] = rotr32(v[4] ^ v[9], 12); - v[3] += v[4] + m[s[15]]; v[14] = rotr32(v[14] ^ v[3], 8); - v[9] += v[14]; v[4] = rotr32(v[4] ^ v[9], 7); + v[3] = v[3] + v[4] + m[s[14]]; + v[14] = rotr32(v[14] ^ v[3], 16); + v[9] = v[9] + v[14]; + v[4] = rotr32(v[4] ^ v[9], 12); + v[3] = v[3] + v[4] + m[s[15]]; + v[14] = rotr32(v[14] ^ v[3], 8); + v[9] = v[9] + v[14]; + v[4] = rotr32(v[4] ^ v[9], 7); } - /* Final XOR */ - for (int i = 0; i < 8; i++) { - ctx->h[i] ^= v[i] ^ v[i + 8]; - } + /* Finalize: h[i] ^= v[i] ^ v[i+8] */ + ctx->h[0] ^= v[0] ^ v[8]; + ctx->h[1] ^= v[1] ^ v[9]; + ctx->h[2] ^= v[2] ^ v[10]; + ctx->h[3] ^= v[3] ^ v[11]; + ctx->h[4] ^= v[4] ^ v[12]; + ctx->h[5] ^= v[5] ^ v[13]; + ctx->h[6] ^= v[6] ^ v[14]; + ctx->h[7] ^= v[7] ^ v[15]; } /* ============================================================================ @@ -153,16 +195,13 @@ int se050_blake2s_init(se050_blake2s_ctx_t *ctx, size_t outlen) return -1; } - /* Initialize hash state with IV XORed with parameter block */ - /* Parameter block: 0x01010000 ^ (0 << 8) ^ outlen = 0x01010000 ^ outlen */ - ctx->h[0] = BLAKE2S_IV[0] ^ 0x01010000 ^ outlen; - ctx->h[1] = BLAKE2S_IV[1]; - ctx->h[2] = BLAKE2S_IV[2]; - ctx->h[3] = BLAKE2S_IV[3]; - ctx->h[4] = BLAKE2S_IV[4]; - ctx->h[5] = BLAKE2S_IV[5]; - ctx->h[6] = BLAKE2S_IV[6]; - ctx->h[7] = BLAKE2S_IV[7]; + /* Initialize hash state */ + for (int i = 0; i < 8; i++) { + ctx->h[i] = BLAKE2S_IV[i]; + } + + /* XOR with parameter block: 0x01010000 ^ outlen */ + ctx->h[0] ^= 0x01010000 ^ outlen; ctx->t[0] = 0; ctx->t[1] = 0; @@ -265,7 +304,7 @@ int se050_blake2s_final(se050_blake2s_ctx_t *ctx, void *out, size_t outlen) return -1; } - /* Update counter */ + /* Update counter with remaining data */ ctx->t[0] += ctx->buflen; if (ctx->t[0] < ctx->buflen) { ctx->t[1]++; @@ -275,9 +314,15 @@ int se050_blake2s_final(se050_blake2s_ctx_t *ctx, void *out, size_t outlen) ctx->f[0] = 0xFFFFFFFF; ctx->f[1] = 0xFFFFFFFF; - /* Pad and compress last block */ - memset(ctx->buf + ctx->buflen, 0, BLAKE2S_BLOCK_SIZE - ctx->buflen); - blake2s_compress(ctx, ctx->buf); + /* Pad buffer to full block */ + uint8_t block[BLAKE2S_BLOCK_SIZE]; + memset(block, 0, BLAKE2S_BLOCK_SIZE); + if (ctx->buflen > 0) { + memcpy(block, ctx->buf, ctx->buflen); + } + + /* Compress last block */ + blake2s_compress(ctx, block); /* Output digest */ uint8_t digest[32];