/** * @file se050_blake2s.c * @brief BLAKE2s Hash Function Implementation * Based on BLAKE2 official reference implementation */ #include "se050_blake2s.h" #include "se050_crypto_utils.h" #include static const uint32_t BLAKE2S_IV[8] = { 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; static const uint8_t BLAKE2S_SIGMA[10][16] = { { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }, { 14,10,4,8,9,15,13,6,1,12,0,2,11,7,5,3 }, { 11,8,12,0,5,2,15,13,10,14,3,6,7,1,9,4 }, { 7,9,3,1,13,12,11,14,2,6,5,10,4,0,15,8 }, { 9,0,5,7,2,4,10,15,14,1,11,12,6,8,3,13 }, { 2,12,6,10,0,11,8,3,4,13,7,5,15,14,1,9 }, { 12,5,1,15,14,13,4,10,0,7,6,3,9,2,8,11 }, { 13,11,7,14,12,1,3,9,5,0,15,4,8,6,2,10 }, { 6,15,14,9,11,3,0,8,12,2,13,7,1,4,10,5 }, { 10,2,8,4,7,6,1,5,15,11,9,14,3,12,13,0 } }; typedef struct { uint32_t h[8]; uint32_t t[2]; uint32_t f[2]; uint8_t buf[64]; size_t buflen; size_t outlen; } blake2s_internal_t; static inline uint32_t load32_le(const uint8_t *p) { return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); } static inline void store32_le(uint8_t *p, uint32_t v) { p[0] = (uint8_t)v; p[1] = (uint8_t)(v >> 8); p[2] = (uint8_t)(v >> 16); p[3] = (uint8_t)(v >> 24); } static inline uint32_t rotr32(uint32_t x, unsigned int n) { return (x >> n) | (x << (32 - n)); } static void blake2s_compress(blake2s_internal_t *S, const uint8_t in[64]) { uint32_t m[16], v[16]; size_t i; for (i = 0; i < 16; i++) m[i] = load32_le(in + i * 4); for (i = 0; i < 8; i++) v[i] = S->h[i]; v[8] = BLAKE2S_IV[0]; v[9] = BLAKE2S_IV[1]; v[10] = BLAKE2S_IV[2]; v[11] = BLAKE2S_IV[3]; v[12] = S->t[0] ^ BLAKE2S_IV[4]; v[13] = S->t[1] ^ BLAKE2S_IV[5]; v[14] = S->f[0] ^ BLAKE2S_IV[6]; v[15] = S->f[1] ^ BLAKE2S_IV[7]; for (i = 0; i < 10; i++) { const uint8_t *s = BLAKE2S_SIGMA[i]; v[0] = v[0] + v[4] + m[s[0]]; v[12] = rotr32(v[12] ^ v[0], 16); v[8] = v[8] + v[12]; v[4] = rotr32(v[4] ^ v[8], 12); v[0] = v[0] + v[4] + m[s[1]]; v[12] = rotr32(v[12] ^ v[0], 8); v[8] = v[8] + v[12]; v[4] = rotr32(v[4] ^ v[8], 7); v[1] = v[1] + v[5] + m[s[2]]; v[13] = rotr32(v[13] ^ v[1], 16); v[9] = v[9] + v[13]; v[5] = rotr32(v[5] ^ v[9], 12); v[1] = v[1] + v[5] + m[s[3]]; v[13] = rotr32(v[13] ^ v[1], 8); v[9] = v[9] + v[13]; v[5] = rotr32(v[5] ^ v[9], 7); v[2] = v[2] + v[6] + m[s[4]]; v[14] = rotr32(v[14] ^ v[2], 16); v[10] = v[10] + v[14]; v[6] = rotr32(v[6] ^ v[10], 12); v[2] = v[2] + v[6] + m[s[5]]; v[14] = rotr32(v[14] ^ v[2], 8); v[10] = v[10] + v[14]; v[6] = rotr32(v[6] ^ v[10], 7); v[3] = v[3] + v[7] + m[s[6]]; v[15] = rotr32(v[15] ^ v[3], 16); v[11] = v[11] + v[15]; v[7] = rotr32(v[7] ^ v[11], 12); v[3] = v[3] + v[7] + m[s[7]]; v[15] = rotr32(v[15] ^ v[3], 8); v[11] = v[11] + v[15]; v[7] = rotr32(v[7] ^ v[11], 7); v[0] = v[0] + v[5] + m[s[8]]; v[15] = rotr32(v[15] ^ v[0], 16); v[10] = v[10] + v[15]; v[5] = rotr32(v[5] ^ v[10], 12); v[0] = v[0] + v[5] + m[s[9]]; v[15] = rotr32(v[15] ^ v[0], 8); v[10] = v[10] + v[15]; v[5] = rotr32(v[5] ^ v[10], 7); v[1] = v[1] + v[6] + m[s[10]]; v[12] = rotr32(v[12] ^ v[1], 16); v[11] = v[11] + v[12]; v[6] = rotr32(v[6] ^ v[11], 12); v[1] = v[1] + v[6] + m[s[11]]; v[12] = rotr32(v[12] ^ v[1], 8); v[11] = v[11] + v[12]; v[6] = rotr32(v[6] ^ v[11], 7); v[2] = v[2] + v[7] + m[s[12]]; v[13] = rotr32(v[13] ^ v[2], 16); v[8] = v[8] + v[13]; v[7] = rotr32(v[7] ^ v[8], 12); v[2] = v[2] + v[7] + m[s[13]]; v[13] = rotr32(v[13] ^ v[2], 8); v[8] = v[8] + v[13]; v[7] = rotr32(v[7] ^ v[8], 7); v[3] = v[3] + v[4] + m[s[14]]; v[14] = rotr32(v[14] ^ v[3], 16); v[9] = v[9] + v[14]; v[4] = rotr32(v[4] ^ v[9], 12); v[3] = v[3] + v[4] + m[s[15]]; v[14] = rotr32(v[14] ^ v[3], 8); v[9] = v[9] + v[14]; v[4] = rotr32(v[4] ^ v[9], 7); } for (i = 0; i < 8; i++) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; } int se050_blake2s_init(se050_blake2s_ctx_t *ctx, size_t outlen) { blake2s_internal_t *inner = (blake2s_internal_t *)ctx; if (!ctx || outlen == 0 || outlen > 32) return -1; for (size_t i = 0; i < 8; i++) inner->h[i] = BLAKE2S_IV[i]; inner->h[0] ^= 0x01010000UL ^ outlen; inner->t[0] = inner->t[1] = inner->f[0] = inner->f[1] = 0; inner->buflen = 0; inner->outlen = outlen; return 0; } int se050_blake2s_init_key(se050_blake2s_ctx_t *ctx, size_t outlen, const void *key, size_t keylen) { blake2s_internal_t *inner = (blake2s_internal_t *)ctx; if (!ctx || !key || keylen == 0 || keylen > 32 || outlen == 0 || outlen > 32) return -1; for (size_t i = 0; i < 8; i++) inner->h[i] = BLAKE2S_IV[i]; inner->h[0] ^= 0x01010000UL ^ (keylen << 8) ^ outlen; inner->t[0] = inner->t[1] = inner->f[0] = inner->f[1] = 0; inner->buflen = 0; inner->outlen = outlen; uint8_t block[64]; memset(block, 0, 64); memcpy(block, key, keylen); int ret = se050_blake2s_update(ctx, block, 64); memzero_explicit(block, 64); return ret; } int se050_blake2s_update(se050_blake2s_ctx_t *ctx, const void *data, size_t len) { blake2s_internal_t *inner = (blake2s_internal_t *)ctx; const uint8_t *in = (const uint8_t *)data; if (!ctx) return -1; if (len > 0 && !data) return -1; if (len > 0) { size_t left = inner->buflen, fill = 64 - left; /* If buffer is empty, process full blocks directly */ if (left == 0) { while (len > 64) { inner->t[0] += 64; if (inner->t[0] < 64) inner->t[1]++; blake2s_compress(inner, in); in += 64; len -= 64; } } /* If we can fill the buffer (including exact fill), do it */ else if (len >= fill) { memcpy(inner->buf + left, in, fill); inner->buflen = 0; inner->t[0] += 64; if (inner->t[0] < 64) inner->t[1]++; blake2s_compress(inner, inner->buf); in += fill; len -= fill; /* Process remaining full blocks */ while (len > 64) { inner->t[0] += 64; if (inner->t[0] < 64) inner->t[1]++; blake2s_compress(inner, in); in += 64; len -= 64; } } /* Store remaining data in buffer */ memcpy(inner->buf + inner->buflen, in, len); inner->buflen += len; } return 0; } int se050_blake2s_final(se050_blake2s_ctx_t *ctx, void *out, size_t outlen) { blake2s_internal_t *inner = (blake2s_internal_t *)ctx; uint8_t buffer[32] = {0}; if (!ctx || !out || outlen < inner->outlen) return -1; if (inner->f[0] != 0) return -1; inner->t[0] += (uint32_t)inner->buflen; if (inner->t[0] < inner->buflen) inner->t[1]++; inner->f[0] = (uint32_t)-1; memset(inner->buf + inner->buflen, 0, 64 - inner->buflen); blake2s_compress(inner, inner->buf); for (size_t i = 0; i < 8; i++) store32_le(buffer + i * 4, inner->h[i]); memcpy(out, buffer, inner->outlen); se050_blake2s_zeroize(ctx); return 0; } int se050_blake2s(void *out, size_t outlen, const void *data, size_t len) { se050_blake2s_ctx_t ctx; int ret = se050_blake2s_init(&ctx, outlen); if (ret != 0) return ret; ret = se050_blake2s_update(&ctx, data, len); if (ret != 0) { se050_blake2s_zeroize(&ctx); return ret; } return se050_blake2s_final(&ctx, out, outlen); } int se050_blake2s_keyed(void *out, size_t outlen, const void *key, size_t keylen, const void *data, size_t len) { se050_blake2s_ctx_t ctx; int ret = se050_blake2s_init_key(&ctx, outlen, key, keylen); if (ret != 0) return ret; ret = se050_blake2s_update(&ctx, data, len); if (ret != 0) { se050_blake2s_zeroize(&ctx); return ret; } return se050_blake2s_final(&ctx, out, outlen); } void se050_blake2s_zeroize(se050_blake2s_ctx_t *ctx) { if (ctx) { blake2s_internal_t *inner = (blake2s_internal_t *)ctx; memzero_explicit(inner, sizeof(blake2s_internal_t)); } } int se050_wireguard_derive_key(uint8_t out[32], const uint8_t *input, size_t inlen) { if (!out || !input) return -1; static const uint8_t LABEL[24] = { 0x77,0x69,0x72,0x65,0x67,0x75,0x61,0x72, 0x64,0x20,0x6b,0x65,0x79,0x20,0x64,0x65, 0x72,0x69,0x76,0x61,0x74,0x69,0x6f,0x6e }; return se050_blake2s_keyed(out, 32, LABEL, 24, input, inlen); } int se050_wireguard_generate_secret(uint8_t out[32], const uint8_t *input, size_t inlen) { if (!out || !input) return -1; static const uint8_t LABEL[22] = { 0x77,0x69,0x72,0x65,0x67,0x75,0x61,0x72, 0x64,0x20,0x67,0x65,0x6e,0x65,0x72,0x61, 0x74,0x65,0x20,0x73,0x65,0x63 }; return se050_blake2s_keyed(out, 32, LABEL, 22, input, inlen); } #ifdef BLAKE2S_TEST #include /* RFC 7693 Test Vector (page 15) - BLAKE2s-256("abc") */ /* Note: The value 508c5e8c... is the correct BLAKE2s-256("abc") digest */ static const uint8_t BLAKE2S_ABC_DIGEST[32] = { 0x50,0x8c,0x5e,0x8c,0x32,0x7c,0x14,0xe2, 0xe1,0xa7,0x2b,0xa3,0x4e,0xeb,0x45,0x2f, 0x37,0x45,0x8b,0x20,0x9e,0xd6,0x3a,0x29, 0x4d,0x99,0x9b,0x4c,0x86,0x67,0x59,0x82 }; static void print_hex(const char *label, const uint8_t *buf, size_t len) { printf("%s: ", label); for (size_t i = 0; i < len; i++) printf("%02x", buf[i]); printf("\n"); } int main(void) { uint8_t digest[32]; printf("BLAKE2s Test Suite\n==================\n\n"); printf("Test: RFC 7693 \"abc\" (page 15)\n"); se050_blake2s(digest, 32, (const uint8_t*)"abc", 3); print_hex("Expected", BLAKE2S_ABC_DIGEST, 32); print_hex("Computed", digest, 32); if (memcmp(digest, BLAKE2S_ABC_DIGEST, 32) == 0) { printf("[PASS] RFC 7693 \"abc\" test vector\n"); printf("==================\n"); return 0; } else { printf("[FAIL] RFC 7693 \"abc\" test vector\n"); printf("==================\n"); return 1; } } #endif