fix: Poly1305 MAC computation bugs

Bug fixes applied:
1. poly1305_update buffer path: Added missing h[0..3] data addition
2. poly1305_update full block: Fixed hibit from 2^40 to 2^128 (1ULL << 24)
3. poly1305_final (64-bit): Output full 128-bit MAC instead of 64-bit

Remaining issues:
- ESP32 version of poly1305_final still outputs only 64-bit MAC
- poly1305_final for partial blocks may have issues
- RFC 7539 test still fails (MAC is all zeros)

WireGuard tests: 28 passed, 4 failed
This commit is contained in:
km
2026-03-29 06:06:00 +09:00
parent 760b37690e
commit 43643bc4cf
2 changed files with 75 additions and 19 deletions
+26 -19
View File
@@ -398,8 +398,16 @@ static void poly1305_update(poly1305_state_t *st, const uint8_t *data, size_t le
data += needed;
len -= needed;
/* Add 0x01 byte after 16 bytes - 2^128 in 26-bit limb representation */
st->h[4] += (1ULL << 24); /* 2^128 = (1<<24) in the 5th 26-bit limb */
/* Add buffer data to h */
st->h[0] += (uint64_t)st->buf[0] | ((uint64_t)st->buf[1] << 8) |
((uint64_t)st->buf[2] << 16) | ((uint64_t)st->buf[3] << 24);
st->h[1] += ((uint64_t)st->buf[4] | ((uint64_t)st->buf[5] << 8) |
((uint64_t)st->buf[6] << 16) | ((uint64_t)st->buf[7] << 24)) & 0x3ffff03;
st->h[2] += ((uint64_t)st->buf[8] | ((uint64_t)st->buf[9] << 8) |
((uint64_t)st->buf[10] << 16) | ((uint64_t)st->buf[11] << 24)) & 0x3ffc0ff;
st->h[3] += ((uint64_t)st->buf[12] | ((uint64_t)st->buf[13] << 8) |
((uint64_t)st->buf[14] << 16) | ((uint64_t)st->buf[15] << 24)) & 0x3f03fff;
st->h[4] += (1ULL << 24); /* 2^128 in 26-bit limb representation */
/* Multiply by r */
uint64_t r0 = st->r[0], r1 = st->r[1], r2 = st->r[2], r3 = st->r[3], r4 = st->r[4];
@@ -429,7 +437,7 @@ static void poly1305_update(poly1305_state_t *st, const uint8_t *data, size_t le
}
while (len >= 16) {
uint64_t hibit = ((uint64_t)1) << 40;
/* 2^128 in 26-bit limb */
st->h[0] += (uint64_t)data[0] | ((uint64_t)data[1] << 8) |
((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24);
st->h[1] += ((uint64_t)data[4] | ((uint64_t)data[5] << 8) |
@@ -438,7 +446,7 @@ static void poly1305_update(poly1305_state_t *st, const uint8_t *data, size_t le
((uint64_t)data[10] << 16) | ((uint64_t)data[11] << 24)) & 0x3ffc0ff;
st->h[3] += ((uint64_t)data[12] | ((uint64_t)data[13] << 8) |
((uint64_t)data[14] << 16) | ((uint64_t)data[15] << 24)) & 0x3f03fff;
st->h[4] += hibit;
st->h[4] += (1ULL << 24); /* 2^128 */
/* Multiply by r */
uint64_t r0 = st->r[0], r1 = st->r[1], r2 = st->r[2], r3 = st->r[3], r4 = st->r[4];
@@ -492,7 +500,7 @@ static void poly1305_final(poly1305_state_t *st, uint8_t mac[16])
((uint64_t)st->buf[10] << 16) | ((uint64_t)st->buf[11] << 24)) & 0x3ffc0ff;
st->h[3] += ((uint64_t)st->buf[12] | ((uint64_t)st->buf[13] << 8) |
((uint64_t)st->buf[14] << 16) | ((uint64_t)st->buf[15] << 24)) & 0x3f03fff;
st->h[4] += hibit;
st->h[4] += (1ULL << 24); /* 2^128 */
/* Multiply by r one last time */
uint64_t r0 = st->r[0], r1 = st->r[1], r2 = st->r[2], r3 = st->r[3], r4 = st->r[4];
@@ -532,21 +540,20 @@ static void poly1305_final(poly1305_state_t *st, uint8_t mac[16])
st->h[4] += (st->h[3] >> 22);
st->h[3] &= 0x3ffffff;
/* Add s */
uint64_t mac0 = st->h[0] + st->s[0];
uint64_t mac1 = st->h[1] + st->s[1] + (mac0 >> 32);
mac0 &= 0xFFFFFFFF;
mac1 &= 0xFFFFFFFF;
/* Add s[0], s[1] and output full 128-bit MAC */
uint64_t f0 = st->h[0] + st->s[0];
uint64_t f1 = st->h[1] + st->s[1] + (f0 >> 32);
uint64_t f2 = st->h[2] + (f1 >> 32);
uint64_t f3 = st->h[3] + (f2 >> 32);
mac[0] = (uint8_t)mac0;
mac[1] = (uint8_t)(mac0 >> 8);
mac[2] = (uint8_t)(mac0 >> 16);
mac[3] = (uint8_t)(mac0 >> 24);
mac[4] = (uint8_t)mac1;
mac[5] = (uint8_t)(mac1 >> 8);
mac[6] = (uint8_t)(mac1 >> 16);
mac[7] = (uint8_t)(mac1 >> 24);
for (int i = 8; i < 16; i++) mac[i] = 0;
mac[0] = (uint8_t)(f0); mac[1] = (uint8_t)(f0 >> 8);
mac[2] = (uint8_t)(f0 >> 16); mac[3] = (uint8_t)(f0 >> 24);
mac[4] = (uint8_t)(f1); mac[5] = (uint8_t)(f1 >> 8);
mac[6] = (uint8_t)(f1 >> 16); mac[7] = (uint8_t)(f1 >> 24);
mac[8] = (uint8_t)(f2); mac[9] = (uint8_t)(f2 >> 8);
mac[10] = (uint8_t)(f2 >> 16); mac[11] = (uint8_t)(f2 >> 24);
mac[12] = (uint8_t)(f3); mac[13] = (uint8_t)(f3 >> 8);
mac[14] = (uint8_t)(f3 >> 16); mac[15] = (uint8_t)(f3 >> 24);
}
#endif /* SE050_CHACHA20_ESP32 */