diff options
author | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
commit | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch) | |
tree | 51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/pkg/ur |
claude is gud
Diffstat (limited to 'vere/pkg/ur')
-rw-r--r-- | vere/pkg/ur/bitstream.c | 1283 | ||||
-rw-r--r-- | vere/pkg/ur/bitstream.h | 239 | ||||
-rw-r--r-- | vere/pkg/ur/build.zig | 42 | ||||
-rw-r--r-- | vere/pkg/ur/build.zig.zon | 13 | ||||
-rw-r--r-- | vere/pkg/ur/defs.h | 86 | ||||
-rw-r--r-- | vere/pkg/ur/hashcons.c | 1026 | ||||
-rw-r--r-- | vere/pkg/ur/hashcons.h | 259 | ||||
-rw-r--r-- | vere/pkg/ur/serial.c | 588 | ||||
-rw-r--r-- | vere/pkg/ur/serial.h | 114 | ||||
-rw-r--r-- | vere/pkg/ur/tests.c | 1862 | ||||
-rw-r--r-- | vere/pkg/ur/ur.h | 11 |
11 files changed, 5523 insertions, 0 deletions
diff --git a/vere/pkg/ur/bitstream.c b/vere/pkg/ur/bitstream.c new file mode 100644 index 0000000..6cf95e6 --- /dev/null +++ b/vere/pkg/ur/bitstream.c @@ -0,0 +1,1283 @@ +/// @file + +#include "bitstream.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "defs.h" + +ur_cue_res_e +ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes) +{ + // check for overflow + // + if ( (len << 3) < len ) { + return ur_cue_meme; + } + + bsr->left = len; + bsr->bytes = bytes; + + return ur_cue_good; +} + +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr) +{ + ur_bool_t ret = 8 > bsr->off; + + if ( !bsr->left ) { + ret = ret && (!bsr->off && !bsr->bytes); + } + + return ret; +} + +ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + + if ( 7 == off ) { + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + bsr->bits++; + + *out = bit; + return ur_cue_good; + } +} + +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr) +{ + uint64_t left = bsr->left; + + bsr->bits++; + + if ( !left ) { + return 0; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + + if ( 7 == off ) { + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + return bit; + } +} + +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(8, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint8_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else if ( 1 == left ) { + bsr->off = 0; + bsr->left = 0; + bsr->bytes = 0; + return m; + } + else { + off = len - rest; + + bsr->off = off; + bsr->left--; + bsr->bytes++; + + { + uint8_t l = b[1] & ((1 << off) - 1); + return m ^ (l << rest); + } + } + } +} + +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(32, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + uint32_t m = bsr->bytes[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + const uint8_t *b; + uint8_t mask, len_byt; + uint32_t l; + + len -= rest; + left--; + b = ++bsr->bytes; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + default: assert(0); + + case 4: { + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8 + ^ (uint32_t)b[2] << 16 + ^ (uint32_t)b[3] << 24; + } break; + + case 3: { + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8 + ^ (uint32_t)b[2] << 16; + + if ( mask ) { + l ^= (uint32_t)(b[3] & mask) << 24; + } + } break; + + case 2: { + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8; + + if ( mask ) { + l ^= (uint32_t)(b[2] & mask) << 16; + } + } break; + + case 1: { + l = (uint32_t)b[0]; + + if ( mask ) { + l ^= (uint32_t)(b[1] & mask) << 8; + } + } break; + + case 0: { + l = ( mask ) ? (uint32_t)(b[0] & mask) : 0; + } break; + } + + return m ^ (l << rest); + } + } +} + +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(64, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + uint64_t m = bsr->bytes[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + const uint8_t *b; + uint8_t mask, len_byt; + uint64_t l; + + len -= rest; + left--; + b = ++bsr->bytes; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + case 8: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40 + ^ (uint64_t)b[6] << 48 + ^ (uint64_t)b[7] << 56; + } break; + + case 7: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40 + ^ (uint64_t)b[6] << 48; + + if ( mask ) { + l ^= (uint64_t)(b[7] & mask) << 56; + } + } break; + + case 6: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40; + + if ( mask ) { + l ^= (uint64_t)(b[6] & mask) << 48; + } + } break; + + case 5: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32; + + if ( mask ) { + l ^= (uint64_t)(b[5] & mask) << 40; + } + } break; + + case 4: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24; + + if ( mask ) { + l ^= (uint64_t)(b[4] & mask) << 32; + } + } break; + + case 3: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16; + + if ( mask ) { + l ^= (uint64_t)(b[3] & mask) << 24; + } + } break; + + case 2: { + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8; + + if ( mask ) { + l ^= (uint64_t)(b[2] & mask) << 16; + } + } break; + + case 1: { + l = (uint64_t)b[0]; + + if ( mask ) { + l ^= (uint64_t)(b[1] & mask) << 8; + } + } break; + + case 0: { + l = ( mask ) ? (uint64_t)(b[0] & mask) : 0; + } break; + } + + return m ^ (l << rest); + } + } +} + +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint64_t left = bsr->left; + + bsr->bits += len; + + if ( !left ) { + return; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + uint64_t need = len_byt + !!len_bit; + + if ( !off ) { + if ( need > left ) { + memcpy(out, b, left); + left = 0; + bsr->bytes = 0; + } + else { + memcpy(out, b, len_byt); + off = len_bit; + + if ( off ) { + out[len_byt] = b[len_byt] & ((1 << off) - 1); + } + + left -= len_byt; + bsr->bytes = ( left ) ? b + len_byt : 0; + } + } + // the most-significant bits from a byte in the stream + // become the least-significant bits of an output byte, and vice-versa + // + else { + uint8_t rest = 8 - off; + uint64_t last = left - 1; + uint64_t max = ur_min(last, len_byt); + uint8_t m, l; + + // loop over all the bytes we need (or all that remain) + // + { + uint64_t i; + + for ( i = 0; i < max; i++ ) { + out[i] = (b[i] >> off) ^ (b[i + 1] << rest); + } + + b += max; + m = *b >> off; + } + + // we're reading into or beyond the last byte [bsr] + // + // [m] holds all the remaining bits in [bsr], + // but we might not need all of it + // + if ( need >= left ) { + uint8_t bits = len - (last << 3); + + if ( bits < rest ) { + out[max] = m & ((1 << len_bit) - 1); + bsr->bytes = b; + left = 1; + off += len_bit; + } + else { + out[max] = m; + bsr->bytes = 0; + left = 0; + off = 0; + } + } + // we need less than a byte, but it might span multiple bytes + // + else { + uint8_t bits = off + len_bit; + uint8_t step = !!(bits >> 3); + + bsr->bytes = b + step; + left -= len_byt + step; + off = ur_mask_3(bits); + + if ( len_bit ) { + if ( len_bit <= rest ) { + out[max] = m & ((1 << len_bit) - 1); + } + else { + l = *++b & ((1 << off) - 1); + out[max] = (m ^ (l << rest)) & ((1 << len_bit) - 1); + } + } + } + } + + bsr->off = off; + bsr->left = left; + } +} + +void +ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len) +{ + uint64_t left = bsr->left; + + bsr->bits += len; + + if ( !left ) { + return; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + uint64_t need = len_byt + !!len_bit; + uint8_t rest = 8 - off; + uint64_t last = left - 1; + + b += ur_min(last, len_byt) + 1; + + if ( need >= left ) { + uint8_t bits = len - (last << 3); + + if ( bits < rest ) { + bsr->bytes = b - 1; + left = 1; + off += len_bit; + } + else { + bsr->bytes = 0; + left = 0; + off = 0; + } + } + else { + uint8_t bits = off + len_bit; + uint8_t step = !!(bits >> 3); + + bsr->bytes = b - (1 - step); + left -= len_byt + step; + off = ur_mask_3(bits); + } + + bsr->off = off; + bsr->left = left; + } +} + +static inline ur_cue_res_e +_bsr_set_gone(ur_bsr_t *bsr, uint8_t bits) +{ + bsr->bits += bits; + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + return ur_cue_gone; +} + +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + uint8_t len = 1; + + if ( 0 == bit ) { + *out = ur_jam_atom; + } + else { + if ( 7 == off ) { + if ( 1 == left ) { + return _bsr_set_gone(bsr, 1); + } + + bit = b[1] & 1; + } + else { + bit = (b[0] >> (off + 1)) & 1; + } + + len++; + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + } + + { + uint8_t bits = off + len; + uint8_t bytes = bits >> 3; + + left -= bytes; + + if ( !left ) { + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + } + else { + bsr->bytes += bytes; + bsr->left = left; + bsr->off = ur_mask_3(bits); + } + + bsr->bits += len; + + return ur_cue_good; + } + } +} + +static inline ur_cue_res_e +_bsr_log_meme(ur_bsr_t *bsr) +{ + bsr->bits += 256; + bsr->bytes += 32; + bsr->left -= 32; + return ur_cue_meme; +} + +ur_cue_res_e +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + uint8_t off = bsr->off; + const uint8_t *b = bsr->bytes; + uint8_t byt = b[0] >> off; + uint8_t skip = 0; + + while ( !byt ) { + if ( 32 == skip ) { + return _bsr_log_meme(bsr); + } + + byt = b[++skip]; + + if ( skip == left ) { + return _bsr_set_gone(bsr, (skip << 3) - off); + } + } + + { + uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); + + if ( 255 < zeros ) { + return _bsr_log_meme(bsr); + } + else { + uint32_t bits = off + 1 + zeros; + uint8_t bytes = bits >> 3; + + left -= bytes; + + bsr->bytes = left ? (b + bytes) : 0; + bsr->bits += 1 + zeros; + bsr->left = left; + bsr->off = ur_mask_3(bits); + + *out = zeros; + return ur_cue_good; + } + } + } +} + +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) +{ + ur_cue_res_e res; + uint8_t len; + + if ( ur_cue_good != (res = ur_bsr_log(bsr, &len)) ) { + return res; + } + else if ( 64 <= len ) { + return ur_cue_meme; + } + + switch ( len ) { + case 0: { + *out = 0; + } break; + + case 1: { + *out = 1; + } break; + + default: { + len--; + *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); + } break; + } + + return ur_cue_good; +} + +/* +** bitstream-writer operations follow a pattern of an unsafe (inline) +** implementation, unsafe wrt to buffer size and reallocation, +** wrapped in a public function with buffer size checks. +** +** higher-level operations made up of multiple discrete writes check +** the buffer size once for all involved writes. +** +** this pattern should be easily adaptable to an alternate bitstream-writer +** implementation that flushes accumulated output periodically instead +** of reallocating the output buffer. +*/ + +void +ur_bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size) +{ + bsw->prev = prev; + bsw->size = size; + bsw->bytes = calloc(size, 1); + + if ( !bsw->bytes ) { + fprintf(stderr, + "ur: bitstream-init allocation failed, out of memory\r\n"); + abort(); + } +} + +void +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) +{ + uint64_t size = bsw->size; + uint64_t next = size + step; + + bsw->bytes = realloc(bsw->bytes, next); + + if ( !bsw->bytes ) { + fprintf(stderr, + "ur: bitstream-write allocation failed, out of memory\r\n"); + abort(); + } + + memset(bsw->bytes + size, 0, step); + + bsw->prev = size; + bsw->size = next; +} + +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw) +{ + return ( (8 > bsw->off) + && ((bsw->fill << 3) + bsw->off == bsw->bits) ); +} + +uint64_t +ur_bsw_done(ur_bsw_t *bsw, uint64_t *len, uint8_t **byt) +{ + uint64_t bits = bsw->bits; + + *len = bsw->fill + !!bsw->off; + *byt = bsw->bytes; + + memset(bsw, 0, sizeof(*bsw)); + + return bits; +} + +static inline void +_bsw_bit_unsafe(ur_bsw_t *bsw, uint8_t bit) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + + bsw->bytes[fill] ^= (bit & 1) << off; + + if ( 7 == off ) { + bsw->fill = 1 + fill; + bsw->off = 0; + } + else { + bsw->off = 1 + off; + } + + bsw->bits++; +} + +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) +{ + if ( (7 == bsw->off) + && ((1 + bsw->fill) == bsw->size) ) + { + ur_bsw_grow(bsw, bsw->prev); + } + + _bsw_bit_unsafe(bsw, bit); +} + +static inline void +_bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t rest = 8 - off; + uint8_t l, m; + + // the least-significant bits of the input become the + // most-significant bits of a byte in the output stream + // + if ( len < rest ) { + l = byt & ((1 << len) - 1); + + bsw->bytes[fill] ^= l << off; + bsw->off = off + len; + } + // and vice-versa + // + else { + l = byt & ((1 << rest) - 1); + m = byt >> rest; + + bsw->bytes[fill++] ^= l << off; + off = len - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); + + bsw->fill = fill; + bsw->off = off; + } + + bsw->bits += len; +} + +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + len = ur_min(8, len); + + if ( bsw->fill + !!((bsw->off + len) >> 3) >= bsw->size ) { + ur_bsw_grow(bsw, bsw->prev); + } + + _bsw8_unsafe(bsw, len, byt); +} + +static inline void +_bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint8_t need; + + len = ur_min(32, len); + need = ur_bloq_up3( bsw->off + len ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw32_unsafe(bsw, len, val); +} + +static inline void +_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 8: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + bytes[fill++] = ur_mask_8(val >> 56); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 7: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + val >>= 56; + } break; + + case 6: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + val >>= 48; + } break; + + case 5: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + val >>= 40; + } break; + + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + val >>= 32; + } break; + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( bsw->off + len ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw64_unsafe(bsw, len, val); +} + +static inline void +_bsw_bytes_unsafe(ur_bsw_t *bsw, const uint64_t len, const uint8_t* src) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *dst = bsw->bytes + fill; + + if ( !off ) { + const uint64_t len_byt = len >> 3; + const uint8_t len_bit = ur_mask_3(len); + + memcpy(dst, src, len_byt); + bsw->fill = fill + len_byt; + + if ( len_bit ) { + dst[len_byt] = src[len_byt] & ((1 << len_bit) - 1); + bsw->off = len_bit; + } + } + else { + const uint8_t rest = 8 - off; + + if ( rest >= len ) { + uint16_t ful = off + len; + + *dst ^= (*src & ((1 << len) - 1)) << off; + + if ( ful >> 3 ) { + bsw->fill = fill + 1; + } + + bsw->off = ur_mask_3(ful); + } + else { + const uint64_t nel = len - rest; + const uint64_t len_byt = nel >> 3; + const uint8_t len_bit = ur_mask_3(nel); + + *dst++ ^= *src << off; + + for ( uint64_t i = 0; i < len_byt; i++ ) { + dst[i] = (src[i] >> rest) ^ (src[i + 1] << off); + } + + { + uint8_t tal = (src[len_byt] >> rest) + ^ (( off >= len_bit ) ? 0 : (src[len_byt + 1] << off)); + dst[len_byt] = tal & ((1 << len_bit) - 1); + } + + bsw->fill = fill + len_byt + 1; + bsw->off = len_bit; + } + } + + bsw->bits += len; +} + +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t need = ur_bloq_up3(len + bsw->off); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_bytes_unsafe(bsw, len, byt); +} + +static inline void +_bsw_bex_unsafe(ur_bsw_t *bsw, uint8_t n) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint32_t bits = n + off; + + fill += bits >> 3; + off = ur_mask_3(bits); + + bsw->bytes[fill] ^= 1 << off; + + if ( 7 == off ) { + bsw->off = 0; + bsw->fill = 1 + fill; + } + else { + bsw->off = 1 + off; + bsw->fill = fill; + } + + bsw->bits += 1 + n; +} + +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) +{ + uint64_t need = ur_bloq_up3(1 + n + bsw->off); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_bex_unsafe(bsw, n); +} + +static inline void +_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + if ( 0 == len ) { + _bsw_bit_unsafe(bsw, 1); + } + else { + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); + } + + _bsw64_unsafe(bsw, len, val); + } +} + +/* +* the length of a "mat" run-length encoded atom of [len] bits +*/ +#define MAT_LEN(len) ( ( 0 == len ) ? 1 : len + (2 * ur_met0_64((uint64_t)len)) ) + +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( bsw->off + MAT_LEN(len) ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_mat64_unsafe(bsw, len, val); +} + +static inline void +_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + if ( 0 == len ) { + _bsw_bit_unsafe(bsw, 1); + } + else { + // write run-length + // + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); + } + + _bsw_bytes_unsafe(bsw, len, byt); + } +} + +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t need = ur_bloq_up3( bsw->off + MAT_LEN(len) ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_mat_bytes_unsafe(bsw, len, byt); +} + +static inline void +_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw8_unsafe(bsw, 2, 3); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( 2 + bsw->off + MAT_LEN(len) ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_back64(bsw, len, val); +} + +static inline void +_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( 1 + bsw->off + MAT_LEN(len) ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom64(bsw, len, val); +} + +static inline void +_bsw_atom_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t need = ur_bloq_up3( 1 + bsw->off + MAT_LEN(len) ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_cell(ur_bsw_t *bsw) +{ + uint8_t need = ur_bloq_up3( 2 + bsw->off ); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw8_unsafe(bsw, 2, 1); +} diff --git a/vere/pkg/ur/bitstream.h b/vere/pkg/ur/bitstream.h new file mode 100644 index 0000000..a4b7859 --- /dev/null +++ b/vere/pkg/ur/bitstream.h @@ -0,0 +1,239 @@ +/// @file + +#ifndef UR_BITSTREAM_H +#define UR_BITSTREAM_H + +#include <inttypes.h> + +#include "defs.h" + +/* +** stateful bitstream reader, backed by a byte-buffer, +** maintaing a 64-bit bit-cursor, and supporting a variety +** of read sizes and patterns. +** +** NB: ur_bsr*_any() functions behave as if the stream were infinite, +** subject to the overall limit of the bit-cursor. +** +*/ +typedef struct ur_bsr_s { + uint64_t left; + uint64_t bits; + uint8_t off; + const uint8_t *bytes; +} ur_bsr_t; + +/* +** generalized bitstream-reader/cue response enum +*/ +typedef enum { + ur_cue_good = 0, // successful read + ur_cue_back = 1, // missing backreference + ur_cue_gone = 2, // read off the end of the stream + ur_cue_meme = 3 // exceeded memory representation +} ur_cue_res_e; + +/* +** jam/cue type tag enumeration +*/ +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + +/* +** stateful bitstream writer, backed by a byte-buffer automatically +** reallocated with fibonacc growth, maintaing a 64-bit bit-cursor, +** and supporting a variety of write sizes and patterns. +** +*/ +typedef struct ur_bsw_s { + uint64_t prev; + uint64_t size; + uint64_t fill; + uint64_t bits; + uint8_t off; + uint8_t *bytes; +} ur_bsw_t; + +/* +** initialize bitstream-reader and check for 64-bit bit-cursor overflow. +*/ +ur_cue_res_e +ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes); + +/* +** validate bitstream-reader invariants. +*/ +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr); + +/* +** read a bit, failing at EOS +*/ +ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); + +/* +** read a bit +*/ +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr); + +/* +** read N (up to 8) bits into a uint8. +*/ +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); + +/* +** read N (up to 32) bits into a uint32. +*/ +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); + +/* +** read N (up to 64) bits into a uint64. +*/ +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); + +/* +** read N bits into a zero-initialized byte array. +*/ +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); + +/* +** advance the bitstream cursor as if we had read N bits. +*/ +void +ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len); + +/* +** read a jam/cue type tag. +*/ +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); + +/* +** read a binary exponent, producing the binary log. +** +** read N (up to 255) zero bits followed by a 1, produce N. +*/ +ur_cue_res_e +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out); + +/* +** read an atomic run-length (a la +rub). +** +** read a binary log N, then read N (up to 64) bits, +** produce (N-bits ^ (1 << N)) +*/ +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); + +/* +** initialize bitstream-writer with prev/size for fibonacci growth. +*/ +void +ur_bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size); + +/* +** reallocate bitstream write buffer with max(fibonacci, step) growth. +*/ +void +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); + +/* +** validate bitstream-writer invariants. +*/ +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw); + +/* +** return bit-length, produce byte-buffer. +*/ +uint64_t +ur_bsw_done(ur_bsw_t *bsw, uint64_t *len, uint8_t **byt); + +/* +** write a bit +*/ +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); + +/* +** write N (up to 8) bits of a uint8. +*/ +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); + +/* +** write N (up to 32) bits of a uint32. +*/ +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); + +/* +** write N (up to 64) bits of a uint64. +*/ +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +/* +** write N bits of a byte array. +** +** NB: [byt] must contain at least N bits. +*/ +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +/* +** write a binary exponent (N zero bits, followed by a 1). +*/ +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); + +/* +** write N (up to 64) run-length prefixed bits (a la +mat). +*/ +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +/* +** write N run-length prefixed bits (a la +mat). +** +** NB: [byt] must contain at least N bits. +*/ +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +/* +** write a backref tag (1, 1) and N (up to 64) run-length prefixed bits. +*/ +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +/* +** write an atom tag (0) and N (up to 64) run-length prefixed bits. +*/ +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +/* +** write an atom tag (0) and N run-length prefixed bits. +** +** NB: [byt] must contain at least N bits. +*/ +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +/* +** write a cell tag (1, 0) +*/ +void +ur_bsw_cell(ur_bsw_t *bsw); + +#endif diff --git a/vere/pkg/ur/build.zig b/vere/pkg/ur/build.zig new file mode 100644 index 0000000..abd1ec0 --- /dev/null +++ b/vere/pkg/ur/build.zig @@ -0,0 +1,42 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const copts: []const []const u8 = + b.option([]const []const u8, "copt", "") orelse &.{}; + + const pkg_ur = b.addStaticLibrary(.{ + .name = "ur", + .target = target, + .optimize = optimize, + }); + + const murmur3 = b.dependency("murmur3", .{ + .target = target, + .optimize = optimize, + }); + + pkg_ur.linkLibC(); + pkg_ur.linkLibrary(murmur3.artifact("murmur3")); + + pkg_ur.addIncludePath(b.path("")); + pkg_ur.addCSourceFiles(.{ + .root = b.path(""), + .files = &.{ + "bitstream.c", + "hashcons.c", + "serial.c", + }, + .flags = copts, + }); + + pkg_ur.installHeader(b.path("bitstream.h"), "ur/bitstream.h"); + pkg_ur.installHeader(b.path("defs.h"), "ur/defs.h"); + pkg_ur.installHeader(b.path("hashcons.h"), "ur/hashcons.h"); + pkg_ur.installHeader(b.path("serial.h"), "ur/serial.h"); + pkg_ur.installHeader(b.path("ur.h"), "ur/ur.h"); + + b.installArtifact(pkg_ur); +} diff --git a/vere/pkg/ur/build.zig.zon b/vere/pkg/ur/build.zig.zon new file mode 100644 index 0000000..2330f45 --- /dev/null +++ b/vere/pkg/ur/build.zig.zon @@ -0,0 +1,13 @@ +.{ + .name = .ur, + .fingeprint = 0xad9a8f5c919f2264, + .version = "0.0.1", + .dependencies = .{ + .murmur3 = .{ + .path = "../../ext/murmur3", + }, + }, + .paths = .{ + "", + }, +} diff --git a/vere/pkg/ur/defs.h b/vere/pkg/ur/defs.h new file mode 100644 index 0000000..68d6f74 --- /dev/null +++ b/vere/pkg/ur/defs.h @@ -0,0 +1,86 @@ +/// @file + +#ifndef UR_DEFS_H +#define UR_DEFS_H + +#include <assert.h> +#include <inttypes.h> +#include <limits.h> + +typedef uint8_t ur_bool_t; + +#define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) +#define ur_max(a, b) ( ((a) > (b)) ? (a) : (b) ) + +/* +** fibonacci constants, for convenient initialization of +** objects intended to be reallocated with fibonacci growth +*/ +#define ur_fib10 55 +#define ur_fib11 89 +#define ur_fib12 144 +#define ur_fib27 196418 +#define ur_fib28 317811 +#define ur_fib33 3524578 +#define ur_fib34 5702887 + +/* +** bit-masking helpers +*/ +#define ur_mask_3(a) (a & 0x7) +#define ur_mask_8(a) (a & 0xff) +#define ur_mask_31(a) (a & 0x7fffffff) +#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) + +/* +** bloq (binary exponent) conversions +*/ +#define ur_bloq_up1(a) ( (a + 0x1) >> 1 ) +#define ur_bloq_up2(a) ( (a + 0x3) >> 2 ) +#define ur_bloq_up3(a) ( (a + 0x7) >> 3 ) + +/* +** atom measurement +*/ +#if (32 == (CHAR_BIT * __SIZEOF_INT__)) +# define ur_lz32 __builtin_clz +# define ur_tz32 __builtin_ctz +#elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz32 __builtin_clzl +# define ur_tz32 __builtin_ctzl +#else +# error "port me" +#endif + +#if (64 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz64 __builtin_clzl +# define ur_tz64 __builtin_ctzl +#elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) +# define ur_lz64 __builtin_clzll +# define ur_tz64 __builtin_ctzll +#else +# error "port me" +#endif + +#define ur_lz8(a) ( ur_lz32(a) - 24 ) +#define ur_tz8 ur_tz32 + +#define ur_met0_8(a) ( (a) ? 8 - ur_lz8(a) : 0 ) +#define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) +#define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) + +/* +** unsafe wrt trailing null bytes, which are invalid +*/ +inline uint64_t +ur_met0_bytes_unsafe(uint64_t len, uint8_t *byt) +{ + uint64_t last = len - 1; + return (last << 3) + ur_met0_8(byt[last]); +} + +#define ur_met3_8(a) ur_bloq_up3(ur_met0_8(a)) +#define ur_met3_32(a) ur_bloq_up3(ur_met0_32(a)) +#define ur_met3_64(a) ur_bloq_up3(ur_met0_64(a)) + +#endif /* ifndef UR_DEFS_H */ diff --git a/vere/pkg/ur/hashcons.c b/vere/pkg/ur/hashcons.c new file mode 100644 index 0000000..ea2338b --- /dev/null +++ b/vere/pkg/ur/hashcons.c @@ -0,0 +1,1026 @@ +/// @file + +#include "hashcons.h" + +#include <assert.h> +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "defs.h" +#include "murmur3.h" + +// declarations of inline functions +// +uint64_t +ur_met0_bytes_unsafe(uint64_t len, uint8_t *byt); + +static void* +_oom(const char* cap, void* v) +{ + if ( !v ) { + fprintf(stderr, + "ur: hashcons: %s: allocation failed, out of memory\r\n", cap); + abort(); + } + + return v; +} + +void +ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail32_t *buckets, *old_buckets = dict->buckets; + uint8_t *fills, *old_fills = dict->fills; + uint64_t old_size = dict->size; + uint64_t i, next = prev + size; + + buckets = _oom("dict32_grow", calloc(next, sizeof(*buckets))); + fills = _oom("dict32_grow", calloc(next, sizeof(*fills))); + + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail32_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_fills[i]; + + for ( j = 0; j < old_fill; j++ ) { + uint32_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); + + uint64_t idx = ( mug % next ); + ur_pail32_t *bucket = &(buckets[idx]); + uint8_t new_fill = fills[idx]; + + if ( ur_pail_max == new_fill ) { + free(buckets); + free(fills); + return ur_dict32_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + fills[idx] = 1 + new_fill; + } + } + + free(old_buckets); + free(old_fills); + } + + dict->prev = size; + dict->size = next; + dict->fills = fills; + dict->buckets = buckets; +} + +ur_bool_t +ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail32_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + *out = bucket->vals[i]; + return 1; + } + } + + return 0; +} + +void +ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail32_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + bucket->vals[i] = val; + return; + } + } + + if ( ur_pail_max == fill ) { + ur_dict32_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + bucket->vals[fill] = val; + dict->fills[idx] = 1 + fill; + break; + } +} + +void +ur_dict32_wipe(ur_dict32_t *dict) +{ + uint8_t *fills = dict->fills; + uint64_t size = dict->size; + + memset(fills, 0, sizeof(*fills) * size); +} + +void +ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail64_t *buckets, *old_buckets = dict->buckets; + uint8_t *fills, *old_fills = dict->fills; + uint64_t old_size = dict->size; + uint64_t i, next = prev + size; + + buckets = _oom("dict64_grow", calloc(next, sizeof(*buckets))); + fills = _oom("dict64_grow", calloc(next, sizeof(*fills))); + + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail64_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_fills[i]; + + for ( j = 0; j < old_fill; j++ ) { + uint64_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); + + uint64_t idx = ( mug % next ); + ur_pail64_t *bucket = &(buckets[idx]); + uint8_t new_fill = fills[idx]; + + if ( ur_pail_max == new_fill ) { + free(buckets); + free(fills); + return ur_dict64_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + fills[idx] = 1 + new_fill; + } + } + + free(old_buckets); + free(old_fills); + } + + dict->prev = size; + dict->size = next; + dict->fills = fills; + dict->buckets = buckets; +} + +ur_bool_t +ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + *out = bucket->vals[i]; + return 1; + } + } + + return 0; +} + +void +ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + bucket->vals[i] = val; + return; + } + } + + if ( ur_pail_max == fill ) { + ur_dict64_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + bucket->vals[fill] = val; + dict->fills[idx] = 1 + fill; + break; + } +} + +void +ur_dict64_wipe(ur_dict64_t *dict) +{ + uint8_t *fills = dict->fills; + uint64_t size = dict->size; + + memset(fills, 0, sizeof(*fills) * size); +} + +void +ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail_t *buckets, *old_buckets = dict->buckets; + uint8_t *fills, *old_fills = dict->fills; + uint64_t old_size = dict->size; + uint64_t i, next = prev + size; + + buckets = _oom("dict_grow", calloc(next, sizeof(*buckets))); + fills = _oom("dict_grow", calloc(next, sizeof(*fills))); + + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_fills[i]; + + for ( j = 0; j < old_fill; j++ ) { + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); + + uint64_t idx = ( mug % next ); + ur_pail_t *bucket = &(buckets[idx]); + uint8_t new_fill = fills[idx]; + + if ( ur_pail_max == new_fill ) { + free(buckets); + free(fills); + return ur_dict_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + fills[idx] = 1 + new_fill; + } + } + + free(old_buckets); + free(old_fills); + } + + dict->prev = size; + dict->size = next; + dict->fills = fills; + dict->buckets = buckets; +} + +ur_bool_t +ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + return 1; + } + } + + return 0; +} + +void +ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = dict->fills[idx]; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + return; + } + } + + if ( ur_pail_max == fill ) { + ur_dict_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + dict->fills[idx] = 1 + fill; + break; + } +} + +void +ur_dict_wipe(ur_dict_t *dict) +{ + uint8_t *fills = dict -> fills; + uint64_t size = dict->size; + + memset(fills, 0, sizeof(*fills) * size); +} + +void +ur_dict_free(ur_dict_t *dict) +{ + free(dict->buckets); + free(dict->fills); + dict->buckets = 0; +} + +ur_mug +ur_mug_bytes(const uint8_t *byt, uint64_t len) +{ + uint32_t seed = 0xcafebabe; + uint8_t i = 0; + + while ( i < 8 ) { + ur_mug mug; + uint32_t raw; + MurmurHash3_x86_32(byt, len, seed, &raw); + mug = (raw >> 31) ^ ( ur_mask_31(raw) ); + + if ( 0 == mug ) { + seed++; i++; + } + else { + return mug; + } + } + + return (ur_mug)0x7fff; +} + +ur_mug +ur_mug32(uint32_t x) +{ + uint8_t byt[4] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24) + }; + + return ur_mug_bytes(byt, ur_met3_32(x)); +} + +ur_mug +ur_mug64(uint64_t x) +{ + uint8_t byt[8] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24), + ur_mask_8(x >> 32), + ur_mask_8(x >> 40), + ur_mask_8(x >> 48), + ur_mask_8(x >> 56) + }; + + return ur_mug_bytes(byt, ur_met3_64(x)); +} + +ur_mug +ur_mug_both(ur_mug hed, ur_mug tal) +{ + uint32_t seed = 0xdeadbeef; + uint8_t len = 4 + ur_bloq_up3(ur_met0_32(tal)); + uint8_t i = 0; + uint8_t byt[8] = { + ur_mask_8(hed >> 0), + ur_mask_8(hed >> 8), + ur_mask_8(hed >> 16), + ur_mask_8(hed >> 24), + ur_mask_8(tal >> 0), + ur_mask_8(tal >> 8), + ur_mask_8(tal >> 16), + ur_mask_8(tal >> 24) + }; + + while ( i < 8 ) { + ur_mug mug; + uint32_t raw; + MurmurHash3_x86_32(byt, len, seed, &raw); + mug = (raw >> 31) ^ ( ur_mask_31(raw) ); + + if ( 0 == mug ) { + seed++; i++; + } + else { + return mug; + } + } + + return (ur_mug)0xfffe; +} + +ur_mug +ur_nref_mug(ur_root_t *r, ur_nref ref) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: return ur_mug64(ref); + case ur_iatom: return r->atoms.mugs[ur_nref_idx(ref)]; + case ur_icell: return r->cells.mugs[ur_nref_idx(ref)]; + } +} + +ur_bool_t +ur_deep(ur_nref ref) +{ + return ur_icell == ur_nref_tag(ref); +} + +ur_nref +ur_head(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.heads[ur_nref_idx(ref)]; +} + +ur_nref +ur_tail(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.tails[ur_nref_idx(ref)]; +} + +void +ur_atoms_grow(ur_atoms_t *atoms) +{ + uint64_t prev = atoms->prev; + uint64_t size = atoms->size; + uint64_t next = prev + size; + uint8_t **bytes = atoms->bytes; + uint64_t *lens = atoms->lens; + ur_mug *mugs = atoms->mugs; + + atoms->bytes = _oom("atoms_grow", malloc(next * ( sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs) ))); + atoms->lens = (void*)((char*)atoms->bytes + (next * sizeof(*atoms->bytes))); + atoms->mugs = (void*)((char*)atoms->lens + (next * sizeof(*atoms->lens))); + + if ( bytes ) { + memcpy(atoms->bytes, bytes, size * (sizeof(*bytes))); + memcpy(atoms->lens, lens, size * (sizeof(*lens))); + memcpy(atoms->mugs, mugs, size * (sizeof(*mugs))); + + free(bytes); + } + + atoms->prev = size; + atoms->size = next; +} + +void +ur_cells_grow(ur_cells_t *cells) +{ + uint64_t prev = cells->prev; + uint64_t size = cells->size; + uint64_t next = prev + size; + ur_nref *heads = cells->heads; + ur_nref *tails = cells->tails; + ur_mug *mugs = cells->mugs; + + cells->heads = _oom("cells_grow", malloc(next * ( sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs) ))); + cells->tails = (void*)((char*)cells->heads + (next * sizeof(*cells->heads))); + cells->mugs = (void*)((char*)cells->tails + (next * sizeof(*cells->tails))); + + if ( heads ) { + memcpy(cells->heads, heads, size * (sizeof(*heads))); + memcpy(cells->tails, tails, size * (sizeof(*tails))); + memcpy(cells->mugs, mugs, size * (sizeof(*mugs))); + + free(heads); + } + + cells->prev = size; + cells->size = next; +} + +void +ur_bytes(ur_root_t *r, ur_nref ref, uint8_t **byt, uint64_t *len) +{ + assert( !ur_deep(ref) ); + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + *len = ur_met3_64(ref); + // XX little-endian + // + *byt = (uint8_t*)&ref; + } break; + + case ur_iatom: { + uint64_t idx = ur_nref_idx(ref); + *len = r->atoms.lens[idx]; + *byt = r->atoms.bytes[idx]; + } break; + } +} + +uint64_t +ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) +{ + uint64_t m_bit; + + // XX return bool for cells, length in out parameter + // + assert( !ur_deep(ref) ); + + if ( ur_direct == ur_nref_tag(ref) ) { + m_bit = ur_met0_64(ref); + } + else { + uint64_t idx = ur_nref_idx(ref); + uint64_t len = r->atoms.lens[idx]; + uint8_t *byt = r->atoms.bytes[idx]; + + m_bit = ur_met0_bytes_unsafe(len, byt); + } + + switch ( bloq ) { + case 0: return m_bit; + case 1: return ur_bloq_up1(m_bit); + case 2: return ur_bloq_up2(m_bit); + case 3: return ur_bloq_up3(m_bit); + + default: { + uint64_t m_byt = ur_bloq_up3(m_bit); + uint8_t off = (bloq - 3); + return (m_byt + ((1ULL << off) - 1)) >> off; + } + } +} + +static ur_nref +_coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint64_t len, uint8_t *byt) +{ + uint64_t fill = atoms->fill; + ur_tag tag = ur_iatom; + ur_nref tom = ( fill | ((uint64_t)tag << 62) ); + + // XX necessary? + // + assert( 62 >= ur_met0_64(fill) ); + + atoms->bytes[fill] = byt; + atoms->lens[fill] = len; + atoms->mugs[fill] = mug; + atoms->fill = 1 + fill; + + return tom; +} + +static ur_nref +_cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) +{ + uint64_t fill = cells->fill; + ur_tag tag = ur_icell; + ur_nref cel = ( fill | ((uint64_t)tag << 62) ); + + // XX necessary? + // + assert( 62 >= ur_met0_64(fill) ); + + cells->mugs[fill] = mug; + cells->heads[fill] = hed; + cells->tails[fill] = tal; + cells->fill = 1 + fill; + + return cel; +} + +ur_nref +ur_coin_bytes_unsafe(ur_root_t *r, uint64_t len, uint8_t *byt) +{ + ur_atoms_t *atoms = &(r->atoms); + ur_dict_t *dict = &(atoms->dict); + ur_mug mug = ur_mug_bytes(byt, len); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = dict->fills[idx]; + ur_nref tom; + + for ( i = 0; i < b_fill; i++ ) { + uint8_t *t_byt; + uint64_t t_len; + tom = bucket->refs[i]; + + ur_bytes(r, tom, &t_byt, &t_len); + + if ( (t_len == len) + && (0 == memcmp(t_byt, byt, len)) ) + { + return tom; + } + } + + if ( ur_pail_max == b_fill ) { + ur_dict_grow(r, dict, dict->prev, dict->size); + continue; + } + + if ( atoms->fill == atoms->size ) { + ur_atoms_grow(atoms); + } + + tom = _coin_unsafe(atoms, mug, len, byt); + + bucket->refs[b_fill] = tom; + dict->fills[idx] = 1 + b_fill; + + return tom; + } +} + +ur_nref +ur_coin_bytes(ur_root_t *r, uint64_t len, uint8_t *byt) +{ + // strip trailing zeroes + // + while ( len && !byt[len - 1] ) { + len--; + } + + // produce a direct atom if possible + // + if ( 62 >= ur_met0_bytes_unsafe(len, byt) ) { + uint64_t i, direct = 0; + + for ( i = 0; i < len; i++ ) { + direct |= byt[i] << (8 * i); + } + + return (ur_nref)direct; + } + else { + uint8_t *copy = _oom("coin_bytes", malloc(len)); + memcpy(copy, byt, len); + + return ur_coin_bytes_unsafe(r, len, copy); + } +} + +ur_nref +ur_coin64(ur_root_t *r, uint64_t n) +{ + if ( ur_direct == ur_nref_tag(n) ) { + return n; + } + else { + uint8_t *byt; + assert( 8 == ur_met3_64(n) ); + + byt = _oom("coin64", malloc(8)); + + byt[0] = ur_mask_8(n); + byt[1] = ur_mask_8(n >> 8); + byt[2] = ur_mask_8(n >> 16); + byt[3] = ur_mask_8(n >> 24); + byt[4] = ur_mask_8(n >> 32); + byt[5] = ur_mask_8(n >> 40); + byt[6] = ur_mask_8(n >> 48); + byt[7] = ur_mask_8(n >> 56); + + return ur_coin_bytes_unsafe(r, 8, byt); + } +} + +ur_nref +ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) +{ + ur_cells_t *cells = &(r->cells); + ur_dict_t *dict = &(cells->dict); + ur_mug mug = ur_mug_both(ur_nref_mug(r, hed), + ur_nref_mug(r, tal)); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = dict->fills[idx]; + ur_nref cel; + + for ( i = 0; i < b_fill; i++ ) { + cel = bucket->refs[i]; + + if ( (hed == ur_head(r, cel)) + && (tal == ur_tail(r, cel)) ) + { + return cel; + } + } + + if ( ur_pail_max == b_fill ) { + ur_dict_grow(r, dict, dict->prev, dict->size); + continue; + } + + if ( cells->fill == cells->size ) { + ur_cells_grow(cells); + } + + cel = _cons_unsafe(cells, mug, hed, tal); + + bucket->refs[b_fill] = cel; + dict->fills[idx] = 1 + b_fill; + + return cel; + } +} + +static void +_print_memory(FILE *f, const char *c, uint64_t bytes) +{ + if ( !bytes ) { + fprintf(f, "%s: B/0\r\n", c); + } + else { + uint32_t g = (bytes / 1000000000); + uint32_t m = (bytes % 1000000000) / 1000000; + uint32_t k = (bytes % 1000000) / 1000; + uint32_t b = (bytes % 1000); + + if ( g ) { + fprintf(f, "%s: GB/%d.%03d.%03d.%03d\r\n", c, g, m, k, b); + } + else if ( m ) { + fprintf(f, "%s: MB/%d.%03d.%03d\r\n", c, m, k, b); + } + else if ( k ) { + fprintf(f, "%s: KB/%d.%03d\r\n", c, k, b); + } + else if ( b ) { + fprintf(f, "%s: B/%d\r\n", c, b); + } + } +} + +static uint64_t +_dict_info(FILE *f, ur_dict_t *dict) +{ + uint64_t data = dict->size * sizeof(*dict->buckets); + _print_memory(f, " dict", data); + return data; +} + +static uint64_t +_atoms_info(FILE *f, ur_atoms_t *atoms) +{ + uint64_t total = 0; + uint64_t size = atoms->size; + uint64_t fill = atoms->fill; + uint64_t refs = size * ( sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs) ); + uint64_t i, data = 0; + + fprintf(f, " atoms (%" PRIu64 "):\r\n", fill); + + _print_memory(f, " refs", refs); + total += refs; + + for ( i = 0; i < fill; i++ ) { + data += atoms->lens[i]; + } + _print_memory(f, " data", data); + total += data; + + total += _dict_info(f, &(atoms->dict)); + _print_memory(f, " total", total); + return total; +} + +static uint64_t +_cells_info(FILE *f, ur_cells_t *cells) +{ + uint64_t total = 0; + uint64_t size = cells->size; + uint64_t fill = cells->fill; + uint64_t refs = size * ( sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs) ); + + fprintf(f, " cells (%" PRIu64 "):\r\n", fill); + + _print_memory(f, " refs", refs); + total += refs; + + total += _dict_info(f, &(cells->dict)); + _print_memory(f, " total", total); + return total; +} + +void +ur_root_info(FILE *f, ur_root_t *r) +{ + uint64_t total = 0; + + fprintf(stderr, "hash-cons arena:\r\n"); + + { + uint64_t root = sizeof(*r); + _print_memory(f, " root", root); + total += root; + } + + total += _atoms_info(f, &(r->atoms)); + total += _cells_info(f, &(r->cells)); + + _print_memory(f, "total", total); +} + +static void +_atoms_free(ur_atoms_t *atoms) +{ + uint8_t **bytes = atoms->bytes; + uint64_t i, fill = atoms->fill; + + for ( i = 0; i < fill; i++ ) { + free(bytes[i]); + } + + ur_dict_free(&(atoms->dict)); + free(bytes); +} + +static void +_cells_free(ur_cells_t *cells) +{ + ur_dict_free(&(cells->dict)); + free(cells->heads); +} + +void +ur_root_free(ur_root_t *r) +{ + _atoms_free(&(r->atoms)); + _cells_free(&(r->cells)); + free(r); +} + +ur_root_t* +ur_root_init(void) +{ + ur_root_t *r = _oom("root_init", calloc(1, sizeof(*r))); + + { + ur_dict_t *dict; + + // allocate atom storage + // + r->atoms.prev = ur_fib11; + r->atoms.size = ur_fib12; + ur_atoms_grow(&(r->atoms)); + + // allocate atom hashtable + // + dict = &(r->atoms.dict); + ur_dict_grow(r, dict, ur_fib11, ur_fib12); + + // allocate cell storage + // + r->cells.prev = ur_fib11; + r->cells.size = ur_fib12; + ur_cells_grow(&(r->cells)); + + // allocate cell hashtable + // + dict = &(r->cells.dict); + ur_dict_grow(r, dict, ur_fib11, ur_fib12); + } + + return r; +} + +void +ur_nvec_free(ur_nvec_t *v) +{ + free(v->refs); +} + +void +ur_nvec_init(ur_nvec_t *v, uint64_t size) +{ + v->fill = 0; + v->refs = _oom("nvec_init", calloc(size, sizeof(ur_nref))); +} + +/* +** define opaque struct ur_walk_fore_s (ie, ur_walk_fore_t) +*/ +struct ur_walk_fore_s { + ur_root_t *r; + uint32_t prev; + uint32_t size; + ur_nref *top; +}; + +ur_walk_fore_t* +ur_walk_fore_init_with(ur_root_t *r, + uint32_t s_prev, + uint32_t s_size) +{ + ur_walk_fore_t *w = _oom("walk_fore", malloc(sizeof(*w))); + w->top = _oom("walk_fore", malloc(s_size * sizeof(*w->top))); + w->prev = s_prev; + w->size = s_size; + w->r = r; + + return w; +} + +ur_walk_fore_t* +ur_walk_fore_init(ur_root_t *r) +{ + return ur_walk_fore_init_with(r, ur_fib10, ur_fib11); +} + +void +ur_walk_fore_with(ur_walk_fore_t *w, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)) +{ + ur_root_t *r = w->r; + uint32_t fill = 1; + + *w->top = ref; + + do { + // visit atom, pop stack + // + if ( !ur_deep(ref) ) { + atom(r, ref, v); + fill--; + } + // visit cell, pop stack if false + // + else if ( !cell(r, ref, v) ) { + fill--; + } + // push the tail, continue into the head + // + else { + w->top[fill++] = ur_tail(r, ref); + + // reallocate "stack" if full + // + if ( w->size == fill ) { + uint32_t next = w->prev + w->size; + w->top = _oom("walk_fore", realloc(w->top, next * sizeof(*w->top))); + w->prev = w->size; + w->size = next; + } + + w->top[fill] = ur_head(r, ref); + } + + ref = w->top[fill]; + } while ( fill ); +} + +void +ur_walk_fore_done(ur_walk_fore_t *w) +{ + free(w->top); + free(w); +} + +void +ur_walk_fore(ur_root_t *r, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)) +{ + ur_walk_fore_t *w = ur_walk_fore_init(r); + ur_walk_fore_with(w, ref, v, atom, cell); + ur_walk_fore_done(w); +} diff --git a/vere/pkg/ur/hashcons.h b/vere/pkg/ur/hashcons.h new file mode 100644 index 0000000..8e0afea --- /dev/null +++ b/vere/pkg/ur/hashcons.h @@ -0,0 +1,259 @@ +/// @file + +#ifndef UR_HASHCONS_H +#define UR_HASHCONS_H + +#include <assert.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> + +#include "defs.h" + +/* +** 64-bit noun references, with the top 2 bits reserved for type tags. +*/ +typedef uint64_t ur_nref; + +typedef enum { + ur_direct = 0, + ur_iatom = 1, + ur_icell = 2, +} ur_tag; + +#define ur_nref_tag(ref) ( ref >> 62 ) +#define ur_nref_idx(ref) ur_mask_62(ref) + +/* +** 31-bit, non-zero, murmur3-based noun hash. +*/ +typedef uint32_t ur_mug; + +/* +** associative structures (dictionaries) of noun references, +** distributed by mug across fixed-size buckets (pails), +** reallocated with fibonacci growth once a bucket is full. +** +** - ur_dict_t: set of noun references +** - ur_dict32_t: map from noun reference to uint32 +** - ur_dict32_t: map from noun reference to uint64 +*/ + +#define ur_pail_max 10 + +typedef struct ur_pail32_s { + ur_nref refs[ur_pail_max]; + uint32_t vals[ur_pail_max]; +} ur_pail32_t; + +typedef struct ur_dict32_s { + uint64_t prev; + uint64_t size; + uint8_t *fills; + ur_pail32_t *buckets; +} ur_dict32_t; + +typedef struct ur_pail64_s { + ur_nref refs[ur_pail_max]; + uint64_t vals[ur_pail_max]; +} ur_pail64_t; + +typedef struct ur_dict64_s { + uint64_t prev; + uint64_t size; + uint8_t *fills; + ur_pail64_t *buckets; +} ur_dict64_t; + +typedef struct ur_pail_s { + ur_nref refs[ur_pail_max]; +} ur_pail_t; + +typedef struct ur_dict_s { + uint64_t prev; + uint64_t size; + uint8_t *fills; + ur_pail_t *buckets; +} ur_dict_t; + +/* +** cells are hash-consed, atoms are deduplicated (byte-array comparison), +** mug hashes are stored, and noun references are unique within a root. +*/ +typedef struct ur_cells_s { + ur_dict_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + ur_nref *heads; + ur_nref *tails; +} ur_cells_t; + +typedef struct ur_atoms_s { + ur_dict_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + uint8_t **bytes; + uint64_t *lens; +} ur_atoms_t; + +typedef struct ur_root_s { + ur_cells_t cells; + ur_atoms_t atoms; +} ur_root_t; + +/* +** a vector of noun references. +*/ +typedef struct ur_nvec_s { + uint64_t fill; + ur_nref* refs; +} ur_nvec_t; + +/* +** opaque handle for repeated traversal. +*/ +typedef struct ur_walk_fore_s ur_walk_fore_t; + +/* +** type-specific dictionary operations. +** +** NB: [r] is only used to retrieve the stored mug of cells and +** indirect atoms. If all references are direct atoms (62-bits or less), +** [r] can be null. This option is used extensively in cue (de-serialization) +** implementations, where the dictionary keys are bit-cursors. +*/ +void +ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size); + +ur_bool_t +ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out); + +void +ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val); + +void +ur_dict32_wipe(ur_dict32_t *dict); + +void +ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size); + +ur_bool_t +ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); + +void +ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val); + +void +ur_dict64_wipe(ur_dict64_t *dict); + +void +ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size); + +ur_bool_t +ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref); + +void +ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref); + +void +ur_dict_wipe(ur_dict_t *dict); + +/* +** free the buckets of any dictionary (cast to ur_dict_t*). +*/ +void +ur_dict_free(ur_dict_t *dict); + +/* +** measure the bloq (binary-exponent) length of an atom in [r] +*/ +uint64_t +ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref); + +/* +** find or allocate an atom in [r] +** +** unsafe variant is unsafe wrt allocation (byte arrays must be +** allocated with system malloc) and trailing null bytes (not allowed). +*/ +ur_nref +ur_coin_bytes_unsafe(ur_root_t *r, uint64_t len, uint8_t *byt); + +ur_nref +ur_coin_bytes(ur_root_t *r, uint64_t len, uint8_t *byt); + +ur_nref +ur_coin64(ur_root_t *r, uint64_t n); + +/* +** find or construct a cell in [r] +*/ +ur_nref +ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); + +/* +** calculate the mug of [ref], or produce the stored value in [r]. +*/ +ur_mug +ur_nref_mug(ur_root_t *r, ur_nref ref); + +/* +** initialize a noun arena (root). +*/ +ur_root_t* +ur_root_init(void); + +/* +** print root details to [f] +*/ +void +ur_root_info(FILE *f, ur_root_t *r); + +/* +** dispose all allocations in [r] +*/ +void +ur_root_free(ur_root_t *r); + +/* +** initialize or dispose a vector of noun references +*/ +void +ur_nvec_init(ur_nvec_t *v, uint64_t size); + +void +ur_nvec_free(ur_nvec_t *v); + +/* +** depth-first, pre-order noun traversal, cells can short-circuit. +*/ +void +ur_walk_fore(ur_root_t *r, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); + +ur_walk_fore_t* +ur_walk_fore_init_with(ur_root_t *r, + uint32_t s_prev, + uint32_t s_size); + +ur_walk_fore_t* +ur_walk_fore_init(ur_root_t *r); + +void +ur_walk_fore_with(ur_walk_fore_t *w, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); + +void +ur_walk_fore_done(ur_walk_fore_t *w); + +#endif /* ifndef UR_HASHCONS_H */ diff --git a/vere/pkg/ur/serial.c b/vere/pkg/ur/serial.c new file mode 100644 index 0000000..72fb448 --- /dev/null +++ b/vere/pkg/ur/serial.c @@ -0,0 +1,588 @@ +/// @file + +#include "serial.h" + +#include <assert.h> +#include <stdlib.h> + +static void* +_oom(const char* cap, void* v) +{ + if ( !v ) { + fprintf(stderr, + "ur: hashcons: %s: allocation failed, out of memory\r\n", cap); + abort(); + } + + return v; +} + +static inline void +_bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: return ur_bsw_atom64(bsw, len, ref); + + case ur_iatom: { + uint8_t *byt = r->atoms.bytes[ur_nref_idx(ref)]; + return ur_bsw_atom_bytes(bsw, len, byt); + } + } +} + +/* +** define opaque struct ur_jam_s (ie, ur_jam_t) +*/ +struct ur_jam_s { + ur_root_t *r; + ur_walk_fore_t *w; + ur_dict64_t dict; + ur_bsw_t bsw; +}; + +static void +_jam_atom(ur_root_t *r, ur_nref ref, void *ptr) +{ + ur_jam_t *j = ptr; + ur_dict64_t *dict = &j->dict; + ur_bsw_t *bsw = &j->bsw; + uint64_t bak, len = ur_met(r, 0, ref); + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + _bsw_atom(r, ref, bsw, len); + } + else { + uint64_t len_bak = ur_met0_64(bak); + + if ( len <= len_bak ) { + _bsw_atom(r, ref, bsw, len); + } + else { + ur_bsw_back64(bsw, len_bak, bak); + } + } +} + +static ur_bool_t +_jam_cell(ur_root_t *r, ur_nref ref, void *ptr) +{ + ur_jam_t *j = ptr; + ur_dict64_t *dict = &j->dict; + ur_bsw_t *bsw = &j->bsw; + uint64_t bak; + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_cell(bsw); + return 1; + } + else { + ur_bsw_back64(bsw, ur_met0_64(bak), bak); + return 0; + } +} + +static uint64_t +_jam(ur_jam_t *j, + ur_nref ref, + uint64_t *len, + uint8_t **byt) +{ + ur_bsw_init(&j->bsw, ur_fib11, ur_fib12); + ur_walk_fore_with(j->w, ref, j, _jam_atom, _jam_cell); + return ur_bsw_done(&j->bsw, len, byt); +} + +ur_jam_t* +ur_jam_init_with(ur_root_t *r, + uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size) +{ + ur_jam_t *j = _oom("jam_init", calloc(sizeof(*j), 1)); + j->w = ur_walk_fore_init_with(r, s_prev, s_size); + j->r = r; + + ur_dict64_grow(r, &j->dict, d_prev, d_size); + + return j; +} + +ur_jam_t* +ur_jam_init(ur_root_t *r) +{ + return ur_jam_init_with(r, ur_fib11, ur_fib12, // dict sizes + ur_fib10, ur_fib11); // stack sizes +} + +uint64_t +ur_jam_with(ur_jam_t *j, + ur_nref ref, + uint64_t *len, + uint8_t **byt) +{ + uint64_t bits = _jam(j, ref, len, byt); + ur_dict64_wipe(&j->dict); + return bits; +} + +void +ur_jam_done(ur_jam_t *j) +{ + ur_dict_free((ur_dict_t*)&j->dict); + ur_walk_fore_done(j->w); + free(j); +} + +uint64_t +ur_jam(ur_root_t *r, + ur_nref ref, + uint64_t *len, + uint8_t **byt) +{ + ur_jam_t *j = ur_jam_init(r); + uint64_t bits = _jam(j, ref, len, byt); + ur_jam_done(j); + return bits; +} + +/* +** stack frame for recording head vs tail iteration +** +** $? [CUE_HEAD bits=@] +** [hed=* bits=@] +*/ + +#define CUE_HEAD 0xffffffffffffffffULL + +typedef struct _cue_frame_s { + uint64_t ref; + uint64_t bits; +} _cue_frame_t; + +typedef struct _cue_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_frame_t* f; +} _cue_stack_t; + +static inline ur_cue_res_e +_cue_next(ur_root_t *r, + _cue_stack_t *s, + ur_bsr_t *bsr, + ur_dict64_t *dict, + ur_nref *out) +{ + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; + + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = _oom("cue_next stack", realloc(s->f, next * sizeof(*s->f))); + s->prev = s->size; + s->size = next; + } + + // save a head-frame and read the head from the stream + // + { + _cue_frame_t* f = &(s->f[s->fill++]); + f->ref = CUE_HEAD; + f->bits = bits; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t val, bak = ur_bsr64_any(bsr, len); + + if ( !ur_dict64_get(r, dict, bak, &val) ) { + return ur_cue_back; + } + + *out = (ur_nref)val; + return ur_cue_good; + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 >= len ) { + *out = (ur_nref)ur_bsr64_any(bsr, len); + } + else { + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = _oom("cue_next bytes", calloc(len_byt, 1)); + + ur_bsr_bytes_any(bsr, len, byt); + + // strip trailing zeroes + // + while ( len_byt && !byt[len_byt - 1] ) { + len_byt--; + } + + *out = ur_coin_bytes_unsafe(r, len_byt, byt); + } + + ur_dict64_put(r, dict, bits, (uint64_t)*out); + return ur_cue_good; + } + } + } +} + +/* +** define opaque struct ur_cue_s (ie, ur_cue_t) +*/ +struct ur_cue_s { + ur_root_t *r; + _cue_stack_t s; + ur_dict64_t dict; +}; + +static ur_cue_res_e +_cue(ur_cue_t *c, + uint64_t len, + const uint8_t *byt, + ur_nref *out) +{ + ur_bsr_t bsr = {0}; + ur_root_t *r = c->r; + _cue_stack_t *s = &c->s; + ur_dict64_t *dict = &c->dict; + ur_cue_res_e res; + ur_nref ref; + + // init bitstream-reader + // + if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { + return res; + } + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + else if ( 0x7ffffffffffffffULL < len ) { + return ur_cue_meme; + } + + // advance into stream + // + res = _cue_next(r, s, &bsr, dict, &ref); + + // process result + // + while ( s->fill && (ur_cue_good == res) ) { + // peek at the top of the stack + // + _cue_frame_t *f = &(s->f[s->fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( CUE_HEAD == f->ref ) { + f->ref = ref; + res = _cue_next(r, s, &bsr, dict, &ref); + } + // f is a tail-frame; pop the stack and continue + // + else { + ref = ur_cons(r, f->ref, ref); + ur_dict64_put(r, dict, f->bits, (uint64_t)ref); + s->fill--; + } + } + + if ( ur_cue_good == res ) { + *out = ref; + } + return res; +} + +ur_cue_t* +ur_cue_init_with(ur_root_t *r, + uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size) +{ + ur_cue_t* c = _oom("cue_init", calloc(sizeof(*c), 1)); + c->r = r; + + ur_dict64_grow(r, &c->dict, d_prev, d_size); + + c->s.prev = s_prev; + c->s.size = s_size; + c->s.f = _oom("cue_test_init", malloc(s_size * sizeof(*c->s.f))); + + return c; +} + +ur_cue_t* +ur_cue_init(ur_root_t *r) +{ + return ur_cue_init_with(r, ur_fib11, ur_fib12, // dict sizes + ur_fib10, ur_fib11); // stack sizes +} + +ur_cue_res_e +ur_cue_with(ur_cue_t *c, + uint64_t len, + const uint8_t *byt, + ur_nref *out) +{ + ur_cue_res_e res = _cue(c, len, byt, out); + + // XX check size, shrink if above threshold + // + ur_dict64_wipe(&c->dict); + c->s.fill = 0; + + return res; +} + +void +ur_cue_done(ur_cue_t *c) +{ + + ur_dict_free((ur_dict_t*)&c->dict); + free(c->s.f); + free(c); +} + +ur_cue_res_e +ur_cue(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + ur_nref *out) +{ + ur_cue_t *c = ur_cue_init(r); + ur_cue_res_e res = _cue(c, len, byt, out); + + ur_cue_done(c); + return res; +} + +/* +** stack frame for recording head vs tail iteration +** +** [hed=? bits=@] +** +*/ + +typedef struct _cue_test_frame_s { + ur_bool_t tal; + uint64_t bits; +} _cue_test_frame_t; + +typedef struct _cue_test_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_test_frame_t* f; +} _cue_test_stack_t; + +static inline ur_cue_res_e +_cue_test_next(_cue_test_stack_t *s, + ur_bsr_t *bsr, + ur_dict_t *dict) +{ + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; + + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = _oom("cue_test", realloc(s->f, next * sizeof(*s->f))); + s->prev = s->size; + s->size = next; + } + + // save a head-frame and read the head from the stream + // + { + _cue_test_frame_t* f = &(s->f[s->fill++]); + f->tal = 0; + f->bits = bits; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t bak = ur_bsr64_any(bsr, len); + return ur_dict_get((ur_root_t*)0, dict, bak) + ? ur_cue_good + : ur_cue_back; + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + ur_bsr_skip_any(bsr, len); + ur_dict_put((ur_root_t*)0, dict, bits); + return ur_cue_good; + } + } + } +} + +/* +** define opaque struct ur_cue_test_s (ie, ur_cue_test_t) +*/ +struct ur_cue_test_s { + _cue_test_stack_t s; + ur_dict_t dict; +}; + +static ur_cue_res_e +_cue_test(ur_cue_test_t *t, + uint64_t len, + const uint8_t *byt) +{ + ur_bsr_t bsr = {0}; + _cue_test_stack_t *s = &t->s; + ur_dict_t *dict = &t->dict; + ur_cue_res_e res; + + // init bitstream-reader + // + if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { + return res; + } + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + else if ( 0x7ffffffffffffffULL < len ) { + return ur_cue_meme; + } + + // advance into stream + // + res = _cue_test_next(s, &bsr, dict); + + // process result + // + while ( s->fill && (ur_cue_good == res) ) { + // peek at the top of the stack + // + _cue_test_frame_t *f = &(s->f[s->fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( !f->tal ) { + f->tal = 1; + res = _cue_test_next(s, &bsr, dict); + } + // f is a tail-frame; pop the stack and continue + // + else { + ur_dict_put((ur_root_t*)0, dict, f->bits); + s->fill--; + } + } + + return res; +} + +ur_cue_test_t* +ur_cue_test_init_with(uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size) +{ + ur_cue_test_t* t = _oom("cue_test_init", calloc(sizeof(*t), 1)); + + ur_dict_grow((ur_root_t*)0, &t->dict, d_prev, d_size); + + t->s.prev = s_prev; + t->s.size = s_size; + t->s.f = _oom("cue_test_init", malloc(s_size * sizeof(*t->s.f))); + + return t; +} + +ur_cue_test_t* +ur_cue_test_init(void) +{ + return ur_cue_test_init_with(ur_fib11, ur_fib12, // dict sizes + ur_fib10, ur_fib11); // stack sizes +} + +ur_bool_t +ur_cue_test_with(ur_cue_test_t *t, + uint64_t len, + const uint8_t *byt) +{ + ur_bool_t ret = ur_cue_good == _cue_test(t, len, byt); + + // XX check size, shrink if above threshold + // + ur_dict_wipe(&t->dict); + t->s.fill = 0; + + return ret; +} + +void +ur_cue_test_done(ur_cue_test_t *t) +{ + ur_dict_free(&t->dict); + free(t->s.f); + free(t); +} + +ur_bool_t +ur_cue_test(uint64_t len, const uint8_t *byt) +{ + ur_cue_test_t *t = ur_cue_test_init(); + ur_bool_t ret = ur_cue_good == _cue_test(t, len, byt); + + ur_cue_test_done(t); + return ret; +} diff --git a/vere/pkg/ur/serial.h b/vere/pkg/ur/serial.h new file mode 100644 index 0000000..3044c4b --- /dev/null +++ b/vere/pkg/ur/serial.h @@ -0,0 +1,114 @@ +/// @file + +#ifndef UR_SERIAL_H +#define UR_SERIAL_H + +#include <inttypes.h> + +#include "bitstream.h" +#include "defs.h" +#include "hashcons.h" + +/* +** bit-wise serialization of a noun into a byte-buffer. +** supports up to 64-bits of bit-addressed output (nearly 2 EiB). +** (as this is an impractical volume data, cursor overflow is not checked.) +** +** jam_with* api factors out stack/dict (re)allocation, +** for better performance inside hot loops. +** +*/ + +typedef struct ur_jam_s ur_jam_t; + +uint64_t +ur_jam_unsafe(ur_root_t *r, + ur_nref ref, + ur_dict64_t *dict, + uint64_t *len, + uint8_t **byt); + +uint64_t +ur_jam(ur_root_t *r, + ur_nref ref, + uint64_t *len, + uint8_t **byt); + +ur_jam_t* +ur_jam_init_with(ur_root_t *r, + uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size); + +ur_jam_t* +ur_jam_init(ur_root_t *r); + +uint64_t +ur_jam_with(ur_jam_t *j, + ur_nref ref, + uint64_t *len, + uint8_t **byt); +void +ur_jam_done(ur_jam_t *j); + +/* +** bitwise deserialization of a byte-buffer into a noun. +** supports up to 62-bits of bit-addressed input (511 PiB). +** returns [ur_cue_good] on success. +** +** cue_with factors out stack/dict (re)allocation, +** for better performance of hot loops. +** +** cue_test does not allocate nouns, but merely parses the input; +** cue_test_with* api factors out stack/dict (re)allocation, +** for better performance of repeated tests. +** +*/ + +typedef struct ur_cue_test_s ur_cue_test_t; +typedef struct ur_cue_s ur_cue_t; + +ur_cue_res_e +ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); + +ur_cue_t* +ur_cue_init_with(ur_root_t *r, + uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size); + +ur_cue_t* +ur_cue_init(ur_root_t *r); + +ur_cue_res_e +ur_cue_with(ur_cue_t *c, + uint64_t len, + const uint8_t *byt, + ur_nref *out); + +void +ur_cue_done(ur_cue_t *c); + +ur_bool_t +ur_cue_test(uint64_t len, const uint8_t *byt); + +ur_cue_test_t* +ur_cue_test_init_with(uint64_t d_prev, + uint64_t d_size, + uint32_t s_prev, + uint32_t s_size); + +ur_cue_test_t* +ur_cue_test_init(void); + +ur_bool_t +ur_cue_test_with(ur_cue_test_t *t, + uint64_t len, + const uint8_t *byt); + +void +ur_cue_test_done(ur_cue_test_t *t); + +#endif /* ifndef UR_SERIAL_H */ diff --git a/vere/pkg/ur/tests.c b/vere/pkg/ur/tests.c new file mode 100644 index 0000000..e0516b6 --- /dev/null +++ b/vere/pkg/ur/tests.c @@ -0,0 +1,1862 @@ +/// @file + +#include "ur.h" + +#include <assert.h> +#include <inttypes.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* +** initialize helper for bitstream-writer tests. +*/ +static void +_bsw_reinit(ur_bsw_t *bsw, uint64_t prev, uint64_t size) +{ + bsw->prev = prev; + bsw->size = size; + bsw->bits = 0; + bsw->fill = 0; + bsw->off = 0; + + free(bsw->bytes); + bsw->bytes = calloc(size, 1); +} + +/* +** check bitstream-writer test invariants. +*/ +static int +_bsw_bit_check(const char* cap, ur_bsw_t *bsw, uint8_t byt, uint8_t off) +{ + int ret = 1; + + if ( !ur_bsw_sane(bsw) ) { + fprintf(stderr, "%s: insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsw->off, bsw->fill, bsw->bits); + ret = 0; + } + + if ( byt != bsw->bytes[0] ) { + fprintf(stderr, "%s: bytes fail (%u, %u)\r\n", cap, byt, bsw->bytes[0]); + ret = 0; + } + + if ( off != bsw->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsw->off); + ret = 0; + } + + return ret; +} + +/* +** test 8 sequential writes of a set bit. +*/ +static int +_test_bsw_bit_ones(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_reinit(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw ones init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones a", &bsw, 0x1, 1); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones b", &bsw, 0x3, 2); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones c", &bsw, 0x7, 3); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones d", &bsw, 0xf, 4); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones e", &bsw, 0x1f, 5); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones f", &bsw, 0x3f, 6); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones g", &bsw, 0x7f, 7); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones h", &bsw, 0xff, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw ones grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +/* +** test 8 sequential writes of 1 null bit. +*/ +static int +_test_bsw_bit_zeros(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_reinit(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw zeros init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros a", &bsw, 0x0, 1); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros b", &bsw, 0x0, 2); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros c", &bsw, 0x0, 3); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros d", &bsw, 0x0, 4); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros e", &bsw, 0x0, 5); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros f", &bsw, 0x0, 6); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros g", &bsw, 0x0, 7); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros h", &bsw, 0x0, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw zeros grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +/* +** test 8 sequential writes of alternating bits. +*/ +static int +_test_bsw_bit_alt(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_reinit(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw alt init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt a", &bsw, 0x0, 1); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt b", &bsw, 0x2, 2); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt c", &bsw, 0x2, 3); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt d", &bsw, 0xa, 4); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt e", &bsw, 0xa, 5); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt f", &bsw, 0x2a, 6); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt g", &bsw, 0x2a, 7); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt h", &bsw, 0xaa, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw alt grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +static int +_test_bsw_bit(void) +{ + return _test_bsw_bit_ones() + & _test_bsw_bit_zeros() + & _test_bsw_bit_alt(); +} + +/* +** subsequents bitstream-writer tests assume the correctnesss of +** ur_bsw_bit(), and compare the output of a bit-at-a-time +** "golden master" with that of the relevant, higher-level operation. +** +** XX the "golden" master implementations shouldn't be in bitstream module, +** as we don't intend to run them, but it's kind of weird implement them +** in the test itself. +** +*/ +static int +_bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t *a, ur_bsw_t *b) +{ + int ret = 1; + + if ( !ur_bsw_sane(a) ) { + fprintf(stderr, "%s: val 0x%02x off %u, len %u: a insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, val, off, len, a->off, a->fill, a->bits); + ret = 0; + } + if ( !ur_bsw_sane(b) ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: b insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, val, off, len, b->off, b->fill, b->bits); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: offset fail (%u, %u)\r\n", + cap, val, off, len, a->off, b->off); + ret = 0; + } + + if ( a->fill != b->fill ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: fill fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, val, off, len, a->fill, b->fill); + ret = 0; + } + + { + uint64_t k, len_byt = a->fill + !!a->off; + + if ( memcmp(a->bytes, b->bytes, len_byt) ) { + fprintf(stderr, "%s: val 0x%02x off %u, len %u not equal off=%u fill=%" PRIu64 "\r\n", + cap, val, off, len, b->off, b->fill); + fprintf(stderr, " a: { "); + for ( k = 0; k < len_byt; k++ ) { + fprintf(stderr, "%02x, ", a->bytes[k]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " b: { "); + for ( k = 0; k < len_byt; k++ ) { + fprintf(stderr, "%02x, ", b->bytes[k]); + } + fprintf(stderr, "}\r\n"); + ret = 0; + } + } + + return ret; +} + +/* +** ur_bsw8 golden master +*/ +static void +_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + len = ur_min(8, len); + + while ( len ) { + ur_bsw_bit(bsw, byt); + byt >>= 1; + len--; + } +} + +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw8 and master, comparing the result each time. +*/ +static int +_test_bsw8_loop(const char* cap, uint8_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 8; j++ ) { + _bsw_reinit(&a, 1, 1); + _bsw_reinit(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw8_slow(&a, j, val); + ur_bsw8(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw8(void) +{ + return _test_bsw8_loop("bsw bits ones", 0xff) + & _test_bsw8_loop("bsw bits zeros", 0x0) + & _test_bsw8_loop("bsw bits alt 1", 0xaa) + & _test_bsw8_loop("bsw bits alt 2", 0x55); +} + +/* +** ur_bsw32 golden master +*/ +static void +_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + len = ur_min(32, len); + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw32 and master, comparing the result each time. +*/ +static int +_test_bsw32_loop(const char* cap, uint32_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 32; j++ ) { + _bsw_reinit(&a, 1, 1); + _bsw_reinit(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw32_slow(&a, j, val); + ur_bsw32(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw32(void) +{ + return _test_bsw32_loop("bsw 32 ones", 0xffffffff) + & _test_bsw32_loop("bsw 32 zeros", 0x0) + & _test_bsw32_loop("bsw 32 alt 1", 0xaaaaaaaa) + & _test_bsw32_loop("bsw 32 alt 2", 0x55555555); +} + +/* +** ur_bsw64 golden master +*/ +static void +_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + len = ur_min(64, len); + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw64 and master, comparing the result each time. +*/ +static int +_test_bsw64_loop(const char* cap, uint64_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 64; j++ ) { + _bsw_reinit(&a, 1, 1); + _bsw_reinit(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw64_slow(&a, j, val); + ur_bsw64(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw64(void) +{ + return _test_bsw64_loop("bsw 64 ones", 0xffffffffffffffffULL) + & _test_bsw64_loop("bsw 64 zeros", 0x0ULL) + & _test_bsw64_loop("bsw 64 alt 1", 0xaaaaaaaaaaaaaaaaULL) + & _test_bsw64_loop("bsw 64 alt 2", 0x5555555555555555ULL); +} + +/* +** ur_bsw_bytes() golden master +*/ +static void +_bsw_bytes_slow(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t i, len_byt = len >> 3; + + for ( i = 0; i < len_byt; i++ ) { + _bsw8_slow(bsw, 8, byt[i]); + len -= 8; + } + + _bsw8_slow(bsw, len, byt[len_byt]); +} + +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw_bytes and master, comparing the result each time. +*/ +static int +_test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j, *byt; + uint64_t len_bit = len << 3; + + byt = malloc(len); + memset(byt, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < len_bit; j++ ) { + _bsw_reinit(&a, 1, 1); + _bsw_reinit(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw_bytes_slow(&a, j, byt); + ur_bsw_bytes(&b, j, byt); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + free(byt); + + return ret; +} + +static int +_test_bsw_bytes(void) +{ + return _test_bsw_bytes_loop("bsw bytes nought", 0, 0x0) + & _test_bsw_bytes_loop("bsw bytes ones odd", 3, 0xff) + & _test_bsw_bytes_loop("bsw bytes ones even", 4, 0xff) + & _test_bsw_bytes_loop("bsw bytes zeros odd", 5, 0x0) + & _test_bsw_bytes_loop("bsw bytes zeros even", 6, 0x0) + & _test_bsw_bytes_loop("bsw bytes alt 1 odd", 7, 0xaa) + & _test_bsw_bytes_loop("bsw bytes alt 1 even", 8, 0xaa) + & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 9, 0x55) + & _test_bsw_bytes_loop("bsw bytes alt 2 even", 10, 0x55); +} + +/* +** ur_bsw_bex golden master +*/ +static void +_bsw_bex_slow(ur_bsw_t *bsw, uint8_t n) +{ + while ( n >= 64 ) { + _bsw64_slow(bsw, 64, 0); + n -= 64; + } + + _bsw64_slow(bsw, n + 1, 1ULL << n); +} + +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw_bex and master, comparing the result each time. +*/ +static int +_test_bsw_bex() +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i; + uint32_t j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < 256; j++ ) { + _bsw_reinit(&a, 1, 1); + _bsw_reinit(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw_bex_slow(&a, j); + ur_bsw_bex(&b, j); + + ret &= _bsw_cmp_check("bsw bex", j, i, j + 1, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw(void) +{ + return _test_bsw_bit() + & _test_bsw8() + & _test_bsw32() + & _test_bsw64() + & _test_bsw_bytes() + & _test_bsw_bex(); +} + +/* +** check bitstream-reader test invariants. +*/ +static int +_bsr_bit_check(const char *cap, + ur_bsr_t *bsr, + uint8_t off, + uint64_t bits, + uint8_t exp, + uint8_t val, + ur_cue_res_e ser, + ur_cue_res_e res) +{ + int ret = 1; + + if ( !ur_bsr_sane(bsr) ) { + fprintf(stderr, "%s: insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsr->off, bsr->left, bsr->bits); + ret = 0; + } + + if ( ser != res ) { + fprintf(stderr, "%s: res not equal (%s, %s) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, (ur_cue_good == ser) ? "good" : "gone", + (ur_cue_good == res) ? "good" : "gone", + bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( (ur_cue_good == res) && (exp != val) ) { + fprintf(stderr, "%s: val not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, exp, val, bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( off != bsr->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsr->off); + ret = 0; + } + + if ( bits != bsr->bits ) { + fprintf(stderr, "%s: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", cap, bits, bsr->bits); + ret = 0; + } + + return ret; +} + +/* +** read a bit 8 times from a bitstream initialized to all ones, +** checking invariants and result after each read. +*/ +static int +_test_bsr_bit_ones(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xff }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +/* +** read a bit 8 times from a bitstream initialized to all zeros, +** checking invariants and result after each read. +*/ +static int +_test_bsr_bit_zeros(void) +{ + int ret = 1; + uint8_t ones[1] = { 0x0 }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +/* +** read a bit 8 times from a bitstream initialized to alternating zeros and ones, +** checking invariants and result after each read. +*/ +static int +_test_bsr_bit_alt(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xaa }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +static int +_test_bsr_bit(void) +{ + return _test_bsr_bit_ones() + & _test_bsr_bit_zeros() + & _test_bsr_bit_alt(); +} + +/* +** check bitstream-reader test invariants, after (maybe) reading +** of the end of the stream. +*/ +static int +_bsr_bit_any_check(const char* cap, ur_bsr_t *bsr, uint8_t off, uint64_t bits, uint8_t exp, uint8_t val) +{ + int ret = 1; + + if ( !ur_bsr_sane(bsr) ) { + fprintf(stderr, "%s: insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsr->off, bsr->left, bsr->bits); + ret = 0; + } + + if ( exp != val ) { + fprintf(stderr, "%s: not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, exp, val, bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( off != bsr->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsr->off); + ret = 0; + } + + if ( bits != bsr->bits ) { + fprintf(stderr, "%s: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", cap, bits, bsr->bits); + ret = 0; + } + + return ret; +} + +/* +** read a bit 17 times from a bitstream initialized to 8 ones, +** checking invariants and result after each read. +*/ +static int +_test_bsr_bit_any_ones(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xff }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any ones init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 1", &bsr, 1, 1, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 2", &bsr, 2, 2, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 3", &bsr, 3, 3, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 4", &bsr, 4, 4, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 5", &bsr, 5, 5, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 6", &bsr, 6, 6, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 7", &bsr, 7, 7, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 8", &bsr, 0, 8, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +/* +** read a bit 17 times from a bitstream initialized to 8 zeros, +** checking invariants and result after each read. +*/ +static int +_test_bsr_bit_any_zeros(void) +{ + int ret = 1; + uint8_t ones[1] = { 0x0 }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any zeros init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 1", &bsr, 1, 1, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 2", &bsr, 2, 2, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 3", &bsr, 3, 3, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 4", &bsr, 4, 4, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 5", &bsr, 5, 5, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 6", &bsr, 6, 6, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 7", &bsr, 7, 7, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 8", &bsr, 0, 8, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +/* +** read a bit 17 times from a bitstream initialized to 8 bits of alternating, +** ones and zeros, checking invariants and result after each read. +*/ +static int +_test_bsr_bit_any_alt(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xaa }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any alt init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 1", &bsr, 1, 1, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 2", &bsr, 2, 2, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 3", &bsr, 3, 3, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 4", &bsr, 4, 4, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 5", &bsr, 5, 5, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 6", &bsr, 6, 6, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 7", &bsr, 7, 7, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 8", &bsr, 0, 8, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +static int +_test_bsr_bit_any(void) +{ + return _test_bsr_bit_any_ones() + & _test_bsr_bit_any_zeros() + & _test_bsr_bit_any_alt(); +} + +/* +** subsequents bitstream-reader tests assume the correctnesss of +** ur_bsr_bit_any(), and compare the output of a bit-at-a-time +** "golden master" with that of the relevant, higher-level operation. +** +** XX the "golden" master implementations shouldn't be in bitstream module, +** as we don't intend to run them, but it's kind of weird implement them +** in the test itself. +** +*/ +static int +_bsr_cmp_any_check(const char* cap, uint8_t off, uint8_t len, ur_bsr_t *a, ur_bsr_t *b) +{ + int ret = 1; + + if ( !ur_bsr_sane(a) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, a->off, a->left, a->bits); + ret = 0; + } + + if ( !ur_bsr_sane(b) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, b->off, b->left, b->bits); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: off %u len %u: offset fail (%u, %u)\r\n", + cap, off, len, a->off, b->off); + ret = 0; + } + + if ( a->left != b->left ) { + fprintf(stderr, "%s: off %u len %u: left fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->left, b->left); + ret = 0; + } + + if ( a->bits != b->bits ) { + fprintf(stderr, "%s: off %u len %u: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->bits, b->bits); + ret = 0; + } + + if ( a->bytes != b->bytes ) { + fprintf(stderr, "%s: off %u len %u: bytes fail (%p, %p)\r\n", + cap, off, len, a->bytes, b->bytes); + ret = 0; + } + + return ret; +} + +/* +** ur_bsr8_any golden master +*/ +static uint8_t +_bsr8_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint8_t i, out = 0; + + len = ur_min(8, len); + + for ( i = 0; i < len; i++ ) { + out ^= ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr8_any and master, comparing +** the results and respective states each time. +*/ +static int +_test_bsr8_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint8_t c, d, i, j; + + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 8; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr8_any_slow(&a, j); + d = ur_bsr8_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr8(void) +{ + return _test_bsr8_loop("bsr8 ones 1", 1, 0xff) + & _test_bsr8_loop("bsr8 ones 2", 2, 0xff) + & _test_bsr8_loop("bsr8 zeros 1", 1, 0x0) + & _test_bsr8_loop("bsr8 zeros 2", 2, 0x0) + & _test_bsr8_loop("bsr8 alt-1 1", 1, 0xaa) + & _test_bsr8_loop("bsr8 alt-1 2", 2, 0xaa) + & _test_bsr8_loop("bsr8 alt-2 1", 1, 0x55) + & _test_bsr8_loop("bsr8 alt-2 2", 2, 0x55); +} + +/* +** ur_bsr32_any golden master +*/ +static uint32_t +_bsr32_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint32_t out = 0; + uint8_t i; + + len = ur_min(32, len); + + for ( i = 0; i < len; i++ ) { + out ^= (uint32_t)ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr32_any and master, comparing +** the results and respective states each time. +*/ +static int +_test_bsr32_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint32_t c, d; + uint8_t i, j; + + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 32; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr32_any_slow(&a, j); + d = ur_bsr32_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%08x, %08x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr32(void) +{ + return _test_bsr32_loop("bsr32 ones 1", 1, 0xff) + & _test_bsr32_loop("bsr32 ones 2", 2, 0xff) + & _test_bsr32_loop("bsr32 ones 3", 3, 0xff) + & _test_bsr32_loop("bsr32 ones 4", 4, 0xff) + & _test_bsr32_loop("bsr32 zeros 1", 1, 0x0) + & _test_bsr32_loop("bsr32 zeros 2", 2, 0x0) + & _test_bsr32_loop("bsr32 zeros 3", 3, 0x0) + & _test_bsr32_loop("bsr32 zeros 4", 4, 0x0) + & _test_bsr32_loop("bsr32 alt-1 1", 1, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 2", 2, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 3", 3, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 4", 4, 0xaa) + & _test_bsr32_loop("bsr32 alt-2 1", 1, 0x55) + & _test_bsr32_loop("bsr32 alt-2 2", 2, 0x55) + & _test_bsr32_loop("bsr32 alt-2 3", 3, 0x55) + & _test_bsr32_loop("bsr32 alt-2 4", 4, 0x55); +} + +/* +** ur_bsr64_any golden master +*/ +static uint64_t +_bsr64_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t out = 0; + uint8_t i; + + len = ur_min(64, len); + + for ( i = 0; i < len; i++ ) { + out ^= (uint64_t)ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr64_any and master, comparing +** the results and respective states each time. +*/ +static int +_test_bsr64_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint64_t c, d; + uint8_t i, j; + + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 64; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr64_any_slow(&a, j); + d = ur_bsr64_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%016" PRIx64", %016" PRIx64") off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr64(void) +{ + return _test_bsr64_loop("bsr64 ones 1", 1, 0xff) + & _test_bsr64_loop("bsr64 ones 2", 2, 0xff) + & _test_bsr64_loop("bsr64 ones 3", 3, 0xff) + & _test_bsr64_loop("bsr64 ones 4", 4, 0xff) + & _test_bsr64_loop("bsr64 ones 5", 5, 0xff) + & _test_bsr64_loop("bsr64 ones 6", 6, 0xff) + & _test_bsr64_loop("bsr64 ones 7", 7, 0xff) + & _test_bsr64_loop("bsr64 ones 8", 8, 0xff) + & _test_bsr64_loop("bsr64 zeros 1", 1, 0x0) + & _test_bsr64_loop("bsr64 zeros 2", 2, 0x0) + & _test_bsr64_loop("bsr64 zeros 3", 3, 0x0) + & _test_bsr64_loop("bsr64 zeros 4", 4, 0x0) + & _test_bsr64_loop("bsr64 zeros 5", 5, 0x0) + & _test_bsr64_loop("bsr64 zeros 6", 6, 0x0) + & _test_bsr64_loop("bsr64 zeros 7", 7, 0x0) + & _test_bsr64_loop("bsr64 zeros 8", 8, 0x0) + & _test_bsr64_loop("bsr64 alt-1 1", 1, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 2", 2, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 3", 3, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 4", 4, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 5", 5, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 6", 6, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 7", 7, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 8", 8, 0xaa) + & _test_bsr64_loop("bsr64 alt-2 1", 1, 0x55) + & _test_bsr64_loop("bsr64 alt-2 2", 2, 0x55) + & _test_bsr64_loop("bsr64 alt-2 3", 3, 0x55) + & _test_bsr64_loop("bsr64 alt-2 4", 4, 0x55) + & _test_bsr64_loop("bsr64 alt-2 5", 5, 0x55) + & _test_bsr64_loop("bsr64 alt-2 6", 6, 0x55) + & _test_bsr64_loop("bsr64 alt-2 7", 7, 0x55) + & _test_bsr64_loop("bsr64 alt-2 8", 8, 0x55); +} + +/* +** ur_bsr_bytes_any golden master +*/ +static void +_bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint64_t i, len_byt = len >> 3, len_bit = ur_mask_3(len); + + for ( i = 0; i < len_byt; i++ ) { + out[i] = _bsr8_any_slow(bsr, 8); + } + + if ( len_bit ) { + out[len_byt] = _bsr8_any_slow(bsr, len_bit); + } +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr_bytes_any and master, comparing +** the results and respective states each time. +*/ +static int +_test_bsr_bytes_any_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint64_t max = (len << 3) + 7; + ur_bsr_t a, b; + uint8_t *bytes, *c, *d; + uint8_t i, j, k; + + c = malloc(1 + len); + d = malloc(1 + len); + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 1; j <= max; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = len ? bytes : 0; + a.off = b.off = len ? i : 0; + a.bits = b.bits = i; + memset(c, 0x0, len); + memset(d, 0x0, len); + + _bsr_bytes_any_slow(&a, j, c); + ur_bsr_bytes_any(&b, j, d); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( memcmp(c, d, len) ) { + fprintf(stderr, "%s: off %u, len %u not equal off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, i, j, b.off, b.left, b.bits); + fprintf(stderr, " a: { "); + for ( k = 0; k < len; k++ ) { + fprintf(stderr, "%02x, ", c[k]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " b: { "); + for ( k = 0; k < len; k++ ) { + fprintf(stderr, "%02x, ", d[k]); + } + fprintf(stderr, "}\r\n"); + ret = 0; + } + } + } + + free(bytes); + free(d); + free(c); + + return ret; +} + +static int +_test_bsr_bytes_any(void) +{ + return _test_bsr_bytes_any_loop("bsr bytes nought", 0, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes ones odd", 3, 0xff) + & _test_bsr_bytes_any_loop("bsr bytes ones even", 4, 0xff) + & _test_bsr_bytes_any_loop("bsr bytes zeros odd", 5, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes zeros even", 6, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes alt 1 odd", 7, 0xaa) + & _test_bsr_bytes_any_loop("bsr bytes alt 1 even", 8, 0xaa) + & _test_bsr_bytes_any_loop("bsr bytes alt 2 odd", 9, 0x55) + & _test_bsr_bytes_any_loop("bsr bytes alt 2 even", 10, 0x55); +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** skip a varying numbers of bits via ur_bsr_skip_any and read the same via +** ur_bsr_bytes_any master, comparing the respective states each time. +*/ +static int +_test_bsr_skip_any_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint64_t max = (len << 3) + 7; + ur_bsr_t a, b; + uint8_t *bytes, *c; + uint8_t i, j; + + c = malloc(1 + len); + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 1; j <= max; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = len ? bytes : 0; + a.off = b.off = len ? i : 0; + a.bits = b.bits = i; + memset(c, 0x0, len); + + _bsr_bytes_any_slow(&a, j, c); + ur_bsr_skip_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + } + } + + free(bytes); + free(c); + + return ret; +} + +static int +_test_bsr_skip_any(void) +{ + return _test_bsr_skip_any_loop("bsr skip nought", 0, 0x0) + & _test_bsr_skip_any_loop("bsr skip ones odd", 3, 0xff) + & _test_bsr_skip_any_loop("bsr skip ones even", 4, 0xff) + & _test_bsr_skip_any_loop("bsr skip zeros odd", 5, 0x0) + & _test_bsr_skip_any_loop("bsr skip zeros even", 6, 0x0) + & _test_bsr_skip_any_loop("bsr skip alt 1 odd", 7, 0xaa) + & _test_bsr_skip_any_loop("bsr skip alt 1 even", 8, 0xaa) + & _test_bsr_skip_any_loop("bsr skip alt 2 odd", 9, 0x55) + & _test_bsr_skip_any_loop("bsr skip alt 2 even", 10, 0x55); +} + +/* +** compare the result and state of two reads (that were not permitted +** to read past the end of the stream). +*/ +static int +_bsr_cmp_check(const char* cap, + uint8_t off, + uint8_t len, + ur_bsr_t *a, + ur_bsr_t *b, + uint8_t c, + uint8_t d, + ur_cue_res_e e, + ur_cue_res_e f) +{ + int ret = 1; + + if ( !ur_bsr_sane(a) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, a->off, a->left, a->bits); + ret = 0; + } + + if ( !ur_bsr_sane(b) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, b->off, b->left, b->bits); + ret = 0; + } + + if ( e != f ) { + fprintf(stderr, "%s: off %u, len %u ret not equal (%s, %s) off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, + (ur_cue_good == e) ? "good" : "gone", + (ur_cue_good == f) ? "good" : "gone", + b->off, b->left, b->bits); + ret = 0; + } + + if ( (ur_cue_good == e) && (c != d) ) { + fprintf(stderr, "%s: off %u, len %u val not equal (%02x, %02x) off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, c, d, b->off, b->left, b->bits); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: off %u len %u: offset fail (%u, %u)\r\n", + cap, off, len, a->off, b->off); + ret = 0; + } + + if ( a->left != b->left ) { + fprintf(stderr, "%s: off %u len %u: left fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->left, b->left); + ret = 0; + } + + if ( a->bits != b->bits ) { + fprintf(stderr, "%s: off %u len %u: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->bits, b->bits); + ret = 0; + } + + if ( a->bytes != b->bytes ) { + fprintf(stderr, "%s: off %u len %u: bytes fail (%p, %p)\r\n", + cap, off, len, a->bytes, b->bytes); + ret = 0; + } + + + return ret; +} + +/* +** ur_bsr_log golden master +*/ +static ur_cue_res_e +_bsr_log_slow(ur_bsr_t *bsr, uint8_t *out) +{ + ur_cue_res_e res; + uint8_t bit, i = 0; + + do { + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( bit ) { + *out = i; + return ur_cue_good; + } + } + while ( ++i ); + + return ur_cue_meme; +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr_log and master, comparing +** the results and respective states each time. +*/ +static int +_test_bsr_log_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + ur_bsr_t a, b; + uint8_t *bytes, c, d; + uint8_t i, j; + ur_cue_res_e e, f; + + bytes = malloc(len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < len; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + memset(bytes, 0x0, j); + memset(bytes + j, val, len - j); + + e = _bsr_log_slow(&a, &c); + f = ur_bsr_log(&b, &d); + + ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr_log(void) +{ + int ret = _test_bsr_log_loop("bsr log nought", 0, 0x0) + & _test_bsr_log_loop("bsr log ones odd", 3, 0xff) + & _test_bsr_log_loop("bsr log ones even", 4, 0xff) + & _test_bsr_log_loop("bsr log ones big", 50, 0xff) + & _test_bsr_log_loop("bsr log zeros odd", 5, 0x0) + & _test_bsr_log_loop("bsr log zeros even", 6, 0x0) + & _test_bsr_log_loop("bsr log zeros big", 50, 0x0); + + { + uint8_t i, j = 5; + char cap[1024]; + + for ( i = 0; i < 8; i++ ) { + snprintf(cap, 1000, "bsr log 1<<%u odd", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); + + snprintf(cap, 1000, "bsr log 1<<%u even", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); + + snprintf(cap, 1000, "bsr log 1<<%u big", i); + ret &= _test_bsr_log_loop((const char*)cap, 50, 0x1 << i); + } + } + + return ret; +} + +/* +** ur_bsr_tag golden master +*/ +static ur_cue_res_e +_bsr_tag_slow(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + ur_cue_res_e res; + uint8_t bit; + + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( 0 == bit ) { + *out = ur_jam_atom; + return ur_cue_good; + } + else if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + return ur_cue_good; +} + +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a jam type tag via ur_bsr_tag and master, comparing the results and +** respective states each time. +*/ +static int +_test_bsr_tag_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + ur_bsr_t a, b; + uint8_t *bytes; + ur_cue_tag_e c, d; + uint8_t i, j; + ur_cue_res_e e, f; + + bytes = malloc(len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < len; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + memset(bytes, 0x0, j); + memset(bytes + j, val, len - j); + + e = _bsr_tag_slow(&a, &c); + f = ur_bsr_tag(&b, &d); + + ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr_tag(void) +{ + return _test_bsr_tag_loop("bsr tag nought", 0, 0x0) + & _test_bsr_tag_loop("bsr tag ones 1", 1, 0xff) + & _test_bsr_tag_loop("bsr tag ones 2", 2, 0xff) + & _test_bsr_tag_loop("bsr tag zeros 1", 1, 0x0) + & _test_bsr_tag_loop("bsr tag zeros 2", 2, 0x0) + & _test_bsr_tag_loop("bsr tag alt-1 1", 1, 0xaa) + & _test_bsr_tag_loop("bsr tag alt-1 2", 2, 0xaa) + & _test_bsr_tag_loop("bsr tag alt-2 1", 1, 0x55) + & _test_bsr_tag_loop("bsr tag alt-2 2", 2, 0x55); +} + +static int +_test_bsr(void) +{ + return _test_bsr_bit() + & _test_bsr_bit_any() + & _test_bsr_bytes_any() + & _test_bsr_skip_any() + & _test_bsr8() + & _test_bsr32() + & _test_bsr64() + & _test_bsr_log() + & _test_bsr_tag(); +} + +static int +_test_jam_spec(const char *cap, + ur_root_t *r, + ur_nref ref, + size_t len, + const uint8_t *res) +{ + uint64_t i, out_len; + uint8_t *out; + int ret; + + ur_jam(r, ref, &out_len, &out); + + if ( 0 != memcmp(out, res, len) ) { + fprintf(stderr, "\033[31mjam %s fail\033[0m\r\n", cap); + + fprintf(stderr, " actual: { "); + for ( i = 0; i < out_len; i++ ) { + fprintf(stderr, "0x%x, ", out[i]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " expect: { "); + for ( i = 0; i < len; i++ ) { + fprintf(stderr, "0x%x, ", res[i]); + } + fprintf(stderr, "}\r\n"); + + ret = 0; + } + else { + ret = 1; + } + + free(out); + + return ret; +} + +static int +_test_cue_spec(const char *cap, + ur_root_t* r, + ur_nref ref, + size_t len, + const uint8_t *res) +{ + int ret = 1; + ur_nref out; + + if ( !ur_cue_test(len, res) ) { + fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap); + ret = 0; + } + + if ( ur_cue_good != ur_cue(r, len, res, &out) ) { + fprintf(stderr, "\033[31mcue %s fail 2\033[0m\r\n", cap); + ret = 0; + } + else if ( ref != out ) { + fprintf(stderr, "\033[31mcue %s fail 3 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap, ref, out); + ret = 0; + } + + return ret; +} + +/* +** test jam/cue correctness and roundtrips across a variety of inputs +*/ +static int +_test_jam_cue(void) +{ + ur_root_t *r = ur_root_init(); + int ret = 1; + +# define NC(a, b) ur_cons(r, a, b) +# define NT(a, b, c) NC(a, NC(b, c)) + +# define FAST 0x74736166 +# define FULL 0x6c6c7566 + +# define TEST_CASE(a, b) \ + const char* cap = a; \ + ur_nref ref = b; \ + ret &= _test_jam_spec(cap, r, ref, sizeof(res), res); \ + ret &= _test_cue_spec(cap, r, ref, sizeof(res), res); \ + + { + uint8_t res[1] = { 0x2 }; + TEST_CASE("0", 0); + } + + { + uint8_t res[1] = { 0xc }; + TEST_CASE("1", 1); + } + + { + uint8_t res[1] = { 0x48 }; + TEST_CASE("2", 2); + } + + { + uint8_t res[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; + TEST_CASE("%fast", FAST); + } + + { + uint8_t res[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; + TEST_CASE("%full", FULL); + } + + { + uint8_t res[1] = { 0x29 }; + TEST_CASE("[0 0]", NC(0, 0)); + } + + { + uint8_t res[2] = { 0x31, 0x3 }; + TEST_CASE("[1 1]", NC(1, 1)); + } + + { + uint8_t res[2] = { 0x21, 0xd1 }; + TEST_CASE("[2 3]", NC(2, 3)); + } + + { + uint8_t res[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + TEST_CASE("[%fast %full]", NC(FAST, FULL)); + } + + { + uint8_t res[2] = { 0x71, 0xcc }; + TEST_CASE("[1 1 1]", NC(1, NC(1, 1))); + } + + { + uint8_t res[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; + TEST_CASE("[%fast %full %fast]", NC(FAST, NC(FULL, FAST))); + } + + { + uint8_t res[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; + TEST_CASE("[[0 0] [[0 0] 1 1] 1 1]", NC(NC(0, 0), NC(NC(NC(0, 0), NC(1, 1)), NC(1, 1)))); + } + + { + uint8_t res[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; + TEST_CASE("deep", NC(NC(NC(1, NC(NC(2, NC(NC(3, NC(NC(4, NC(NT(5, 6, NC(7, NC(NC(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0)); + } + + { + uint8_t inp[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + uint8_t res[35] = { 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + TEST_CASE("wide", ur_coin_bytes(r, sizeof(inp), inp)); + } + + { + uint8_t inp[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0xa8, 0xab, 0x60, 0xef, 0x2d, 0xd, 0x0, 0x0, 0x80 }; + uint8_t res[19] = { 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x18, 0x50, 0x57, 0xc1, 0xde, 0x5b, 0x1a, 0x0, 0x0, 0x0, 0x1 }; + TEST_CASE("date", ur_coin_bytes(r, sizeof(inp), inp)); + } + + ur_root_free(r); + + return ret; +} + +static int +_test_ur(void) +{ + int ret = 1; + + if ( !_test_bsw() ) { + fprintf(stderr, "ur test bsw failed\r\n"); + ret = 0; + } + + if ( !_test_bsr() ) { + fprintf(stderr, "ur test bsr failed\r\n"); + ret = 0; + } + + if ( !_test_jam_cue() ) { + fprintf(stderr, "ur test jam/cue failed\r\n"); + ret = 0; + } + + return ret; +} + +int +main(int argc, char* argv[]) +{ + if ( !_test_ur() ) { + fprintf(stderr, "ur test failed\r\n"); + return 1; + } + + fprintf(stderr, "ur ok\n"); + return 0; +} diff --git a/vere/pkg/ur/ur.h b/vere/pkg/ur/ur.h new file mode 100644 index 0000000..cfa0c47 --- /dev/null +++ b/vere/pkg/ur/ur.h @@ -0,0 +1,11 @@ +/// @file + +#ifndef UR_H +#define UR_H + +#include "bitstream.h" +#include "defs.h" +#include "hashcons.h" +#include "serial.h" + +#endif /* ifndef UR_H */ |