From fcedfddf00b3f994e4f4e40332ac7fc192c63244 Mon Sep 17 00:00:00 2001 From: polwex Date: Sun, 5 Oct 2025 21:56:51 +0700 Subject: claude is gud --- vere/ext/nasm/asm/strfunc.c | 359 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 vere/ext/nasm/asm/strfunc.c (limited to 'vere/ext/nasm/asm/strfunc.c') diff --git a/vere/ext/nasm/asm/strfunc.c b/vere/ext/nasm/asm/strfunc.c new file mode 100644 index 0000000..236b9d2 --- /dev/null +++ b/vere/ext/nasm/asm/strfunc.c @@ -0,0 +1,359 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +/* + * strfunc.c + * + * String transformation functions + */ + +#include "nasmlib.h" +#include "nasm.h" + +/* + * Convert a string in UTF-8 format to UTF-16LE + */ +static size_t utf8_to_16le(uint8_t *str, size_t len, char *op) +{ +#define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0) + + size_t outlen = 0; + int expect = 0; + uint8_t c; + uint32_t v = 0, vmin = 0; + + while (len--) { + c = *str++; + + if (expect) { + if ((c & 0xc0) != 0x80) { + expect = 0; + return -1; + } else { + v = (v << 6) | (c & 0x3f); + if (!--expect) { + if (v < vmin || v > 0x10ffff || + (v >= 0xd800 && v <= 0xdfff)) { + return -1; + } else if (v > 0xffff) { + v -= 0x10000; + EMIT(0xd800 | (v >> 10)); + EMIT(0xdc00 | (v & 0x3ff)); + } else { + EMIT(v); + } + } + continue; + } + } + + if (c < 0x80) { + EMIT(c); + } else if (c < 0xc0 || c >= 0xfe) { + /* Invalid UTF-8 */ + return -1; + } else if (c < 0xe0) { + v = c & 0x1f; + expect = 1; + vmin = 0x80; + } else if (c < 0xf0) { + v = c & 0x0f; + expect = 2; + vmin = 0x800; + } else if (c < 0xf8) { + v = c & 0x07; + expect = 3; + vmin = 0x10000; + } else if (c < 0xfc) { + v = c & 0x03; + expect = 4; + vmin = 0x200000; + } else { + v = c & 0x01; + expect = 5; + vmin = 0x4000000; + } + } + + return expect ? (size_t)-1 : outlen << 1; + +#undef EMIT +} + +/* + * Convert a string in UTF-8 format to UTF-16BE + */ +static size_t utf8_to_16be(uint8_t *str, size_t len, char *op) +{ +#define EMIT(x) \ + do { \ + uint16_t _y = (x); \ + if (op) { \ + WRITECHAR(op, _y >> 8); \ + WRITECHAR(op, _y); \ + } \ + outlen++; \ + } while (0) \ + + size_t outlen = 0; + int expect = 0; + uint8_t c; + uint32_t v = 0, vmin = 0; + + while (len--) { + c = *str++; + + if (expect) { + if ((c & 0xc0) != 0x80) { + expect = 0; + return -1; + } else { + v = (v << 6) | (c & 0x3f); + if (!--expect) { + if (v < vmin || v > 0x10ffff || + (v >= 0xd800 && v <= 0xdfff)) { + return -1; + } else if (v > 0xffff) { + v -= 0x10000; + EMIT(0xdc00 | (v & 0x3ff)); + EMIT(0xd800 | (v >> 10)); + } else { + EMIT(v); + } + } + continue; + } + } + + if (c < 0x80) { + EMIT(c); + } else if (c < 0xc0 || c >= 0xfe) { + /* Invalid UTF-8 */ + return -1; + } else if (c < 0xe0) { + v = c & 0x1f; + expect = 1; + vmin = 0x80; + } else if (c < 0xf0) { + v = c & 0x0f; + expect = 2; + vmin = 0x800; + } else if (c < 0xf8) { + v = c & 0x07; + expect = 3; + vmin = 0x10000; + } else if (c < 0xfc) { + v = c & 0x03; + expect = 4; + vmin = 0x200000; + } else { + v = c & 0x01; + expect = 5; + vmin = 0x4000000; + } + } + + return expect ? (size_t)-1 : outlen << 1; + +#undef EMIT +} + +/* + * Convert a string in UTF-8 format to UTF-32LE + */ +static size_t utf8_to_32le(uint8_t *str, size_t len, char *op) +{ +#define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0) + + size_t outlen = 0; + int expect = 0; + uint8_t c; + uint32_t v = 0, vmin = 0; + + while (len--) { + c = *str++; + + if (expect) { + if ((c & 0xc0) != 0x80) { + return -1; + } else { + v = (v << 6) | (c & 0x3f); + if (!--expect) { + if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) { + return -1; + } else { + EMIT(v); + } + } + continue; + } + } + + if (c < 0x80) { + EMIT(c); + } else if (c < 0xc0 || c >= 0xfe) { + /* Invalid UTF-8 */ + return -1; + } else if (c < 0xe0) { + v = c & 0x1f; + expect = 1; + vmin = 0x80; + } else if (c < 0xf0) { + v = c & 0x0f; + expect = 2; + vmin = 0x800; + } else if (c < 0xf8) { + v = c & 0x07; + expect = 3; + vmin = 0x10000; + } else if (c < 0xfc) { + v = c & 0x03; + expect = 4; + vmin = 0x200000; + } else { + v = c & 0x01; + expect = 5; + vmin = 0x4000000; + } + } + + return expect ? (size_t)-1 : outlen << 2; + +#undef EMIT +} + +/* + * Convert a string in UTF-8 format to UTF-32BE + */ +static size_t utf8_to_32be(uint8_t *str, size_t len, char *op) +{ +#define EMIT(x) \ + do { \ + uint32_t _y = (x); \ + if (op) { \ + WRITECHAR(op,_y >> 24); \ + WRITECHAR(op,_y >> 16); \ + WRITECHAR(op,_y >> 8); \ + WRITECHAR(op,_y); \ + } \ + outlen++; \ + } while (0) + + size_t outlen = 0; + int expect = 0; + uint8_t c; + uint32_t v = 0, vmin = 0; + + while (len--) { + c = *str++; + + if (expect) { + if ((c & 0xc0) != 0x80) { + return -1; + } else { + v = (v << 6) | (c & 0x3f); + if (!--expect) { + if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) { + return -1; + } else { + EMIT(v); + } + } + continue; + } + } + + if (c < 0x80) { + EMIT(c); + } else if (c < 0xc0 || c >= 0xfe) { + /* Invalid UTF-8 */ + return -1; + } else if (c < 0xe0) { + v = c & 0x1f; + expect = 1; + vmin = 0x80; + } else if (c < 0xf0) { + v = c & 0x0f; + expect = 2; + vmin = 0x800; + } else if (c < 0xf8) { + v = c & 0x07; + expect = 3; + vmin = 0x10000; + } else if (c < 0xfc) { + v = c & 0x03; + expect = 4; + vmin = 0x200000; + } else { + v = c & 0x01; + expect = 5; + vmin = 0x4000000; + } + } + + return expect ? (size_t)-1 : outlen << 2; + +#undef EMIT +} + +typedef size_t (*transform_func)(uint8_t *, size_t, char *); + +/* + * Apply a specific string transform and return it in a nasm_malloc'd + * buffer, returning the length. On error, returns (size_t)-1 and no + * buffer is allocated. + */ +size_t string_transform(char *str, size_t len, char **out, enum strfunc func) +{ + /* This should match enum strfunc in nasm.h */ + static const transform_func str_transforms[] = { + utf8_to_16le, + utf8_to_16le, + utf8_to_16be, + utf8_to_32le, + utf8_to_32le, + utf8_to_32be, + }; + transform_func transform = str_transforms[func]; + size_t outlen; + uint8_t *s = (uint8_t *)str; + char *buf; + + outlen = transform(s, len, NULL); + if (outlen == (size_t)-1) + return -1; + + *out = buf = nasm_malloc(outlen+1); + buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */ + return transform(s, len, buf); +} -- cgit v1.2.3