summaryrefslogtreecommitdiff
path: root/vere/ext/nasm/asm/quote.c
diff options
context:
space:
mode:
Diffstat (limited to 'vere/ext/nasm/asm/quote.c')
-rw-r--r--vere/ext/nasm/asm/quote.c563
1 files changed, 563 insertions, 0 deletions
diff --git a/vere/ext/nasm/asm/quote.c b/vere/ext/nasm/asm/quote.c
new file mode 100644
index 0000000..074f17d
--- /dev/null
+++ b/vere/ext/nasm/asm/quote.c
@@ -0,0 +1,563 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2020 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * quote.c
+ */
+
+#include "compiler.h"
+#include "nasmlib.h"
+#include "quote.h"
+#include "nctype.h"
+#include "error.h"
+
+/*
+ * Create a NASM quoted string in newly allocated memory. Update the
+ * *lenp parameter with the output length (sans final NUL).
+ */
+
+char *nasm_quote(const char *str, size_t *lenp)
+{
+ const char *p, *ep;
+ char c, c1, *q, *nstr;
+ unsigned char uc;
+ bool sq_ok, dq_ok;
+ size_t qlen;
+ size_t len = *lenp;
+
+ sq_ok = dq_ok = true;
+ ep = str+len;
+ qlen = 0; /* Length if we need `...` quotes */
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '\'':
+ sq_ok = false;
+ qlen++;
+ break;
+ case '\"':
+ dq_ok = false;
+ qlen++;
+ break;
+ case '`':
+ case '\\':
+ qlen += 2;
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ sq_ok = dq_ok = false;
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ case 27:
+ qlen += 2;
+ break;
+ default:
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c1 >= '0' && c1 <= '7')
+ uc = 0377; /* Must use the full form */
+ else
+ uc = c;
+ if (uc > 077)
+ qlen++;
+ if (uc > 07)
+ qlen++;
+ qlen += 2;
+ break;
+ }
+ } else {
+ qlen++;
+ }
+ break;
+ }
+ }
+
+ if (sq_ok || dq_ok) {
+ /* Use '...' or "..." */
+ nstr = nasm_malloc(len+3);
+ nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
+ q = &nstr[len+2];
+ if (len > 0)
+ memcpy(nstr+1, str, len);
+ } else {
+ /* Need to use `...` quoted syntax */
+ nstr = nasm_malloc(qlen+3);
+ q = nstr;
+ *q++ = '`';
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '`':
+ case '\\':
+ *q++ = '\\';
+ *q++ = c;
+ break;
+ case 7:
+ *q++ = '\\';
+ *q++ = 'a';
+ break;
+ case 8:
+ *q++ = '\\';
+ *q++ = 'b';
+ break;
+ case 9:
+ *q++ = '\\';
+ *q++ = 't';
+ break;
+ case 10:
+ *q++ = '\\';
+ *q++ = 'n';
+ break;
+ case 11:
+ *q++ = '\\';
+ *q++ = 'v';
+ break;
+ case 12:
+ *q++ = '\\';
+ *q++ = 'f';
+ break;
+ case 13:
+ *q++ = '\\';
+ *q++ = 'r';
+ break;
+ case 27:
+ *q++ = '\\';
+ *q++ = 'e';
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c1 >= '0' && c1 <= '7')
+ uc = 0377; /* Must use the full form */
+ else
+ uc = c;
+ *q++ = '\\';
+ if (uc > 077)
+ *q++ = ((unsigned char)c >> 6) + '0';
+ if (uc > 07)
+ *q++ = (((unsigned char)c >> 3) & 7) + '0';
+ *q++ = ((unsigned char)c & 7) + '0';
+ break;
+ } else {
+ *q++ = c;
+ }
+ break;
+ }
+ }
+ *q++ = '`';
+ nasm_assert((size_t)(q-nstr) == qlen+2);
+ }
+ *q = '\0';
+ *lenp = q - nstr;
+ return nstr;
+}
+
+static unsigned char *emit_utf8(unsigned char *q, uint32_t v)
+{
+ uint32_t vb1, vb2, vb3, vb4, vb5;
+
+ if (v <= 0x7f) {
+ *q++ = v;
+ goto out0;
+ }
+
+ vb1 = v >> 6;
+ if (vb1 <= 0x1f) {
+ *q++ = 0xc0 + vb1;
+ goto out1;
+ }
+
+ vb2 = vb1 >> 6;
+ if (vb2 <= 0x0f) {
+ *q++ = 0xe0 + vb2;
+ goto out2;
+ }
+
+ vb3 = vb2 >> 6;
+ if (vb3 <= 0x07) {
+ *q++ = 0xf0 + vb3;
+ goto out3;
+ }
+
+ vb4 = vb3 >> 6;
+ if (vb4 <= 0x03) {
+ *q++ = 0xf8 + vb4;
+ goto out4;
+ }
+
+ /*
+ * Note: this is invalid even for "classic" (pre-UTF16) 31-bit
+ * UTF-8 if the value is >= 0x8000000. This at least tries to do
+ * something vaguely sensible with it. Caveat programmer.
+ * The __utf*__ string transform functions do reject these
+ * as invalid input.
+ *
+ * vb5 cannot be more than 3, as a 32-bit value has been shifted
+ * right by 5*6 = 30 bits already.
+ */
+ vb5 = vb4 >> 6;
+ *q++ = 0xfc + vb5;
+ goto out5;
+
+ /* Emit extension bytes as appropriate */
+out5: *q++ = 0x80 + (vb4 & 63);
+out4: *q++ = 0x80 + (vb3 & 63);
+out3: *q++ = 0x80 + (vb2 & 63);
+out2: *q++ = 0x80 + (vb1 & 63);
+out1: *q++ = 0x80 + (v & 63);
+out0: return q;
+}
+
+static inline uint32_t ctlbit(uint32_t v)
+{
+ return unlikely(v < 32) ? UINT32_C(1) << v : 0;
+}
+
+#define CTL_ERR(c) \
+ (badctl & (ctlmask |= ctlbit(c)))
+
+#define EMIT_UTF8(c) \
+ do { \
+ uint32_t ec = (c); \
+ if (!CTL_ERR(ec)) \
+ q = emit_utf8(q, ec); \
+ } while (0)
+
+#define EMIT(c) \
+ do { \
+ unsigned char ec = (c); \
+ if (!CTL_ERR(ec)) \
+ *q++ = ec; \
+ } while (0)
+
+/*
+ * Same as nasm_quote, but take the length of a C string;
+ * the lenp argument is optional.
+ */
+char *nasm_quote_cstr(const char *str, size_t *lenp)
+{
+ size_t len = strlen(str);
+ char *qstr = nasm_quote(str, &len);
+ if (lenp)
+ *lenp = len;
+ return qstr;
+}
+
+/*
+ * Do an *in-place* dequoting of the specified string, returning the
+ * resulting length (which may be containing embedded nulls.)
+ *
+ * In-place replacement is possible since the unquoted length is always
+ * shorter than or equal to the quoted length.
+ *
+ * *ep points to the final quote, or to the null if improperly quoted.
+ *
+ * Issue an error if the string contains control characters
+ * corresponding to bits set in badctl; in that case, the output
+ * string, but not *ep, is truncated before the first invalid
+ * character.
+ *
+ * badctl is a bitmask of control characters (0-31) which are forbidden
+ * from appearing in the final output.
+ *
+ * The qstart character can be either '`' (NASM style) or '\"' (C style),
+ * to indicate the lead marker of a quoted string. If it is '\"', then
+ * '`' is not a special character at all.
+ */
+
+size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
+ const char qstart)
+{
+ unsigned char bq;
+ const unsigned char *p;
+ const unsigned char *escp = NULL;
+ unsigned char *q;
+ unsigned char c;
+ uint32_t ctlmask = 0; /* Mask of control characters seen */
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_hex,
+ st_oct,
+ st_ucs,
+ st_done
+ } state;
+ int ndig = 0;
+ uint32_t nval = 0;
+
+ p = q = (unsigned char *)str;
+
+ bq = *p++;
+ if (!bq)
+ return 0;
+
+ if (bq == (unsigned char)qstart) {
+ /* `...` string */
+ state = st_start;
+
+ while (state != st_done) {
+ c = *p++;
+ switch (state) {
+ case st_start:
+ if (c == '\\') {
+ state = st_backslash;
+ } else if ((c == '\0') | (c == bq)) {
+ state = st_done;
+ } else {
+ EMIT(c);
+ }
+ break;
+
+ case st_backslash:
+ state = st_start;
+ escp = p; /* Beginning of argument sequence */
+ nval = 0;
+ switch (c) {
+ case 'a':
+ nval = 7;
+ break;
+ case 'b':
+ nval = 8;
+ break;
+ case 'e':
+ nval = 27;
+ break;
+ case 'f':
+ nval = 12;
+ break;
+ case 'n':
+ nval = 10;
+ break;
+ case 'r':
+ nval = 13;
+ break;
+ case 't':
+ nval = 9;
+ break;
+ case 'u':
+ state = st_ucs;
+ ndig = 4;
+ break;
+ case 'U':
+ state = st_ucs;
+ ndig = 8;
+ break;
+ case 'v':
+ nval = 11;
+ break;
+ case 'x':
+ case 'X':
+ state = st_hex;
+ ndig = 2;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = st_oct;
+ ndig = 2; /* Up to two more digits */
+ nval = c - '0';
+ break;
+ case '\0':
+ nval = '\\';
+ p--; /* Reprocess; terminates string */
+ break;
+ default:
+ nval = c;
+ break;
+ }
+ if (state == st_start)
+ EMIT(nval);
+ break;
+
+ case st_oct:
+ if (c >= '0' && c <= '7') {
+ nval = (nval << 3) + (c - '0');
+ if (--ndig)
+ break; /* Might have more digits */
+ } else {
+ p--; /* Process this character again */
+ }
+ EMIT(nval);
+ state = st_start;
+ break;
+
+ case st_hex:
+ case st_ucs:
+ if (nasm_isxdigit(c)) {
+ nval = (nval << 4) + numvalue(c);
+ if (--ndig)
+ break; /* Might have more digits */
+ } else {
+ p--; /* Process this character again */
+ }
+
+ if (unlikely(p <= escp))
+ EMIT(escp[-1]);
+ else if (state == st_ucs)
+ EMIT_UTF8(nval);
+ else
+ EMIT(nval);
+
+ state = st_start;
+ break;
+
+ default:
+ panic();
+ }
+ }
+ } else if (bq == '\'' || bq == '\"') {
+ /*
+ * '...' or "..." string, NASM legacy style (no escapes of
+ * * any kind, including collapsing double quote marks.)
+ * We obviously can't get here if qstart == '\"'.
+ */
+ while ((c = *p++) && (c != bq))
+ EMIT(c);
+ } else {
+ /* Not a quoted string, just return the input... */
+ while ((c = *p++))
+ EMIT(c);
+ }
+
+ /* Zero-terminate the output */
+ *q = '\0';
+
+ if (ctlmask & badctl)
+ nasm_nonfatal("control character in string not allowed here");
+
+ if (ep)
+ *ep = (char *)p - 1;
+ return (char *)q - str;
+}
+#undef EMIT
+
+/*
+ * Unquote any arbitrary string; may produce any bytes, including embedded
+ * control- and NUL characters.
+ */
+size_t nasm_unquote(char *str, char **ep)
+{
+ return nasm_unquote_anystr(str, ep, 0, STR_NASM);
+}
+
+/*
+ * Unquote a string intended to be used as a C string; most control
+ * characters are rejected, including whitespace characters that
+ * would imply line endings and so on.
+ */
+size_t nasm_unquote_cstr(char *str, char **ep)
+{
+ return nasm_unquote_anystr(str, ep, BADCTL, STR_NASM);
+}
+
+/*
+ * Find the end of a quoted string; returns the pointer to the terminating
+ * character (either the ending quote or the null character, if unterminated.)
+ * If the input is not a quoted string, return NULL.
+ * This applies to NASM style strings only.
+ */
+char *nasm_skip_string(const char *str)
+{
+ char bq;
+ const char *p;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_done
+ } state;
+
+ bq = str[0];
+ p = str+1;
+ switch (bq) {
+ case '\'':
+ case '\"':
+ /* '...' or "..." string */
+ while ((c = *p++) && (c != bq))
+ ;
+ break;
+
+ case '`':
+ /* `...` string */
+ state = st_start;
+ while (state != st_done) {
+ c = *p++;
+ switch (state) {
+ case st_start:
+ switch (c) {
+ case '\\':
+ state = st_backslash;
+ break;
+ case '`':
+ case '\0':
+ state = st_done;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case st_backslash:
+ /*
+ * Note: for the purpose of finding the end of the string,
+ * all successor states to st_backslash are functionally
+ * equivalent to st_start, since either a backslash or
+ * a backquote will force a return to the st_start state,
+ * and any possible multi-character state will terminate
+ * for any non-alphanumeric character.
+ */
+ state = c ? st_start : st_done;
+ break;
+
+ default:
+ panic();
+ }
+ }
+ break;
+
+ default:
+ /* Not a string at all... */
+ return NULL;
+ }
+ return (char *)p - 1;
+}