From 987b1653f1f11f6ad418452482e9dd886e7e7955 Mon Sep 17 00:00:00 2001 From: Jordan Bancino Date: Wed, 27 Jul 2022 13:47:12 -0400 Subject: [PATCH] Add UtilUtf8Encode() and UtilServerTs() Also updated Json.c to use UtilUtf8Encode() when decoding JSON strings. --- src/Json.c | 91 +++++++++++++++++++++++++++++++++++------------------- src/Util.c | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 31 deletions(-) create mode 100644 src/Util.c diff --git a/src/Json.c b/src/Json.c index b012c62..408ea2e 100644 --- a/src/Json.c +++ b/src/Json.c @@ -1,8 +1,11 @@ #include +#include + #include #include #include +#include struct JsonValue { @@ -282,16 +285,10 @@ JsonEncodeString(const char *str, FILE * out) case '\r': fputs("\\r", out); break; - default: - if (c < ' ') - { - fprintf(out, "\\u%04x", c); - } - else - { - fputc(c, out); - } + default: /* Assume UTF-8 input */ + fputc(c, out); break; + } i++; } @@ -304,11 +301,15 @@ JsonDecodeString(FILE * in) { const size_t strBlockSize = 16; + size_t i; size_t len; size_t allocated; char *str; char c; - char a; + char a[5]; + + unsigned long utf8; + char *utf8Ptr; len = 0; allocated = strBlockSize; @@ -334,30 +335,52 @@ JsonDecodeString(FILE * in) case '\\': case '"': case '/': - a = c; + a[0] = c; + a[1] = '\0'; break; case 'b': - a = '\b'; + a[0] = '\b'; + a[1] = '\0'; break; case 't': - a = '\t'; + a[0] = '\t'; + a[1] = '\0'; break; case 'n': - a = '\n'; + a[0] = '\n'; + a[1] = '\0'; break; case 'f': - a = '\f'; + a[0] = '\f'; + a[1] = '\0'; break; case 'r': - a = '\r'; + a[0] = '\r'; + a[1] = '\0'; break; case 'u': - if (fscanf(in, "%04x", (unsigned int *) &a) != 1) + /* Read \uXXXX point into a 4-byte buffer */ + if (fscanf(in, "%04lx", &utf8) != 1) { /* Bad hex value */ free(str); return NULL; } + + /* Encode the 4-byte UTF-8 buffer into a series + * of 1-byte characters */ + utf8Ptr = UtilUtf8Encode(utf8); + if (!utf8Ptr) + { + /* Mem error */ + free(str); + return NULL; + } + + /* Move the output of UtilUtf8Encode() into our + * local buffer */ + strcpy(a, utf8Ptr); + free(utf8Ptr); break; default: /* Bad escape value */ @@ -366,28 +389,34 @@ JsonDecodeString(FILE * in) } break; default: - a = c; + a[0] = c; + a[1] = '\0'; break; } - /* Append a */ - if (len >= allocated) + /* Append buffer a */ + i = 0; + while (a[i] != '\0') { - char *tmp; - - allocated += strBlockSize; - tmp = realloc(str, allocated * sizeof(char)); - if (!tmp) + if (len >= allocated) { - free(str); - return NULL; + char *tmp; + + allocated += strBlockSize; + tmp = realloc(str, allocated * sizeof(char)); + if (!tmp) + { + free(str); + return NULL; + } + + str = tmp; } - str = tmp; + str[len] = a[i]; + len++; + i++; } - - str[len] = a; - len++; } free(str); diff --git a/src/Util.c b/src/Util.c new file mode 100644 index 0000000..f7351ee --- /dev/null +++ b/src/Util.c @@ -0,0 +1,65 @@ +#include + +#include +#include + +long +UtilServerTs(void) +{ + struct timeval tv; + long ts; + + gettimeofday(&tv, NULL); + ts = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); + + return ts; +} + +char * +UtilUtf8Encode(unsigned long utf8) +{ + char *str; + + str = malloc(5 * sizeof(char)); + if (!str) + { + return NULL; + } + + if (utf8 <= 0x7F) /* Plain ASCII */ + { + str[0] = (char) utf8; + str[1] = '\0'; + } + else if (utf8 <= 0x07FF) /* 2-byte */ + { + str[0] = (char) (((utf8 >> 6) & 0x1F) | 0xC0); + str[1] = (char) (((utf8 >> 0) & 0x3F) | 0x80); + str[2] = '\0'; + } + else if (utf8 <= 0xFFFF) /* 3-byte */ + { + str[0] = (char) (((utf8 >> 12) & 0x0F) | 0xE0); + str[1] = (char) (((utf8 >> 6) & 0x3F) | 0x80); + str[2] = (char) (((utf8 >> 0) & 0x3F) | 0x80); + str[3] = '\0'; + } + else if (utf8 <= 0x10FFFF) /* 4-byte */ + { + str[0] = (char) (((utf8 >> 18) & 0x07) | 0xF0); + str[1] = (char) (((utf8 >> 12) & 0x3F) | 0x80); + str[2] = (char) (((utf8 >> 6) & 0x3F) | 0x80); + str[3] = (char) (((utf8 >> 0) & 0x3F) | 0x80); + str[4] = '\0'; + } + else + { + /* Send replacement character */ + str[0] = (char) 0xEF; + str[1] = (char) 0xBF; + str[2] = (char) 0xBD; + str[3] = '\0'; + } + + return str; +}