summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Majstorovic <majstor@majstor.org>2020-08-03 03:51:26 +0200
committerUros Majstorovic <majstor@majstor.org>2020-08-03 03:51:26 +0200
commitab370b40c9dcb25fb5bb828963aaa975764f3cf8 (patch)
treec8161a00d265ecc8929fa3a726ad2adced93ca36
parent0e518d5117b73fd54081decf1c0eb9f9d3173ff6 (diff)
unicode verify fixed
-rw-r--r--code/fe310/eos/eve/unicode.c127
-rw-r--r--code/fe310/eos/eve/unicode.h13
-rw-r--r--code/fe310/eos/eve/widget/strw.c19
-rw-r--r--code/fe310/eos/eve/widget/textw.c19
4 files changed, 146 insertions, 32 deletions
diff --git a/code/fe310/eos/eve/unicode.c b/code/fe310/eos/eve/unicode.c
index 62b1714..2915791 100644
--- a/code/fe310/eos/eve/unicode.c
+++ b/code/fe310/eos/eve/unicode.c
@@ -1,6 +1,6 @@
#include "unicode.h"
-uint8_t utf8_enc(utf32_t ch, utf8_t *str) {
+int utf8_enc(utf32_t ch, utf8_t *str) {
if (ch <= 0x7f) {
str[0] = ch;
return 1;
@@ -9,7 +9,7 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) {
str[1] = 0x80 | (ch & 0x3f);
return 2;
} else if (ch <= 0xffff) {
- if ((ch >= 0xd800) && (ch <= 0xdfff)) return 0;
+ if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR;
str[0] = 0xe0 | (ch >> 12);
str[1] = 0x80 | ((ch >> 6) & 0x3f);
str[2] = 0x80 | (ch & 0x3f);
@@ -21,39 +21,39 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) {
str[3] = 0x80 | (ch & 0x3f);
return 4;
} else {
- return 0;
+ return UTF_ERR;
}
}
-uint8_t utf8_dec(utf8_t *str, utf32_t *ch) {
+int utf8_dec(utf8_t *str, utf32_t *ch) {
if ((str[0] & 0x80) == 0x00) {
*ch = str[0];
return 1;
} else if ((str[0] & 0xe0) == 0xc0) {
- if ((str[1] & 0xc0) != 0x80) return 0;
+ if ((str[1] & 0xc0) != 0x80) return UTF_ERR;
*ch = (utf32_t)(str[0] & 0x1f) << 6;
*ch |= (utf32_t)(str[1] & 0x3f);
- if (*ch < 0x80) return 0;
+ if (*ch < 0x80) return UTF_ERR;
return 2;
} else if ((str[0] & 0xf0) == 0xe0) {
- if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return 0;
+ if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return UTF_ERR;
*ch = (utf32_t)(str[0] & 0x0f) << 12;
*ch |= (utf32_t)(str[1] & 0x3f) << 6;
*ch |= (utf32_t)(str[2] & 0x3f);
- if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return 0;
- if (*ch < 0x800) return 0;
+ if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return UTF_ERR;
+ if (*ch < 0x800) return UTF_ERR;
return 3;
} else if ((str[0] & 0xf8) == 0xf0) {
- if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return 0;
+ if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return UTF_ERR;
*ch = (utf32_t)(str[0] & 0x07) << 18;
*ch |= (utf32_t)(str[1] & 0x0f) << 12;
*ch |= (utf32_t)(str[2] & 0x3f) << 6;
*ch |= (utf32_t)(str[3] & 0x3f);
- if (*ch < 0x010000) return 0;
- if (*ch > 0x10ffff) return 0;
+ if (*ch < 0x010000) return UTF_ERR;
+ if (*ch > 0x10ffff) return UTF_ERR;
return 4;
} else {
- return 0;
+ return UTF_ERR;
}
}
@@ -65,39 +65,116 @@ int utf8_seek(utf8_t *str, int off, utf32_t *ch) {
off = -off;
for (i=0; i<off; i++) {
len--;
- while ((*(str + len) & 0xc0) == 0x80) len--;
+ while ((str[len] & 0xc0) == 0x80) len--;
}
- utf8_dec(str + len, ch);
} else {
for (i=0; i<off; i++) {
- len += utf8_dec(str + len, ch);
+ if ((str[len] & 0x80) == 0x00) {
+ len += 1;
+ } else if ((str[0] & 0xe0) == 0xc0) {
+ len += 2;
+ } else if ((str[0] & 0xf0) == 0xe0) {
+ len += 3;
+ } else if ((str[0] & 0xf8) == 0xf0) {
+ len += 4;
+ }
}
}
+ utf8_dec(str + len, ch);
return len;
}
-int utf8_verify(utf8_t *str, int sz) {
+int utf8_verify(utf8_t *str, int str_size, int *str_len) {
utf32_t ch;
uint8_t ch_l;
int len = 0;
- while (len < sz) {
- if (sz - len < 4) {
+ while (len < str_size) {
+ if (str_size - len < 4) {
if (((str[len] & 0xf8) == 0xf0) ||
- (((str[len] & 0xf0) == 0xe0) && (sz - len < 3)) ||
- (((str[len] & 0xe0) == 0xc0) && (sz - len < 2))) {
- str[len] = '\0';
+ (((str[len] & 0xf0) == 0xe0) && (str_size - len < 3)) ||
+ (((str[len] & 0xe0) == 0xc0) && (str_size - len < 2))) {
break;
}
}
ch_l = utf8_dec(str + len, &ch);
- if (ch_l) {
- if (ch == 0) break;
+ if (ch_l > 0) {
+ if (ch == 0) {
+ *str_len = len;
+ return UTF_OK;
+ }
len += ch_l;
} else {
- str[len] = '\0';
break;
}
}
+ *str_len = len;
+ return UTF_ERR;
+}
+
+int utf16_enc(utf32_t ch, uint8_t *str) {
+ if (ch <= 0xffff) {
+ if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR;
+ str[0] = ch >> 8;
+ str[1] = ch & 0xff;
+ return 2;
+ } else if (ch <= 0x10ffff) {
+ uint16_t hi;
+ uint16_t lo;
+
+ ch -= 0x10000;
+ hi = (ch >> 10) + 0xd800;
+ lo = (ch & 0x3ff) + 0xdc00;
+ str[0] = hi >> 8;
+ str[1] = hi & 0xff;
+ str[2] = lo >> 8;
+ str[3] = lo & 0xff;
+ return 4;
+ } else {
+ return UTF_ERR;
+ }
+}
+
+int utf16_dec(uint8_t *str, utf32_t *ch) {
+ *ch = (str[0] << 8) | str[1];
+ if ((*ch >= 0xd800) && (*ch <= 0xdfff)) {
+ uint16_t hi = *ch;
+ uint16_t lo;
+
+ if (hi > 0xdbff) return UTF_ERR;
+ lo = (str[2] << 8) | str[3];
+ if ((lo < 0xdc00) || (lo > 0xdfff)) return UTF_ERR;
+ *ch = (((hi - 0xd800) << 10) | (lo - 0xdc00)) + 0x10000;
+ return 4;
+ } else {
+ return 2;
+ }
+}
+
+int utf16_seek(uint8_t *str, int off, utf32_t *ch) {
+ int i;
+ int len = 0;
+ uint16_t cu;
+
+ if (off < 0) {
+ off = -off;
+ for (i=0; i<off; i++) {
+ len -= 2;
+ cu = (str[len] << 8) | str[len + 1];
+ if ((cu >= 0xdc00) && (cu <= 0xdfff)) {
+ len -= 2;
+ }
+ }
+ } else {
+ for (i=0; i<off; i++) {
+ cu = (str[len] << 8) | str[len + 1];
+ if ((cu >= 0xd800) && (cu <= 0xdbff)) {
+ len += 4;
+ } else {
+ len += 2;
+ }
+ }
+ }
+ utf16_dec(str + len, ch);
return len;
}
diff --git a/code/fe310/eos/eve/unicode.h b/code/fe310/eos/eve/unicode.h
index 6452822..a3b9696 100644
--- a/code/fe310/eos/eve/unicode.h
+++ b/code/fe310/eos/eve/unicode.h
@@ -1,10 +1,17 @@
#include <stdint.h>
+#define UTF_OK 0
+#define UTF_ERR -1
+
typedef uint8_t utf8_t;
typedef uint16_t utf16_t;
typedef uint32_t utf32_t;
-uint8_t utf8_enc(utf32_t ch, utf8_t *str);
-uint8_t utf8_dec(utf8_t *str, utf32_t *ch);
+int utf8_enc(utf32_t ch, utf8_t *str);
+int utf8_dec(utf8_t *str, utf32_t *ch);
int utf8_seek(utf8_t *str, int off, utf32_t *ch);
-int utf8_verify(utf8_t *str, int sz); \ No newline at end of file
+int utf8_verify(utf8_t *str, int str_size, int *str_len);
+
+int utf16_enc(utf32_t ch, uint8_t *str);
+int utf16_dec(uint8_t *str, utf32_t *ch);
+int utf16_seek(uint8_t *str, int off, utf32_t *ch);
diff --git a/code/fe310/eos/eve/widget/strw.c b/code/fe310/eos/eve/widget/strw.c
index da57bab..5f7e63d 100644
--- a/code/fe310/eos/eve/widget/strw.c
+++ b/code/fe310/eos/eve/widget/strw.c
@@ -30,6 +30,7 @@
#define CHAR_VALID_INPUT(c) ((c >= 0x20) && (c < 0x7f))
void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str, uint16_t str_size) {
+ int rv, str_len;
EVEWidget *_widget = &widget->w;
memset(widget, 0, sizeof(EVEStrWidget));
@@ -37,7 +38,12 @@ void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str,
widget->font = font;
widget->str = str;
widget->str_size = str_size;
- widget->str_len = utf8_verify(str, str_size);
+ rv = utf8_verify(str, str_size, &str_len);
+ if (rv != UTF_OK) {
+ if (str_len >= str_size) str_len = 0;
+ widget->str[str_len] = '\0';
+ }
+ widget->str_len = str_len;
widget->str_g.w = eve_font_str_w(font, str);
if (_widget->g.h == 0) _widget->g.h = eve_font_h(font);
}
@@ -335,8 +341,17 @@ void eve_strw_putc(void *_page, int c) {
ins_c = utf8_enc(c, utf8_buf);
ins_w = eve_font_ch_w(widget->font, c);
} else if (c == CH_CTRLV) {
+ int rv, clipb_len = 0;
+
clipb = eve_clipb_get();
- ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0;
+ if (clipb) {
+ rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len);
+ if (rv != UTF_OK) {
+ clipb = NULL;
+ clipb_len = 0;
+ }
+ }
+ ins_c = clipb_len;
ins_w = eve_font_str_w(widget->font, clipb);
}
if (widget->str_len + ins_c >= widget->str_size + del_c) {
diff --git a/code/fe310/eos/eve/widget/textw.c b/code/fe310/eos/eve/widget/textw.c
index f14e4f6..03f6c0a 100644
--- a/code/fe310/eos/eve/widget/textw.c
+++ b/code/fe310/eos/eve/widget/textw.c
@@ -36,6 +36,7 @@
#define DIVC(x,y) ((x) / (y) + ((x) % (y) != 0))
void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *text, uint16_t text_size, uint16_t *line, uint16_t line_size) {
+ int rv, text_len;
EVEWidget *_widget = &widget->w;
memset(widget, 0, sizeof(EVETextWidget));
@@ -43,7 +44,12 @@ void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *te
widget->font = font;
widget->text = text;
widget->text_size = text_size;
- widget->text_len = utf8_verify(text, text_size);
+ rv = utf8_verify(text, text_size, &text_len);
+ if (rv != UTF_OK) {
+ if (text_len >= text_size) text_len = 0;
+ widget->text[text_len] = '\0';
+ }
+ widget->text_len = text_len;
widget->line = line;
widget->line_size = line_size;
memset(widget->line, 0xff, line_size * sizeof(uint16_t));
@@ -341,8 +347,17 @@ void eve_textw_putc(void *_page, int c) {
ins_c = utf8_enc(c, utf8_buf);
ch_w = eve_font_ch_w(widget->font, c);
} else if (c == CH_CTRLV) {
+ int rv, clipb_len = 0;
+
clipb = eve_clipb_get();
- ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0;
+ if (clipb) {
+ rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len);
+ if (rv != UTF_OK) {
+ clipb = NULL;
+ clipb_len = 0;
+ }
+ }
+ ins_c = clipb_len;
ch_w = eve_font_str_w(widget->font, clipb);
}
if (widget->text_len + ins_c >= widget->text_size + del_c) {