From ab370b40c9dcb25fb5bb828963aaa975764f3cf8 Mon Sep 17 00:00:00 2001 From: Uros Majstorovic Date: Mon, 3 Aug 2020 03:51:26 +0200 Subject: unicode verify fixed --- code/fe310/eos/eve/unicode.c | 127 ++++++++++++++++++++++++++++++-------- code/fe310/eos/eve/unicode.h | 13 +++- code/fe310/eos/eve/widget/strw.c | 19 +++++- code/fe310/eos/eve/widget/textw.c | 19 +++++- 4 files changed, 146 insertions(+), 32 deletions(-) (limited to 'code/fe310') diff --git a/code/fe310/eos/eve/unicode.c b/code/fe310/eos/eve/unicode.c index 62b1714..2915791 100644 --- a/code/fe310/eos/eve/unicode.c +++ b/code/fe310/eos/eve/unicode.c @@ -1,6 +1,6 @@ #include "unicode.h" -uint8_t utf8_enc(utf32_t ch, utf8_t *str) { +int utf8_enc(utf32_t ch, utf8_t *str) { if (ch <= 0x7f) { str[0] = ch; return 1; @@ -9,7 +9,7 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) { str[1] = 0x80 | (ch & 0x3f); return 2; } else if (ch <= 0xffff) { - if ((ch >= 0xd800) && (ch <= 0xdfff)) return 0; + if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR; str[0] = 0xe0 | (ch >> 12); str[1] = 0x80 | ((ch >> 6) & 0x3f); str[2] = 0x80 | (ch & 0x3f); @@ -21,39 +21,39 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) { str[3] = 0x80 | (ch & 0x3f); return 4; } else { - return 0; + return UTF_ERR; } } -uint8_t utf8_dec(utf8_t *str, utf32_t *ch) { +int utf8_dec(utf8_t *str, utf32_t *ch) { if ((str[0] & 0x80) == 0x00) { *ch = str[0]; return 1; } else if ((str[0] & 0xe0) == 0xc0) { - if ((str[1] & 0xc0) != 0x80) return 0; + if ((str[1] & 0xc0) != 0x80) return UTF_ERR; *ch = (utf32_t)(str[0] & 0x1f) << 6; *ch |= (utf32_t)(str[1] & 0x3f); - if (*ch < 0x80) return 0; + if (*ch < 0x80) return UTF_ERR; return 2; } else if ((str[0] & 0xf0) == 0xe0) { - if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return 0; + if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return UTF_ERR; *ch = (utf32_t)(str[0] & 0x0f) << 12; *ch |= (utf32_t)(str[1] & 0x3f) << 6; *ch |= (utf32_t)(str[2] & 0x3f); - if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return 0; - if (*ch < 0x800) return 0; + if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return UTF_ERR; + if (*ch < 0x800) return UTF_ERR; return 3; } else if ((str[0] & 0xf8) == 0xf0) { - if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return 0; + if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return UTF_ERR; *ch = (utf32_t)(str[0] & 0x07) << 18; *ch |= (utf32_t)(str[1] & 0x0f) << 12; *ch |= (utf32_t)(str[2] & 0x3f) << 6; *ch |= (utf32_t)(str[3] & 0x3f); - if (*ch < 0x010000) return 0; - if (*ch > 0x10ffff) return 0; + if (*ch < 0x010000) return UTF_ERR; + if (*ch > 0x10ffff) return UTF_ERR; return 4; } else { - return 0; + return UTF_ERR; } } @@ -65,39 +65,116 @@ int utf8_seek(utf8_t *str, int off, utf32_t *ch) { off = -off; for (i=0; i 0) { + if (ch == 0) { + *str_len = len; + return UTF_OK; + } len += ch_l; } else { - str[len] = '\0'; break; } } + *str_len = len; + return UTF_ERR; +} + +int utf16_enc(utf32_t ch, uint8_t *str) { + if (ch <= 0xffff) { + if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR; + str[0] = ch >> 8; + str[1] = ch & 0xff; + return 2; + } else if (ch <= 0x10ffff) { + uint16_t hi; + uint16_t lo; + + ch -= 0x10000; + hi = (ch >> 10) + 0xd800; + lo = (ch & 0x3ff) + 0xdc00; + str[0] = hi >> 8; + str[1] = hi & 0xff; + str[2] = lo >> 8; + str[3] = lo & 0xff; + return 4; + } else { + return UTF_ERR; + } +} + +int utf16_dec(uint8_t *str, utf32_t *ch) { + *ch = (str[0] << 8) | str[1]; + if ((*ch >= 0xd800) && (*ch <= 0xdfff)) { + uint16_t hi = *ch; + uint16_t lo; + + if (hi > 0xdbff) return UTF_ERR; + lo = (str[2] << 8) | str[3]; + if ((lo < 0xdc00) || (lo > 0xdfff)) return UTF_ERR; + *ch = (((hi - 0xd800) << 10) | (lo - 0xdc00)) + 0x10000; + return 4; + } else { + return 2; + } +} + +int utf16_seek(uint8_t *str, int off, utf32_t *ch) { + int i; + int len = 0; + uint16_t cu; + + if (off < 0) { + off = -off; + for (i=0; i= 0xdc00) && (cu <= 0xdfff)) { + len -= 2; + } + } + } else { + for (i=0; i= 0xd800) && (cu <= 0xdbff)) { + len += 4; + } else { + len += 2; + } + } + } + utf16_dec(str + len, ch); return len; } diff --git a/code/fe310/eos/eve/unicode.h b/code/fe310/eos/eve/unicode.h index 6452822..a3b9696 100644 --- a/code/fe310/eos/eve/unicode.h +++ b/code/fe310/eos/eve/unicode.h @@ -1,10 +1,17 @@ #include +#define UTF_OK 0 +#define UTF_ERR -1 + typedef uint8_t utf8_t; typedef uint16_t utf16_t; typedef uint32_t utf32_t; -uint8_t utf8_enc(utf32_t ch, utf8_t *str); -uint8_t utf8_dec(utf8_t *str, utf32_t *ch); +int utf8_enc(utf32_t ch, utf8_t *str); +int utf8_dec(utf8_t *str, utf32_t *ch); int utf8_seek(utf8_t *str, int off, utf32_t *ch); -int utf8_verify(utf8_t *str, int sz); \ No newline at end of file +int utf8_verify(utf8_t *str, int str_size, int *str_len); + +int utf16_enc(utf32_t ch, uint8_t *str); +int utf16_dec(uint8_t *str, utf32_t *ch); +int utf16_seek(uint8_t *str, int off, utf32_t *ch); diff --git a/code/fe310/eos/eve/widget/strw.c b/code/fe310/eos/eve/widget/strw.c index da57bab..5f7e63d 100644 --- a/code/fe310/eos/eve/widget/strw.c +++ b/code/fe310/eos/eve/widget/strw.c @@ -30,6 +30,7 @@ #define CHAR_VALID_INPUT(c) ((c >= 0x20) && (c < 0x7f)) void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str, uint16_t str_size) { + int rv, str_len; EVEWidget *_widget = &widget->w; memset(widget, 0, sizeof(EVEStrWidget)); @@ -37,7 +38,12 @@ void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str, widget->font = font; widget->str = str; widget->str_size = str_size; - widget->str_len = utf8_verify(str, str_size); + rv = utf8_verify(str, str_size, &str_len); + if (rv != UTF_OK) { + if (str_len >= str_size) str_len = 0; + widget->str[str_len] = '\0'; + } + widget->str_len = str_len; widget->str_g.w = eve_font_str_w(font, str); if (_widget->g.h == 0) _widget->g.h = eve_font_h(font); } @@ -335,8 +341,17 @@ void eve_strw_putc(void *_page, int c) { ins_c = utf8_enc(c, utf8_buf); ins_w = eve_font_ch_w(widget->font, c); } else if (c == CH_CTRLV) { + int rv, clipb_len = 0; + clipb = eve_clipb_get(); - ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0; + if (clipb) { + rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len); + if (rv != UTF_OK) { + clipb = NULL; + clipb_len = 0; + } + } + ins_c = clipb_len; ins_w = eve_font_str_w(widget->font, clipb); } if (widget->str_len + ins_c >= widget->str_size + del_c) { diff --git a/code/fe310/eos/eve/widget/textw.c b/code/fe310/eos/eve/widget/textw.c index f14e4f6..03f6c0a 100644 --- a/code/fe310/eos/eve/widget/textw.c +++ b/code/fe310/eos/eve/widget/textw.c @@ -36,6 +36,7 @@ #define DIVC(x,y) ((x) / (y) + ((x) % (y) != 0)) void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *text, uint16_t text_size, uint16_t *line, uint16_t line_size) { + int rv, text_len; EVEWidget *_widget = &widget->w; memset(widget, 0, sizeof(EVETextWidget)); @@ -43,7 +44,12 @@ void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *te widget->font = font; widget->text = text; widget->text_size = text_size; - widget->text_len = utf8_verify(text, text_size); + rv = utf8_verify(text, text_size, &text_len); + if (rv != UTF_OK) { + if (text_len >= text_size) text_len = 0; + widget->text[text_len] = '\0'; + } + widget->text_len = text_len; widget->line = line; widget->line_size = line_size; memset(widget->line, 0xff, line_size * sizeof(uint16_t)); @@ -341,8 +347,17 @@ void eve_textw_putc(void *_page, int c) { ins_c = utf8_enc(c, utf8_buf); ch_w = eve_font_ch_w(widget->font, c); } else if (c == CH_CTRLV) { + int rv, clipb_len = 0; + clipb = eve_clipb_get(); - ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0; + if (clipb) { + rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len); + if (rv != UTF_OK) { + clipb = NULL; + clipb_len = 0; + } + } + ins_c = clipb_len; ch_w = eve_font_str_w(widget->font, clipb); } if (widget->text_len + ins_c >= widget->text_size + del_c) { -- cgit v1.2.3