diff options
| author | Uros Majstorovic <majstor@majstor.org> | 2020-08-03 03:51:26 +0200 | 
|---|---|---|
| committer | Uros Majstorovic <majstor@majstor.org> | 2020-08-03 03:51:26 +0200 | 
| commit | ab370b40c9dcb25fb5bb828963aaa975764f3cf8 (patch) | |
| tree | c8161a00d265ecc8929fa3a726ad2adced93ca36 | |
| parent | 0e518d5117b73fd54081decf1c0eb9f9d3173ff6 (diff) | |
unicode verify fixed
| -rw-r--r-- | code/fe310/eos/eve/unicode.c | 127 | ||||
| -rw-r--r-- | code/fe310/eos/eve/unicode.h | 13 | ||||
| -rw-r--r-- | code/fe310/eos/eve/widget/strw.c | 19 | ||||
| -rw-r--r-- | code/fe310/eos/eve/widget/textw.c | 19 | 
4 files changed, 146 insertions, 32 deletions
| diff --git a/code/fe310/eos/eve/unicode.c b/code/fe310/eos/eve/unicode.c index 62b1714..2915791 100644 --- a/code/fe310/eos/eve/unicode.c +++ b/code/fe310/eos/eve/unicode.c @@ -1,6 +1,6 @@  #include "unicode.h" -uint8_t utf8_enc(utf32_t ch, utf8_t *str) { +int utf8_enc(utf32_t ch, utf8_t *str) {      if (ch <= 0x7f) {          str[0] = ch;          return 1; @@ -9,7 +9,7 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) {          str[1] = 0x80 | (ch & 0x3f);          return 2;      } else if (ch <= 0xffff) { -        if ((ch >= 0xd800) && (ch <= 0xdfff)) return 0; +        if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR;          str[0] = 0xe0 | (ch >> 12);          str[1] = 0x80 | ((ch >> 6) & 0x3f);          str[2] = 0x80 | (ch & 0x3f); @@ -21,39 +21,39 @@ uint8_t utf8_enc(utf32_t ch, utf8_t *str) {          str[3] = 0x80 | (ch & 0x3f);          return 4;      } else { -        return 0; +        return UTF_ERR;      }  } -uint8_t utf8_dec(utf8_t *str, utf32_t *ch) { +int utf8_dec(utf8_t *str, utf32_t *ch) {      if ((str[0] & 0x80) == 0x00) {          *ch = str[0];          return 1;      } else if ((str[0] & 0xe0) == 0xc0) { -        if ((str[1] & 0xc0) != 0x80) return 0; +        if ((str[1] & 0xc0) != 0x80) return UTF_ERR;          *ch  = (utf32_t)(str[0] & 0x1f) << 6;          *ch |= (utf32_t)(str[1] & 0x3f); -        if (*ch < 0x80) return 0; +        if (*ch < 0x80) return UTF_ERR;          return 2;      } else if ((str[0] & 0xf0) == 0xe0) { -        if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return 0; +        if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80)) return UTF_ERR;          *ch  = (utf32_t)(str[0] & 0x0f) << 12;          *ch |= (utf32_t)(str[1] & 0x3f) << 6;          *ch |= (utf32_t)(str[2] & 0x3f); -        if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return 0; -        if (*ch < 0x800) return 0; +        if ((*ch >= 0xd800) && (*ch <= 0xdfff)) return UTF_ERR; +        if (*ch < 0x800) return UTF_ERR;          return 3;      } else if ((str[0] & 0xf8) == 0xf0) { -        if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return 0; +        if (((str[1] & 0xc0) != 0x80) || ((str[2] & 0xc0) != 0x80) || ((str[3] & 0xc0) != 0x80)) return UTF_ERR;          *ch  = (utf32_t)(str[0] & 0x07) << 18;          *ch |= (utf32_t)(str[1] & 0x0f) << 12;          *ch |= (utf32_t)(str[2] & 0x3f) << 6;          *ch |= (utf32_t)(str[3] & 0x3f); -        if (*ch < 0x010000) return 0; -        if (*ch > 0x10ffff) return 0; +        if (*ch < 0x010000) return UTF_ERR; +        if (*ch > 0x10ffff) return UTF_ERR;          return 4;      } else { -        return 0; +        return UTF_ERR;      }  } @@ -65,39 +65,116 @@ int utf8_seek(utf8_t *str, int off, utf32_t *ch) {          off = -off;          for (i=0; i<off; i++) {              len--; -            while ((*(str + len) & 0xc0) == 0x80) len--; +            while ((str[len] & 0xc0) == 0x80) len--;          } -        utf8_dec(str + len, ch);      } else {          for (i=0; i<off; i++) { -            len += utf8_dec(str + len, ch); +            if ((str[len] & 0x80) == 0x00) { +                len += 1; +            } else if ((str[0] & 0xe0) == 0xc0) { +                len += 2; +            } else if ((str[0] & 0xf0) == 0xe0) { +                len += 3; +            } else if ((str[0] & 0xf8) == 0xf0) { +                len += 4; +            }          }      } +    utf8_dec(str + len, ch);      return len;  } -int utf8_verify(utf8_t *str, int sz) { +int utf8_verify(utf8_t *str, int str_size, int *str_len) {      utf32_t ch;      uint8_t ch_l;      int len = 0; -    while (len < sz) { -        if (sz - len < 4) { +    while (len < str_size) { +        if (str_size - len < 4) {              if (((str[len] & 0xf8) == 0xf0) || -               (((str[len] & 0xf0) == 0xe0) && (sz - len < 3)) || -               (((str[len] & 0xe0) == 0xc0) && (sz - len < 2))) { -                   str[len] = '\0'; +               (((str[len] & 0xf0) == 0xe0) && (str_size - len < 3)) || +               (((str[len] & 0xe0) == 0xc0) && (str_size - len < 2))) {                     break;                 }          }          ch_l = utf8_dec(str + len, &ch); -        if (ch_l) { -            if (ch == 0) break; +        if (ch_l > 0) { +            if (ch == 0) { +                *str_len = len; +                return UTF_OK; +            }              len += ch_l;          } else { -            str[len] = '\0';              break;          }      } +    *str_len = len; +    return UTF_ERR; +} + +int utf16_enc(utf32_t ch, uint8_t *str) { +    if (ch <= 0xffff) { +        if ((ch >= 0xd800) && (ch <= 0xdfff)) return UTF_ERR; +        str[0] = ch >> 8; +        str[1] = ch & 0xff; +        return 2; +    } else if (ch <= 0x10ffff) { +        uint16_t hi; +        uint16_t lo; + +        ch -= 0x10000; +        hi = (ch >> 10) + 0xd800; +        lo = (ch & 0x3ff) + 0xdc00; +        str[0] = hi >> 8; +        str[1] = hi & 0xff; +        str[2] = lo >> 8; +        str[3] = lo & 0xff; +        return 4; +    } else { +        return UTF_ERR; +    } +} + +int utf16_dec(uint8_t *str, utf32_t *ch) { +    *ch = (str[0] << 8) | str[1]; +    if ((*ch >= 0xd800) && (*ch <= 0xdfff)) { +        uint16_t hi = *ch; +        uint16_t lo; + +        if (hi > 0xdbff) return UTF_ERR; +        lo = (str[2] << 8) | str[3]; +        if ((lo < 0xdc00) || (lo > 0xdfff)) return UTF_ERR; +        *ch = (((hi - 0xd800) << 10) | (lo - 0xdc00)) + 0x10000; +        return 4; +    } else { +        return 2; +    } +} + +int utf16_seek(uint8_t *str, int off, utf32_t *ch) { +    int i; +    int len = 0; +    uint16_t cu; + +    if (off < 0) { +        off = -off; +        for (i=0; i<off; i++) { +            len -= 2; +            cu = (str[len] << 8) | str[len + 1]; +            if ((cu >= 0xdc00) && (cu <= 0xdfff)) { +                len -= 2; +            } +        } +    } else { +        for (i=0; i<off; i++) { +            cu = (str[len] << 8) | str[len + 1]; +            if ((cu >= 0xd800) && (cu <= 0xdbff)) { +                len += 4; +            } else { +                len += 2; +            } +        } +    } +    utf16_dec(str + len, ch);      return len;  } diff --git a/code/fe310/eos/eve/unicode.h b/code/fe310/eos/eve/unicode.h index 6452822..a3b9696 100644 --- a/code/fe310/eos/eve/unicode.h +++ b/code/fe310/eos/eve/unicode.h @@ -1,10 +1,17 @@  #include <stdint.h> +#define UTF_OK      0 +#define UTF_ERR     -1 +  typedef uint8_t utf8_t;  typedef uint16_t utf16_t;  typedef uint32_t utf32_t; -uint8_t utf8_enc(utf32_t ch, utf8_t *str); -uint8_t utf8_dec(utf8_t *str, utf32_t *ch); +int utf8_enc(utf32_t ch, utf8_t *str); +int utf8_dec(utf8_t *str, utf32_t *ch);  int utf8_seek(utf8_t *str, int off, utf32_t *ch); -int utf8_verify(utf8_t *str, int sz);
\ No newline at end of file +int utf8_verify(utf8_t *str, int str_size, int *str_len); + +int utf16_enc(utf32_t ch, uint8_t *str); +int utf16_dec(uint8_t *str, utf32_t *ch); +int utf16_seek(uint8_t *str, int off, utf32_t *ch); diff --git a/code/fe310/eos/eve/widget/strw.c b/code/fe310/eos/eve/widget/strw.c index da57bab..5f7e63d 100644 --- a/code/fe310/eos/eve/widget/strw.c +++ b/code/fe310/eos/eve/widget/strw.c @@ -30,6 +30,7 @@  #define CHAR_VALID_INPUT(c)     ((c >= 0x20) && (c < 0x7f))  void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str, uint16_t str_size) { +    int rv, str_len;      EVEWidget *_widget = &widget->w;      memset(widget, 0, sizeof(EVEStrWidget)); @@ -37,7 +38,12 @@ void eve_strw_init(EVEStrWidget *widget, EVERect *g, EVEFont *font, utf8_t *str,      widget->font = font;      widget->str = str;      widget->str_size = str_size; -    widget->str_len = utf8_verify(str, str_size); +    rv = utf8_verify(str, str_size, &str_len); +    if (rv != UTF_OK) { +        if (str_len >= str_size) str_len = 0; +        widget->str[str_len] = '\0'; +    } +    widget->str_len = str_len;      widget->str_g.w = eve_font_str_w(font, str);      if (_widget->g.h == 0) _widget->g.h = eve_font_h(font);  } @@ -335,8 +341,17 @@ void eve_strw_putc(void *_page, int c) {              ins_c = utf8_enc(c, utf8_buf);              ins_w = eve_font_ch_w(widget->font, c);          } else if (c == CH_CTRLV) { +            int rv, clipb_len = 0; +              clipb = eve_clipb_get(); -            ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0; +            if (clipb) { +                rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len); +                if (rv != UTF_OK) { +                    clipb = NULL; +                    clipb_len = 0; +                } +            } +            ins_c = clipb_len;              ins_w = eve_font_str_w(widget->font, clipb);          }          if (widget->str_len + ins_c >= widget->str_size + del_c) { diff --git a/code/fe310/eos/eve/widget/textw.c b/code/fe310/eos/eve/widget/textw.c index f14e4f6..03f6c0a 100644 --- a/code/fe310/eos/eve/widget/textw.c +++ b/code/fe310/eos/eve/widget/textw.c @@ -36,6 +36,7 @@  #define DIVC(x,y)               ((x) / (y) + ((x) % (y) != 0))  void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *text, uint16_t text_size, uint16_t *line, uint16_t line_size) { +    int rv, text_len;      EVEWidget *_widget = &widget->w;      memset(widget, 0, sizeof(EVETextWidget)); @@ -43,7 +44,12 @@ void eve_textw_init(EVETextWidget *widget, EVERect *g, EVEFont *font, utf8_t *te      widget->font = font;      widget->text = text;      widget->text_size = text_size; -    widget->text_len = utf8_verify(text, text_size); +    rv = utf8_verify(text, text_size, &text_len); +    if (rv != UTF_OK) { +        if (text_len >= text_size) text_len = 0; +        widget->text[text_len] = '\0'; +    } +    widget->text_len = text_len;      widget->line = line;      widget->line_size = line_size;      memset(widget->line, 0xff, line_size * sizeof(uint16_t)); @@ -341,8 +347,17 @@ void eve_textw_putc(void *_page, int c) {              ins_c = utf8_enc(c, utf8_buf);              ch_w = eve_font_ch_w(widget->font, c);          } else if (c == CH_CTRLV) { +            int rv, clipb_len = 0; +              clipb = eve_clipb_get(); -            ins_c = clipb ? utf8_verify(clipb, EVE_CLIPB_SIZE_BUF) : 0; +            if (clipb) { +                rv = utf8_verify(clipb, EVE_CLIPB_SIZE_BUF, &clipb_len); +                if (rv != UTF_OK) { +                    clipb = NULL; +                    clipb_len = 0; +                } +            } +            ins_c = clipb_len;              ch_w = eve_font_str_w(widget->font, clipb);          }          if (widget->text_len + ins_c >= widget->text_size + del_c) { | 
