Ticket #7142: stxttranslator-latin9.patch

File stxttranslator-latin9.patch, 3.2 KB (added by Karvjorm, 13 years ago)

A Latin-9 (ISO-8859-15) patch for STXTTranslator

  • src/add-ons/translators/stxt/STXTTranslator.cpp

     
    188188static int looks_utf8(const unsigned char *, size_t, my_unichar *, size_t *);
    189189static int looks_unicode(const unsigned char *, size_t, my_unichar *, size_t *);
    190190static int looks_latin1(const unsigned char *, size_t, my_unichar *, size_t *);
     191static int looks_latin9(const unsigned char *, size_t, my_unichar *, size_t *);
    191192static int looks_extended(const unsigned char *, size_t, my_unichar *, size_t *);
    192193static void from_ebcdic(const unsigned char *, size_t, unsigned char *);
    193194static int ascmatch(const unsigned char *, const my_unichar *, size_t);
     
    265266        code = "ISO-8859";
    266267        type = "text";
    267268        encoding = "iso-8859-1";
     269    } else if (looks_latin9(buf, nbytes, ubuf, &ulen)) {
     270        code = "ISO-8859";
     271        type = "text";
     272        encoding = "iso-8859-15";
    268273    } else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
    269274        code = "Non-ISO extended-ASCII";
    270275        type = "text";
     
    280285            code = "International EBCDIC";
    281286            type = "character data";
    282287            encoding = "ebcdic";
     288        } else if (looks_latin9(nbuf, nbytes, ubuf, &ulen)) {
     289            code = "International EBCDIC";
     290            type = "character data";
     291            encoding = "ebcdic";
    283292        } else {
    284293            rv = 0;
    285294            goto done;  /* doesn't look like text at all */
     
    520529    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
    521530};
    522531
     532static char tekst_chars[256] = {
     533    /*                  BEL BS HT LF    FF CR    */
     534    F, I, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
     535        /*                              ESC          */
     536    F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
     537    I, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
     538    T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
     539    T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
     540    T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
     541    I, I, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
     542    T, T, T, T, T, T, T, T, T, T, T, T, T, I, I, F,  /* 0x7X */
     543    /*            NEL                            */
     544    X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
     545    X, X, I, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
     546    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
     547    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
     548    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
     549    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
     550    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
     551    I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
     552};
     553
    523554static int
    524555looks_ascii(const unsigned char *buf, size_t nbytes, my_unichar *ubuf,
    525556    size_t *ulen)
     
    560591}
    561592
    562593static int
     594looks_latin9(const unsigned char *buf, size_t nbytes, my_unichar *ubuf, size_t *ulen)
     595{
     596    int i;
     597
     598    *ulen = 0;
     599
     600    for (i = 0; i < (int)nbytes; i++) {
     601        int t = tekst_chars[buf[i]];
     602
     603        if (t != T && t != I)
     604            return 0;
     605
     606        ubuf[(*ulen)++] = buf[i];
     607    }
     608
     609    return 1;
     610}
     611
     612static int
    563613looks_extended(const unsigned char *buf, size_t nbytes, my_unichar *ubuf,
    564614    size_t *ulen)
    565615{