fltk 1.3.0rc3
About: FLTK (Fast Light Tool Kit) is a cross-platform C++ GUI toolkit for UNIX/Linux (X11), Microsoft Windows, and MacOS X. Release candidate.
  SfR Fresh Dox: fltk-1.3.0rc3-source.tar.gz ("inofficial" and yet experimental doxygen-generated source code documentation)  

8bit_tab_to_h.c

Go to the documentation of this file.
00001 /* $XFree86: xc/lib/X11/lcUniConv/8bit_tab_to_h.c,v 1.3 2001/02/09 00:02:54 dawes Exp $ */
00002 
00003 /*
00004  * Generates an 8-bit character set table from a .TXT table as found on
00005  * ftp.unicode.org or from a table containing the 256 Unicode values as
00006  * hexadecimal integers.
00007  * Examples:
00008  *
00009  *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1
00010  *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2
00011  *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3
00012  *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4
00013  *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5
00014  *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6
00015  *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7
00016  *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8
00017  *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9
00018  *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10
00019  *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14
00020  *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15
00021  *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201
00022  *   ./8bit_tab_to_h TIS620-0 tis620 < tabtis620
00023  *   ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r
00024  *   ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u
00025  *   ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8
00026  *   ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133
00027  *   ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1
00028  *   ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii
00029  *   ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn
00030  *   ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy
00031  *   ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps
00032  *
00033  *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT
00034  *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT
00035  *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT
00036  *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT
00037  *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT
00038  *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT
00039  *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT
00040  *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT
00041  *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT
00042  *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT
00043  *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT
00044  *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT
00045  *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT
00046  *   ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT
00047  */
00048 
00049 #include <stdio.h>
00050 #include <stdlib.h>
00051 #include <stdbool.h>
00052 #include <string.h>
00053 
00054 int main (int argc, char *argv[])
00055 {
00056   const char* charsetname;
00057   const char* c_charsetname;
00058   const char* filename;
00059   const char* directory;
00060   int charset2uni[0x100];
00061 
00062   if (argc != 3 && argc != 4 && argc != 5)
00063     exit(1);
00064   charsetname = argv[1];
00065   c_charsetname = argv[2];
00066   if (argc > 3) {
00067     filename = argv[3];
00068   } else {
00069     char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1);
00070     strcpy(s,c_charsetname); strcat(s,".h");
00071     filename = s;
00072   }
00073   directory = (argc > 4 ? argv[4] : "");
00074 
00075   fprintf(stderr, "Creating %s%s\n", directory, filename);
00076 
00077   {
00078     int i, c;
00079     c = getc(stdin);
00080     ungetc(c,stdin);
00081     if (c == '#') {
00082       /* Read a unicode.org style .TXT file. */
00083       for (i = 0; i < 0x100; i++)
00084         charset2uni[i] = 0xfffd;
00085       for (;;) {
00086         c = getc(stdin);
00087         if (c == EOF)
00088           break;
00089         if (c == '\n' || c == ' ' || c == '\t')
00090           continue;
00091         if (c == '#') {
00092           do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
00093           continue;
00094         }
00095         ungetc(c,stdin);
00096         if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100))
00097           exit(1);
00098         do { c = getc(stdin); } while (c == ' ' || c == '\t');
00099         if (c != EOF)
00100           ungetc(c,stdin);
00101         if (c == '\n' || c == '#')
00102           continue;
00103         if (scanf("0x%x", &charset2uni[i]) != 1)
00104           exit(1);
00105       }
00106     } else {
00107       /* Read a table of hexadecimal Unicode values. */
00108       for (i = 0; i < 0x100; i++) {
00109         if (scanf("%x", &charset2uni[i]) != 1)
00110           exit(1);
00111         if (charset2uni[i] < 0 || charset2uni[i] == 0xffff)
00112           charset2uni[i] = 0xfffd;
00113       }
00114       if (scanf("%x", &i) != EOF)
00115         exit(1);
00116     }
00117   }
00118 
00119   /* Write the output file. */
00120   {
00121     FILE* f;
00122 
00123     {
00124       char* fname = malloc(strlen(directory)+strlen(filename)+1);
00125       strcpy(fname,directory); strcat(fname,filename);
00126       f = fopen(fname,"w");
00127       if (f == NULL)
00128         exit(1);
00129     }
00130 
00131     fprintf(f, "\n");
00132     fprintf(f, "/*\n");
00133     fprintf(f, " * %s\n", charsetname);
00134     fprintf(f, " */\n");
00135     fprintf(f, "\n");
00136 
00137     {
00138       int i, i1, i2, i3;
00139       int line[16];
00140       int tableno;
00141       struct { int minline; int maxline; } tables[16];
00142       bool some_invalid;
00143       bool final_ret_reached;
00144 
00145       for (i1 = 0; i1 < 16; i1++) {
00146         bool all_invalid = true;
00147         bool all_identity = true;
00148         for (i2 = 0; i2 < 16; i2++) {
00149           i = 16*i1+i2;
00150           if (charset2uni[i] != 0xfffd)
00151             all_invalid = false;
00152           if (charset2uni[i] != i)
00153             all_identity = false;
00154         }
00155         if (all_invalid)
00156           line[i1] = -2;
00157         else if (all_identity)
00158           line[i1] = -1;
00159         else
00160           line[i1] = 0;
00161       }
00162       tableno = 0;
00163       for (i1 = 0; i1 < 16; i1++) {
00164         if (line[i1] >= 0) {
00165           if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) {
00166             line[i1] = tableno-1;
00167             tables[tableno-1].maxline = i1;
00168           } else {
00169             tableno++;
00170             line[i1] = tableno-1;
00171             tables[tableno-1].minline = tables[tableno-1].maxline = i1;
00172           }
00173         }
00174       }
00175       some_invalid = false;
00176       for (i = 0; i < 0x100; i++)
00177         if (charset2uni[i] == 0xfffd)
00178           some_invalid = true;
00179       if (tableno > 0) {
00180         int t;
00181         for (t = 0; t < tableno; t++) {
00182           fprintf(f, "static const unsigned short %s_2uni", c_charsetname);
00183           if (tableno > 1)
00184             fprintf(f, "_%d", t+1);
00185           fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1));
00186           for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) {
00187             fprintf(f, "  /* 0x%02x */\n", 16*i1);
00188             for (i2 = 0; i2 < 2; i2++) {
00189               fprintf(f, " ");
00190               for (i3 = 0; i3 < 8; i3++) {
00191                 i = 16*i1+8*i2+i3;
00192                 fprintf(f, " 0x%04x,", charset2uni[i]);
00193               }
00194               fprintf(f, "\n");
00195             }
00196           }
00197           fprintf(f, "};\n");
00198         }
00199         fprintf(f, "\n");
00200       }
00201       final_ret_reached = false;
00202       fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname);
00203       fprintf(f, "{\n");
00204       fprintf(f, "  unsigned char c = *s;\n");
00205       if (some_invalid) {
00206         for (i1 = 0; i1 < 16;) {
00207           int t = line[i1];
00208           const char* indent;
00209           for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
00210           indent = (i1 == 0 && i2 == 16 ? "  " : "    ");
00211           if (i1 == 0) {
00212             if (i2 == 16) {
00213             } else {
00214               fprintf(f, "  if (c < 0x%02x) {\n", 16*i2);
00215             }
00216           } else {
00217             if (i2 == 16) {
00218               fprintf(f, "  else {\n");
00219             } else {
00220               fprintf(f, "  else if (c < 0x%02x) {\n", 16*i2);
00221             }
00222           }
00223           if (t == -2) {
00224             final_ret_reached = true;
00225           } else if (t == -1) {
00226             fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent);
00227             fprintf(f, "%sreturn 1;\n", indent);
00228           } else {
00229             fprintf(f, "%s", indent);
00230             some_invalid = false;
00231             for (i = 16*i1; i < 16*i2; i++)
00232               if (charset2uni[i] == 0xfffd)
00233                 some_invalid = true;
00234             if (some_invalid)
00235               fprintf(f, "unsigned short wc = ");
00236             else
00237               fprintf(f, "*pwc = (ucs4_t) ");
00238             fprintf(f, "%s_2uni", c_charsetname);
00239             if (tableno > 1)
00240               fprintf(f, "_%d", t+1);
00241             fprintf(f, "[c");
00242             if (tables[t].minline > 0)
00243               fprintf(f, "-0x%02x", 16*tables[t].minline);
00244             fprintf(f, "];\n");
00245             if (some_invalid) {
00246               fprintf(f, "%sif (wc != 0xfffd) {\n", indent);
00247               fprintf(f, "%s  *pwc = (ucs4_t) wc;\n", indent);
00248               fprintf(f, "%s  return 1;\n", indent);
00249               fprintf(f, "%s}\n", indent);
00250               final_ret_reached = true;
00251             } else {
00252               fprintf(f, "%sreturn 1;\n", indent);
00253             }
00254           }
00255           if (!(i1 == 0 && i2 == 16))
00256             fprintf(f, "  }\n");
00257           i1 = i2;
00258         }
00259         if (final_ret_reached)
00260           fprintf(f, "  return RET_ILSEQ;\n");
00261       } else {
00262         for (i1 = 0; i1 < 16;) {
00263           int t = line[i1];
00264           for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
00265           if (i1 == 0) {
00266             if (i2 == 16) {
00267               fprintf(f, "  ");
00268             } else {
00269               fprintf(f, "  if (c < 0x%02x)\n    ", 16*i2);
00270             }
00271           } else {
00272             if (i2 == 16) {
00273               fprintf(f, "  else\n    ");
00274             } else {
00275               fprintf(f, "  else if (c < 0x%02x)\n    ", 16*i2);
00276             }
00277           }
00278           if (t == -1)
00279             fprintf(f, "*pwc = (ucs4_t) c;\n");
00280           else {
00281             fprintf(f, "*pwc = (ucs4_t) %s_2uni", c_charsetname);
00282             if (tableno > 1)
00283               fprintf(f, "_%d", t+1);
00284             fprintf(f, "[c");
00285             if (tables[t].minline > 0)
00286               fprintf(f, "-0x%02x", 16*tables[t].minline);
00287             fprintf(f, "];\n");
00288           }
00289           i1 = i2;
00290         }
00291         fprintf(f, "  return 1;\n");
00292       }
00293       fprintf(f, "}\n");
00294 
00295     }
00296 
00297     fprintf(f, "\n");
00298 
00299     {
00300       int uni2charset[0x10000];
00301       bool pages[0x100];
00302       int line[0x2000];
00303       int tableno;
00304       struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
00305       bool need_c;
00306       bool fix_0000;
00307       int i, j, p, j1, j2, t;
00308 
00309       for (j = 0; j < 0x10000; j++)
00310         uni2charset[j] = 0;
00311       for (p = 0; p < 0x100; p++)
00312         pages[p] = false;
00313       for (i = 0; i < 0x100; i++) {
00314         j = charset2uni[i];
00315         if (j != 0xfffd) {
00316           uni2charset[j] = i;
00317           pages[j>>8] = true;
00318         }
00319       }
00320       for (j1 = 0; j1 < 0x2000; j1++) {
00321         bool all_invalid = true;
00322         bool all_identity = true;
00323         for (j2 = 0; j2 < 8; j2++) {
00324           j = 8*j1+j2;
00325           if (uni2charset[j] != 0)
00326             all_invalid = false;
00327           if (uni2charset[j] != j)
00328             all_identity = false;
00329         }
00330         if (all_invalid)
00331           line[j1] = -2;
00332         else if (all_identity)
00333           line[j1] = -1;
00334         else
00335           line[j1] = 0;
00336       }
00337       tableno = 0;
00338       for (j1 = 0; j1 < 0x2000; j1++) {
00339         if (line[j1] >= 0) {
00340           if (tableno > 0
00341               && ((j1 > 0 && line[j1-1] == tableno-1)
00342                   || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
00343                       && j1 - tables[tableno-1].maxline <= 8))) {
00344             line[j1] = tableno-1;
00345             tables[tableno-1].maxline = j1;
00346           } else {
00347             tableno++;
00348             line[j1] = tableno-1;
00349             tables[tableno-1].minline = tables[tableno-1].maxline = j1;
00350           }
00351         }
00352       }
00353       for (t = 0; t < tableno; t++) {
00354         tables[t].usecount = 0;
00355         j1 = 8*tables[t].minline;
00356         j2 = 8*(tables[t].maxline+1);
00357         for (j = j1; j < j2; j++)
00358           if (uni2charset[j] != 0)
00359             tables[t].usecount++;
00360       }
00361       for (t = 0, p = -1, i = 0; t < tableno; t++) {
00362         if (tables[t].usecount > 1) {
00363           char* s;
00364           if (p == tables[t].minline >> 5) {
00365             s = (char*) malloc(5+1);
00366             sprintf(s, "%02x_%d", p, ++i);
00367           } else {
00368             p = tables[t].minline >> 5;
00369             s = (char*) malloc(2+1);
00370             sprintf(s, "%02x", p);
00371           }
00372           tables[t].suffix = s;
00373         } else
00374           tables[t].suffix = NULL;
00375       }
00376       {
00377         p = -1;
00378         for (t = 0; t < tableno; t++)
00379           if (tables[t].usecount > 1) {
00380             p = 0;
00381             fprintf(f, "static const unsigned char %s_page%s[%d] = {\n", c_charsetname, tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
00382             for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
00383               if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
00384                 fprintf(f, "  /* 0x%04x */\n", 8*j1);
00385               fprintf(f, " ");
00386               for (j2 = 0; j2 < 8; j2++) {
00387                 j = 8*j1+j2;
00388                 fprintf(f, " 0x%02x,", uni2charset[j]);
00389               }
00390               fprintf(f, " /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
00391             }
00392             fprintf(f, "};\n");
00393           }
00394         if (p >= 0)
00395           fprintf(f, "\n");
00396       }
00397       need_c = false;
00398       for (j1 = 0; j1 < 0x2000;) {
00399         t = line[j1];
00400         for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
00401         if (t >= 0)
00402           j2 = tables[t].maxline+1;
00403         if (!(t == -2 || (t == -1 && j1 == 0)))
00404           need_c = true;
00405         j1 = j2;
00406       }
00407       fix_0000 = false;
00408       fprintf(f, "static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", c_charsetname);
00409       fprintf(f, "{\n");
00410       if (need_c)
00411         fprintf(f, "  unsigned char c = 0;\n");
00412       for (j1 = 0; j1 < 0x2000;) {
00413         t = line[j1];
00414         for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
00415         if (t >= 0) {
00416           if (j1 != tables[t].minline) abort();
00417           if (j2 > tables[t].maxline+1) abort();
00418           j2 = tables[t].maxline+1;
00419         }
00420         if (t == -2) {
00421         } else {
00422           if (j1 == 0)
00423             fprintf(f, "  ");
00424           else
00425             fprintf(f, "  else ");
00426           if (t >= 0 && tables[t].usecount == 0) abort();
00427           if (t >= 0 && tables[t].usecount == 1) {
00428             if (j2 != j1+1) abort();
00429             for (j = 8*j1; j < 8*j2; j++)
00430               if (uni2charset[j] != 0) {
00431                 fprintf(f, "if (wc == 0x%04x)\n    c = 0x%02x;\n", j, uni2charset[j]);
00432                 break;
00433               }
00434           } else {
00435             if (j1 == 0) {
00436               fprintf(f, "if (wc < 0x%04x)", 8*j2);
00437             } else {
00438               fprintf(f, "if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
00439             }
00440             if (t == -1) {
00441               if (j1 == 0)
00442                 /* If wc == 0, the function must return 1, not -1. */
00443                 fprintf(f, " {\n    *r = wc;\n    return 1;\n  }\n");
00444               else
00445                 fprintf(f, "\n    c = wc;\n");
00446             } else {
00447               fprintf(f, "\n    c = %s_page%s[wc", c_charsetname, tables[t].suffix);
00448               if (tables[t].minline > 0)
00449                 fprintf(f, "-0x%04x", 8*j1);
00450               fprintf(f, "];\n");
00451               if (j1 == 0 && uni2charset[0] == 0)
00452                 /* If wc == 0, the function must return 1, not -1. */
00453                 fix_0000 = true;
00454             }
00455           }
00456         }
00457         j1 = j2;
00458       }
00459       if (need_c) {
00460         if (fix_0000)
00461           fprintf(f, "  if (c != 0 || wc == 0) {\n");
00462         else
00463           fprintf(f, "  if (c != 0) {\n");
00464         fprintf(f, "    *r = c;\n");
00465         fprintf(f, "    return 1;\n");
00466         fprintf(f, "  }\n");
00467       }
00468       fprintf(f, "  return RET_ILSEQ;\n");
00469       fprintf(f, "}\n");
00470 
00471     }
00472 
00473     if (ferror(f) || fclose(f))
00474       exit(1);
00475   }
00476 
00477 #if 0
00478 
00479     int i1, i2, i3, i1_min, i1_max, j1, j2;
00480 
00481   i1_min = 16;
00482   i1_max = -1;
00483   for (i1 = 0; i1 < 16; i1++)
00484     for (i2 = 0; i2 < 16; i2++)
00485       if (charset2uni[16*i1+i2] != 0xfffd) {
00486         if (i1_min > i1) i1_min = i1;
00487         if (i1_max < i1) i1_max = i1;
00488       }
00489   printf("static const unsigned short %s_2uni[%d] = {\n",
00490          name, 16*(i1_max-i1_min+1));
00491   for (i1 = i1_min; i1 <= i1_max; i1++) {
00492     printf("  /""* 0x%02x *""/\n", 16*i1);
00493     for (i2 = 0; i2 < 2; i2++) {
00494       printf("  ");
00495       for (i3 = 0; i3 < 8; i3++) {
00496         if (i3 > 0) printf(" ");
00497         printf("0x%04x,", charset2uni[16*i1+8*i2+i3]);
00498       }
00499       printf("\n");
00500     }
00501   }
00502   printf("};\n");
00503   printf("\n");
00504 
00505   for (p = 0; p < 0x100; p++)
00506     pages[p] = 0;
00507   for (i = 0; i < 0x100; i++)
00508     if (charset2uni[i] != 0xfffd)
00509       pages[charset2uni[i]>>8] = 1;
00510   for (p = 0; p < 0x100; p++)
00511     if (pages[p]) {
00512       int j1_min = 32;
00513       int j1_max = -1;
00514       for (j1 = 0; j1 < 32; j1++)
00515         for (j2 = 0; j2 < 8; j2++)
00516           if (uni2charset[256*p+8*j1+j2] != 0) {
00517             if (j1_min > j1) j1_min = j1;
00518             if (j1_max < j1) j1_max = j1;
00519           }
00520       printf("static const unsigned char %s_page%02x[%d] = {\n",
00521              name, p, 8*(j1_max-j1_min+1));
00522       for (j1 = j1_min; j1 <= j1_max; j1++) {
00523         printf("  ");
00524         for (j2 = 0; j2 < 8; j2++)
00525           printf("0x%02x, ", uni2charset[256*p+8*j1+j2]);
00526         printf("/""* 0x%02x-0x%02x *""/\n", 8*j1, 8*j1+7);
00527       }
00528       printf("};\n");
00529     }
00530   printf("\n");
00531 
00532 }
00533 #endif
00534 
00535   exit(0);
00536 }