|
fltk 1.3.0rc3
About: FLTK (Fast Light Tool Kit) is a cross-platform C++ GUI toolkit for UNIX/Linux (X11), Microsoft Windows, and MacOS X. Release candidate.
SfR Fresh Dox: fltk-1.3.0rc3-source.tar.gz ("inofficial" and yet experimental doxygen-generated source code documentation) ![]() |
00001 /* $XFree86: xc/lib/X11/lcUniConv/cjk_tab_to_h.c,v 1.2 2000/12/04 18:49:31 dawes Exp $ */ 00002 00003 /* 2009-02-17 <sparkaround@gmail.com>: Create gbk_tab_to_h.c from 00004 * cjk_tab_to_h.c to generate GBK(cp936ext) table correctly. 00005 * 00006 * 00007 * Generates a CJK character set table from a .TXT table as found on 00008 * ftp.unicode.org or in the X nls directory. 00009 * Examples: 00010 * 00011 * ./gbk_tab_to_h CP936EXT cp936ext > cp936ext.h < CP936EXT.TXT 00012 * 00013 */ 00014 00015 #include <stdio.h> 00016 #include <stdlib.h> 00017 #include <stdbool.h> 00018 #include <string.h> 00019 00020 typedef struct { 00021 int start; 00022 int end; 00023 } Block; 00024 00025 typedef struct { 00026 int rows; /* number of possible values for the 1st byte */ 00027 int cols; /* number of possible values for the 2nd byte */ 00028 int (*row_byte) (int row); /* returns the 1st byte value for a given row */ 00029 int (*col_byte) (int col); /* returns the 2nd byte value for a given col */ 00030 int (*byte_row) (int byte); /* converts a 1st byte value to a row, else -1 */ 00031 int (*byte_col) (int byte); /* converts a 2nd byte value to a col, else -1 */ 00032 const char* check_row_expr; /* format string for 1st byte value checking */ 00033 const char* check_col_expr; /* format string for 2nd byte value checking */ 00034 const char* byte_row_expr; /* format string for 1st byte value to row */ 00035 const char* byte_col_expr; /* format string for 2nd byte value to col */ 00036 int** charset2uni; /* charset2uni[0..rows-1][0..cols-1] is valid */ 00037 /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book. 00038 Once a row is fixed, choosing a "col" is the same as choosing a "cell". */ 00039 int* charsetpage; /* charsetpage[0..rows]: how large is a page for a row */ 00040 int ncharsetblocks; 00041 Block* charsetblocks; /* blocks[0..nblocks-1] */ 00042 int* uni2charset; /* uni2charset[0x0000..0xffff] */ 00043 } Encoding; 00044 00045 /* 00046 * Outputs the file title. 00047 */ 00048 static void output_title (const char *charsetname) 00049 { 00050 printf("\n"); 00051 printf("/*\n"); 00052 printf(" * %s\n", charsetname); 00053 printf(" */\n"); 00054 printf("\n"); 00055 } 00056 00057 /* 00058 * Reads the charset2uni table from standard input. 00059 */ 00060 static void read_table (Encoding* enc) 00061 { 00062 int row, col, i, i1, i2, c, j; 00063 00064 enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*)); 00065 for (row = 0; row < enc->rows; row++) 00066 enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int)); 00067 00068 for (row = 0; row < enc->rows; row++) 00069 for (col = 0; col < enc->cols; col++) 00070 enc->charset2uni[row][col] = 0xfffd; 00071 00072 c = getc(stdin); 00073 ungetc(c,stdin); 00074 if (c == '#') { 00075 /* Read a unicode.org style .TXT file. */ 00076 for (;;) { 00077 c = getc(stdin); 00078 if (c == EOF) 00079 break; 00080 if (c == '\n' || c == ' ' || c == '\t') 00081 continue; 00082 if (c == '#') { 00083 do { c = getc(stdin); } while (!(c == EOF || c == '\n')); 00084 continue; 00085 } 00086 ungetc(c,stdin); 00087 if (scanf("0x%x", &j) != 1) 00088 exit(1); 00089 i1 = j >> 8; 00090 i2 = j & 0xff; 00091 row = enc->byte_row(i1); 00092 col = enc->byte_col(i2); 00093 if (row < 0 || col < 0) { 00094 fprintf(stderr, "lost entry for %02x %02x\n", i1, i2); 00095 exit(1); 00096 } 00097 if (scanf(" 0x%x", &enc->charset2uni[row][col]) != 1) 00098 exit(1); 00099 } 00100 } else { 00101 /* Read a table of hexadecimal Unicode values. */ 00102 for (i1 = 32; i1 < 132; i1++) 00103 for (i2 = 32; i2 < 132; i2++) { 00104 i = scanf("%x", &j); 00105 if (i == EOF) 00106 goto read_done; 00107 if (i != 1) 00108 exit(1); 00109 if (j < 0 || j == 0xffff) 00110 j = 0xfffd; 00111 if (j != 0xfffd) { 00112 if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) { 00113 fprintf(stderr, "lost entry at %02x %02x\n", i1, i2); 00114 exit (1); 00115 } 00116 enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j; 00117 } 00118 } 00119 read_done: ; 00120 } 00121 } 00122 00123 /* 00124 * Computes the charsetpage[0..rows] array. 00125 */ 00126 static void find_charset2uni_pages (Encoding* enc) 00127 { 00128 int row, col; 00129 00130 enc->charsetpage = (int*) malloc((enc->rows+1)*sizeof(int)); 00131 00132 for (row = 0; row <= enc->rows; row++) 00133 enc->charsetpage[row] = 0; 00134 00135 for (row = 0; row < enc->rows; row++) { 00136 int used = 0; 00137 for (col = 0; col < enc->cols; col++) 00138 if (enc->charset2uni[row][col] != 0xfffd) 00139 used = col+1; 00140 enc->charsetpage[row] = used; 00141 } 00142 } 00143 00144 /* 00145 * Fills in nblocks and blocks. 00146 */ 00147 static void find_charset2uni_blocks (Encoding* enc) 00148 { 00149 int n, row, lastrow; 00150 00151 enc->charsetblocks = (Block*) malloc(enc->rows*sizeof(Block)); 00152 00153 n = 0; 00154 for (row = 0; row < enc->rows; row++) 00155 if (enc->charsetpage[row] > 0 && (row == 0 || enc->charsetpage[row-1] == 0)) { 00156 for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++); 00157 enc->charsetblocks[n].start = row * enc->cols; 00158 enc->charsetblocks[n].end = lastrow * enc->cols + enc->charsetpage[lastrow]; 00159 n++; 00160 } 00161 enc->ncharsetblocks = n; 00162 } 00163 00164 /* 00165 * Outputs the charset to unicode table and function. 00166 */ 00167 static void output_charset2uni (const char* name, Encoding* enc) 00168 { 00169 int row, col, lastrow, col_max, i, i1_min, i1_max; 00170 00171 find_charset2uni_pages(enc); 00172 00173 find_charset2uni_blocks(enc); 00174 00175 for (row = 0; row < enc->rows; row++) 00176 if (enc->charsetpage[row] > 0) { 00177 if (row == 0 || enc->charsetpage[row-1] == 0) { 00178 /* Start a new block. */ 00179 for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++); 00180 printf("static const unsigned short %s_2uni_page%02x[%d] = {\n", 00181 name, enc->row_byte(row), 00182 (lastrow-row) * enc->cols + enc->charsetpage[lastrow]); 00183 } 00184 printf(" /""* 0x%02x *""/\n ", enc->row_byte(row)); 00185 col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]); 00186 for (col = 0; col < col_max; col++) { 00187 printf(" 0x%04x,", enc->charset2uni[row][col]); 00188 if ((col % 8) == 7 && (col+1 < col_max)) printf("\n "); 00189 } 00190 printf("\n"); 00191 if (enc->charsetpage[row+1] == 0) { 00192 /* End a block. */ 00193 printf("};\n"); 00194 } 00195 } 00196 printf("\n"); 00197 00198 printf("static int\n"); 00199 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name); 00200 printf("{\n"); 00201 printf(" unsigned char c1 = s[0];\n"); 00202 printf(" if ("); 00203 for (i = 0; i < enc->ncharsetblocks; i++) { 00204 i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols); 00205 i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols); 00206 if (i > 0) 00207 printf(" || "); 00208 if (i1_min == i1_max) 00209 printf("(c1 == 0x%02x)", i1_min); 00210 else 00211 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max); 00212 } 00213 printf(") {\n"); 00214 printf(" if (n >= 2) {\n"); 00215 printf(" unsigned char c2 = s[1];\n"); 00216 printf(" if ("); 00217 printf(enc->check_col_expr, "c2"); 00218 printf(") {\n"); 00219 printf(" unsigned int i = %d * (", enc->cols); 00220 printf(enc->byte_row_expr, "c1"); 00221 printf(") + ("); 00222 printf(enc->byte_col_expr, "c2"); 00223 printf(");\n"); 00224 printf(" unsigned short wc = 0xfffd;\n"); 00225 for (i = 0; i < enc->ncharsetblocks; i++) { 00226 printf(" "); 00227 if (i > 0) 00228 printf("} else "); 00229 if (i < enc->ncharsetblocks-1) 00230 printf("if (i < %d) ", enc->charsetblocks[i+1].start); 00231 printf("{\n"); 00232 printf(" if (i < %d)\n", enc->charsetblocks[i].end); 00233 printf(" wc = %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols)); 00234 if (enc->charsetblocks[i].start > 0) 00235 printf("-%d", enc->charsetblocks[i].start); 00236 printf("];\n"); 00237 } 00238 printf(" }\n"); 00239 printf(" if (wc != 0xfffd) {\n"); 00240 printf(" *pwc = (ucs4_t) wc;\n"); 00241 printf(" return 2;\n"); 00242 printf(" }\n"); 00243 printf(" }\n"); 00244 printf(" return RET_ILSEQ;\n"); 00245 printf(" }\n"); 00246 printf(" return RET_TOOFEW(0);\n"); 00247 printf(" }\n"); 00248 printf(" return RET_ILSEQ;\n"); 00249 printf("}\n"); 00250 printf("\n"); 00251 } 00252 00253 /* 00254 * Computes the uni2charset[0x0000..0xffff] array. 00255 */ 00256 static void invert (Encoding* enc) 00257 { 00258 int row, col, j; 00259 00260 enc->uni2charset = (int*) malloc(0x10000*sizeof(int)); 00261 00262 for (j = 0; j < 0x10000; j++) 00263 enc->uni2charset[j] = 0; 00264 00265 for (row = 0; row < enc->rows; row++) 00266 for (col = 0; col < enc->cols; col++) { 00267 j = enc->charset2uni[row][col]; 00268 if (j != 0xfffd) 00269 enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col); 00270 } 00271 } 00272 00273 /* 00274 * Outputs the unicode to charset table and function, using a linear array. 00275 * (Suitable if the table is dense.) 00276 */ 00277 static void output_uni2charset_dense (const char* name, Encoding* enc) 00278 { 00279 /* Like in 8bit_tab_to_h.c */ 00280 bool pages[0x100]; 00281 int line[0x2000]; 00282 int tableno; 00283 struct { int minline; int maxline; int usecount; } tables[0x2000]; 00284 bool first; 00285 int row, col, j, p, j1, j2, t; 00286 00287 for (p = 0; p < 0x100; p++) 00288 pages[p] = false; 00289 for (row = 0; row < enc->rows; row++) 00290 for (col = 0; col < enc->cols; col++) { 00291 j = enc->charset2uni[row][col]; 00292 if (j != 0xfffd) 00293 pages[j>>8] = true; 00294 } 00295 for (j1 = 0; j1 < 0x2000; j1++) { 00296 bool all_invalid = true; 00297 for (j2 = 0; j2 < 8; j2++) { 00298 j = 8*j1+j2; 00299 if (enc->uni2charset[j] != 0) 00300 all_invalid = false; 00301 } 00302 if (all_invalid) 00303 line[j1] = -1; 00304 else 00305 line[j1] = 0; 00306 } 00307 tableno = 0; 00308 for (j1 = 0; j1 < 0x2000; j1++) { 00309 if (line[j1] >= 0) { 00310 if (tableno > 0 00311 && ((j1 > 0 && line[j1-1] == tableno-1) 00312 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) 00313 && j1 - tables[tableno-1].maxline <= 8))) { 00314 line[j1] = tableno-1; 00315 tables[tableno-1].maxline = j1; 00316 } else { 00317 tableno++; 00318 line[j1] = tableno-1; 00319 tables[tableno-1].minline = tables[tableno-1].maxline = j1; 00320 } 00321 } 00322 } 00323 for (t = 0; t < tableno; t++) { 00324 tables[t].usecount = 0; 00325 j1 = 8*tables[t].minline; 00326 j2 = 8*(tables[t].maxline+1); 00327 for (j = j1; j < j2; j++) 00328 if (enc->uni2charset[j] != 0) 00329 tables[t].usecount++; 00330 } 00331 { 00332 p = -1; 00333 for (t = 0; t < tableno; t++) 00334 if (tables[t].usecount > 1) { 00335 //p = tables[t].minline >> 5; 00336 p = tables[t].minline ; 00337 //printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1)); 00338 printf("static const unsigned short %s_page%04x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1)); 00339 for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { 00340 if ((j1 % 0x20) == 0 && j1 > tables[t].minline) 00341 printf(" /* 0x%04x */\n", 8*j1); 00342 printf(" "); 00343 for (j2 = 0; j2 < 8; j2++) { 00344 j = 8*j1+j2; 00345 printf(" 0x%04x,", enc->uni2charset[j]); 00346 } 00347 printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); 00348 } 00349 printf("};\n"); 00350 } 00351 if (p >= 0) 00352 printf("\n"); 00353 } 00354 printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name); 00355 printf("{\n"); 00356 printf(" if (n >= 2) {\n"); 00357 printf(" unsigned short c = 0;\n"); 00358 first = true; 00359 for (j1 = 0; j1 < 0x2000;) { 00360 t = line[j1]; 00361 for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++); 00362 if (t >= 0) { 00363 if (j1 != tables[t].minline) abort(); 00364 if (j2 > tables[t].maxline+1) abort(); 00365 j2 = tables[t].maxline+1; 00366 if (first) 00367 printf(" "); 00368 else 00369 printf(" else "); 00370 first = false; 00371 if (tables[t].usecount == 0) abort(); 00372 if (tables[t].usecount == 1) { 00373 if (j2 != j1+1) abort(); 00374 for (j = 8*j1; j < 8*j2; j++) 00375 if (enc->uni2charset[j] != 0) { 00376 printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j, enc->uni2charset[j]); 00377 break; 00378 } 00379 } else { 00380 if (j1 == 0) { 00381 printf("if (wc < 0x%04x)", 8*j2); 00382 } else { 00383 printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2); 00384 } 00385 //printf("\n c = %s_page%02x[wc", name, j1 >> 5); 00386 printf("\n c = %s_page%04x[wc", name, j1); 00387 if (tables[t].minline > 0) 00388 printf("-0x%04x", 8*j1); 00389 printf("];\n"); 00390 } 00391 } 00392 j1 = j2; 00393 } 00394 printf(" if (c != 0) {\n"); 00395 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n"); 00396 printf(" return 2;\n"); 00397 printf(" }\n"); 00398 printf(" return RET_ILSEQ;\n"); 00399 printf(" }\n"); 00400 printf(" return RET_TOOSMALL;\n"); 00401 printf("}\n"); 00402 } 00403 00404 /* 00405 * Outputs the unicode to charset table and function, using a packed array. 00406 * (Suitable if the table is sparse.) 00407 */ 00408 static void output_uni2charset_sparse (const char* name, Encoding* enc) 00409 { 00410 bool pages[0x100]; 00411 Block pageblocks[0x100]; int npageblocks; 00412 int indx2charset[0x10000]; 00413 int summary_indx[0x1000]; 00414 int summary_used[0x1000]; 00415 int i, row, col, j, p, j1, j2, indx; 00416 00417 /* Fill pages[0x100]. */ 00418 for (p = 0; p < 0x100; p++) 00419 pages[p] = false; 00420 for (row = 0; row < enc->rows; row++) 00421 for (col = 0; col < enc->cols; col++) { 00422 j = enc->charset2uni[row][col]; 00423 if (j != 0xfffd) 00424 pages[j>>8] = true; 00425 } 00426 00427 #if 0 00428 for (p = 0; p < 0x100; p++) 00429 if (pages[p]) { 00430 printf("static const unsigned short %s_page%02x[256] = {\n", name, p); 00431 for (j1 = 0; j1 < 32; j1++) { 00432 printf(" "); 00433 for (j2 = 0; j2 < 8; j2++) 00434 printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]); 00435 printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7); 00436 } 00437 printf("};\n"); 00438 } 00439 printf("\n"); 00440 #endif 00441 00442 /* Fill summary_indx[] and summary_used[]. */ 00443 indx = 0; 00444 for (j1 = 0; j1 < 0x1000; j1++) { 00445 summary_indx[j1] = indx; 00446 summary_used[j1] = 0; 00447 for (j2 = 0; j2 < 16; j2++) { 00448 j = 16*j1+j2; 00449 if (enc->uni2charset[j] != 0) { 00450 indx2charset[indx++] = enc->uni2charset[j]; 00451 summary_used[j1] |= (1 << j2); 00452 } 00453 } 00454 } 00455 00456 /* Fill npageblocks and pageblocks[]. */ 00457 npageblocks = 0; 00458 for (p = 0; p < 0x100; ) { 00459 if (pages[p] && (p == 0 || !pages[p-1])) { 00460 pageblocks[npageblocks].start = 16*p; 00461 do p++; while (p < 0x100 && pages[p]); 00462 j1 = 16*p; 00463 while (summary_used[j1-1] == 0) j1--; 00464 pageblocks[npageblocks].end = j1; 00465 npageblocks++; 00466 } else 00467 p++; 00468 } 00469 00470 printf("static const unsigned short %s_2charset[%d] = {\n", name, indx); 00471 for (i = 0; i < indx; ) { 00472 if ((i % 8) == 0) printf(" "); 00473 printf(" 0x%04x,", indx2charset[i]); 00474 i++; 00475 if ((i % 8) == 0 || i == indx) printf("\n"); 00476 } 00477 printf("};\n"); 00478 printf("\n"); 00479 for (i = 0; i < npageblocks; i++) { 00480 printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name, 00481 pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start); 00482 for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) { 00483 if (((16*j1) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1); 00484 if ((j1 % 4) == 0) printf(" "); 00485 printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]); 00486 j1++; 00487 if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n"); 00488 } 00489 printf("};\n"); 00490 } 00491 printf("\n"); 00492 00493 printf("static int\n"); 00494 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name); 00495 printf("{\n"); 00496 printf(" if (n >= 2) {\n"); 00497 printf(" const Summary16 *summary = NULL;\n"); 00498 for (i = 0; i < npageblocks; i++) { 00499 printf(" "); 00500 if (i > 0) 00501 printf("else "); 00502 printf("if (wc >= 0x%04x && wc < 0x%04x)\n", 00503 16*pageblocks[i].start, 16*pageblocks[i].end); 00504 printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name, 00505 pageblocks[i].start/16); 00506 if (pageblocks[i].start > 0) 00507 printf("-0x%03x", pageblocks[i].start); 00508 printf("];\n"); 00509 } 00510 printf(" if (summary) {\n"); 00511 printf(" unsigned short used = summary->used;\n"); 00512 printf(" unsigned int i = wc & 0x0f;\n"); 00513 printf(" if (used & ((unsigned short) 1 << i)) {\n"); 00514 printf(" unsigned short c;\n"); 00515 printf(" /* Keep in `used' only the bits 0..i-1. */\n"); 00516 printf(" used &= ((unsigned short) 1 << i) - 1;\n"); 00517 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n"); 00518 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n"); 00519 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n"); 00520 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n"); 00521 printf(" used = (used & 0x00ff) + (used >> 8);\n"); 00522 printf(" c = %s_2charset[summary->indx + used];\n", name); 00523 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n"); 00524 printf(" return 2;\n"); 00525 printf(" }\n"); 00526 printf(" }\n"); 00527 printf(" return RET_ILSEQ;\n"); 00528 printf(" }\n"); 00529 printf(" return RET_TOOSMALL;\n"); 00530 printf("}\n"); 00531 } 00532 00533 /* ISO-2022/EUC specifics */ 00534 00535 static int row_byte_normal (int row) { return 0x21+row; } 00536 static int col_byte_normal (int col) { return 0x21+col; } 00537 static int byte_row_normal (int byte) { return byte-0x21; } 00538 static int byte_col_normal (int byte) { return byte-0x21; } 00539 00540 static void do_normal (const char* name) 00541 { 00542 Encoding enc; 00543 00544 enc.rows = 94; 00545 enc.cols = 94; 00546 enc.row_byte = row_byte_normal; 00547 enc.col_byte = col_byte_normal; 00548 enc.byte_row = byte_row_normal; 00549 enc.byte_col = byte_col_normal; 00550 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00551 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00552 enc.byte_row_expr = "%1$s - 0x21"; 00553 enc.byte_col_expr = "%1$s - 0x21"; 00554 00555 read_table(&enc); 00556 output_charset2uni(name,&enc); 00557 invert(&enc); output_uni2charset_sparse(name,&enc); 00558 } 00559 00560 /* Note: On first sight, the jisx0212_2charset[] table seems to be in order, 00561 starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in 00562 order. There are 75 out-of-order values, scattered all throughout the table. 00563 */ 00564 00565 static void do_normal_only_charset2uni (const char* name) 00566 { 00567 Encoding enc; 00568 00569 enc.rows = 94; 00570 enc.cols = 94; 00571 enc.row_byte = row_byte_normal; 00572 enc.col_byte = col_byte_normal; 00573 enc.byte_row = byte_row_normal; 00574 enc.byte_col = byte_col_normal; 00575 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00576 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00577 enc.byte_row_expr = "%1$s - 0x21"; 00578 enc.byte_col_expr = "%1$s - 0x21"; 00579 00580 read_table(&enc); 00581 output_charset2uni(name,&enc); 00582 } 00583 00584 /* CNS 11643 specifics - trick to put two tables into one */ 00585 00586 static int row_byte_cns11643 (int row) { 00587 return 0x100 * (row / 94) + (row % 94) + 0x21; 00588 } 00589 static int byte_row_cns11643 (int byte) { 00590 return (byte >= 0x100 && byte < 0x200 ? byte-0x121 : 00591 byte >= 0x200 && byte < 0x300 ? byte-0x221+94 : 00592 byte >= 0x300 && byte < 0x400 ? byte-0x321+2*94 : 00593 -1); 00594 } 00595 00596 static void do_cns11643_only_uni2charset (const char* name) 00597 { 00598 Encoding enc; 00599 int j, x; 00600 00601 enc.rows = 3*94; 00602 enc.cols = 94; 00603 enc.row_byte = row_byte_cns11643; 00604 enc.col_byte = col_byte_normal; 00605 enc.byte_row = byte_row_cns11643; 00606 enc.byte_col = byte_col_normal; 00607 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00608 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00609 enc.byte_row_expr = "%1$s - 0x21"; 00610 enc.byte_col_expr = "%1$s - 0x21"; 00611 00612 read_table(&enc); 00613 invert(&enc); 00614 /* Move the 2 plane bits into the unused bits 15 and 7. */ 00615 for (j = 0; j < 0x10000; j++) { 00616 x = enc.uni2charset[j]; 00617 if (x != 0) { 00618 if (x & 0x8080) abort(); 00619 switch (x >> 16) { 00620 case 0: /* plane 1 */ x = (x & 0xffff) | 0x0000; break; 00621 case 1: /* plane 2 */ x = (x & 0xffff) | 0x0080; break; 00622 case 2: /* plane 3 */ x = (x & 0xffff) | 0x8000; break; 00623 default: abort(); 00624 } 00625 enc.uni2charset[j] = x; 00626 } 00627 } 00628 output_uni2charset_sparse(name,&enc); 00629 } 00630 00631 /* GBK specifics */ 00632 00633 static int row_byte_gbk1 (int row) { 00634 return 0x81+row; 00635 } 00636 static int col_byte_gbk1 (int col) { 00637 return (col >= 0x3f ? 0x41 : 0x40) + col; 00638 } 00639 static int byte_row_gbk1 (int byte) { 00640 if (byte >= 0x81 && byte < 0xff) 00641 return byte-0x81; 00642 else 00643 return -1; 00644 } 00645 static int byte_col_gbk1 (int byte) { 00646 if (byte >= 0x40 && byte < 0x7f) 00647 return byte-0x40; 00648 else if (byte >= 0x80 && byte < 0xff) 00649 return byte-0x41; 00650 else 00651 return -1; 00652 } 00653 00654 static void do_gbk1 (const char* name) 00655 { 00656 Encoding enc; 00657 00658 enc.rows = 126; 00659 enc.cols = 190; 00660 enc.row_byte = row_byte_gbk1; 00661 enc.col_byte = col_byte_gbk1; 00662 enc.byte_row = byte_row_gbk1; 00663 enc.byte_col = byte_col_gbk1; 00664 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff"; 00665 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)"; 00666 enc.byte_row_expr = "%1$s - 0x81"; 00667 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)"; 00668 00669 read_table(&enc); 00670 output_charset2uni(name,&enc); 00671 invert(&enc); output_uni2charset_dense(name,&enc); 00672 } 00673 00674 static void do_gbk1_only_charset2uni (const char* name) 00675 { 00676 Encoding enc; 00677 00678 enc.rows = 126; 00679 enc.cols = 190; 00680 enc.row_byte = row_byte_gbk1; 00681 enc.col_byte = col_byte_gbk1; 00682 enc.byte_row = byte_row_gbk1; 00683 enc.byte_col = byte_col_gbk1; 00684 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff"; 00685 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)"; 00686 enc.byte_row_expr = "%1$s - 0x81"; 00687 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)"; 00688 00689 read_table(&enc); 00690 output_charset2uni(name,&enc); 00691 } 00692 00693 static int row_byte_gbk2 (int row) { 00694 return 0x81+row; 00695 } 00696 static int col_byte_gbk2 (int col) { 00697 return (col >= 0x3f ? 0x41 : 0x40) + col; 00698 } 00699 static int byte_row_gbk2 (int byte) { 00700 if (byte >= 0x81 && byte < 0xff) 00701 return byte-0x81; 00702 else 00703 return -1; 00704 } 00705 static int byte_col_gbk2 (int byte) { 00706 if (byte >= 0x40 && byte < 0x7f) 00707 return byte-0x40; 00708 else if (byte >= 0x80 && byte < 0xa1) 00709 return byte-0x41; 00710 else 00711 return -1; 00712 } 00713 00714 static void do_gbk2_only_charset2uni (const char* name) 00715 { 00716 Encoding enc; 00717 00718 enc.rows = 126; 00719 enc.cols = 96; 00720 enc.row_byte = row_byte_gbk2; 00721 enc.col_byte = col_byte_gbk2; 00722 enc.byte_row = byte_row_gbk2; 00723 enc.byte_col = byte_col_gbk2; 00724 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff"; 00725 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)"; 00726 enc.byte_row_expr = "%1$s - 0x81"; 00727 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)"; 00728 00729 read_table(&enc); 00730 output_charset2uni(name,&enc); 00731 } 00732 00733 static void do_gbk1_only_uni2charset (const char* name) 00734 { 00735 Encoding enc; 00736 00737 enc.rows = 126; 00738 enc.cols = 190; 00739 enc.row_byte = row_byte_gbk1; 00740 enc.col_byte = col_byte_gbk1; 00741 enc.byte_row = byte_row_gbk1; 00742 enc.byte_col = byte_col_gbk1; 00743 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff"; 00744 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)"; 00745 enc.byte_row_expr = "%1$s - 0x81"; 00746 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)"; 00747 00748 read_table(&enc); 00749 invert(&enc); output_uni2charset_sparse(name,&enc); 00750 } 00751 00752 /* KSC 5601 specifics */ 00753 00754 /* 00755 * Reads the charset2uni table from standard input. 00756 */ 00757 static void read_table_ksc5601 (Encoding* enc) 00758 { 00759 int row, col, i, i1, i2, c, j; 00760 00761 enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*)); 00762 for (row = 0; row < enc->rows; row++) 00763 enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int)); 00764 00765 for (row = 0; row < enc->rows; row++) 00766 for (col = 0; col < enc->cols; col++) 00767 enc->charset2uni[row][col] = 0xfffd; 00768 00769 c = getc(stdin); 00770 ungetc(c,stdin); 00771 if (c == '#') { 00772 /* Read a unicode.org style .TXT file. */ 00773 for (;;) { 00774 c = getc(stdin); 00775 if (c == EOF) 00776 break; 00777 if (c == '\n' || c == ' ' || c == '\t') 00778 continue; 00779 if (c == '#') { 00780 do { c = getc(stdin); } while (!(c == EOF || c == '\n')); 00781 continue; 00782 } 00783 ungetc(c,stdin); 00784 if (scanf("0x%x", &j) != 1) 00785 exit(1); 00786 i1 = j >> 8; 00787 i2 = j & 0xff; 00788 if (scanf(" 0x%x", &j) != 1) 00789 exit(1); 00790 /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0 00791 = KS X 1001.1992, ignore the rest. */ 00792 if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127)) 00793 continue; /* KSC5601 specific */ 00794 i1 &= 0x7f; /* KSC5601 specific */ 00795 i2 &= 0x7f; /* KSC5601 specific */ 00796 row = enc->byte_row(i1); 00797 col = enc->byte_col(i2); 00798 if (row < 0 || col < 0) { 00799 fprintf(stderr, "lost entry for %02x %02x\n", i1, i2); 00800 exit(1); 00801 } 00802 enc->charset2uni[row][col] = j; 00803 } 00804 } else { 00805 /* Read a table of hexadecimal Unicode values. */ 00806 for (i1 = 33; i1 < 127; i1++) 00807 for (i2 = 33; i2 < 127; i2++) { 00808 i = scanf("%x", &j); 00809 if (i == EOF) 00810 goto read_done; 00811 if (i != 1) 00812 exit(1); 00813 if (j < 0 || j == 0xffff) 00814 j = 0xfffd; 00815 if (j != 0xfffd) { 00816 if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) { 00817 fprintf(stderr, "lost entry at %02x %02x\n", i1, i2); 00818 exit (1); 00819 } 00820 enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j; 00821 } 00822 } 00823 read_done: ; 00824 } 00825 } 00826 00827 static void do_ksc5601 (const char* name) 00828 { 00829 Encoding enc; 00830 00831 enc.rows = 94; 00832 enc.cols = 94; 00833 enc.row_byte = row_byte_normal; 00834 enc.col_byte = col_byte_normal; 00835 enc.byte_row = byte_row_normal; 00836 enc.byte_col = byte_col_normal; 00837 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00838 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f"; 00839 enc.byte_row_expr = "%1$s - 0x21"; 00840 enc.byte_col_expr = "%1$s - 0x21"; 00841 00842 read_table_ksc5601(&enc); 00843 output_charset2uni(name,&enc); 00844 invert(&enc); output_uni2charset_sparse(name,&enc); 00845 } 00846 00847 /* Big5 specifics */ 00848 00849 static int row_byte_big5 (int row) { 00850 return 0xa1+row; 00851 } 00852 static int col_byte_big5 (int col) { 00853 return (col >= 0x3f ? 0x62 : 0x40) + col; 00854 } 00855 static int byte_row_big5 (int byte) { 00856 if (byte >= 0xa1 && byte < 0xff) 00857 return byte-0xa1; 00858 else 00859 return -1; 00860 } 00861 static int byte_col_big5 (int byte) { 00862 if (byte >= 0x40 && byte < 0x7f) 00863 return byte-0x40; 00864 else if (byte >= 0xa1 && byte < 0xff) 00865 return byte-0x62; 00866 else 00867 return -1; 00868 } 00869 00870 static void do_big5 (const char* name) 00871 { 00872 Encoding enc; 00873 00874 enc.rows = 94; 00875 enc.cols = 157; 00876 enc.row_byte = row_byte_big5; 00877 enc.col_byte = col_byte_big5; 00878 enc.byte_row = byte_row_big5; 00879 enc.byte_col = byte_col_big5; 00880 enc.check_row_expr = "%1$s >= 0xa1 && %1$s < 0xff"; 00881 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)"; 00882 enc.byte_row_expr = "%1$s - 0xa1"; 00883 enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)"; 00884 00885 read_table(&enc); 00886 output_charset2uni(name,&enc); 00887 invert(&enc); output_uni2charset_sparse(name,&enc); 00888 } 00889 00890 /* Johab Hangul specifics */ 00891 00892 static int row_byte_johab_hangul (int row) { 00893 return 0x84+row; 00894 } 00895 static int col_byte_johab_hangul (int col) { 00896 return (col >= 0x3e ? 0x43 : 0x41) + col; 00897 } 00898 static int byte_row_johab_hangul (int byte) { 00899 if (byte >= 0x84 && byte < 0xd4) 00900 return byte-0x84; 00901 else 00902 return -1; 00903 } 00904 static int byte_col_johab_hangul (int byte) { 00905 if (byte >= 0x41 && byte < 0x7f) 00906 return byte-0x41; 00907 else if (byte >= 0x81 && byte < 0xff) 00908 return byte-0x43; 00909 else 00910 return -1; 00911 } 00912 00913 static void do_johab_hangul (const char* name) 00914 { 00915 Encoding enc; 00916 00917 enc.rows = 80; 00918 enc.cols = 188; 00919 enc.row_byte = row_byte_johab_hangul; 00920 enc.col_byte = col_byte_johab_hangul; 00921 enc.byte_row = byte_row_johab_hangul; 00922 enc.byte_col = byte_col_johab_hangul; 00923 enc.check_row_expr = "%1$s >= 0x84 && %1$s < 0xd4"; 00924 enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)"; 00925 enc.byte_row_expr = "%1$s - 0x84"; 00926 enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)"; 00927 00928 read_table(&enc); 00929 output_charset2uni(name,&enc); 00930 invert(&enc); output_uni2charset_dense(name,&enc); 00931 } 00932 00933 /* SJIS specifics */ 00934 00935 static int row_byte_sjis (int row) { 00936 return (row >= 0x1f ? 0xc1 : 0x81) + row; 00937 } 00938 static int col_byte_sjis (int col) { 00939 return (col >= 0x3f ? 0x41 : 0x40) + col; 00940 } 00941 static int byte_row_sjis (int byte) { 00942 if (byte >= 0x81 && byte < 0xa0) 00943 return byte-0x81; 00944 else if (byte >= 0xe0) 00945 return byte-0xc1; 00946 else 00947 return -1; 00948 } 00949 static int byte_col_sjis (int byte) { 00950 if (byte >= 0x40 && byte < 0x7f) 00951 return byte-0x40; 00952 else if (byte >= 0x80 && byte < 0xfd) 00953 return byte-0x41; 00954 else 00955 return -1; 00956 } 00957 00958 static void do_sjis (const char* name) 00959 { 00960 Encoding enc; 00961 00962 enc.rows = 94; 00963 enc.cols = 188; 00964 enc.row_byte = row_byte_sjis; 00965 enc.col_byte = col_byte_sjis; 00966 enc.byte_row = byte_row_sjis; 00967 enc.byte_col = byte_col_sjis; 00968 enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)"; 00969 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)"; 00970 enc.byte_row_expr = "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)"; 00971 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)"; 00972 00973 read_table(&enc); 00974 output_charset2uni(name,&enc); 00975 invert(&enc); output_uni2charset_sparse(name,&enc); 00976 } 00977 00978 /* Main program */ 00979 00980 int main (int argc, char *argv[]) 00981 { 00982 const char* charsetname; 00983 const char* name; 00984 00985 if (argc != 3) 00986 exit(1); 00987 charsetname = argv[1]; 00988 name = argv[2]; 00989 00990 output_title(charsetname); 00991 00992 if (!strcmp(name,"gb2312") || !strcmp(name,"gb12345ext") 00993 || !strcmp(name,"jisx0208") || !strcmp(name,"jisx0212")) 00994 do_normal(name); 00995 else if (!strcmp(name,"cns11643_1") || !strcmp(name,"cns11643_2") 00996 || !strcmp(name,"cns11643_3")) 00997 do_normal_only_charset2uni(name); 00998 else if (!strcmp(name,"cns11643_inv")) 00999 do_cns11643_only_uni2charset(name); 01000 else if (!strcmp(name,"gbkext1")) 01001 do_gbk1_only_charset2uni(name); 01002 else if (!strcmp(name,"gbkext2")) 01003 do_gbk2_only_charset2uni(name); 01004 else if (!strcmp(name,"gbkext_inv")) 01005 do_gbk1_only_uni2charset(name); 01006 else if (!strcmp(name,"cp936ext")) 01007 do_gbk1(name); 01008 else if (!strcmp(name,"ksc5601")) 01009 do_ksc5601(name); 01010 else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext")) 01011 do_big5(name); 01012 else if (!strcmp(name,"johab_hangul")) 01013 do_johab_hangul(name); 01014 else if (!strcmp(name,"cp932ext")) 01015 do_sjis(name); 01016 else 01017 exit(1); 01018 01019 return 0; 01020 }