fltk 1.3.0rc3
About: FLTK (Fast Light Tool Kit) is a cross-platform C++ GUI toolkit for UNIX/Linux (X11), Microsoft Windows, and MacOS X. Release candidate.
  SfR Fresh Dox: fltk-1.3.0rc3-source.tar.gz ("inofficial" and yet experimental doxygen-generated source code documentation)  

utf8Input.c

Go to the documentation of this file.
00001 /* "$Id: $"
00002  *
00003  * Author: Jean-Marc Lienher ( http://oksid.ch )
00004  * Copyright 2000-2003 by O'ksi'D.
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Library General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Library General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Library General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00019  * USA.
00020  *
00021  * Please report all bugs and problems on the following page:
00022  *
00023  *     http://www.fltk.org/str.php
00024  */
00025 
00026 #if !defined(WIN32) && !defined(__APPLE__)
00027 
00028 #include <config.h>
00029 #include "../../FL/Xutf8.h"
00030 #include <X11/X.h>
00031 #include <X11/Xlib.h>
00032 #include <X11/Xutil.h>
00033 #include <string.h>
00034 #include <stdlib.h>
00035 
00036 #if HAVE_LIBC_ICONV
00037 #include <iconv.h>
00038 #endif
00039 /*
00040   I haven't found much doc on the web about EUC encodings, so I've used
00041   GNU libiconv source code as a reference.
00042   http://clisp.cons.org/~haible/packages-libiconv.html
00043 */
00044 
00045 #define RET_ILSEQ -1
00046 #define RET_TOOFEW(x) (-10 - x)
00047 #define RET_TOOSMALL -2
00048 #define conv_t void*
00049 #define ucs4_t unsigned int
00050 typedef struct {
00051   unsigned short indx;
00052   unsigned short used;
00053 } Summary16;
00054 
00055 #define NEED_TOWC // indicates what part of these include files is needed here (avoid compilation warnings)
00056 #include "lcUniConv/big5.h"
00057 #include "lcUniConv/gb2312.h"
00058 #include "lcUniConv/cp936ext.h"
00059 #include "lcUniConv/jisx0201.h"
00060 #include "lcUniConv/jisx0208.h"
00061 #include "lcUniConv/jisx0212.h"
00062 #include "lcUniConv/ksc5601.h"
00063 
00064 int 
00065 XConvertEucTwToUtf8(char* buffer_return, int len) {
00066   /* FIXME */
00067 #if HAVE_LIBC_ICONV
00068   iconv_t cd;
00069   int cdl;
00070 #else
00071   int i = 0;
00072 #endif
00073   int l = 0;
00074   char *buf, *b;
00075 
00076   if (len < 1) return 0;
00077   b = buf = (char*) malloc((unsigned)len);
00078   memcpy(buf, buffer_return, (unsigned) len);
00079 
00080 #if HAVE_LIBC_ICONV
00081   l = cdl = len;
00082   cd = iconv_open("EUC-TW", "UTF-8");
00083   iconv(cd, &b, &len, &buffer_return, &cdl);
00084   iconv_close(cd);
00085   l -= cdl;     
00086 #else
00087   while (i < len) {
00088     unsigned int ucs;
00089     unsigned char c; 
00090     c = (unsigned char) buf[i];
00091     if (c < 0x80) {
00092       ucs = c;  
00093       i++;
00094     } else if (c >= 0xa1 && c < 0xff && len - i > 1 ) {
00095       unsigned char b[2];
00096       b[0] = (unsigned char) c - 0x80;
00097       b[1] = (unsigned char) buf[i + 1] - 0x80;
00098       ucs = ' '; i += 2;
00099     } else if (c == 0x8e &&  len - i > 3) {
00100       unsigned char b[2];
00101       unsigned char c1 =  buf[i + 1];
00102       unsigned char c2 =  buf[i + 2];
00103       unsigned char c3 =  buf[i + 3];
00104       b[0] = (unsigned char)  buf[i + 2] - 0x80;
00105       b[1] = (unsigned char)  buf[i + 3] - 0x80;
00106       if (c1 >= 0xa1 && c1 <= 0xb0) {
00107         if (c2 >= 0xa1 && c2 < 0xff && c3 >= 0xa1 && c3 < 0xff) {
00108           ucs = ' '; i += 4;
00109         } else {
00110           ucs = '?'; i++;
00111         }
00112       } else {
00113         ucs = '?'; i++;
00114       }
00115     } else {
00116       ucs = '?';
00117       i++;
00118     }
00119     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00120   }
00121 #endif
00122   free(buf);
00123   return l;
00124 }
00125 
00126 int 
00127 XConvertEucKrToUtf8(char* buffer_return, int len) {
00128   int i = 0, l = 0;
00129   char *buf;
00130 
00131   if (len < 1) return 0;
00132 
00133   buf = (char*) malloc((unsigned)len);
00134   memcpy(buf, buffer_return, (unsigned)len);
00135 
00136   while (i < len) {
00137     unsigned int ucs;
00138     unsigned char c, c1;
00139     c = (unsigned char) buf[i];
00140     if (c < 0x80) {
00141       ucs = c;  
00142       i++;
00143     } else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
00144       c1 = (unsigned char) buf[i + 1];
00145       if (c1 >= 0xa1 && c1 < 0xff) {
00146         unsigned char b[2];
00147         b[0] = c - 0x80;
00148         b[1] = c1 - 0x80;
00149         if (ksc5601_mbtowc(NULL, &ucs, b, 2) < 1) {
00150           ucs = '?';
00151         }
00152       } else {
00153         ucs = '?';
00154       }
00155       i += 2;
00156     } else {
00157       ucs = '?';
00158       i++;
00159     }
00160     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00161   }
00162   free(buf);
00163   return l;
00164 }
00165 
00166 int 
00167 XConvertBig5ToUtf8(char* buffer_return, int len) {
00168   int i = 0, l = 0;
00169   char *buf;
00170 
00171   if (len < 1) return 0;
00172   buf = (char*) malloc((unsigned)len);
00173   memcpy(buf, buffer_return, (unsigned)len);
00174 
00175   if (len == 1) {
00176     l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
00177   }
00178   while (i + 1 < len) {
00179     unsigned int ucs;
00180     unsigned char b[2];
00181     b[0] = (unsigned char) buf[i];
00182     b[1] = (unsigned char) buf[i + 1];
00183     if (big5_mbtowc(NULL, &ucs, b, 2) == 2) {
00184       i += 2;
00185     } else {
00186       ucs = '?';
00187       i++;
00188     }
00189     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00190   }
00191   free(buf);
00192   return l;
00193 }
00194 
00195 int 
00196 XConvertCp936extToUtf8(char* buffer_return, int len)
00197 {
00198   int i = 0, l = 0;
00199   char *buf;
00200 
00201   if (len < 1) return 0;
00202   buf = (char*) malloc((unsigned)len);
00203   memcpy(buf, buffer_return, (unsigned)len);
00204 
00205   if (len == 1) {
00206           l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
00207   }
00208   while (i + 1 < len) {
00209           unsigned int ucs;
00210           unsigned char b[2];
00211           b[0] = (unsigned char) buf[i];
00212           b[1] = (unsigned char) buf[i + 1];
00213           if (cp936ext_mbtowc(NULL, &ucs, b, 2) == 2) {
00214                   i += 2;
00215           } else {
00216               if ( b[0] < 0x80) {
00217                     ucs = b[0];
00218                 }else{
00219                               ucs = '?';
00220                   }
00221                           i++;
00222                   }
00223           l += XConvertUcsToUtf8(ucs, buffer_return + l);
00224   }
00225   if(i + 1 == len) { 
00226       l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
00227   }
00228   free(buf);
00229   return l;
00230 }
00231 
00232 int 
00233 XConvertGb2312ToUtf8(char* buffer_return, int len) {
00234   int i = 0, l = 0;
00235   char *buf;
00236 
00237   if (len < 1) return 0;
00238   buf = (char*) malloc((unsigned)len);
00239   memcpy(buf, buffer_return, (unsigned)len);
00240 
00241   if (len == 1) {
00242     l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
00243   }
00244   while (i + 1 < len) {
00245     unsigned int ucs;
00246     unsigned char b[2];
00247     b[0] = (unsigned char) buf[i];
00248     b[1] = (unsigned char) buf[i + 1];
00249     if ( b[0] < 0x80 ) {
00250       ucs = b[0];
00251       i++;
00252     } else if (gb2312_mbtowc(NULL, &ucs, b, 2) == 2) {
00253       i += 2;
00254     } else {
00255       ucs = '?';
00256       i++;
00257     }
00258     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00259   }
00260   if (i + 1 == len) {
00261     l += XConvertUcsToUtf8((unsigned int)buf[i], buffer_return + l);
00262   }
00263   free(buf);
00264   return l;
00265 }
00266 
00267 int 
00268 XConvertEucCnToUtf8(char* buffer_return, int len) {
00269   int i = 0, l = 0;
00270   char *buf;
00271 
00272   if (len < 1) return 0;
00273   buf = (char*) malloc((unsigned)len);
00274   memcpy(buf, buffer_return, (unsigned)len);
00275 
00276   while (i < len) {
00277     unsigned int ucs;
00278     unsigned char c, c1;
00279     c = (unsigned char) buf[i];
00280     if (c < 0x80) {
00281       ucs = c;  
00282       i++;
00283     } else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
00284       c1 = (unsigned char) buf[i + 1];
00285       if (c1 >= 0xa1 && c1 < 0xff) {    
00286         unsigned char b[2];
00287         b[0] = (unsigned char) c;
00288         b[1] = (unsigned char) c1;
00289         if (gb2312_mbtowc(NULL, &ucs, b, 2) < 1) {
00290           ucs = '?';
00291         }       
00292       } else {
00293         ucs = '?';
00294       }
00295       i += 2;
00296     } else {
00297       ucs = '?';
00298       i++;
00299     }
00300     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00301   }
00302   free(buf);
00303   return l;
00304 }
00305 
00306 int 
00307 XConvertEucJpToUtf8(char* buffer_return, int len) {
00308   int i = 0, l = 0;
00309   char *buf;
00310 
00311   if (len < 1) return 0;
00312   buf = (char*) malloc((unsigned)len);
00313   memcpy(buf, buffer_return, (unsigned)len);
00314 
00315   while (i < len) {
00316     unsigned int ucs;
00317     unsigned char c, c1;
00318     c = (unsigned char) buf[i];
00319     if (c < 0x80) {
00320       ucs = c;  
00321       i++;
00322     } else if (c >= 0xA1 && c < 0xFF && len - i > 1) {
00323       c1 = (unsigned char) buf[i + 1];          
00324       if (c < 0xF5 && c1 >= 0xa1) {
00325         unsigned char b[2];
00326         b[0] = c - 0x80;
00327         b[1] = c1 - 0x80;
00328         if (jisx0208_mbtowc(NULL, &ucs, b, 2) < 1) { 
00329           ucs = '?';
00330         }
00331       } else if (c1 >= 0xA1 && c1 < 0xFF) {
00332         ucs = 0xE000 + 94 * (c - 0xF5) + (c1 - 0xA1);
00333       } else {
00334         ucs = '?';
00335       }
00336       i += 2;
00337     } else if (c == 0x8E && len - i > 1) {
00338       c1 = (unsigned char) buf[i + 1];          
00339       if (c1 >= 0xa1 && c1 <= 0xe0) {
00340         if (jisx0201_mbtowc(NULL, &ucs, &c1, 1) != 1) {
00341           ucs = '?';
00342         }
00343       } else {
00344         ucs = '?';
00345       }
00346       i += 2;
00347     } else if (c == 0x8F && len - i > 2) {
00348       c = (unsigned char) buf[i + 1];           
00349       c1 = (unsigned char) buf[i + 2];  
00350       if (c >= 0xa1 && c < 0xff) {
00351         if (c < 0xf5 && c1 >= 0xa1 && c1 < 0xff) {
00352           unsigned char b[2];
00353           b[0] = c - 0x80;
00354           b[1] = c1 - 0x80;
00355           if (jisx0212_mbtowc(NULL, &ucs, b, 2) < 1) {
00356             ucs = '?';
00357           }
00358         } else {
00359           ucs = '?';
00360         }
00361       } else {
00362         if (c1 >= 0xa1 && c1 < 0xff) {
00363           ucs = 0xe3ac + 94 * (c - 0xF5) + (c1 - 0xA1);
00364         } else {
00365           ucs = '?';
00366         }
00367       }
00368       i += 3;
00369     } else {
00370       ucs = '?';
00371       i++;
00372     }
00373     l += XConvertUcsToUtf8(ucs, buffer_return + l);
00374   }
00375   free(buf);
00376   return l;
00377 }
00378 
00379 int
00380 XConvertEucToUtf8(const char*   locale,
00381                   char*         buffer_return, 
00382                   int           len, 
00383                   int           bytes_buffer) {
00384 
00385   //if (!locale/* || strstr(locale, "UTF") || strstr(locale, "utf")*/) {
00386   if (!locale || strstr(locale, "UTF") || strstr(locale, "utf")) {
00387     return len;
00388   }
00389 
00390   if (strstr(locale, "ja")) {   
00391     return XConvertEucJpToUtf8(buffer_return, len);
00392   } else if (strstr(locale, "Big5") || strstr(locale, "big5")) { /* BIG5 */
00393     return XConvertBig5ToUtf8(buffer_return, len);
00394   } else if (strstr(locale, "GBK") || strstr(locale, "gbk")) {
00395     return XConvertCp936extToUtf8(buffer_return, len);
00396   } else if (strstr(locale, "zh") || strstr(locale, "chinese-")) {
00397     if (strstr(locale, "TW") || strstr(locale, "chinese-t")) {
00398       if (strstr(locale, "EUC") || strstr(locale, "euc") || strstr(locale, "chinese-t")) {
00399         return XConvertEucTwToUtf8(buffer_return, len);
00400       }
00401       return XConvertBig5ToUtf8(buffer_return, len);
00402     }
00403     if (strstr(locale, "EUC") || strstr(locale, "euc")) {
00404       return XConvertEucCnToUtf8(buffer_return, len);
00405     }
00406     return XConvertGb2312ToUtf8(buffer_return, len);
00407   } else if (strstr(locale, "ko")) { 
00408     return XConvertEucKrToUtf8(buffer_return, len);
00409   }
00410   return len;
00411 }
00412 
00413 int
00414 XUtf8LookupString(XIC                 ic,
00415                   XKeyPressedEvent*   event,
00416                   char*               buffer_return,
00417                   int                 bytes_buffer,
00418                   KeySym*             keysym,
00419                   Status*             status_return) {
00420 
00421   long ucs = -1;
00422   int len;
00423   len = XmbLookupString(ic, event, buffer_return, bytes_buffer / 5,
00424                         keysym, status_return);
00425   if (*status_return == XBufferOverflow) {
00426     return len * 5;
00427   }
00428   if (*keysym > 0 && *keysym < 0x100 && len == 1) {
00429     if (*keysym < 0x80) {
00430       ucs = (unsigned char)buffer_return[0];
00431     } else {
00432       ucs = *keysym;
00433     }
00434   } else  if (((*keysym >= 0x100 && *keysym <= 0xf000) ||
00435               (*keysym & 0xff000000U) == 0x01000000))
00436   {
00437     ucs = XKeysymToUcs(*keysym);
00438   } else {
00439     ucs = -2;
00440   }
00441 
00442   if (ucs > 0) {
00443     len = XConvertUcsToUtf8((unsigned)ucs, (char *)buffer_return);
00444   } else if (len > 0) {
00445     XIM im;
00446     if (!ic) return 0;
00447     im = XIMOfIC(ic);
00448     if (!im) return 0;
00449     len = XConvertEucToUtf8(XLocaleOfIM(im), buffer_return, len, bytes_buffer); 
00450   }
00451   return len;
00452 }
00453 
00454 #endif /* X11 only */
00455 
00456 /*
00457  * End of "$Id$".
00458  */