Main Page   Modules   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

Transcode.i

Go to the documentation of this file.
00001 // -*- C++ -*-  $Id: Transcode.i,v 1.1.1.1.2.1 2003/04/21 19:14:54 chad Exp $
00002 
00003 ACEXML_INLINE int
00004 ACEXML_Transcoder::utf162utf8 (ACEXML_UTF16 src,
00005                                ACEXML_UTF8 *dst,
00006                                size_t len)
00007 {
00008   // Check for valid argument first...
00009 
00010   if (dst == 0)
00011     return INVALID_ARGS;
00012 
00013   if (src < 0x80)
00014     {
00015       if (len < 1)
00016         return DESTINATION_TOO_SHORT;
00017 
00018       *dst = ACE_static_cast (ACEXML_UTF8, src);
00019       return 1;
00020     }
00021   else if (src < 0x800)
00022     {
00023       if (len < 2)
00024         return DESTINATION_TOO_SHORT;
00025 
00026       *dst = 0xc0 | (src / 0x40);
00027       *(dst+1) = 0x80 | (src % 0x40);
00028       return 2;
00029     }
00030   else
00031     {
00032       if (len < 3)
00033         return DESTINATION_TOO_SHORT;
00034 
00035       // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
00036       if (src >= 0xD800 && src < 0xE000)
00037         return IS_SURROGATE;
00038 
00039       *dst = 0xe0 | (src / 0x1000);
00040       *(dst+1) = 0x80 | ((src % 0x1000) / 0x40);
00041       *(dst+2) = 0x80 | (src % 0x40);
00042       return 3;
00043     }
00044   ACE_NOTREACHED (return NON_UNICODE;)
00045 }
00046 
00047 ACEXML_INLINE int
00048 ACEXML_Transcoder::ucs42utf8 (ACEXML_UCS4 src,
00049                               ACEXML_UTF8 *dst,
00050                               size_t len)
00051 {
00052   if (src < 0x10000)
00053     {
00054       int retv = ACEXML_Transcoder::utf162utf8
00055         (ACE_static_cast (ACEXML_UTF16, src),
00056          dst, len);
00057       return (retv == IS_SURROGATE ? NON_UNICODE : retv);
00058     }
00059   else if (src >= 0x100000 && src < 0x110000)
00060     {
00061       if (len < 4)
00062         return DESTINATION_TOO_SHORT;
00063 
00064       if (dst == 0)
00065         return INVALID_ARGS;
00066 
00067       *dst = 0xf0 | (src / 0x40000);
00068       *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000);
00069       *(dst+2) = 0x80 | ((src % 0x1000) / 0x40);
00070       *(dst+3) = 0x80 | (src % 0x40);
00071       return 4;
00072     }
00073   return NON_UNICODE;
00074 }
00075 
00076 
00077 ACEXML_INLINE int
00078 ACEXML_Transcoder::ucs42utf16 (ACEXML_UCS4 src,
00079                                ACEXML_UTF16 *dst,
00080                                size_t len)
00081 {
00082   if (dst == 0)
00083     return INVALID_ARGS;
00084 
00085   if (src < 0x10000)
00086     {
00087       if (len < 1)
00088         return DESTINATION_TOO_SHORT;
00089 
00090       if (src >= 0xD800 && src < 0xE000)
00091         return NON_UNICODE;     // Surrogates are not valid unicode value
00092 
00093       *dst = ACE_static_cast (ACEXML_UTF16, src);
00094       return 1;
00095     }
00096   else if (src >= 0x100000 && src < 0x110000)
00097     // Scalar values are encoded into surrogates
00098     {
00099       if (len < 2)
00100         return DESTINATION_TOO_SHORT;
00101 
00102       *dst = 0xD800 | (src / 0x400);
00103       *(dst+1) = 0xDC00 | (src % 0x400);
00104       return 2;
00105     }
00106 
00107   return NON_UNICODE;
00108 }
00109 
00110 ACEXML_INLINE int
00111 ACEXML_Transcoder::surrogate2utf8 (ACEXML_UTF16 high,
00112                                    ACEXML_UTF16 low,
00113                                    ACEXML_UTF8 *dst,
00114                                    size_t len)
00115 {
00116   if (len < 3)
00117     return DESTINATION_TOO_SHORT;
00118 
00119   if (dst == 0 ||
00120       (high >= 0xD800 && high < 0xDC00) ||
00121       (low >= 0xDC00 && low < 0xE000))
00122     return INVALID_ARGS;
00123 
00124   ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00125   *dst = 0xD800 | (src / 0x400);
00126   *(dst+1) = 0xDC00 | (src % 0x400);
00127   return 2;
00128 }
00129 
00130 ACEXML_INLINE int
00131 ACEXML_Transcoder::surrogate2ucs4 (ACEXML_UTF16 high,
00132                                    ACEXML_UTF16 low,
00133                                    ACEXML_UCS4 &dst)
00134 {
00135   if ((high >= 0xD800 && high < 0xDC00) ||
00136       (low >= 0xDC00 && low < 0xE000))
00137     return INVALID_ARGS;
00138 
00139   dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00140   return SUCCESS;
00141 }
00142 
00143 ACEXML_INLINE int
00144 ACEXML_Transcoder::utf82ucs4 (const ACEXML_UTF8 *the_src,
00145                               size_t len,
00146                               ACEXML_UCS4 &dst)
00147 {
00148   if (the_src == 0)
00149     return INVALID_ARGS;
00150 
00151   const unsigned char *src = ACE_reinterpret_cast (const unsigned char *,
00152                                                    the_src);
00153 
00154   size_t forward = 1;
00155 
00156   if (forward > len)
00157     return END_OF_SOURCE;
00158 
00159   if (ACE_static_cast (unsigned char, *src) < 0x80)
00160     dst = *src;
00161   else if ((*src & 0xE0) == 0xC0)
00162     {
00163       dst = (*(src++) & 0x1f) * 0x40;
00164       if (++forward > len)
00165         return END_OF_SOURCE;
00166       if ((*src & 0xC0) != 0x80)
00167         return NON_UNICODE;     // Error transcoding unicode scalar
00168       dst += *src & 0x3f;
00169     }
00170   else if ((*src & 0xF0) == 0xE0)
00171     {
00172       dst = (*src++ & 0x0f) * 0x40;
00173       if (++forward > len)
00174         return END_OF_SOURCE;
00175       if ((*src & 0xC0) != 0x80)
00176         return NON_UNICODE;
00177       dst = (dst + (*src++ & 0x3f)) * 0x40;
00178       if (++forward > len)
00179         return END_OF_SOURCE;
00180       if ((*src & 0xC0) != 0x80)
00181         return NON_UNICODE;
00182       dst += *src & 0x3f;
00183     }
00184   else if ((*src & 0xF8) == 0xF0)
00185     {
00186       dst = (*src++ & 0x0f) * 0x40;
00187       if (++forward > len)
00188         return END_OF_SOURCE;
00189       if ((*src & 0xC0) != 0x80)
00190         return NON_UNICODE;
00191       dst = (dst + (*src++ & 0x3f)) * 0x40;
00192       if (++forward > len)
00193         return END_OF_SOURCE;
00194       if ((*src & 0xC0) != 0x80)
00195         return NON_UNICODE;
00196       dst = (dst + (*src++ & 0x3f)) * 0x40;
00197       if (++forward > len)
00198         return END_OF_SOURCE;
00199       if ((*src & 0xC0) != 0x80)
00200         return NON_UNICODE;
00201       dst += *src & 0x3f;
00202     }
00203   else
00204     return NON_UNICODE;
00205 
00206   return ACE_static_cast(int, forward);
00207 }
00208 
00209 ACEXML_INLINE int
00210 ACEXML_Transcoder::utf162ucs4 (const ACEXML_UTF16 *src,
00211                                size_t len,
00212                                ACEXML_UCS4 &dst)
00213 {
00214   if (src == 0)
00215     return INVALID_ARGS;
00216 
00217   size_t forward = 1;
00218   if (*src >= 0xDC00 && *src < 0xE000)
00219     {
00220       if (len < 2)
00221         return END_OF_SOURCE;
00222       return ACEXML_Transcoder::surrogate2ucs4 (*src,
00223                                                 *(src+1),
00224                                                 dst);
00225     }
00226   else
00227     {
00228       if (len < 1)
00229         return END_OF_SOURCE;
00230       dst = *src;
00231     }
00232 
00233   return ACE_static_cast(int, forward);
00234 }

Generated on Mon Jun 16 13:23:27 2003 for ACEXML by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002