Main Page   Modules   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

ACEXML_Transcoder Class Reference

ACEXML_Transcoder. More...

#include "ACEXML/common/Transcode.h"

List of all members.

Public Types

enum  {
  SUCCESS = 0, DESTINATION_TOO_SHORT = -1, END_OF_SOURCE = -2, INVALID_ARGS = -3,
  IS_SURROGATE = -4, NON_UNICODE = -5
}

Static Public Methods

int utf162utf8 (ACEXML_UTF16 src, ACEXML_UTF8 *dst, size_t len)
int ucs42utf8 (ACEXML_UCS4 src, ACEXML_UTF8 *dst, size_t len)
int ucs42utf16 (ACEXML_UCS4 src, ACEXML_UTF16 *dst, size_t len)
int surrogate2utf8 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UTF8 *dst, size_t len)
int surrogate2ucs4 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UCS4 &dst)
int utf82ucs4 (const ACEXML_UTF8 *src, size_t len, ACEXML_UCS4 &dst)
int utf162ucs4 (const ACEXML_UTF16 *src, size_t len, ACEXML_UCS4 &dst)
int utf8s2utf16s (const ACEXML_UTF8 *src, ACEXML_UTF16 *dst, size_t len)
int utf16s2utf8s (const ACEXML_UTF16 *src, ACEXML_UTF8 *dst, size_t len)

Public Attributes

enum ACEXML_Transcoder:: { ... }  STATUS


Detailed Description

ACEXML_Transcoder.

Wrapper class for performing transcoding among different UNICODE encoding.

Definition at line 36 of file Transcode.h.


Member Enumeration Documentation

anonymous enum
 

Enumeration values:
SUCCESS 
DESTINATION_TOO_SHORT 
END_OF_SOURCE 
INVALID_ARGS 
IS_SURROGATE 
NON_UNICODE 

Definition at line 42 of file Transcode.h.

00043   {
00044     SUCCESS = 0,
00045     DESTINATION_TOO_SHORT = -1,
00046     END_OF_SOURCE = -2,
00047     INVALID_ARGS = -3,
00048     IS_SURROGATE = -4,
00049     NON_UNICODE = -5
00050   } STATUS;


Member Function Documentation

ACEXML_INLINE int ACEXML_Transcoder::surrogate2ucs4 ACEXML_UTF16    high,
ACEXML_UTF16    low,
ACEXML_UCS4   dst
[static]
 

Definition at line 131 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF16, INVALID_ARGS, and SUCCESS.

Referenced by utf162ucs4.

00134 {
00135   if ((high >= 0xD800 && high < 0xDC00) ||
00136       (low >= 0xDC00 && low < 0xE000))
00137     return INVALID_ARGS;
00138 
00139   dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00140   return SUCCESS;
00141 }

ACEXML_INLINE int ACEXML_Transcoder::surrogate2utf8 ACEXML_UTF16    high,
ACEXML_UTF16    low,
ACEXML_UTF8   dst,
size_t    len
[static]
 

Definition at line 111 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, DESTINATION_TOO_SHORT, and INVALID_ARGS.

00115 {
00116   if (len < 3)
00117     return DESTINATION_TOO_SHORT;
00118 
00119   if (dst == 0 ||
00120       (high >= 0xD800 && high < 0xDC00) ||
00121       (low >= 0xDC00 && low < 0xE000))
00122     return INVALID_ARGS;
00123 
00124   ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00125   *dst = 0xD800 | (src / 0x400);
00126   *(dst+1) = 0xDC00 | (src % 0x400);
00127   return 2;
00128 }

ACEXML_INLINE int ACEXML_Transcoder::ucs42utf16 ACEXML_UCS4    src,
ACEXML_UTF16   dst,
size_t    len
[static]
 

Definition at line 78 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF16, DESTINATION_TOO_SHORT, INVALID_ARGS, and NON_UNICODE.

Referenced by utf8s2utf16s.

00081 {
00082   if (dst == 0)
00083     return INVALID_ARGS;
00084 
00085   if (src < 0x10000)
00086     {
00087       if (len < 1)
00088         return DESTINATION_TOO_SHORT;
00089 
00090       if (src >= 0xD800 && src < 0xE000)
00091         return NON_UNICODE;     // Surrogates are not valid unicode value
00092 
00093       *dst = ACE_static_cast (ACEXML_UTF16, src);
00094       return 1;
00095     }
00096   else if (src >= 0x100000 && src < 0x110000)
00097     // Scalar values are encoded into surrogates
00098     {
00099       if (len < 2)
00100         return DESTINATION_TOO_SHORT;
00101 
00102       *dst = 0xD800 | (src / 0x400);
00103       *(dst+1) = 0xDC00 | (src % 0x400);
00104       return 2;
00105     }
00106 
00107   return NON_UNICODE;
00108 }

ACEXML_INLINE int ACEXML_Transcoder::ucs42utf8 ACEXML_UCS4    src,
ACEXML_UTF8   dst,
size_t    len
[static]
 

Definition at line 48 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, DESTINATION_TOO_SHORT, INVALID_ARGS, IS_SURROGATE, NON_UNICODE, and utf162utf8.

Referenced by utf16s2utf8s.

00051 {
00052   if (src < 0x10000)
00053     {
00054       int retv = ACEXML_Transcoder::utf162utf8
00055         (ACE_static_cast (ACEXML_UTF16, src),
00056          dst, len);
00057       return (retv == IS_SURROGATE ? NON_UNICODE : retv);
00058     }
00059   else if (src >= 0x100000 && src < 0x110000)
00060     {
00061       if (len < 4)
00062         return DESTINATION_TOO_SHORT;
00063 
00064       if (dst == 0)
00065         return INVALID_ARGS;
00066 
00067       *dst = 0xf0 | (src / 0x40000);
00068       *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000);
00069       *(dst+2) = 0x80 | ((src % 0x1000) / 0x40);
00070       *(dst+3) = 0x80 | (src % 0x40);
00071       return 4;
00072     }
00073   return NON_UNICODE;
00074 }

ACEXML_INLINE int ACEXML_Transcoder::utf162ucs4 const ACEXML_UTF16   src,
size_t    len,
ACEXML_UCS4   dst
[static]
 

Definition at line 210 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF16, END_OF_SOURCE, INVALID_ARGS, and surrogate2ucs4.

Referenced by utf16s2utf8s.

00213 {
00214   if (src == 0)
00215     return INVALID_ARGS;
00216 
00217   size_t forward = 1;
00218   if (*src >= 0xDC00 && *src < 0xE000)
00219     {
00220       if (len < 2)
00221         return END_OF_SOURCE;
00222       return ACEXML_Transcoder::surrogate2ucs4 (*src,
00223                                                 *(src+1),
00224                                                 dst);
00225     }
00226   else
00227     {
00228       if (len < 1)
00229         return END_OF_SOURCE;
00230       dst = *src;
00231     }
00232 
00233   return ACE_static_cast(int, forward);
00234 }

ACEXML_INLINE int ACEXML_Transcoder::utf162utf8 ACEXML_UTF16    src,
ACEXML_UTF8   dst,
size_t    len
[static]
 

Definition at line 4 of file Transcode.i.

References ACEXML_UTF16, ACEXML_UTF8, DESTINATION_TOO_SHORT, INVALID_ARGS, IS_SURROGATE, and NON_UNICODE.

Referenced by ucs42utf8.

00007 {
00008   // Check for valid argument first...
00009 
00010   if (dst == 0)
00011     return INVALID_ARGS;
00012 
00013   if (src < 0x80)
00014     {
00015       if (len < 1)
00016         return DESTINATION_TOO_SHORT;
00017 
00018       *dst = ACE_static_cast (ACEXML_UTF8, src);
00019       return 1;
00020     }
00021   else if (src < 0x800)
00022     {
00023       if (len < 2)
00024         return DESTINATION_TOO_SHORT;
00025 
00026       *dst = 0xc0 | (src / 0x40);
00027       *(dst+1) = 0x80 | (src % 0x40);
00028       return 2;
00029     }
00030   else
00031     {
00032       if (len < 3)
00033         return DESTINATION_TOO_SHORT;
00034 
00035       // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
00036       if (src >= 0xD800 && src < 0xE000)
00037         return IS_SURROGATE;
00038 
00039       *dst = 0xe0 | (src / 0x1000);
00040       *(dst+1) = 0x80 | ((src % 0x1000) / 0x40);
00041       *(dst+2) = 0x80 | (src % 0x40);
00042       return 3;
00043     }
00044   ACE_NOTREACHED (return NON_UNICODE;)
00045 }

int ACEXML_Transcoder::utf16s2utf8s const ACEXML_UTF16   src,
ACEXML_UTF8   dst,
size_t    len
[static]
 

Definition at line 47 of file Transcode.cpp.

References ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, INVALID_ARGS, ucs42utf8, and utf162ucs4.

00050 {
00051   if (src == 0 || dst == 0)
00052     return INVALID_ARGS;
00053 
00054   size_t src_len = 1;
00055   for (const ACEXML_UTF16 *p = src; *p++ != 0; ++src_len)
00056     ;
00057 
00058   size_t total_len = 0;
00059   int forward;
00060   ACEXML_UCS4 temp;
00061 
00062   while (src_len > 0)
00063     {
00064       if ((forward = ACEXML_Transcoder::utf162ucs4 (src,
00065                                                     src_len,
00066                                                     temp)) <= 0)
00067         return forward;
00068 
00069       src += forward;
00070       src_len -= forward;
00071 
00072       if ((forward = ACEXML_Transcoder::ucs42utf8 (temp,
00073                                                    dst,
00074                                                    len)) <= 0)
00075         return forward;
00076 
00077       total_len += forward;
00078       dst += forward;
00079       len -= forward;
00080     }
00081 
00082   return ACE_static_cast(int, total_len);
00083 }

ACEXML_INLINE int ACEXML_Transcoder::utf82ucs4 const ACEXML_UTF8   src,
size_t    len,
ACEXML_UCS4   dst
[static]
 

Definition at line 144 of file Transcode.i.

References ACEXML_UCS4, ACEXML_UTF8, END_OF_SOURCE, INVALID_ARGS, and NON_UNICODE.

Referenced by utf8s2utf16s.

00147 {
00148   if (the_src == 0)
00149     return INVALID_ARGS;
00150 
00151   const unsigned char *src = ACE_reinterpret_cast (const unsigned char *,
00152                                                    the_src);
00153 
00154   size_t forward = 1;
00155 
00156   if (forward > len)
00157     return END_OF_SOURCE;
00158 
00159   if (ACE_static_cast (unsigned char, *src) < 0x80)
00160     dst = *src;
00161   else if ((*src & 0xE0) == 0xC0)
00162     {
00163       dst = (*(src++) & 0x1f) * 0x40;
00164       if (++forward > len)
00165         return END_OF_SOURCE;
00166       if ((*src & 0xC0) != 0x80)
00167         return NON_UNICODE;     // Error transcoding unicode scalar
00168       dst += *src & 0x3f;
00169     }
00170   else if ((*src & 0xF0) == 0xE0)
00171     {
00172       dst = (*src++ & 0x0f) * 0x40;
00173       if (++forward > len)
00174         return END_OF_SOURCE;
00175       if ((*src & 0xC0) != 0x80)
00176         return NON_UNICODE;
00177       dst = (dst + (*src++ & 0x3f)) * 0x40;
00178       if (++forward > len)
00179         return END_OF_SOURCE;
00180       if ((*src & 0xC0) != 0x80)
00181         return NON_UNICODE;
00182       dst += *src & 0x3f;
00183     }
00184   else if ((*src & 0xF8) == 0xF0)
00185     {
00186       dst = (*src++ & 0x0f) * 0x40;
00187       if (++forward > len)
00188         return END_OF_SOURCE;
00189       if ((*src & 0xC0) != 0x80)
00190         return NON_UNICODE;
00191       dst = (dst + (*src++ & 0x3f)) * 0x40;
00192       if (++forward > len)
00193         return END_OF_SOURCE;
00194       if ((*src & 0xC0) != 0x80)
00195         return NON_UNICODE;
00196       dst = (dst + (*src++ & 0x3f)) * 0x40;
00197       if (++forward > len)
00198         return END_OF_SOURCE;
00199       if ((*src & 0xC0) != 0x80)
00200         return NON_UNICODE;
00201       dst += *src & 0x3f;
00202     }
00203   else
00204     return NON_UNICODE;
00205 
00206   return ACE_static_cast(int, forward);
00207 }

int ACEXML_Transcoder::utf8s2utf16s const ACEXML_UTF8   src,
ACEXML_UTF16   dst,
size_t    len
[static]
 

Definition at line 10 of file Transcode.cpp.

References ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, INVALID_ARGS, ACE_OS_String::strlen, ucs42utf16, and utf82ucs4.

00013 {
00014   if (src == 0 || dst == 0)
00015     return INVALID_ARGS;
00016 
00017   size_t src_len = ACE_OS::strlen (src) + 1;
00018 
00019   size_t total_len = 0;
00020   int forward;
00021   ACEXML_UCS4 temp;
00022 
00023   while (src_len > 0)
00024     {
00025       if ((forward = ACEXML_Transcoder::utf82ucs4 (src,
00026                                                    src_len,
00027                                                    temp)) <= 0)
00028         return forward;
00029 
00030       src += forward;
00031       src_len -= forward;
00032 
00033       if ((forward = ACEXML_Transcoder::ucs42utf16 (temp,
00034                                                     dst,
00035                                                     len)) <= 0)
00036         return forward;
00037 
00038       total_len += forward;
00039       dst += forward;
00040       len -= forward;
00041     }
00042 
00043   return ACE_static_cast(int, total_len);
00044 }


Member Data Documentation

enum { ... } ACEXML_Transcoder::STATUS
 


The documentation for this class was generated from the following files:
Generated on Mon Jun 16 13:27:31 2003 for ACEXML by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002