Main Page   Modules   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

FileCharStream.cpp

Go to the documentation of this file.
00001 // $Id: FileCharStream.cpp,v 1.1.1.1.2.1 2003/04/21 19:14:54 chad Exp $
00002 
00003 #include "ACEXML/common/FileCharStream.h"
00004 #include "ace/ACE.h"
00005 #include "ace/Log_Msg.h"
00006 
00007 ACEXML_FileCharStream::ACEXML_FileCharStream (void)
00008   : filename_ (0), encoding_ (0), size_ (0), infile_ (NULL), peek_ (0)
00009 {
00010 }
00011 
00012 ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
00013 {
00014   this->close();
00015 }
00016 
00017 int
00018 ACEXML_FileCharStream::open (const ACEXML_Char *name)
00019 {
00020   delete[] this->filename_;
00021   this->filename_ = 0;
00022 
00023   delete[] this->encoding_;
00024   this->encoding_ = 0;
00025 
00026   this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
00027   if (this->infile_ == NULL)
00028     return -1;
00029 
00030   ACE_stat statbuf;
00031   if (ACE_OS::stat (name, &statbuf) < 0)
00032     return -1;
00033 
00034   this->size_ = statbuf.st_size;
00035   this->filename_ = ACE::strnew (name);
00036   this->determine_encoding();
00037   return 0;
00038 }
00039 
00040 int
00041 ACEXML_FileCharStream::determine_encoding (void)
00042 {
00043   char input[4];
00044   int retval = 0;
00045   int i = 0;
00046   for (; i < 4 && retval != -1; ++i)
00047     retval = this->getchar_i(input[i]);
00048   if (i < 4)
00049     return -1;
00050 
00051   // Rewind the stream
00052   this->rewind();
00053 
00054   const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00055   if (!temp)
00056     return -1;
00057   if (ACE_OS::strcmp (temp,
00058                       ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0)
00059     return -1;
00060   else
00061     {
00062       this->encoding_ = ACE::strnew (temp);
00063       ACE_DEBUG ((LM_DEBUG, "File's encoding is %s\n", this->encoding_));
00064     }
00065   // Move over the byte-order-mark if present.
00066   char ch;
00067   for (int j = 0; j < 2; ++j)
00068     {
00069       this->getchar_i (ch);
00070       if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF')
00071         continue;
00072       else
00073         {
00074           ungetc (ch, this->infile_);
00075           break;
00076         }
00077     }
00078   return 0;
00079 }
00080 
00081 void
00082 ACEXML_FileCharStream::rewind()
00083 {
00084   ACE_OS::rewind (this->infile_);
00085 }
00086 
00087 int
00088 ACEXML_FileCharStream::available (void)
00089 {
00090   long curr;
00091   if ((curr = ACE_OS::ftell (this->infile_)) < 0)
00092     return -1;
00093   return (this->size_ - curr);
00094 }
00095 
00096 int
00097 ACEXML_FileCharStream::close (void)
00098 {
00099   if (this->infile_ != NULL)
00100     {
00101       ACE_OS::fclose (this->infile_);
00102       this->infile_ = NULL;
00103     }
00104   delete[] this->filename_;
00105   this->filename_ = 0;
00106   delete[] this->encoding_;
00107   this->encoding_ = 0;
00108   this->size_ = 0;
00109   this->peek_ = 0;
00110   return 0;
00111 }
00112 
00113 
00114 int
00115 ACEXML_FileCharStream::getchar_i (char& ch)
00116 {
00117   ch = ACE_OS::fgetc (this->infile_);
00118   return (feof(this->infile_) ? -1 : 0);
00119 }
00120 
00121 int
00122 ACEXML_FileCharStream::read (ACEXML_Char *str,
00123                              size_t len)
00124 {
00125   return ACE_static_cast(int, ACE_OS::fread (str, len, 1, this->infile_));
00126 }
00127 
00128 int
00129 ACEXML_FileCharStream::get (ACEXML_Char& ch)
00130 {
00131 #if defined (ACE_USES_WCHAR)
00132   return this->get_i (ch);
00133 #else
00134   ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00135   return (feof(this->infile_) ? -1 : 0);
00136 #endif /* ACE_USES_WCHAR */
00137 }
00138 
00139 #if defined (ACE_USES_WCHAR)
00140 int
00141 ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
00142 {
00143   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00144     {
00145       ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00146       return (feof(this->infile_) ? -1 : 0);
00147     }
00148   // If we have a value in peek_, return it.
00149   if (this->peek_ != 0)
00150     {
00151       ch = this->peek_;
00152       this->peek_ = 0;
00153       return 0;
00154     }
00155 
00156   int BE = (ACE_OS::strcmp (this->encoding_,
00157                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00158   ACEXML_Char input[2];
00159   int i = 0;
00160   for (; i < 2 && !feof (this->infile_); ++i)
00161     {
00162       input[i] = ACE_OS::fgetwc (this->infile_);
00163     }
00164   if (i < 2)
00165     {
00166       ch = 0;
00167       return -1;
00168     }
00169   ch = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
00170   return 0;
00171 }
00172 #endif /* ACE_USES_WCHAR */
00173 
00174 int
00175 ACEXML_FileCharStream::peek (void)
00176 {
00177 #if defined (ACE_USES_WCHAR)
00178   return this->peek_i();
00179 #else
00180   ACEXML_Char ch = ACE_OS::fgetc (this->infile_);
00181   ::ungetc (ch, this->infile_);
00182   return ch;
00183 #endif /* ACE_USES_WCHAR */
00184 }
00185 
00186 #if defined (ACE_USES_WCHAR)
00187 int
00188 ACEXML_FileCharStream::peek_i (void)
00189 {
00190   // If we are reading a UTF-8 encoded file, just use the plain unget.
00191   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00192     {
00193       ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00194       ::ungetc (ch, this->infile_);
00195       return ch;
00196     }
00197 
00198   // If somebody had already called peek() and not consumed it, return the
00199   // value held in this->peek_.
00200   if (this->peek_ != 0)
00201     return this->peek_;
00202 
00203   // Peek into the stream. This reads two characters off the stream, keeps
00204   // it in peek_.
00205   int BE = (ACE_OS::strcmp (this->encoding_,
00206                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00207   ACEXML_Char input[2];
00208   int i = 0;
00209   for (; i < 2 && !feof (this->infile_); ++i)
00210     {
00211       input[i] = ACE_OS::fgetwc (this->infile_);
00212     }
00213   if (i < 2)
00214     {
00215       this->peek_ = 0;
00216       return -1;
00217     }
00218   this->peek_ = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
00219   return this->peek_;
00220 }
00221 #endif /* ACE_USES_WCHAR */
00222 
00223 const ACEXML_Char*
00224 ACEXML_FileCharStream::getEncoding (void)
00225 {
00226   return this->encoding_;
00227 }

Generated on Mon Jun 16 13:23:21 2003 for ACEXML by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002