Main Page   Modules   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

Parser.cpp

Go to the documentation of this file.
00001 // $Id: Parser.cpp,v 1.1.1.1.2.2 2003/04/21 19:14:54 chad Exp $
00002 
00003 #include "ACEXML/parser/parser/Parser.h"
00004 #include "ACEXML/common/Transcode.h"
00005 #include "ACEXML/common/AttributesImpl.h"
00006 #include "ace/ACE.h"
00007 
00008 static const ACEXML_Char default_attribute_type[] = {'C', 'D', 'A', 'T', 'A', 0};
00009 static const ACEXML_Char empty_string[] = { 0 };
00010 
00011 const ACEXML_Char
00012 ACEXML_Parser::simple_parsing_feature_[] = { 'S', 'i', 'm', 'p', 'l', 'e', 0 };
00013 
00014 const ACEXML_Char
00015 ACEXML_Parser::namespaces_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', 's', 0 };
00016 
00017 const ACEXML_Char
00018 ACEXML_Parser::namespace_prefixes_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '-', 'p', 'r', 'e', 'f', 'i', 'x', 'e', 's', 0 };
00019 
00020 #if !defined (__ACEXML_INLINE__)
00021 # include "ACEXML/parser/parser/Parser.i"
00022 #endif /* __ACEXML_INLINE__ */
00023 
00024 ACEXML_Parser::ACEXML_Parser (void)
00025   :   dtd_handler_ (0),
00026       entity_resolver_ (0),
00027       content_handler_ (0),
00028       error_handler_ (0),
00029       instream_ (0),
00030       doctype_ (0),
00031       dtd_system_ (0),
00032       dtd_public_ (0),
00033       locator_(),
00034       simple_parsing_ (0),
00035       namespaces_(1),
00036       namespace_prefixes_ (0)
00037 {
00038 }
00039 
00040 ACEXML_Parser::~ACEXML_Parser (void)
00041 {
00042 }
00043 
00044 int
00045 ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
00046    ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00047                     ACEXML_SAXNotSupportedException))
00048 {
00049   if (ACE_OS::strcmp (name,
00050                              ACEXML_Parser::simple_parsing_feature_) == 0)
00051     {
00052       return this->simple_parsing_;
00053     }
00054   else if (ACE_OS::strcmp (name,
00055                                   ACEXML_Parser::namespaces_feature_) == 0)
00056     {
00057       return this->namespaces_;
00058     }
00059   else if (ACE_OS::strcmp (name,
00060                                   ACEXML_Parser::namespace_prefixes_feature_) == 0)
00061     {
00062       return this->namespace_prefixes_;
00063     }
00064 
00065   ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1);
00066 }
00067 
00068 
00069 
00070 void
00071 ACEXML_Parser::setFeature (const ACEXML_Char *name,
00072                            int boolean_value ACEXML_ENV_ARG_DECL)
00073         ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00074                          ACEXML_SAXNotSupportedException))
00075 {
00076   if (ACE_OS::strcmp (name,
00077                              ACEXML_Parser::simple_parsing_feature_) == 0)
00078     {
00079       this->simple_parsing_ = (boolean_value == 0 ? 0 : 1);
00080       return;
00081     }
00082   else if (ACE_OS::strcmp (name,
00083                                   ACEXML_Parser::namespaces_feature_) == 0)
00084     {
00085       this->namespaces_ = (boolean_value == 0 ? 0 : 1);
00086       return;
00087     }
00088   else if (ACE_OS::strcmp (name,
00089                                   ACEXML_Parser::namespace_prefixes_feature_) == 0)
00090     {
00091       this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1);
00092       return;
00093     }
00094 
00095   ACEXML_THROW (ACEXML_SAXNotRecognizedException (name));
00096 }
00097 
00098 void *
00099 ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
00100   ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00101                    ACEXML_SAXNotSupportedException))
00102 {
00103   ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0);
00104 }
00105 
00106 void
00107 ACEXML_Parser::setProperty (const ACEXML_Char *name,
00108                             void *value ACEXML_ENV_ARG_DECL)
00109   ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00110                    ACEXML_SAXNotSupportedException))
00111 {
00112   ACE_UNUSED_ARG (value);
00113 
00114   ACEXML_THROW (ACEXML_SAXNotSupportedException (name));
00115 }
00116 
00117 void
00118 ACEXML_Parser::report_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
00119 {
00120   ACEXML_SAXParseException* exception = 0;
00121   ACE_NEW_NORETURN (exception,
00122                     ACEXML_SAXParseException (message));
00123   if (this->error_handler_)
00124     this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER);
00125   else
00126     ACEXML_ENV_RAISE (exception);
00127   return;
00128 }
00129 
00130 void
00131 ACEXML_Parser::report_warning (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
00132 {
00133   ACEXML_SAXParseException* exception = 0;
00134   ACE_NEW_NORETURN (exception,
00135                     ACEXML_SAXParseException (message));
00136   if (this->error_handler_)
00137     this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER);
00138   return;
00139 }
00140 
00141 void
00142 ACEXML_Parser::report_fatal_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
00143 {
00144   ACEXML_SAXParseException* exception = 0;
00145   ACE_NEW_NORETURN (exception,
00146                     ACEXML_SAXParseException (message));
00147   if (this->error_handler_)
00148     this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER);
00149   ACEXML_ENV_RAISE (exception);
00150   return;
00151 }
00152 
00153 void
00154 ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
00155   ACE_THROW_SPEC ((ACEXML_SAXException))
00156 {
00157   if (input == 0 || (this->instream_ = input->getCharStream ())  == 0)
00158     {
00159       this->report_fatal_error(ACE_TEXT ("Invalid input source") ACEXML_ENV_ARG_PARAMETER);
00160       return;
00161     }
00162 
00163   // Set up Locator. At this point, the systemId and publicId are null. We
00164   // can't do better, as we don't know anything about the InputSource
00165   // currently, and according to the SAX spec, the parser should set up the
00166   // locator before reporting any document events.
00167   if (this->content_handler_)
00168     this->content_handler_->setDocumentLocator (&this->locator_);
00169 
00170   if (this->simple_parsing_ == 0)
00171     {
00172       this->parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00173       ACEXML_CHECK;
00174     }
00175 
00176   this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00177   ACEXML_CHECK;
00178 
00179   int doctype_defined = 0;
00180 
00181   for (int prolog_done = 0; prolog_done == 0; )
00182     {
00183       if (this->skip_whitespace (0) != '<')
00184         {
00185           this->report_fatal_error (ACE_TEXT ("Expecting '<'") ACEXML_ENV_ARG_PARAMETER);
00186           return;
00187         }
00188       ACEXML_Char fwd = this->peek ();
00189       switch (fwd)
00190         {
00191         case '!':
00192           this->get ();         // consume the '!'
00193           fwd = this->peek ();
00194           if (fwd == 'D' && !doctype_defined)       // DOCTYPE
00195             {
00196               // This will also take care of the trailing MISC block if any.
00197               this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00198               ACEXML_CHECK;
00199               doctype_defined = 1;
00200               break;
00201             }
00202           else if (fwd == '-')  // COMMENT
00203             {
00204               if (this->grok_comment () < 0)
00205                 {
00206                   this->report_fatal_error(ACE_TEXT ("Invalid comment") ACEXML_ENV_ARG_PARAMETER);
00207                   return;
00208                 }
00209             }
00210           else
00211             {
00212               this->report_fatal_error (ACE_TEXT ("Duplicate DOCTYPE definitions") ACEXML_ENV_ARG_PARAMETER);
00213               return;
00214             }
00215           break;
00216         case '?':
00217           this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00218           ACEXML_CHECK;
00219           break;
00220         case 0:
00221           this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
00222           return;
00223         default:                // Root element begins
00224           prolog_done = 1;
00225           break;
00226         }
00227     }
00228 
00229   // Now parse root element.
00230   this->parse_element (1 ACEXML_ENV_ARG_PARAMETER);
00231   ACEXML_CHECK;
00232 
00233   this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00234   ACEXML_CHECK;
00235 
00236   // Reset the Locator held within the parser
00237   this->locator_.reset();
00238 }
00239 
00240 void
00241 ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
00242   ACE_THROW_SPEC ((ACEXML_SAXException))
00243 {
00244   // @@ Not implemented.
00245   ACE_UNUSED_ARG (systemId);
00246 
00247   ACEXML_THROW (ACEXML_SAXNotSupportedException ());
00248 }
00249 
00250 
00251 void
00252 ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
00253       ACE_THROW_SPEC ((ACEXML_SAXException))
00254 {
00255   if (this->parse_token (ACE_TEXT("<?xml")) < 0)
00256     {
00257       this->report_fatal_error(ACE_TEXT ("Invalid XMLDecl ('<?xml' ?)") ACEXML_ENV_ARG_PARAMETER);
00258       return;
00259     }
00260 
00261   ACEXML_Char *astring;
00262 
00263   if (this->skip_whitespace (0) != 'v' // Discard whitespace
00264       || (this->parse_token (ACE_TEXT("ersion")) < 0)
00265       || this->skip_equal () != 0
00266       || this->get_quoted_string (astring) != 0)
00267     {
00268       this->report_fatal_error (ACE_TEXT ("Unrecognized XMLDecl ('version'?)") ACEXML_ENV_ARG_PARAMETER);
00269       return;
00270     }
00271   // @@ Handle version number here.
00272   int xmldecl_state = 0;
00273   int seen_encoding = 0;
00274 
00275   while (1)
00276     {
00277       ACEXML_Char fwd = this->peek ();
00278       if (fwd != '?')
00279         {
00280           fwd = this->skip_whitespace (0); // Discard whitespace
00281           if (fwd == '?')
00282             {
00283               // Fall down to consume the '?' and wrap up the XML Decl parsing.
00284             }
00285           else if (xmldecl_state == 0 && fwd == 'e')
00286             {
00287               if ((this->parse_token (ACE_TEXT("ncoding")) == 0) &&
00288                   this->skip_equal () == 0 &&
00289                   this->get_quoted_string (astring) == 0)
00290                 {
00291                   if (seen_encoding)
00292                     {
00293                       this->report_fatal_error (ACE_TEXT ("Duplicate encoding defined") ACEXML_ENV_ARG_PARAMETER);
00294                       return;
00295                     }
00296                   else
00297                     {
00298                     seen_encoding = 1;
00299                       if (ACE_OS::strcmp (astring,
00300                                           this->instream_->getEncoding()) != 0)
00301                         {
00302                           if (ACE_OS::strstr (astring,
00303                                               this->instream_->getEncoding()) != 0)
00304                             {
00305                               ACE_ERROR ((LM_ERROR,
00306                                           ACE_TEXT ("Detected Encoding is %s : Declared Encoding is %s"),
00307                                           this->instream_->getEncoding(), astring));
00308                               this->report_fatal_error (ACE_TEXT ("Encoding declaration doesn't match detected encoding") ACEXML_ENV_ARG_PARAMETER);
00309                               return;
00310                             }
00311                         }
00312                     }
00313                   continue;
00314                 }
00315               else
00316                 break;
00317             }
00318           else if (xmldecl_state < 2 && fwd == 's')
00319             {
00320               if ((this->parse_token (ACE_TEXT("tandalone")) == 0) &&
00321                   this->skip_equal () == 0 &&
00322                   this->get_quoted_string (astring) == 0)
00323                 {
00324                   xmldecl_state = 2;
00325                   if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0)
00326                     {
00327                       // @@ This is a standalone XML file.
00328                       continue;
00329                     }
00330                   else if (ACE_OS::strcmp (astring, ACE_TEXT ("no")) == 0)
00331                     {
00332                       // @@ This is not a stand alone XML file.
00333                       continue;
00334                     }
00335                 }
00336               break;
00337             }
00338           else
00339             break;
00340         }
00341       if (this->parse_token (ACE_TEXT ("?>")) < 0)
00342         break;
00343       return;
00344     }   // End parsing XML Decl.
00345   this->report_fatal_error (ACE_TEXT ("Unrecognized XML Decl ('standalone'?)") ACEXML_ENV_ARG_PARAMETER);
00346   return;
00347 }
00348 
00349 int
00350 ACEXML_Parser::grok_comment (void)
00351 {
00352   /// Simply filter out all the comment
00353   int state = 0;
00354 
00355   if (this->get () != '-' ||    // Skip the opening "<!--"
00356       this->get () != '-' ||    // completely.
00357       this->get () == '-')      // and at least something not '-'.
00358     return -1;
00359 
00360   while (state < 3)
00361     // Waiting for the trailing three character '-->'. Notice that
00362     // according to the spec, '--->' is not a valid closing comment
00363     // sequence. But we'll let it pass anyway.
00364     {
00365       ACEXML_Char fwd = this->get ();
00366       if ((fwd == '-' && state < 2) ||
00367           (fwd == '>' && state == 2))
00368         state += 1;
00369       else
00370         state = 0;              // Reset parse state.
00371     }
00372   return 0;
00373 }
00374 
00375 int
00376 ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
00377 {
00378   if (this->get () != '?')
00379     {                           // How did we get here?
00380       this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
00381       return -1;
00382     }
00383   const ACEXML_Char *pitarget = this->read_name ();
00384   ACEXML_Char *instruction = 0;
00385 
00386   if (ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget) != 0)
00387     {
00388       // Invalid PITarget name.
00389       this->report_fatal_error(ACE_TEXT ("PITarget name cannot start with 'xml'") ACEXML_ENV_ARG_PARAMETER);
00390       return -1;
00391     }
00392 
00393   int state = 0;
00394   ACEXML_Char ch = this->skip_whitespace (0);
00395 
00396   while (state < 2)
00397     {
00398       switch (ch)
00399         {
00400         case '?':
00401           if (state == 0)
00402             state = 1;
00403           break;
00404         case '>':
00405           if (state == 1)
00406             {
00407               instruction = this->obstack_.freeze ();
00408               this->content_handler_->processingInstruction (pitarget,
00409                                                              instruction ACEXML_ENV_ARG_PARAMETER);
00410               ACEXML_CHECK_RETURN (-1);
00411               this->obstack_.unwind (ACE_const_cast (ACEXML_Char*, pitarget));
00412               return 0;
00413             }
00414           break;
00415         case 0x0D:                // End-of-Line handling
00416           ch = (this->peek () == 0x0A ? this->get () : 0x0A);
00417           // Fall thru...
00418         case 0x0A:
00419           // Fall thru...
00420         default:
00421           if (state == 1)
00422             this->obstack_.grow ('?');
00423           this->obstack_.grow (ch);
00424           state = 0;
00425         }
00426       ch = this->get ();
00427     }
00428   return -1;
00429 }
00430 
00431 int
00432 ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
00433       ACE_THROW_SPEC ((ACEXML_SAXException))
00434 {
00435   if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0)
00436     {
00437       this->report_fatal_error(ACE_TEXT ("Expecting keyword 'DOCTYPE'") ACEXML_ENV_ARG_PARAMETER);
00438       return -1;
00439     }
00440 
00441   ACEXML_Char nextch = this->skip_whitespace (0);
00442   if (nextch == 0)
00443     {
00444       this->report_fatal_error(ACE_TEXT ("Expecting a DOCTYPE name") ACEXML_ENV_ARG_PARAMETER);
00445       return -1;
00446     }
00447 
00448   this->doctype_ = this->read_name (nextch);
00449 
00450   this->skip_whitespace_count (&nextch);
00451 
00452   if (nextch == 'S' || nextch == 'P') // ExternalID defined
00453     {
00454       this->parse_external_id_and_ref (this->dtd_public_,
00455                                        this->dtd_system_
00456                                        ACEXML_ENV_ARG_PARAMETER);
00457       ACEXML_CHECK_RETURN (-1);
00458 //       if (this->dtd_public_ == 0)
00459 //         ACE_DEBUG ((LM_DEBUG,
00460 //                     ACE_TEXT ("ACEXML Parser got external DTD id: SYSTEM %s\n"),
00461 //                     this->dtd_system_));
00462 //       else
00463 //         ACE_DEBUG ((LM_DEBUG,
00464 //                     ACE_TEXT ("ACEXML Parser got DTD external id: PUBLIC %s %s\n"),
00465 //                     this->dtd_public_, this->dtd_system_));
00466     }
00467 
00468   nextch = this->skip_whitespace (0);
00469   switch (nextch)
00470     {
00471     case '[':                   // Internal DTD definition
00472       if (this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
00473         return -1;              // Error in markupdecl
00474       break;
00475     case '>':                   // End of DTD definition
00476       // this is an XML document without a dectypedecl.
00477       return 0;
00478     case '0':
00479       this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
00480       return -1;
00481     default:
00482       break;
00483     }
00484 
00485   if (this->skip_whitespace (0) != '>')
00486     {
00487       this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
00488       return -1;
00489     }
00490   return 0;
00491 }
00492 
00493 void
00494 ACEXML_Parser::parse_element (int is_root ACEXML_ENV_ARG_DECL)
00495       ACE_THROW_SPEC ((ACEXML_SAXException))
00496 {
00497   // Parse STag.
00498   const ACEXML_Char *startname = this->read_name ();
00499   if (startname == 0)
00500     {
00501       this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
00502       return;
00503     }
00504   if (is_root && this->doctype_ != 0
00505       && ACE_OS::strcmp (startname, this->doctype_) != 0)
00506     {
00507       this->report_fatal_error (ACE_TEXT ("Root element missing") ACEXML_ENV_ARG_PARAMETER);
00508       return;
00509     }
00510   ACEXML_AttributesImpl attributes;
00511   ACEXML_Char ch;
00512   int new_namespace = 0;
00513   const ACEXML_Char *endname = 0;
00514   const ACEXML_Char *ns_uri, *ns_lname; // namespace URI and localName
00515   ACEXML_Char* prefix = 0;
00516   ACEXML_Char* name = 0;
00517   for (int start_element_done = 0; start_element_done == 0;)
00518     {
00519       ch = this->skip_whitespace (0);
00520 
00521       switch (ch)
00522         {
00523         case 0:
00524           this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
00525           return;
00526         case '/':
00527           if (this->get () != '>')
00528             {
00529               this->report_fatal_error(ACE_TEXT ("Expecting '>'") ACEXML_ENV_ARG_PARAMETER);
00530               return;
00531             }
00532           else
00533             {
00534               this->xml_namespace_.processName(startname, ns_uri, ns_lname, 0);
00535               prefix = ACE_const_cast (ACEXML_Char*,
00536                                        this->xml_namespace_.getPrefix(ns_uri));
00537               this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1 ACEXML_ENV_ARG_PARAMETER);
00538               ACEXML_CHECK;
00539               this->content_handler_->startElement (ns_uri, ns_lname,
00540                                                     startname, &attributes ACEXML_ENV_ARG_PARAMETER);
00541               ACEXML_CHECK;
00542               this->content_handler_->endElement (ns_uri, ns_lname, startname ACEXML_ENV_ARG_PARAMETER);
00543               ACEXML_CHECK;
00544               this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0 ACEXML_ENV_ARG_PARAMETER);
00545               ACEXML_CHECK;
00546             }
00547           if (new_namespace != 0)
00548             this->xml_namespace_.popContext ();
00549           return;
00550 
00551         case '>':
00552           {
00553             this->xml_namespace_.processName (startname, ns_uri, ns_lname, 0);
00554             prefix = ACE_const_cast (ACEXML_Char*,
00555                                      this->xml_namespace_.getPrefix (ns_uri));
00556             this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1 ACEXML_ENV_ARG_PARAMETER);
00557             ACEXML_CHECK;
00558             this->content_handler_->startElement (ns_uri, ns_lname, startname,
00559                                                   &attributes ACEXML_ENV_ARG_PARAMETER);
00560             ACEXML_CHECK;
00561             start_element_done = 1;
00562             break;
00563           }
00564         default:
00565           ACEXML_Char *attvalue = 0;
00566           ACEXML_Char *attname = this->read_name (ch);
00567 
00568           if (attname == 0 ||
00569               this->skip_equal () != 0 ||
00570               this->get_quoted_string (attvalue) != 0)
00571             {
00572               this->report_fatal_error(ACE_TEXT ("Error reading attribute") ACEXML_ENV_ARG_PARAMETER);
00573               return;
00574             }
00575 
00576           // Handling new namespace if any. Notice that the order of
00577           // namespace declaration does matter.
00578           if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0)
00579             {
00580               if (this->namespaces_)
00581                 {
00582                   if (new_namespace == 0)
00583                     {
00584                       this->xml_namespace_.pushContext ();
00585                       new_namespace = 1;
00586                     }
00587                   name = ACE_OS::strchr (attname, ':');
00588                   const ACEXML_Char* ns_name = (name == 0)?empty_string:name+1;
00589                   if (this->xml_namespace_.declarePrefix (ns_name,
00590                                                           attvalue) == -1)
00591                     {
00592                       this->report_fatal_error(ACE_TEXT ("Duplicate namespace prefix") ACEXML_ENV_ARG_PARAMETER);
00593                       return;
00594                     }
00595                 }
00596               if (this->namespace_prefixes_)
00597                 {
00598                   // Namespace_prefixes_feature_ is required. So add the
00599                   // xmlns:foo to the list of attributes.
00600                   if (attributes.addAttribute (0, 0, attname,
00601                                                default_attribute_type,
00602                                                attvalue) == -1)
00603                     {
00604                       this->report_fatal_error(ACE_TEXT ("Duplicate attribute found") ACEXML_ENV_ARG_PARAMETER);
00605                       return;
00606                     }
00607                 }
00608               if (!this->namespaces_ && !this->namespace_prefixes_)
00609                 {
00610                   this->report_fatal_error(ACE_TEXT ("Both namespaces feature and namespace_prefixes feature are false. Illegal Mode") ACEXML_ENV_ARG_PARAMETER);
00611                   return;
00612                 }
00613             }
00614           else
00615             {
00616               const ACEXML_Char *uri, *lName;
00617               this->xml_namespace_.processName (attname, uri, lName, 1);
00618               if (attributes.addAttribute (uri, lName, attname,
00619                                            default_attribute_type,
00620                                            attvalue) == -1)
00621                 {
00622                   this->report_fatal_error(ACE_TEXT ("Duplicate attribute found") ACEXML_ENV_ARG_PARAMETER);
00623                   return;
00624                 }
00625             }
00626           break;
00627         }
00628     }
00629   ACEXML_Char *cdata;
00630   size_t cdata_length = 0;
00631 
00632   // Parse element contents.
00633   while (1)
00634     {
00635       ACEXML_Char ch = this->get ();
00636 
00637       switch (ch)
00638         {
00639         case 0:
00640           this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
00641           return;
00642         case '<':
00643           // Push out old 'characters' event.
00644           if (cdata_length != 0)
00645             {
00646               cdata = this->obstack_.freeze ();
00647               this->content_handler_->characters (cdata, 0,
00648                                          ACE_static_cast(int, cdata_length)
00649                                          ACEXML_ENV_ARG_PARAMETER);
00650               ACEXML_CHECK;
00651               this->obstack_.unwind (cdata);
00652               cdata_length = 0;
00653             }
00654 
00655           switch (this->peek ())
00656             {
00657             case '!':             // a comment or a CDATA section.
00658               this->get ();       // consume '!'
00659               ch = this->peek ();
00660               if (ch == '-')      // a comment
00661                 {
00662                   if (this->grok_comment () < 0)
00663                     {
00664                       this->report_fatal_error(ACE_TEXT ("Error parsing comment") ACEXML_ENV_ARG_PARAMETER);
00665                       return;
00666                     }
00667                 }
00668               else if (ch == '[') // a CDATA section.
00669                 {
00670                   this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00671                   ACEXML_CHECK;
00672                 }
00673               else
00674                 {
00675                   this->report_fatal_error(ACE_TEXT ("Unexpected character") ACEXML_ENV_ARG_PARAMETER);
00676                   return;
00677                 }
00678               break;
00679             case '?':             // a PI.
00680               this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
00681               ACEXML_CHECK;
00682               break;
00683             case '/':             // an ETag.
00684               {
00685                 this->get ();       // consume '/'
00686                 endname = this->read_name ();
00687                 if (endname == 0 ||
00688                     ACE_OS::strcmp (startname, endname) != 0)
00689                   {
00690                     this->report_fatal_error(ACE_TEXT ("Mismatched End-tag encountered") ACEXML_ENV_ARG_PARAMETER);
00691                     return ;
00692                   }
00693                 if (this->skip_whitespace (0) != '>')
00694                   {
00695                     this->report_fatal_error(ACE_TEXT ("Expecting '>' in an end-tag") ACEXML_ENV_ARG_PARAMETER);
00696                     return;
00697                   }
00698                 this->content_handler_->endElement (ns_uri, ns_lname, endname ACEXML_ENV_ARG_PARAMETER);
00699                 ACEXML_CHECK;
00700                 prefix = ACE_const_cast (ACEXML_Char*,
00701                                          this->xml_namespace_.getPrefix(ns_uri));
00702                 this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0 ACEXML_ENV_ARG_PARAMETER);
00703                     ACEXML_CHECK;
00704                 if (new_namespace != 0)
00705                   this->xml_namespace_.popContext ();
00706                 return;
00707               }
00708             default:              // a new nested element?
00709               this->parse_element (0 ACEXML_ENV_ARG_PARAMETER);
00710               ACEXML_CHECK;
00711               break;
00712             }
00713           break;
00714         case '&':
00715           {
00716             const ACEXML_String *replace = 0;
00717             ACEXML_String charval;
00718             ACEXML_Char buffer[6];
00719 
00720             if (this->peek () == '#')
00721               {
00722                 if (this->parse_char_reference (buffer, 6) != 0)
00723                   {
00724                     // not referring to any character exception?
00725                     return;
00726                   }
00727                 charval.set (buffer, 0);
00728                 replace = &charval;
00729               }
00730             else
00731               replace = this->parse_reference ();
00732 
00733             if (replace == 0)
00734               {
00735                 this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
00736                 return;
00737               }
00738 //             if (this->try_grow_cdata (replace->length (),
00739 //                                       cdata_length, xmlenv) == 0)
00740 //               {
00741                 cdata_length += replace->length ();
00742                 for (size_t i = 0; i < replace->length (); ++i)
00743                   this->obstack_.grow ((*replace)[i]);
00744 //              }
00745 //            else
00746 //              return;
00747           }
00748           break;
00749         case 0x0D:                // End-of-Line handling
00750           ch = (this->peek () == 0x0A ? this->get () : 0x0A);
00751           // Fall thru...
00752         case 0x0A:
00753           // Fall thru...
00754         default:
00755           ++cdata_length;
00756           cdata = this->obstack_.grow (ch);
00757           if (cdata == 0)
00758             {
00759               cdata = this->obstack_.freeze ();
00760               this->content_handler_->characters (cdata, 0,
00761                                            ACE_static_cast(int, cdata_length)
00762                                            ACEXML_ENV_ARG_PARAMETER);
00763               ACEXML_CHECK;
00764               this->obstack_.grow (ch);
00765               cdata_length = 1;   // the missing char.
00766             }
00767         }
00768     }
00769   ACE_NOTREACHED (return;)
00770 }
00771 
00772 int
00773 ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t len)
00774 {
00775   if (this->get () != '#')
00776     {
00777       // Internal error.
00778       return -1;
00779     }
00780 
00781   int hex = 0;
00782 
00783   if (this->peek () == 'x')
00784     {
00785       hex = 1;
00786       this->get ();
00787     }
00788 
00789   int more_digit = 0;
00790   ACEXML_UCS4 sum = 0;
00791 
00792   while (1)
00793     {
00794       ACEXML_Char ch = this->get ();
00795       switch (ch)
00796         {
00797         case '0':
00798         case '1':
00799         case '2':
00800         case '3':
00801         case '4':
00802         case '5':
00803         case '6':
00804         case '7':
00805         case '8':
00806         case '9':
00807           sum = sum * (hex ? 16 : 10) + (ch - '0');
00808           break;
00809         case 'a':
00810         case 'A':
00811           if (!hex)
00812             return -1;
00813           sum = sum * 16 + 10;
00814           break;
00815         case 'b':
00816         case 'B':
00817           if (!hex)
00818             return -1;
00819           sum = sum * 16 + 11;
00820           break;
00821         case 'c':
00822         case 'C':
00823           if (!hex)
00824             return -1;
00825           sum = sum * 16 + 12;
00826           break;
00827         case 'd':
00828         case 'D':
00829           if (!hex)
00830             return -1;
00831           sum = sum * 16 + 13;
00832           break;
00833         case 'e':
00834         case 'E':
00835           if (!hex)
00836             return -1;
00837           sum = sum * 16 + 14;
00838           break;
00839         case 'f':
00840         case 'F':
00841           if (!hex)
00842             return -1;
00843           sum = sum * 16 + 15;
00844           break;
00845         case ';':
00846           if (more_digit == 0)  // no digit exist???
00847             return -1;
00848           int clen;
00849 #if defined (ACE_USES_WCHAR)    // UTF-16
00850           if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0)
00851             return -1;
00852 
00853 #elif 1                         // or UTF-8
00854           if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0)
00855             return -1;
00856           //  #elif 0                         // UCS 4, not likely
00857           //            buf [0] = sum;
00858           //            buf [1] = 0;
00859 #endif
00860           buf [clen] = 0;
00861           return 0;
00862         default:
00863           return -1;
00864         }
00865       more_digit = 1;
00866     }
00867   ACE_NOTREACHED (return -1);
00868 }
00869 
00870 const ACEXML_String *
00871 ACEXML_Parser::parse_reference (void)
00872 {
00873   // @@ We'll use a temporary buffer here as the Obstack is most likely in
00874   // use when we are here. This puts a limit on the max length of a
00875   // reference.
00876   ACEXML_Char ref[MAXPATHLEN];
00877 
00878   size_t loc = 0;
00879 
00880   while (loc < MAXPATHLEN -1)
00881     {
00882       ACEXML_Char ch = this->get ();
00883       if (ch == ';')
00884         {
00885           ref[loc] = 0;
00886           break;
00887         }
00888       else
00889         ref[loc++] = ch;
00890     }
00891 
00892   return this->entities_.resolve_entity (ref);
00893 }
00894 
00895 int
00896 ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
00897 {
00898   if (this->parse_token (ACE_TEXT ("[CDATA[")) < 0)
00899     {
00900       this->report_fatal_error(ACE_TEXT ("'[CDATA[' expected") ACEXML_ENV_ARG_PARAMETER);
00901       return -1;
00902     }
00903 
00904   int parse_state = 0;
00905   size_t datalen = 0;
00906 
00907   while (1)
00908     {
00909       ACEXML_Char ch;
00910       ACEXML_Char *cdata;
00911 
00912       ch = this->get ();
00913       // Anything goes except the sequence "]]>".
00914       switch (parse_state)
00915         {
00916         case 2:
00917           if (ch == ']')
00918             {
00919               parse_state = 3;
00920               continue;
00921             }
00922           break;
00923         case 3:
00924           if (ch == '>')        // Yay!
00925             {
00926               cdata = this->obstack_.freeze ();
00927               this->content_handler_->characters (cdata, 0,
00928                                                   ACE_static_cast(int, datalen)
00929                                                   ACEXML_ENV_ARG_PARAMETER);
00930               // ACEXML_CHECK_RETURN (-1);
00931               this->obstack_.unwind(cdata);
00932               return 0;
00933             }
00934           break;
00935         default:
00936           if (ch == ']')
00937             {
00938               parse_state = 2;
00939               continue;
00940             }
00941           else
00942             parse_state = 1;
00943         }
00944       while (parse_state > 0)
00945         {
00946           if (this->try_grow_cdata (1, datalen ACEXML_ENV_ARG_PARAMETER) < 0)
00947             return -1;
00948 
00949           if (parse_state != 1)
00950             this->obstack_.grow (']');
00951           else
00952             {
00953               if (ch == 0x0D)
00954                 ch = (this->peek () == 0x0A ? this->get () : 0x0A);
00955               this->obstack_.grow (ch);
00956             }
00957           ++datalen;
00958           --parse_state;
00959         }
00960     };
00961   ACE_NOTREACHED (return -1);
00962 }
00963 
00964 int
00965 ACEXML_Parser::try_grow_cdata (size_t size, size_t &len ACEXML_ENV_ARG_DECL)
00966 {
00967   if (this->obstack_.request (size) != 0)
00968     {
00969       if (len != 0)
00970         {
00971           ACEXML_Char *cdata = this->obstack_.freeze ();
00972           if (cdata == 0)
00973             {
00974               this->report_fatal_error(ACE_TEXT ("Internal Error growing CDATA buffer") ACEXML_ENV_ARG_PARAMETER);
00975               return -1;
00976             }
00977           this->content_handler_->characters (cdata,
00978                                               0,
00979                                               ACE_static_cast(int, len)
00980                                               ACEXML_ENV_ARG_PARAMETER);
00981           ACEXML_CHECK_RETURN (-1);
00982           len = 0;              // reset counter
00983           if (this->obstack_.request (size) == 0)
00984             return 0;
00985         }
00986       this->report_fatal_error(ACE_TEXT ("Internal Error, buffer overflowed") ACEXML_ENV_ARG_PARAMETER);
00987       return -1;
00988     }
00989   return 0;
00990 }
00991 
00992 
00993 int
00994 ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
00995 {
00996   ACEXML_Char nextch = this->skip_whitespace (0);
00997 
00998   do {
00999     switch (nextch)
01000       {
01001       case '<':                 // Start of markup Decl.
01002         nextch = this->peek ();
01003         switch (nextch)
01004           {
01005           case '!':
01006             this->get ();       // Discard '!'
01007             nextch = this->peek ();
01008             switch (nextch)
01009               {
01010               case 'E':         // An ELEMENT or ENTITY decl
01011                 this->get ();
01012                 nextch = this->peek ();
01013                 switch (nextch)
01014                   {
01015                   case 'L':
01016                     if (this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
01017                       return -1;
01018                     break;
01019 
01020                   case 'N':
01021                     if (this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
01022                       return -1;
01023                     break;
01024 
01025                   default:
01026                     this->report_fatal_error(ACE_TEXT ("Invalid keyword in decl spec") ACEXML_ENV_ARG_PARAMETER);
01027                     return -1;
01028                   }
01029                 break;
01030 
01031               case 'A':         // An ATTLIST decl
01032                 if (this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
01033                   return -1;
01034                 break;
01035 
01036               case 'N':         // A NOTATION decl
01037                 if (this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
01038                   return -1;
01039                 break;
01040 
01041               case '-':         // a comment.
01042                 if (this->grok_comment () < 0)
01043                   {
01044                     this->report_fatal_error(ACE_TEXT ("Error parsing comment") ACEXML_ENV_ARG_PARAMETER);
01045                     return -1;
01046                   }
01047                 break;
01048               case 0:
01049                 this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
01050                 return -1;
01051               default:
01052                 this->report_fatal_error (ACE_TEXT ("Invalid char. follows '<!' in markupdecl") ACEXML_ENV_ARG_PARAMETER);
01053                 return -1;
01054               }
01055             break;
01056 
01057           case '?':             // PI
01058             this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
01059             ACEXML_CHECK_RETURN (-1);
01060             break;
01061 
01062           case 0:
01063             this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
01064             return -1;
01065           default:
01066             this->report_fatal_error (ACE_TEXT ("Invalid char. follows '<!' in markupdecl") ACEXML_ENV_ARG_PARAMETER);
01067             return -1;
01068           }
01069         break;
01070 
01071       case '%':                 // DeclSep.  Define new PEreference...
01072         break;
01073 
01074       case ']':                 // End of internal definitions.
01075         return 0;               // Not applicable when parsing external DTD spec.
01076 
01077       case 0:                   // This may not be an error if we decide
01078                                 // to generalize this function to handle both
01079                                 // internal and external DTD definitions.
01080         this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
01081         return -1;
01082 
01083       default:
01084         this->report_fatal_error (ACE_TEXT ("Expecting markupdecl or DecSep") ACEXML_ENV_ARG_PARAMETER);
01085         return -1;
01086       };
01087 
01088     // To fully conform with the spec., whitespaces are only allowed
01089     // following a 'DeclSep' section.  However, I found it
01090     // hard/impossible to eliminate all the whitespaces between
01091     // markupdecls.
01092 
01093     nextch = this->skip_whitespace (0);
01094 
01095   } while (1);
01096 
01097   ACE_NOTREACHED (return -1);
01098 }
01099 
01100 int
01101 ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
01102 {
01103   if ((this->parse_token (ACE_TEXT ("LEMENT")) < 0) ||
01104       this->skip_whitespace_count () == 0)
01105     {
01106       this->report_fatal_error (ACE_TEXT ("Expecting keyword `ELEMENT'") ACEXML_ENV_ARG_PARAMETER);
01107       return -1;
01108     }
01109 
01110   ACEXML_Char *element_name = this->read_name ();
01111   if (element_name == 0)
01112     {
01113       this->report_fatal_error (ACE_TEXT ("Error reading element name while defining ELEMENT.") ACEXML_ENV_ARG_PARAMETER);
01114       return -1;
01115     }
01116 
01117   ACEXML_Char nextch ;
01118   this->skip_whitespace_count (&nextch);
01119 
01120   switch (nextch)
01121     {
01122     case 'E':                   // EMPTY
01123       if (this->parse_token (ACE_TEXT ("EMPTY")) < 0)
01124         {
01125           this->report_fatal_error (ACE_TEXT ("Expecting keyword `EMPTY' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
01126           return -1;
01127         }
01128       break;
01129     case 'A':                   // ANY
01130       if (this->parse_token (ACE_TEXT ("ANY")) < 0)
01131         {
01132           this->report_fatal_error (ACE_TEXT ("Expecting keyword `ANY' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
01133           return -1;
01134         }
01135       break;
01136     case '(':                   // children
01137       this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER);
01138       ACEXML_CHECK_RETURN (-1);
01139       break;
01140     default:                    // error
01141       this->report_fatal_error (ACE_TEXT ("Error reading ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
01142       return -1;
01143     }
01144   if (this->skip_whitespace (0) != '>')
01145     {
01146       this->report_fatal_error (ACE_TEXT ("Expecting '>' in ELEMENT definition.") ACEXML_ENV_ARG_PARAMETER);
01147       return -1;
01148     }
01149   return 0;
01150 }
01151 
01152 int
01153 ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
01154 {
01155   ACEXML_Char nextch;
01156 
01157   if ((this->parse_token (ACE_TEXT ("NTITY")) < 0) ||
01158       this->skip_whitespace_count (&nextch) == 0)
01159     {
01160       this->report_fatal_error (ACE_TEXT ("Expecting keyword `ENTITY'") ACEXML_ENV_ARG_PARAMETER);
01161       return -1;
01162     }
01163 
01164   int is_GEDecl = 1;
01165   if (nextch == '%')            // This is a PEDecl.
01166     {
01167       is_GEDecl = 0;
01168       this->get ();             // consume the '%'
01169       if (this->skip_whitespace_count (&nextch) == 0)
01170         {
01171           this->report_fatal_error (ACE_TEXT ("Can't use a reference when defining entity name") ACEXML_ENV_ARG_PARAMETER);
01172           return -1;
01173         }
01174     }
01175 
01176   ACEXML_Char *entity_name = this->read_name ();
01177   if (entity_name == 0)
01178     {
01179       this->report_fatal_error (ACE_TEXT ("Error reading ENTITY name.") ACEXML_ENV_ARG_PARAMETER);
01180       return -1;
01181     }
01182 
01183   this->skip_whitespace_count (&nextch);
01184 
01185   if (nextch == '\'' || nextch == '"')
01186     {
01187       ACEXML_Char *entity_value = 0;
01188 
01189       if (this->get_quoted_string (entity_value) != 0)
01190         {
01191           this->report_fatal_error(ACE_TEXT("Error reading ENTITY value.") ACEXML_ENV_ARG_PARAMETER);
01192           return -1;
01193         }
01194 
01195       if (is_GEDecl)
01196         {
01197           if (this->entities_.add_entity (entity_name, entity_value) != 0)
01198             {
01199               this->report_fatal_error(ACE_TEXT("Error storing entity definition (duplicate definition?)") ACEXML_ENV_ARG_PARAMETER);
01200               return -1;
01201             }
01202         }
01203       else
01204         {
01205           // @@ need to implement PEdecl lookup mechanism
01206           ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (), -1);
01207         }
01208     }
01209   else
01210     {
01211       ACEXML_Char *systemid, *publicid;
01212 
01213       this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER);
01214       ACEXML_CHECK_RETURN (-1);
01215       if (systemid == 0)
01216         {
01217           this->report_fatal_error(ACE_TEXT("Invalid ExternalID definition (system ID missing.)") ACEXML_ENV_ARG_PARAMETER);
01218           return -1;
01219         }
01220       this->skip_whitespace_count (&nextch);
01221       if (nextch == 'N')        // NDATA section followed
01222         {
01223           if (is_GEDecl == 0)
01224             {
01225               this->report_fatal_error(ACE_TEXT("Unexpected keyword NDATA in PEDecl.") ACEXML_ENV_ARG_PARAMETER);
01226               return -1;
01227             }
01228 
01229           if ((this->parse_token (ACE_TEXT ("NDATA")) < 0) ||
01230               this->skip_whitespace_count (&nextch) == 0)
01231             {
01232               this->report_fatal_error(ACE_TEXT("Expecting keyword NDATA") ACEXML_ENV_ARG_PARAMETER);
01233               return -1;
01234             }
01235 
01236           ACEXML_Char *ndata = this->read_name ();
01237           this->dtd_handler_->unparsedEntityDecl (entity_name,
01238                                                   publicid,
01239                                                   systemid,
01240                                                   ndata ACEXML_ENV_ARG_PARAMETER);
01241           ACEXML_CHECK_RETURN (-1);
01242         }
01243       else
01244         {
01245           // @@ Need to support external CharStream sources
01246           ACE_DEBUG ((LM_DEBUG,
01247                       ACE_TEXT ("ENTITY: (%s) "),
01248                       entity_name));
01249 
01250           if (publicid == 0)
01251             ACE_DEBUG ((LM_DEBUG,
01252                         ACE_TEXT ("SYSTEM %s\n"),
01253                         systemid));
01254           else
01255             ACE_DEBUG ((LM_DEBUG,
01256                         ACE_TEXT ("PUBLIC %s %s\n"),
01257                         publicid, systemid));
01258         }
01259     }
01260 
01261   // End of ENTITY definition
01262   if (this->skip_whitespace (0) != '>')
01263     {
01264       this->report_fatal_error(ACE_TEXT("Expecting end of ENTITY definition.") ACEXML_ENV_ARG_PARAMETER);
01265       return -1;
01266     }
01267   return 0;
01268 }
01269 
01270 int
01271 ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
01272 {
01273   if ((this->parse_token (ACE_TEXT ("ATTLIST")) < 0) ||
01274       this->skip_whitespace_count () == 0)
01275     {
01276       this->report_fatal_error(ACE_TEXT("Expecting keyword `ATTLIST'") ACEXML_ENV_ARG_PARAMETER);
01277       return -1;
01278     }
01279 
01280   ACEXML_Char *element_name = this->read_name ();
01281   if (element_name == 0)
01282     {
01283       this->report_fatal_error(ACE_TEXT("Error reading element name while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01284       return -1;
01285     }
01286 
01287   ACEXML_Char nextch = this->skip_whitespace (0);
01288 
01289   // Parse AttDef*
01290   while (nextch != '>')
01291     {
01292       // Parse attribute name
01293       ACEXML_Char *att_name = this->read_name (nextch);
01294       if (att_name == 0)
01295         {
01296           this->report_fatal_error(ACE_TEXT("Error reading attribute name while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01297           return -1;
01298         }
01299 
01300       /*
01301         Parse AttType:
01302         Possible keywords:
01303         CDATA                   // StringType
01304         ID                      // TokenizedType
01305         IDREF
01306         IDREFS
01307         ENTITY
01308         ENTITIES
01309         NMTOKEN
01310         NMTOKENS
01311         NOTATION                // EnumeratedType - NotationTYpe
01312         (                       // EnumeratedType - Enumeration
01313       */
01314       nextch = this->skip_whitespace (0);
01315       switch (nextch)
01316         {
01317         case 'C':               // CDATA
01318           if ((this->parse_token (ACE_TEXT ("DATA")) < 0) ||
01319               this->skip_whitespace_count () == 0)
01320             {
01321               this->report_fatal_error(ACE_TEXT("Expecting keyword `CDATA' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01322               return -1;
01323             }
01324           // Else, we have successfully identified the type of the
01325           // attribute as CDATA
01326           // @@ Set up validator appropriately here.
01327           break;
01328         case 'I':               // ID, IDREF, or, IDREFS
01329           if (this->get () == 'D')
01330             {
01331               if (this->skip_whitespace_count (&nextch) > 0)
01332                 {
01333                   // We have successfully identified the type of the
01334                   // attribute as ID @@ Set up validator as such.
01335                   break;
01336                 }
01337               if (this->parse_token (ACE_TEXT ("REF")) == 0)
01338                 {
01339                   if (this->skip_whitespace_count (&nextch) > 0)
01340                     {
01341                       // We have successfully identified the type of
01342                       // the attribute as IDREF
01343                       // @@ Set up validator as such.
01344                       break;
01345                     }
01346                   else if (nextch == 'S' &&
01347                            this->get () && // consume the 'S'
01348                            this->skip_whitespace_count () != 0)
01349                     {
01350                       // We have successfully identified the type of
01351                       // the attribute as IDREFS
01352                       // @@ Set up validator as such.
01353                       break;
01354                     }
01355                 }
01356             }
01357           // Admittedly, this error message is not precise enough
01358           this->report_fatal_error(ACE_TEXT("Expecting keyword `ID', `IDREF', or `IDREFS' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01359           return -1;
01360         case 'E':               // ENTITY or ENTITIES
01361           if (this->parse_token (ACE_TEXT ("NTIT")) == 0)
01362             {
01363               nextch = this->get ();
01364               if (nextch == 'Y')
01365                 {
01366                   // We have successfully identified the type of
01367                   // the attribute as ENTITY
01368                   // @@ Set up validator as such.
01369                 }
01370               else if (nextch == 'I'&& this->get () == 'E' &&
01371                        this->get () == 'S')
01372                 {
01373                   // We have successfully identified the type of
01374                   // the attribute as ENTITIES
01375                   // @@ Set up validator as such.
01376                 }
01377               if (this->skip_whitespace_count () > 0)
01378                 {
01379                   // success
01380                   break;
01381                 }
01382             }
01383           // Admittedly, this error message is not precise enough
01384           this->report_fatal_error(ACE_TEXT("Expecting keyword `ENTITY', or `ENTITIES' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01385           return -1;
01386         case 'N':               // NMTOKEN, NMTOKENS, or, NOTATION
01387           nextch = this->get ();
01388           if (nextch != 'M' || nextch != 'O')
01389             {
01390               this->report_fatal_error(ACE_TEXT("Expecting keyword `NMTOKEN', `NMTOKENS', or `NOTATION' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01391               return -1;
01392             }
01393           if (nextch == 'M')
01394             {
01395               if (this->parse_token (ACE_TEXT ("TOKEN")) == 0)
01396                 {
01397                   if (this->skip_whitespace_count (&nextch) > 0)
01398                     {
01399                       // We have successfully identified the type of
01400                       // the attribute as NMTOKEN
01401                       // @@ Set up validator as such.
01402                       break;
01403                     }
01404                   else if (nextch == 'S' && this->skip_whitespace_count () > 0)
01405                     {
01406                       // We have successfully identified the type of
01407                       // the attribute as NMTOKENS
01408                       // @@ Set up validator as such.
01409                       break;
01410                     }
01411                 }
01412               this->report_fatal_error(ACE_TEXT("Expecting keyword `NMTOKEN' or `NMTOKENS' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01413               return -1;
01414             }
01415           else                  // NOTATION
01416             {
01417               if ((this->parse_token (ACE_TEXT ("TATION")) < 0) ||
01418                   this->skip_whitespace_count () == 0)
01419                 {
01420                   this->report_fatal_error(ACE_TEXT("Expecting keyword `NOTATION' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01421                   return -1;
01422                 }
01423 
01424               if (this->get () != '(')
01425                 {
01426                   this->report_fatal_error(ACE_TEXT("Expecting `(' following NOTATION while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01427                   return -1;
01428                 }
01429 
01430               this->skip_whitespace_count ();
01431 
01432               do {
01433                 ACEXML_Char *notation_name = this->read_name ();
01434                 if (notation_name == 0)
01435                   {
01436                     this->report_fatal_error(ACE_TEXT("Error reading NOTATION name while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01437                     return -1;
01438                   }
01439                 // @@ get another notation name, set up validator as such
01440                 this->skip_whitespace_count (&nextch);
01441               } while (nextch != ')');
01442 
01443               this->get ();     // consume the closing paren.
01444               this->skip_whitespace_count ();
01445             }
01446           break;
01447         case '(':               // EnumeratedType - Enumeration
01448           this->skip_whitespace_count ();
01449 
01450           do {
01451             ACEXML_Char *token_name = this->read_name (); // @@ need a special read_nmtoken?
01452             if (token_name == 0)
01453               {
01454                 this->report_fatal_error(ACE_TEXT("Error reading enumerated nmtoken name while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01455                 return -1;
01456               }
01457             // @@ get another nmtoken, set up validator as such
01458             this->skip_whitespace_count (&nextch);
01459           } while (nextch != ')');
01460 
01461           this->get ();     // consume the closing paren.
01462           this->skip_whitespace_count ();
01463           break;
01464         default:
01465           {
01466             this->report_fatal_error(ACE_TEXT("Invalid Attribute Type while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01467             return -1;
01468           }
01469           ACE_NOTREACHED (break);
01470         }
01471 
01472       /*
01473         Parse DefaultDecl:
01474         #REQUIRED
01475         #IMPLIED
01476         #FIXED
01477         quoted string           // #FIXED
01478       */
01479       nextch = this->peek ();
01480       switch (nextch)
01481         {
01482         case '#':
01483           this->get ();         // consume the '#'
01484           switch (this->get ())
01485             {
01486             case 'R':
01487               if (this->parse_token (ACE_TEXT ("EQUIRED")) < 0)
01488                 {
01489                   this->report_fatal_error(ACE_TEXT("Expecting keyword `#REQUIRED' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01490                   return -1;
01491                 }
01492               // We now know this attribute is required
01493               // @@ Set up the validator as such.
01494               break;
01495             case 'I':
01496               if (this->parse_token (ACE_TEXT ("MPLIED")) < 0)
01497                 {
01498                   this->report_fatal_error(ACE_TEXT("Expecting keyword `#IMPLIED' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01499                   return -1;
01500                 }
01501               // We now know this attribute is impleid.
01502               // @@ Set up the validator as such.
01503               break;
01504             case 'F':
01505               if (this->parse_token (ACE_TEXT ("IXED")) < 0 ||
01506                   this->skip_whitespace_count () == 0)
01507                 {
01508                   this->report_fatal_error(ACE_TEXT("Expecting keyword `#FIXED' while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01509                   return -1;
01510                 }
01511               // We now know this attribute is fixed.
01512 
01513               ACEXML_Char *fixed_attr;
01514               if (this->get_quoted_string (fixed_attr) != 0)
01515                 {
01516                   this->report_fatal_error(ACE_TEXT("Error parsing `#FIXED' attribute value while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01517                   return -1;
01518                 }
01519               // @@ set up validator
01520               break;
01521             default:
01522               break;
01523             }
01524           break;
01525         case '\'':
01526         case '"':
01527           ACEXML_Char *fixed_attr;
01528           if (this->get_quoted_string (fixed_attr) != 0)
01529             {
01530               this->report_fatal_error(ACE_TEXT("Error parsing `#FIXED' attribute value while defining ATTLIST.") ACEXML_ENV_ARG_PARAMETER);
01531               return -1;
01532             }
01533           // @@ set up validator
01534           break;
01535         default:
01536           break;
01537         }
01538       this->skip_whitespace_count (&nextch);
01539     }
01540 
01541   this->get ();                 // consume closing '>'
01542 
01543   return 0;
01544 }
01545 
01546 int
01547 ACEXML_Parser::parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL)
01548 {
01549   if (this->parse_token (ACE_TEXT ("NOTATION")) < 0 ||
01550       this->skip_whitespace_count () == 0)
01551     {
01552       this->report_fatal_error(ACE_TEXT("Expecting keyword `NOTATION'") ACEXML_ENV_ARG_PARAMETER);
01553       return -1;
01554     }
01555 
01556   ACEXML_Char *notation = this->read_name ();
01557   if (notation == 0)
01558     {
01559       this->report_fatal_error(ACE_TEXT("Invalid notation name.") ACEXML_ENV_ARG_PARAMETER);
01560       return -1;
01561     }
01562 
01563   this->skip_whitespace_count ();
01564   ACEXML_Char *systemid, *publicid;
01565 
01566   this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER);
01567   ACEXML_CHECK_RETURN (-1);
01568 
01569   if (this->get () != '>')
01570     {
01571       this->report_fatal_error(ACE_TEXT("Expecting NOTATION closing '>'.") ACEXML_ENV_ARG_PARAMETER);
01572       return -1;
01573     }
01574 
01575   this->dtd_handler_->notationDecl (notation,
01576                                     publicid,
01577                                     systemid ACEXML_ENV_ARG_PARAMETER);
01578   ACEXML_CHECK_RETURN (-1);
01579 
01580   return 0;
01581 }
01582 
01583 int
01584 ACEXML_Parser::parse_external_id_and_ref (ACEXML_Char *&publicId,
01585                                           ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL)
01586 {
01587   publicId = systemId = 0;
01588   ACEXML_Char nextch = this->get ();
01589 
01590   switch (nextch)
01591     {
01592     case 'S':                   // External SYSTEM id.
01593       if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 ||
01594           this->skip_whitespace_count () == 0)
01595         {
01596           this->report_fatal_error(ACE_TEXT("Expecting keyword 'SYSTEM'") ACEXML_ENV_ARG_PARAMETER);
01597           return -1;
01598         }
01599       if (this->get_quoted_string (systemId) != 0)
01600         {
01601           this->report_fatal_error(ACE_TEXT("Error while parsing SYSTEM literal for SYSTEM id.") ACEXML_ENV_ARG_PARAMETER);
01602           return -1;
01603         }
01604       this->locator_.setSystemId (systemId);
01605       break;
01606     case 'P':                   // External PUBLIC id or previously defined PUBLIC id.
01607       if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 ||
01608           this->skip_whitespace_count () == 0)
01609         {
01610           this->report_fatal_error(ACE_TEXT("Expecting keyword 'PUBLIC'") ACEXML_ENV_ARG_PARAMETER);
01611           return -1;
01612         }
01613       if (this->get_quoted_string (publicId) != 0)
01614         {
01615           this->report_fatal_error(ACE_TEXT("Error while parsing public literal for PUBLIC id.") ACEXML_ENV_ARG_PARAMETER);
01616           return -1;
01617         }
01618       this->locator_.setPublicId (publicId);
01619 
01620       this->skip_whitespace_count (&nextch);
01621       if (nextch == '\'' || nextch == '"') // not end of NOTATION yet.
01622         {
01623           if (this->get_quoted_string (systemId) != 0)
01624             {
01625               this->report_fatal_error(ACE_TEXT("Error while parsing system literal for PUBLIC id.") ACEXML_ENV_ARG_PARAMETER);
01626               return -1;
01627             }
01628           this->locator_.setSystemId (systemId);
01629         }
01630       break;
01631     default:
01632       this->report_fatal_error(ACE_TEXT("Expecting either keyword `SYSTEM' or `PUBLIC'.") ACEXML_ENV_ARG_PARAMETER);
01633       return -1;
01634     }
01635   return 0;
01636 }
01637 
01638 int
01639 ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
01640 {
01641   this->get ();                 // consume the '('
01642 
01643   ACEXML_Char nextch;
01644   int subelement_number = 0;
01645   this->skip_whitespace_count (&nextch);
01646 
01647   switch (nextch)
01648     {
01649     case '#':                   // Mixed element,
01650       if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0)
01651         {
01652           this->report_fatal_error(ACE_TEXT("Expecting keyword `#PCDATA' while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01653           return -1;
01654         }
01655 
01656       this->skip_whitespace_count (&nextch);
01657 
01658       while (nextch != ')')
01659         {
01660           if (this->get () != '|')
01661             {
01662               this->report_fatal_error(ACE_TEXT("Expecting end of Mixed section while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01663               return -1;
01664             }
01665           this->skip_whitespace_count ();
01666 
01667           ACEXML_Char *name = this->read_name ();
01668           // @@ name will be used in the Validator later.
01669           ACE_UNUSED_ARG (name);
01670           ++subelement_number;
01671           // @@ Install Mixed element name into the validator.
01672           this->skip_whitespace_count (&nextch);
01673         }
01674 
01675       if (this->get () != ')' ||
01676           (subelement_number && this->get () != '*'))
01677         {
01678           this->report_fatal_error(ACE_TEXT("Expecting closing `)*' or ')' while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01679           return -1;
01680         }
01681       // @@ close the element definition in the validator.
01682       break;
01683     default:
01684       int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER);
01685       ACEXML_CHECK_RETURN (-1);
01686       if (status != 0)
01687         return -1;
01688     }
01689 
01690   return 0;
01691 }
01692 
01693 int
01694 ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
01695 {
01696   // Conditionally consume the open paren.
01697   if (skip_open_paren == 0 &&
01698       this->get () != '(')
01699     {
01700       this->report_fatal_error(ACE_TEXT("Expecting opening `(' while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01701       return -1;
01702     }
01703 
01704   ACEXML_Char node_type = 0;
01705   ACEXML_Char nextch;
01706 
01707   do {
01708     this->skip_whitespace_count (&nextch);
01709     switch (nextch)
01710       {
01711       case '(':
01712         this->parse_child (0 ACEXML_ENV_ARG_PARAMETER);
01713         ACEXML_CHECK_RETURN (-1);
01714         break;
01715       default:
01716         // must be an element name here.
01717         ACEXML_Char *subelement = this->read_name ();
01718         if (subelement == 0)
01719           {
01720             this->report_fatal_error(ACE_TEXT("Error reading sub-element name while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01721             return -1;
01722           }
01723         // @@ Inform validator of the new element here.
01724         break;
01725       }
01726 
01727     this->skip_whitespace_count (&nextch);
01728     switch (nextch)
01729       {
01730       case '|':
01731         switch (node_type)
01732           {
01733           case 0:
01734             node_type = '|';
01735             // @@ inform validator of this new type??
01736             break;
01737           case '|':
01738             break;
01739           default:
01740             this->report_fatal_error(ACE_TEXT("Expecting `,', `|', or `)' while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01741             return -1;
01742           }
01743         break;
01744       case ',':
01745         switch (node_type)
01746           {
01747           case 0:
01748             node_type = ',';
01749             // @@ inform validator of this new type??
01750             break;
01751           case ',':
01752             break;
01753           default:
01754             this->report_fatal_error(ACE_TEXT("Expecting `,', `|', or `)'while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01755             return -1;
01756           }
01757       case ')':
01758         break;
01759       default:
01760         this->report_fatal_error(ACE_TEXT("Expecting `,', `|', or `)' while defining an element.") ACEXML_ENV_ARG_PARAMETER);
01761         return -1;
01762       }
01763     this->get ();               // consume , | or )
01764   } while (nextch != ')');
01765 
01766   // Check for trailing '?', '*', '+'
01767   nextch = this->peek ();
01768   switch (nextch)
01769     {
01770     case '?':
01771       // @@ Consume the character and inform validator as such,
01772       this->get ();
01773       break;
01774     case '*':
01775       // @@ Consume the character and inform validator as such,
01776       this->get ();
01777       break;
01778     case '+':
01779       // @@ Consume the character and inform validator as such,
01780       this->get ();
01781       break;
01782     default:
01783       break;                    // not much to do.
01784     }
01785 
01786   return 0;
01787 }
01788 
01789 ACEXML_Char
01790 ACEXML_Parser::skip_whitespace (ACEXML_Char **whitespace)
01791 {
01792   ACEXML_Char ch = this->get ();
01793 
01794   if (this->is_whitespace (ch) == 0)
01795     {
01796       if (whitespace != 0)
01797         *whitespace = 0;
01798       return ch;
01799     }
01800 
01801   do
01802     {
01803       if (whitespace != 0)
01804         this->obstack_.grow (ch);
01805       ch = this->get ();
01806     }
01807   while (this->is_whitespace (ch));
01808 
01809   if (whitespace != 0)
01810     *whitespace = this->obstack_.freeze ();
01811 
01812   return ch;
01813 }
01814 
01815 int
01816 ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky)
01817 {
01818   int wscount = 0;
01819   ACEXML_Char dummy;
01820   ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky);
01821 
01822   for (;this->is_whitespace ((forward = this->peek ())); ++wscount)
01823     this->get ();
01824 
01825   return wscount;
01826 }
01827 
01828 int
01829 ACEXML_Parser::parse_token (const ACEXML_Char* keyword)
01830 {
01831   if (keyword == 0)
01832     return -1;
01833   const ACEXML_Char* ptr = keyword;
01834   for (; *ptr != 0 && (this->get() == *ptr); ++ptr)
01835     {
01836       // ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("ch = %c : ptr = %c"), ch, *ptr));
01837     }
01838   if (*ptr == 0)
01839     return 0;
01840   else
01841     return -1;
01842 }
01843 
01844 int
01845 ACEXML_Parser::skip_equal (void)
01846 {
01847   if (this->skip_whitespace (0) != '=')
01848     return -1;
01849 
01850   while (this->is_whitespace (this->peek ()))
01851     this->get ();
01852   return 0;
01853 }
01854 
01855 int
01856 ACEXML_Parser::get_quoted_string (ACEXML_Char *&str)
01857 {
01858   ACEXML_Char quote = this->get ();
01859   if (quote != '\'' && quote != '"')  // Not a quoted string.
01860     return -1;
01861 
01862   while (1)
01863     {
01864       ACEXML_Char ch = this->get ();
01865 
01866       // @@ Deoes not handle buffer overflow yet.
01867       if (ch == quote)
01868         {
01869           str = this->obstack_.freeze ();
01870           return 0;
01871         }
01872 
01873       const ACEXML_String *replace = 0;
01874       ACEXML_String charval;
01875       ACEXML_Char buffer[6];
01876       size_t i = 0;
01877 
01878       switch (ch)
01879         {
01880         case '&':
01881           if (this->peek () == '#')
01882             {
01883               if (this->parse_char_reference (buffer, 6) != 0)
01884                 {
01885 // xmlenv.exception (new ACEXML_SAXParseException
01886 // (ACE_TEXT ("CharRef does not resolves to a valid character")));
01887                   return -1;
01888                 }
01889               charval.set (buffer, 0);
01890               replace = &charval;
01891             }
01892           else
01893             replace = this->parse_reference ();
01894 
01895           if (replace == 0)
01896             {
01897               //              xmlenv.exception (new ACEXML_SAXParseException
01898               // (ACE_TEXT ("Undefined reference")));
01899               return -1;
01900             }
01901           for (i = 0; i < replace->length (); ++i)
01902             this->obstack_.grow ((*replace)[i]);
01903           // handle reference here.
01904           break;
01905         case 0x0D:                // End-of-Line handling
01906           ch = (this->peek () == 0x0A ? this->get () : 0x0A);
01907           // Fall thru...
01908         case 0x0A:
01909           // Fall thru...
01910         default:
01911           this->obstack_.grow (ch);
01912           break;
01913         }
01914     }
01915 }
01916 
01917 ACEXML_Char *
01918 ACEXML_Parser::read_name (ACEXML_Char ch)
01919 {
01920   if (ch == 0)
01921     {
01922       ch = this->get ();
01923 
01924       if (this->is_whitespace (ch))
01925         // No white space is allowed here.
01926         return 0;
01927     }
01928   else if (this->is_nonname (ch))
01929     return 0;
01930 
01931   while (1)
01932     {
01933       this->obstack_.grow (ch);
01934       ch = this->peek ();
01935       if (this->is_nonname (ch))
01936         break;
01937       ch = this->get ();
01938     };
01939 
01940   return this->obstack_.freeze ();
01941 }
01942 
01943 void
01944 ACEXML_Parser::report_prefix_mapping (const ACEXML_Char* prefix,
01945                                       const ACEXML_Char* uri,
01946                                       const ACEXML_Char* name,
01947                                       int start ACEXML_ENV_ARG_DECL)
01948 {
01949   if (this->namespaces_)
01950     {
01951       const ACEXML_Char* temp = (name == 0) ? empty_string : prefix;
01952       if (start) {
01953         this->content_handler_->startPrefixMapping (temp, uri ACEXML_ENV_ARG_PARAMETER);
01954         ACEXML_CHECK;
01955       }
01956       else
01957         {
01958           this->content_handler_->endPrefixMapping(temp ACEXML_ENV_ARG_PARAMETER);
01959           ACEXML_CHECK;
01960         }
01961     }
01962 }

Generated on Mon Jun 16 13:23:24 2003 for ACEXML by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002