/home/rixxx/src/xmlminus/src/xmlminus.h

00001 #ifndef _XMLMINUS_H_
00002 #define _XMLMINUS_H_
00003 
00004 #include <iostream>
00005 #include <string>
00006 #include <locale>
00007 #include <stdexcept>
00008 #include <sstream>
00009 #include <vector>
00010 #include <map>
00011 #include <boost/lexical_cast.hpp>
00012 
00013 namespace
00014 {
00015 const char *ENTITIES[][2] = {{"&amp;", "&"}, {"&lt;", "<"}, {"&gt;", ">"}, {"&quot;", "\""}, {"&apos;", "\'"}, {0, 0}};
00016 }
00017 
00019 namespace xmlm
00020 {
00021 
00023 class ParseError : public std::runtime_error
00024 {
00025     public: ParseError(const std::string &what) : std::runtime_error(what) {}
00026 };
00027 
00029 
00032 template <class T>
00033 class BasicElement
00034 {
00035 public:
00037     typedef T string_type;
00039     typedef typename string_type::value_type char_type;
00041     typedef typename string_type::traits_type traits_type;
00043     typedef typename string_type::allocator_type allocator_type;
00045     typedef std::basic_istream<char_type, traits_type> istream_type;
00047     typedef std::basic_ostream<char_type, traits_type> ostream_type;
00049     typedef std::vector<BasicElement<T> > ChildVector;
00051     typedef std::map<string_type, string_type> AttributeMap;
00053     typedef enum { NORMAL, COMMENT, DECLARATION, TERMINATOR } ElementType;
00054     
00056 
00063     BasicElement(istream_type &in) : locale(in.getloc()) { parse(in); }
00064     
00066 
00075     BasicElement(istream_type &in, const std::locale &loc) : locale(loc) { std::locale old = in.imbue(locale); parse(in); in.imbue(old); }
00076     
00078 
00082     BasicElement(const string_type &value = "", ElementType type = NORMAL) : value(value), type(type) {}
00083     
00085 
00090     BasicElement(const string_type &value, const std::locale &loc, ElementType type = NORMAL) : locale(loc), value(value), type(type) {}
00091     
00093 
00097     BasicElement(const std::locale &loc, ElementType type = NORMAL) : locale(loc), type(type) {}
00098     
00100 
00103     std::locale &getLocale() { return locale; }
00104     
00106 
00109     const std::locale &getLocale() const { return locale; }
00110     
00112 
00115     void setLocale(const std::locale &loc) { locale = loc; }
00116     
00118 
00121     const string_type &getValue() const { return value; }
00122     
00124 
00127     string_type &getValue() { return value; }
00128     
00130 
00133     void setValue(const string_type &val) { value = val; }
00134     
00136 
00139     const ChildVector &getChildren() const { return children; }
00140     
00142 
00145     ChildVector &getChildren() { return children; }
00146     
00148 
00151     const AttributeMap &getAttributes() const { return attributes; }
00152     
00154 
00157     AttributeMap &getAttributes() { return attributes; }
00158     
00160 
00163     const string_type &getText() const { return text; }
00165 
00168     string_type &getText() { return text; }
00170 
00173     void setText(const string_type &str) { text = str; }
00174     
00176 
00180     ElementType getType() const { return type; }
00181     
00183 
00187     void setType(ElementType t) { type = t; }
00188     
00190 
00199     template<typename valueT>
00200     void setAttribute(const string_type &key, const valueT &val);
00201     
00203 
00213     template<typename valueT>
00214     valueT getAttribute(const string_type &key) const;
00215     
00217 
00225     const string_type &getAttributeRaw(const string_type &key) const
00226     {
00227         typename AttributeMap::const_iterator i(attributes.find(key));
00228         if(i == attributes.end()) throw ParseError("attribute not found");
00229         return i->second;
00230     }
00231     
00233 
00241     string_type &getAttributeRaw(const string_type &key)
00242     {
00243         typename AttributeMap::iterator i(attributes.find(key));
00244         if(i == attributes.end()) throw ParseError("attribute not found");
00245         return i->second;
00246     }
00247     
00249 
00256     void setAttributeRaw(const string_type &key, const string_type &val) { attributes[key] = val; }
00257     
00259 
00263     bool hasAttribute(const string_type &key) const { return (attributes.find(key) != attributes.end()); }
00264     
00266 
00271     void print(ostream_type &out, int indent = 0) const;
00272     
00274 
00279     void parse(istream_type &in);
00280     
00282 
00290     const BasicElement<T> &getChild(const string_type &val) const
00291     {
00292         typename ChildVector::const_iterator i = std::find_if(children.begin(), children.end(), valuecmp(val));
00293         if(i == children.end()) throw ParseError("can't find child");
00294         return *i;
00295     }
00296 
00298 
00306     BasicElement<T> &getChild(const string_type &val)
00307     {
00308         typename ChildVector::iterator i = std::find_if(children.begin(), children.end(), valuecmp(val));
00309         if(i == children.end()) throw ParseError("can't find child");
00310         return *i;
00311     }
00312 
00314 
00317     static void entityEncode(string_type &str);
00318     
00320 
00323     static void entityDecode(string_type &str);
00324     
00325 private:
00326     std::locale locale;
00327     string_type value;
00328     ElementType type;
00329     
00330     ChildVector children;
00331     AttributeMap attributes;
00332     
00333     string_type text;
00334     
00335     void readString(istream_type &in, string_type &str);
00336     void readAttribute(istream_type &in);
00337     void readQuotedString(istream_type &in, string_type &str);
00338     void readText(istream_type &in);
00339     void readComment(istream_type &in);
00340     
00341     typedef std::map<string_type, string_type> EntityMap;
00342     static EntityMap entities;
00343     
00344     static void buildEntities();
00345     static void replaceAll(string_type &str, const string_type &from, const string_type& to);
00346     
00347     struct valuecmp
00348     {
00349         string_type value;
00350         valuecmp(const string_type &value) : value(value) {}
00351         bool operator()(const BasicElement<T>& element) { return (element.getValue() == value); }
00352     };
00353 };
00354 
00355 template<class T>
00356 typename BasicElement<T>::EntityMap BasicElement<T>::entities;
00357 
00358 template <class T>
00359 template<typename valueT>
00360 void BasicElement<T>::setAttribute(const string_type &key, const valueT &val)
00361 {
00362     attributes[key] = boost::lexical_cast<string_type>(val);
00363 }
00364 
00365 template <class T>
00366 template<typename valueT>
00367 valueT BasicElement<T>::getAttribute(const string_type &key) const
00368 {
00369     typename AttributeMap::const_iterator i = attributes.find(key);
00370     if(i == attributes.end()) throw ParseError("attribute not found");
00371     return boost::lexical_cast<valueT>(i->second);
00372 }
00373 
00374 template <class T>
00375 void BasicElement<T>::parse(istream_type &in)
00376 {
00377     std::ws(in);
00378     
00379     if(in.eof()) throw ParseError("unexpected end of file");
00380     else if(!in.good()) throw std::runtime_error("i/o error");
00381 
00382     if(in.get() != '<') throw ParseError("expected: '<'");
00383 
00384     switch(in.peek())
00385     {
00386     case '/':
00387         type = TERMINATOR;
00388         in.get();
00389         break;
00390     case '!':
00391         type = COMMENT;
00392         in.get();
00393         break;
00394     case '?':
00395         type = DECLARATION;
00396         in.get();
00397         break;
00398     default:
00399         type = NORMAL;
00400         break;
00401     };
00402     
00403     value.clear();
00404     
00405     if(type == COMMENT)
00406     {
00407         readComment(in);
00408         return;
00409     }
00410     
00411     readString(in, value);
00412     
00413     while(std::isspace(static_cast<char_type>(in.peek()), locale))
00414     {
00415         std::ws(in);
00416         if(in.eof()) throw ParseError("unexpected end of file");
00417         else if(!in.good()) throw std::runtime_error("i/o error");
00418         
00419         if(std::isalnum(static_cast<char_type>(in.peek()), locale))
00420         {
00421             readAttribute(in);
00422         } else break;
00423     }
00424     
00425     bool single = false;
00426     if(in.peek() == '/')
00427     {
00428         if(type != NORMAL) throw ParseError("unexpected character: '/'");
00429 
00430         in.get();
00431         std::ws(in);
00432         if(in.eof()) throw ParseError("unexpected end of file");
00433         else if(!in.good()) throw std::runtime_error("i/o error");
00434 
00435         single = true;  
00436     }
00437     
00438     if(type == DECLARATION)
00439     {
00440         if(in.get() != '?')  throw ParseError("expected: '?'");
00441         std::ws(in);
00442     }
00443     
00444     if(in.get() != '>') throw ParseError("expected: '>'");
00445     
00446     while(!single && type == NORMAL)
00447     {
00448         std::ws(in);
00449         if(in.eof()) throw ParseError("unexpected end of file");
00450         else if(!in.good()) throw std::runtime_error("i/o error");
00451 
00452         if(in.peek() == '<')
00453         {
00454             children.push_back(BasicElement<T>(locale));
00455             
00456             BasicElement<T> &child = children.back();
00457             child.parse(in);
00458             
00459             if(child.type == TERMINATOR)
00460             {
00461                 if(child.value != value) throw ParseError("incorrect terminator");
00462                 children.pop_back();
00463                 break;
00464             }
00465         } else
00466         {
00467             readText(in);
00468         }
00469     }
00470     
00471     entityDecode(text);
00472 }
00473 
00474 // Read a string of alphanumeric characters or '_', ':', '.' or '-'
00475 template<class T>
00476 void BasicElement<T>::readString(istream_type &in, string_type &str)
00477 {
00478     while(in.good())
00479     {
00480         char_type ch = in.peek();
00481         
00482         if(!std::isalnum(ch, locale) && ch != '_' && ch != ':' && ch != '.' && ch != '-') break;
00483         str.push_back(ch);
00484         
00485         in.get();
00486     }
00487 }
00488 
00489 template<class T>
00490 void BasicElement<T>::readAttribute(istream_type &in)
00491 {
00492     string_type key;    
00493     readString(in, key);
00494         
00495     std::ws(in);
00496     if(in.eof()) throw ParseError("unexpected end of file");
00497     else if(!in.good()) throw std::runtime_error("i/o error");
00498 
00499     if(in.get() != '=') throw ParseError("expected: '='");
00500     std::ws(in);
00501     if(in.eof()) throw ParseError("unexpected end of file");
00502     else if(!in.good()) throw std::runtime_error("i/o error");
00503     
00504     string_type val;
00505     readQuotedString(in, val);
00506     entityDecode(val);
00507     
00508     attributes.insert(typename AttributeMap::value_type(key, val));
00509 }
00510 
00511 template<class T>
00512 void BasicElement<T>::readQuotedString(istream_type &in, string_type &str)
00513 {
00514     if(in.get() != '"') throw ParseError("expected: quoted string literal");
00515     
00516     char_type ch;
00517     while((ch = in.get()) != '"' && in.good())
00518     {
00519         if(ch == '\n' || ch == '\r' || ch == '\t' || ch == '<' || ch == '>') break;
00520         
00521         str.push_back(ch);
00522     }
00523     
00524     if(ch != '"') throw ParseError("unterminated string literal");
00525 }
00526 
00527 template<class T>
00528 void BasicElement<T>::readText(istream_type &in)
00529 {
00530     char_type ch;
00531 
00532     while((ch = in.peek()) != '<' && in.good())
00533     {
00534         if(std::isspace(ch, locale))
00535         {
00536             text.push_back(' ');
00537             std::ws(in);
00538         } else
00539         {
00540             text.push_back(ch);
00541             in.get();
00542         }
00543     }
00544     
00545     if(in.eof()) throw ParseError("unexpected end of file");
00546     else if(!in.good()) throw std::runtime_error("i/o error");
00547 
00548     if(ch != '<') throw std::logic_error("expected: '<'");
00549 }
00550 
00551 template <class T>
00552 void BasicElement<T>::readComment(istream_type &in)
00553 {
00554     int depth = 1;
00555     char_type ch = 0;
00556     while(depth > 0 && in.good())
00557     {
00558         ch = in.get();
00559         if(!(depth == 1 && ch == '>')) value.push_back(ch);
00560         if(ch == '<') ++depth;
00561         else if(ch == '>') --depth;
00562     }
00563     
00564     if(in.eof()) throw ParseError("unexpected end of file");
00565     else if(!in.good()) throw std::runtime_error("i/o error");
00566     
00567     if(ch != '>') throw std::logic_error("expected: '>'");
00568 }
00569 
00570 template <class T>
00571 void BasicElement<T>::print(ostream_type &out, int indent) const
00572 {
00573     for(int i = 0; i < indent; ++i) out << '\t';
00574     
00575     out << '<';
00576     
00577     if(type == COMMENT) out << '!';
00578     else if(type == DECLARATION) out << '?';
00579     
00580     out << value;
00581     
00582     for(typename AttributeMap::const_iterator i(attributes.begin()); i != attributes.end(); ++i)
00583     {
00584         out << ' ' << (i->first) << "=\"";
00585         string_type value(i->second);
00586         entityEncode(value);
00587         out << value << '"';
00588     }
00589     
00590     if(text.size() == 0 && children.size() == 0 && type == NORMAL)
00591     {
00592         out << '/' << '>' << '\n';
00593         return;
00594     }
00595     
00596     if(type == DECLARATION) out << '?';
00597     out << '>' << '\n';
00598     if(type != NORMAL) return;
00599     
00600     for(typename ChildVector::const_iterator i(children.begin()); i != children.end(); ++i)
00601     {
00602         i->print(out, indent+1);
00603     }
00604     
00605     if(text.size() > 0)
00606     {
00607         for(int i = 0; i < (indent+1); ++i) out << '\t';
00608         string_type temp(text);
00609         entityEncode(temp);
00610         out << temp;
00611         out << '\n';
00612     }
00613     
00614     for(int i = 0; i < indent; ++i) out << '\t';
00615     out << '<' << '/' << value << '>' << '\n';
00616 }
00617 
00619 template<class T>
00620 std::basic_ostream<typename T::value_type, typename T::traits_type> &operator<<(std::basic_ostream<typename T::value_type, typename T::traits_type> &out, const BasicElement<T> &element)
00621 {
00622     std::locale old = out.imbue(element.getLocale());
00623     element.print(out);
00624     out.imbue(old);
00625     return out;
00626 }
00627 
00629 template<class T>
00630 std::basic_istream<typename T::value_type, typename T::traits_type> &operator>>(std::basic_istream<typename T::value_type, typename T::traits_type> &in, BasicElement<T> &element)
00631 {
00632     std::locale old = in.imbue(element.getLocale());
00633     element.parse(in);
00634     in.imbue(old);
00635     return in;
00636 }
00637 
00638 template<class T>
00639         void BasicElement<T>::buildEntities()
00640 {
00641     entities.clear();
00642     
00643     int n = 0;
00644     while(ENTITIES[n][0])
00645     {
00646         string_type key;
00647         string_type val;
00648         for(const char *c = ENTITIES[n][0]; *c; ++c) key.push_back(*c);
00649         for(const char *c = ENTITIES[n][1]; *c; ++c) val.push_back(*c);
00650         
00651         entities[key] = val;
00652         ++n;
00653     }
00654 }
00655 
00656 template<class T>
00657         void BasicElement<T>::replaceAll(string_type &str, const string_type &from, const string_type& to)
00658 {
00659     typename string_type::size_type pos = 0;
00660     while((pos = str.find(from, pos)) != string_type::npos)
00661     {
00662         str.replace(pos, from.size(), to);
00663         ++pos;
00664     }
00665 }
00666 
00667 template<class T>
00668 void BasicElement<T>::entityEncode(string_type &str)
00669 {
00670     if(entities.empty()) buildEntities();
00671     
00672     for(typename EntityMap::iterator i(entities.begin()); i != entities.end(); ++i)
00673     {
00674         replaceAll(str, i->second, i->first);
00675     }
00676 }
00677 
00678 template<class T>
00679 void BasicElement<T>::entityDecode(string_type &str)
00680 {
00681     if(entities.empty()) buildEntities();
00682     
00683     for(typename EntityMap::iterator i(entities.begin()); i != entities.end(); ++i)
00684     {
00685         replaceAll(str, i->first, i->second);
00686     }
00687 }
00688 
00690 
00693 template <class T>
00694 class BasicDocument
00695 {
00696 public:
00698     typedef T string_type;
00700     typedef typename T::value_type char_type;
00702     typedef typename T::traits_type traits_type;
00704     typedef std::basic_istream<char_type, traits_type> istream_type;
00706     typedef std::basic_ostream<char_type, traits_type> ostream_type;
00708     typedef std::vector<BasicElement<T> > ChildVector;
00709     
00711 
00718     BasicDocument(istream_type &in) : locale(in.getloc()) { parse(in); }
00719     
00721 
00730     BasicDocument(istream_type &in, const std::locale &loc) : locale(loc) { std::locale old = in.imbue(locale); parse(in); in.imbue(old); }
00731     
00733 
00740     BasicDocument(const std::string &filename)
00741     {
00742         std::basic_ifstream<char_type, traits_type> in(filename);
00743         locale = in.getloc();
00744         parse(in);
00745     }
00746     
00748 
00757     BasicDocument(const std::string &filename, const std::locale &loc)
00758         : locale(loc)
00759     {
00760         std::basic_ifstream<char_type, traits_type> in(filename);
00761         in.imbue(locale);
00762         parse(in);
00763     }
00764 
00766 
00769     BasicDocument(const std::locale &locale)
00770         : locale(locale)
00771     {
00772         children.push_back(BasicElement<T>(locale));
00773         check();
00774     }
00775     
00777     BasicDocument()
00778     {
00779         children.push_back(BasicElement<T>(locale));
00780         check();
00781     }
00782     
00784 
00787     std::locale &getLocale() { return locale; }
00788     
00790 
00793     const std::locale &getLocale() const { return locale; }
00794     
00796 
00800     BasicElement<T> &getRootElement() { if(!rootElement) throw ParseError("no root element found"); return *rootElement; }
00801     
00803 
00807     const BasicElement<T> &getRootElement() const { if(!rootElement) throw ParseError("no root element found"); return *rootElement; }
00808 
00810 
00813     const ChildVector &getChildren() const { return children; }
00814     
00816 
00819     ChildVector &getChildren() { return children; }
00820     
00822 
00827     void parse(istream_type &in);
00828     
00830 
00834     void print(ostream_type &out) const;
00835     
00836 private:
00837     std::locale locale;
00838     ChildVector children;
00839     
00840     BasicElement<T> *rootElement;
00841 
00842     void check();
00843 };
00844 
00845 template <class T>
00846 void BasicDocument<T>::parse(istream_type &in)
00847 {
00848     if(!children.empty()) children.clear();
00849     
00850     while(in.good())
00851     {
00852         children.push_back(BasicElement<T>(locale));
00853         children.back().parse(in);
00854         if(in.good()) std::ws(in);
00855     }
00856     
00857     if(!in.eof()) throw std::runtime_error("i/o error");
00858     
00859     check();
00860 }
00861 
00862 template <class T>
00863 void BasicDocument<T>::check()
00864 {
00865     rootElement = NULL;
00866     for(typename ChildVector::iterator i(children.begin()); i != children.end(); ++i)
00867     {
00868         if(i->getType() == BasicElement<T>::TERMINATOR) throw ParseError("tag - end tag mismatch");
00869         if(i->getType() != BasicElement<T>::NORMAL) continue;
00870         
00871         if(rootElement) throw ParseError("multiple root elements");
00872         rootElement = &(*i);
00873     }
00874     
00875     if(!rootElement) throw ParseError("no root element found");
00876 }
00877 
00878 template <class T>
00879 void BasicDocument<T>::print(ostream_type &out) const
00880 {
00881     for(typename ChildVector::const_iterator i(children.begin()); i != children.end(); ++i)
00882     {
00883         i->print(out);
00884     }
00885 }
00886 
00888 template<class T>
00889 std::basic_ostream<typename T::value_type, typename T::traits_type> &operator<<(std::basic_ostream<typename T::value_type, typename T::traits_type> &out, const BasicDocument<T> &document)
00890 {
00891     std::locale old = out.imbue(document.getLocale());
00892     document.print(out);
00893     out.imbue(old);
00894     return out;
00895 }
00896 
00898 template<class T>
00899 std::basic_istream<typename T::value_type, typename T::traits_type> &operator>>(std::basic_istream<typename T::value_type, typename T::traits_type> &in, BasicDocument<T> &document)
00900 {
00901     std::locale old = in.imbue(document.getLocale());
00902     document.parse(in);
00903     in.imbue(old);
00904     return in;
00905 }
00906 
00908 typedef BasicElement<std::string> Element;
00910 typedef BasicDocument<std::string> Document;
00911 
00912 #ifdef HAVE_WSTRING
00914 typedef BasicElement<std::wstring> wElement;
00916 typedef BasicDocument<std::wstring> wDocument;
00917 #endif
00918 
00919 }
00920 
00921 #endif

Generated on Fri Dec 15 19:24:38 2006 for XMLminus by  doxygen 1.5.0