00001 #ifndef _XMLMINUS_H_
00002 #define _XMLMINUS_H_
00003
00004 #include <iostream>
00005 #include <string>
00006 #include <locale>
00007 #include <stdexcept>
00008 #include <sstream>
00009 #include <vector>
00010 #include <map>
00011 #include <boost/lexical_cast.hpp>
00012
00013 namespace
00014 {
00015 const char *ENTITIES[][2] = {{"&", "&"}, {"<", "<"}, {">", ">"}, {""", "\""}, {"'", "\'"}, {0, 0}};
00016 }
00017
00019 namespace xmlm
00020 {
00021
00023 class ParseError : public std::runtime_error
00024 {
00025 public: ParseError(const std::string &what) : std::runtime_error(what) {}
00026 };
00027
00029
00032 template <class T>
00033 class BasicElement
00034 {
00035 public:
00037 typedef T string_type;
00039 typedef typename string_type::value_type char_type;
00041 typedef typename string_type::traits_type traits_type;
00043 typedef typename string_type::allocator_type allocator_type;
00045 typedef std::basic_istream<char_type, traits_type> istream_type;
00047 typedef std::basic_ostream<char_type, traits_type> ostream_type;
00049 typedef std::vector<BasicElement<T> > ChildVector;
00051 typedef std::map<string_type, string_type> AttributeMap;
00053 typedef enum { NORMAL, COMMENT, DECLARATION, TERMINATOR } ElementType;
00054
00056
00063 BasicElement(istream_type &in) : locale(in.getloc()) { parse(in); }
00064
00066
00075 BasicElement(istream_type &in, const std::locale &loc) : locale(loc) { std::locale old = in.imbue(locale); parse(in); in.imbue(old); }
00076
00078
00082 BasicElement(const string_type &value = "", ElementType type = NORMAL) : value(value), type(type) {}
00083
00085
00090 BasicElement(const string_type &value, const std::locale &loc, ElementType type = NORMAL) : locale(loc), value(value), type(type) {}
00091
00093
00097 BasicElement(const std::locale &loc, ElementType type = NORMAL) : locale(loc), type(type) {}
00098
00100
00103 std::locale &getLocale() { return locale; }
00104
00106
00109 const std::locale &getLocale() const { return locale; }
00110
00112
00115 void setLocale(const std::locale &loc) { locale = loc; }
00116
00118
00121 const string_type &getValue() const { return value; }
00122
00124
00127 string_type &getValue() { return value; }
00128
00130
00133 void setValue(const string_type &val) { value = val; }
00134
00136
00139 const ChildVector &getChildren() const { return children; }
00140
00142
00145 ChildVector &getChildren() { return children; }
00146
00148
00151 const AttributeMap &getAttributes() const { return attributes; }
00152
00154
00157 AttributeMap &getAttributes() { return attributes; }
00158
00160
00163 const string_type &getText() const { return text; }
00165
00168 string_type &getText() { return text; }
00170
00173 void setText(const string_type &str) { text = str; }
00174
00176
00180 ElementType getType() const { return type; }
00181
00183
00187 void setType(ElementType t) { type = t; }
00188
00190
00199 template<typename valueT>
00200 void setAttribute(const string_type &key, const valueT &val);
00201
00203
00213 template<typename valueT>
00214 valueT getAttribute(const string_type &key) const;
00215
00217
00225 const string_type &getAttributeRaw(const string_type &key) const
00226 {
00227 typename AttributeMap::const_iterator i(attributes.find(key));
00228 if(i == attributes.end()) throw ParseError("attribute not found");
00229 return i->second;
00230 }
00231
00233
00241 string_type &getAttributeRaw(const string_type &key)
00242 {
00243 typename AttributeMap::iterator i(attributes.find(key));
00244 if(i == attributes.end()) throw ParseError("attribute not found");
00245 return i->second;
00246 }
00247
00249
00256 void setAttributeRaw(const string_type &key, const string_type &val) { attributes[key] = val; }
00257
00259
00263 bool hasAttribute(const string_type &key) const { return (attributes.find(key) != attributes.end()); }
00264
00266
00271 void print(ostream_type &out, int indent = 0) const;
00272
00274
00279 void parse(istream_type &in);
00280
00282
00290 const BasicElement<T> &getChild(const string_type &val) const
00291 {
00292 typename ChildVector::const_iterator i = std::find_if(children.begin(), children.end(), valuecmp(val));
00293 if(i == children.end()) throw ParseError("can't find child");
00294 return *i;
00295 }
00296
00298
00306 BasicElement<T> &getChild(const string_type &val)
00307 {
00308 typename ChildVector::iterator i = std::find_if(children.begin(), children.end(), valuecmp(val));
00309 if(i == children.end()) throw ParseError("can't find child");
00310 return *i;
00311 }
00312
00314
00317 static void entityEncode(string_type &str);
00318
00320
00323 static void entityDecode(string_type &str);
00324
00325 private:
00326 std::locale locale;
00327 string_type value;
00328 ElementType type;
00329
00330 ChildVector children;
00331 AttributeMap attributes;
00332
00333 string_type text;
00334
00335 void readString(istream_type &in, string_type &str);
00336 void readAttribute(istream_type &in);
00337 void readQuotedString(istream_type &in, string_type &str);
00338 void readText(istream_type &in);
00339 void readComment(istream_type &in);
00340
00341 typedef std::map<string_type, string_type> EntityMap;
00342 static EntityMap entities;
00343
00344 static void buildEntities();
00345 static void replaceAll(string_type &str, const string_type &from, const string_type& to);
00346
00347 struct valuecmp
00348 {
00349 string_type value;
00350 valuecmp(const string_type &value) : value(value) {}
00351 bool operator()(const BasicElement<T>& element) { return (element.getValue() == value); }
00352 };
00353 };
00354
00355 template<class T>
00356 typename BasicElement<T>::EntityMap BasicElement<T>::entities;
00357
00358 template <class T>
00359 template<typename valueT>
00360 void BasicElement<T>::setAttribute(const string_type &key, const valueT &val)
00361 {
00362 attributes[key] = boost::lexical_cast<string_type>(val);
00363 }
00364
00365 template <class T>
00366 template<typename valueT>
00367 valueT BasicElement<T>::getAttribute(const string_type &key) const
00368 {
00369 typename AttributeMap::const_iterator i = attributes.find(key);
00370 if(i == attributes.end()) throw ParseError("attribute not found");
00371 return boost::lexical_cast<valueT>(i->second);
00372 }
00373
00374 template <class T>
00375 void BasicElement<T>::parse(istream_type &in)
00376 {
00377 std::ws(in);
00378
00379 if(in.eof()) throw ParseError("unexpected end of file");
00380 else if(!in.good()) throw std::runtime_error("i/o error");
00381
00382 if(in.get() != '<') throw ParseError("expected: '<'");
00383
00384 switch(in.peek())
00385 {
00386 case '/':
00387 type = TERMINATOR;
00388 in.get();
00389 break;
00390 case '!':
00391 type = COMMENT;
00392 in.get();
00393 break;
00394 case '?':
00395 type = DECLARATION;
00396 in.get();
00397 break;
00398 default:
00399 type = NORMAL;
00400 break;
00401 };
00402
00403 value.clear();
00404
00405 if(type == COMMENT)
00406 {
00407 readComment(in);
00408 return;
00409 }
00410
00411 readString(in, value);
00412
00413 while(std::isspace(static_cast<char_type>(in.peek()), locale))
00414 {
00415 std::ws(in);
00416 if(in.eof()) throw ParseError("unexpected end of file");
00417 else if(!in.good()) throw std::runtime_error("i/o error");
00418
00419 if(std::isalnum(static_cast<char_type>(in.peek()), locale))
00420 {
00421 readAttribute(in);
00422 } else break;
00423 }
00424
00425 bool single = false;
00426 if(in.peek() == '/')
00427 {
00428 if(type != NORMAL) throw ParseError("unexpected character: '/'");
00429
00430 in.get();
00431 std::ws(in);
00432 if(in.eof()) throw ParseError("unexpected end of file");
00433 else if(!in.good()) throw std::runtime_error("i/o error");
00434
00435 single = true;
00436 }
00437
00438 if(type == DECLARATION)
00439 {
00440 if(in.get() != '?') throw ParseError("expected: '?'");
00441 std::ws(in);
00442 }
00443
00444 if(in.get() != '>') throw ParseError("expected: '>'");
00445
00446 while(!single && type == NORMAL)
00447 {
00448 std::ws(in);
00449 if(in.eof()) throw ParseError("unexpected end of file");
00450 else if(!in.good()) throw std::runtime_error("i/o error");
00451
00452 if(in.peek() == '<')
00453 {
00454 children.push_back(BasicElement<T>(locale));
00455
00456 BasicElement<T> &child = children.back();
00457 child.parse(in);
00458
00459 if(child.type == TERMINATOR)
00460 {
00461 if(child.value != value) throw ParseError("incorrect terminator");
00462 children.pop_back();
00463 break;
00464 }
00465 } else
00466 {
00467 readText(in);
00468 }
00469 }
00470
00471 entityDecode(text);
00472 }
00473
00474
00475 template<class T>
00476 void BasicElement<T>::readString(istream_type &in, string_type &str)
00477 {
00478 while(in.good())
00479 {
00480 char_type ch = in.peek();
00481
00482 if(!std::isalnum(ch, locale) && ch != '_' && ch != ':' && ch != '.' && ch != '-') break;
00483 str.push_back(ch);
00484
00485 in.get();
00486 }
00487 }
00488
00489 template<class T>
00490 void BasicElement<T>::readAttribute(istream_type &in)
00491 {
00492 string_type key;
00493 readString(in, key);
00494
00495 std::ws(in);
00496 if(in.eof()) throw ParseError("unexpected end of file");
00497 else if(!in.good()) throw std::runtime_error("i/o error");
00498
00499 if(in.get() != '=') throw ParseError("expected: '='");
00500 std::ws(in);
00501 if(in.eof()) throw ParseError("unexpected end of file");
00502 else if(!in.good()) throw std::runtime_error("i/o error");
00503
00504 string_type val;
00505 readQuotedString(in, val);
00506 entityDecode(val);
00507
00508 attributes.insert(typename AttributeMap::value_type(key, val));
00509 }
00510
00511 template<class T>
00512 void BasicElement<T>::readQuotedString(istream_type &in, string_type &str)
00513 {
00514 if(in.get() != '"') throw ParseError("expected: quoted string literal");
00515
00516 char_type ch;
00517 while((ch = in.get()) != '"' && in.good())
00518 {
00519 if(ch == '\n' || ch == '\r' || ch == '\t' || ch == '<' || ch == '>') break;
00520
00521 str.push_back(ch);
00522 }
00523
00524 if(ch != '"') throw ParseError("unterminated string literal");
00525 }
00526
00527 template<class T>
00528 void BasicElement<T>::readText(istream_type &in)
00529 {
00530 char_type ch;
00531
00532 while((ch = in.peek()) != '<' && in.good())
00533 {
00534 if(std::isspace(ch, locale))
00535 {
00536 text.push_back(' ');
00537 std::ws(in);
00538 } else
00539 {
00540 text.push_back(ch);
00541 in.get();
00542 }
00543 }
00544
00545 if(in.eof()) throw ParseError("unexpected end of file");
00546 else if(!in.good()) throw std::runtime_error("i/o error");
00547
00548 if(ch != '<') throw std::logic_error("expected: '<'");
00549 }
00550
00551 template <class T>
00552 void BasicElement<T>::readComment(istream_type &in)
00553 {
00554 int depth = 1;
00555 char_type ch = 0;
00556 while(depth > 0 && in.good())
00557 {
00558 ch = in.get();
00559 if(!(depth == 1 && ch == '>')) value.push_back(ch);
00560 if(ch == '<') ++depth;
00561 else if(ch == '>') --depth;
00562 }
00563
00564 if(in.eof()) throw ParseError("unexpected end of file");
00565 else if(!in.good()) throw std::runtime_error("i/o error");
00566
00567 if(ch != '>') throw std::logic_error("expected: '>'");
00568 }
00569
00570 template <class T>
00571 void BasicElement<T>::print(ostream_type &out, int indent) const
00572 {
00573 for(int i = 0; i < indent; ++i) out << '\t';
00574
00575 out << '<';
00576
00577 if(type == COMMENT) out << '!';
00578 else if(type == DECLARATION) out << '?';
00579
00580 out << value;
00581
00582 for(typename AttributeMap::const_iterator i(attributes.begin()); i != attributes.end(); ++i)
00583 {
00584 out << ' ' << (i->first) << "=\"";
00585 string_type value(i->second);
00586 entityEncode(value);
00587 out << value << '"';
00588 }
00589
00590 if(text.size() == 0 && children.size() == 0 && type == NORMAL)
00591 {
00592 out << '/' << '>' << '\n';
00593 return;
00594 }
00595
00596 if(type == DECLARATION) out << '?';
00597 out << '>' << '\n';
00598 if(type != NORMAL) return;
00599
00600 for(typename ChildVector::const_iterator i(children.begin()); i != children.end(); ++i)
00601 {
00602 i->print(out, indent+1);
00603 }
00604
00605 if(text.size() > 0)
00606 {
00607 for(int i = 0; i < (indent+1); ++i) out << '\t';
00608 string_type temp(text);
00609 entityEncode(temp);
00610 out << temp;
00611 out << '\n';
00612 }
00613
00614 for(int i = 0; i < indent; ++i) out << '\t';
00615 out << '<' << '/' << value << '>' << '\n';
00616 }
00617
00619 template<class T>
00620 std::basic_ostream<typename T::value_type, typename T::traits_type> &operator<<(std::basic_ostream<typename T::value_type, typename T::traits_type> &out, const BasicElement<T> &element)
00621 {
00622 std::locale old = out.imbue(element.getLocale());
00623 element.print(out);
00624 out.imbue(old);
00625 return out;
00626 }
00627
00629 template<class T>
00630 std::basic_istream<typename T::value_type, typename T::traits_type> &operator>>(std::basic_istream<typename T::value_type, typename T::traits_type> &in, BasicElement<T> &element)
00631 {
00632 std::locale old = in.imbue(element.getLocale());
00633 element.parse(in);
00634 in.imbue(old);
00635 return in;
00636 }
00637
00638 template<class T>
00639 void BasicElement<T>::buildEntities()
00640 {
00641 entities.clear();
00642
00643 int n = 0;
00644 while(ENTITIES[n][0])
00645 {
00646 string_type key;
00647 string_type val;
00648 for(const char *c = ENTITIES[n][0]; *c; ++c) key.push_back(*c);
00649 for(const char *c = ENTITIES[n][1]; *c; ++c) val.push_back(*c);
00650
00651 entities[key] = val;
00652 ++n;
00653 }
00654 }
00655
00656 template<class T>
00657 void BasicElement<T>::replaceAll(string_type &str, const string_type &from, const string_type& to)
00658 {
00659 typename string_type::size_type pos = 0;
00660 while((pos = str.find(from, pos)) != string_type::npos)
00661 {
00662 str.replace(pos, from.size(), to);
00663 ++pos;
00664 }
00665 }
00666
00667 template<class T>
00668 void BasicElement<T>::entityEncode(string_type &str)
00669 {
00670 if(entities.empty()) buildEntities();
00671
00672 for(typename EntityMap::iterator i(entities.begin()); i != entities.end(); ++i)
00673 {
00674 replaceAll(str, i->second, i->first);
00675 }
00676 }
00677
00678 template<class T>
00679 void BasicElement<T>::entityDecode(string_type &str)
00680 {
00681 if(entities.empty()) buildEntities();
00682
00683 for(typename EntityMap::iterator i(entities.begin()); i != entities.end(); ++i)
00684 {
00685 replaceAll(str, i->first, i->second);
00686 }
00687 }
00688
00690
00693 template <class T>
00694 class BasicDocument
00695 {
00696 public:
00698 typedef T string_type;
00700 typedef typename T::value_type char_type;
00702 typedef typename T::traits_type traits_type;
00704 typedef std::basic_istream<char_type, traits_type> istream_type;
00706 typedef std::basic_ostream<char_type, traits_type> ostream_type;
00708 typedef std::vector<BasicElement<T> > ChildVector;
00709
00711
00718 BasicDocument(istream_type &in) : locale(in.getloc()) { parse(in); }
00719
00721
00730 BasicDocument(istream_type &in, const std::locale &loc) : locale(loc) { std::locale old = in.imbue(locale); parse(in); in.imbue(old); }
00731
00733
00740 BasicDocument(const std::string &filename)
00741 {
00742 std::basic_ifstream<char_type, traits_type> in(filename);
00743 locale = in.getloc();
00744 parse(in);
00745 }
00746
00748
00757 BasicDocument(const std::string &filename, const std::locale &loc)
00758 : locale(loc)
00759 {
00760 std::basic_ifstream<char_type, traits_type> in(filename);
00761 in.imbue(locale);
00762 parse(in);
00763 }
00764
00766
00769 BasicDocument(const std::locale &locale)
00770 : locale(locale)
00771 {
00772 children.push_back(BasicElement<T>(locale));
00773 check();
00774 }
00775
00777 BasicDocument()
00778 {
00779 children.push_back(BasicElement<T>(locale));
00780 check();
00781 }
00782
00784
00787 std::locale &getLocale() { return locale; }
00788
00790
00793 const std::locale &getLocale() const { return locale; }
00794
00796
00800 BasicElement<T> &getRootElement() { if(!rootElement) throw ParseError("no root element found"); return *rootElement; }
00801
00803
00807 const BasicElement<T> &getRootElement() const { if(!rootElement) throw ParseError("no root element found"); return *rootElement; }
00808
00810
00813 const ChildVector &getChildren() const { return children; }
00814
00816
00819 ChildVector &getChildren() { return children; }
00820
00822
00827 void parse(istream_type &in);
00828
00830
00834 void print(ostream_type &out) const;
00835
00836 private:
00837 std::locale locale;
00838 ChildVector children;
00839
00840 BasicElement<T> *rootElement;
00841
00842 void check();
00843 };
00844
00845 template <class T>
00846 void BasicDocument<T>::parse(istream_type &in)
00847 {
00848 if(!children.empty()) children.clear();
00849
00850 while(in.good())
00851 {
00852 children.push_back(BasicElement<T>(locale));
00853 children.back().parse(in);
00854 if(in.good()) std::ws(in);
00855 }
00856
00857 if(!in.eof()) throw std::runtime_error("i/o error");
00858
00859 check();
00860 }
00861
00862 template <class T>
00863 void BasicDocument<T>::check()
00864 {
00865 rootElement = NULL;
00866 for(typename ChildVector::iterator i(children.begin()); i != children.end(); ++i)
00867 {
00868 if(i->getType() == BasicElement<T>::TERMINATOR) throw ParseError("tag - end tag mismatch");
00869 if(i->getType() != BasicElement<T>::NORMAL) continue;
00870
00871 if(rootElement) throw ParseError("multiple root elements");
00872 rootElement = &(*i);
00873 }
00874
00875 if(!rootElement) throw ParseError("no root element found");
00876 }
00877
00878 template <class T>
00879 void BasicDocument<T>::print(ostream_type &out) const
00880 {
00881 for(typename ChildVector::const_iterator i(children.begin()); i != children.end(); ++i)
00882 {
00883 i->print(out);
00884 }
00885 }
00886
00888 template<class T>
00889 std::basic_ostream<typename T::value_type, typename T::traits_type> &operator<<(std::basic_ostream<typename T::value_type, typename T::traits_type> &out, const BasicDocument<T> &document)
00890 {
00891 std::locale old = out.imbue(document.getLocale());
00892 document.print(out);
00893 out.imbue(old);
00894 return out;
00895 }
00896
00898 template<class T>
00899 std::basic_istream<typename T::value_type, typename T::traits_type> &operator>>(std::basic_istream<typename T::value_type, typename T::traits_type> &in, BasicDocument<T> &document)
00900 {
00901 std::locale old = in.imbue(document.getLocale());
00902 document.parse(in);
00903 in.imbue(old);
00904 return in;
00905 }
00906
00908 typedef BasicElement<std::string> Element;
00910 typedef BasicDocument<std::string> Document;
00911
00912 #ifdef HAVE_WSTRING
00914 typedef BasicElement<std::wstring> wElement;
00916 typedef BasicDocument<std::wstring> wDocument;
00917 #endif
00918
00919 }
00920
00921 #endif