2 Copyright (c) 2005-2015, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 void kumu_init_xml_dom();
67 bool kumu_UTF_8_to_XercesString(const std::string& in_str, std::basic_string<XMLCh>& out_str);
68 bool kumu_UTF_8_to_XercesString_p(const char* in_str, std::basic_string<XMLCh>& out_str);
69 bool kumu_XercesString_to_UTF_8(const std::basic_string<XMLCh>& in_str, std::string& out_str);
70 bool kumu_XercesString_to_UTF_8_p(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0, pretty);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const
209 add_spacer(outbuf, depth);
216 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i )
221 outbuf += (*i).value;
227 // body contents and children
228 if ( ! m_ChildList.empty() )
233 if ( m_Body.length() > 0 )
238 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i )
240 (*i)->RenderElement(outbuf, depth + 1, pretty);
245 add_spacer(outbuf, depth);
248 else if ( m_Body.length() > 0 )
260 Kumu::XMLElement::HasName(const char* name) const
262 if ( name == 0 || *name == 0 )
265 return (m_Name == name);
270 Kumu::XMLElement::SetName(const char* name)
278 Kumu::XMLElement::GetAttrWithName(const char* name) const
280 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
282 if ( (*i).name == name )
283 return (*i).value.c_str();
291 Kumu::XMLElement::GetChildWithName(const char* name) const
293 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
295 if ( (*i)->HasName(name) )
303 const Kumu::ElementList&
304 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
307 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
309 if ( (*i)->HasName(name) )
310 outList.push_back(*i);
312 if ( ! (*i)->m_ChildList.empty() )
313 (*i)->GetChildrenWithName(name, outList);
321 Kumu::XMLElement::DeleteAttributes()
328 Kumu::XMLElement::DeleteAttrWithName(const char* name)
331 AttributeList::iterator i = m_AttrList.begin();
333 while ( i != m_AttrList.end() )
335 if ( i->name == std::string(name) )
336 m_AttrList.erase(i++);
344 Kumu::XMLElement::DeleteChildren()
346 while ( ! m_ChildList.empty() )
348 delete m_ChildList.back();
349 m_ChildList.pop_back();
355 Kumu::XMLElement::DeleteChild(const XMLElement* element)
359 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
364 m_ChildList.erase(i);
373 Kumu::XMLElement::ForgetChild(const XMLElement* element)
377 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
381 m_ChildList.erase(i);
390 Kumu::XMLElement::ParseString(const ByteString& document)
392 return ParseString((const char*)document.RoData(), document.Length());
397 Kumu::XMLElement::ParseString(const std::string& document)
399 return ParseString(document.c_str(), document.size());
404 Kumu::XMLElement::ParseFirstFromString(const ByteString& document)
406 return ParseFirstFromString((const char*)document.RoData(), document.Length());
411 Kumu::XMLElement::ParseFirstFromString(const std::string& document)
413 return ParseFirstFromString(document.c_str(), document.size());
417 //----------------------------------------------------------------------------------------------------
422 class ExpatParseContext
424 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
428 std::stack<XMLElement*> Scope;
431 ExpatParseContext(XMLElement* root) : Root(root) {
432 Namespaces = new ns_map;
436 ~ExpatParseContext() {}
439 // expat wrapper functions
442 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
444 assert(p); assert(name); assert(attrs);
445 ExpatParseContext* Ctx = (ExpatParseContext*)p;
448 const char* ns_root = name;
449 const char* local_name = strchr(name, '|');
450 if ( local_name != 0 )
451 name = local_name + 1;
453 if ( Ctx->Scope.empty() )
455 Ctx->Scope.push(Ctx->Root);
459 Element = Ctx->Scope.top();
460 Ctx->Scope.push(Element->AddChild(name));
463 Element = Ctx->Scope.top();
464 Element->SetName(name);
468 if ( ns_root != name )
469 key.assign(ns_root, name - ns_root - 1);
471 ns_map::iterator ni = Ctx->Namespaces->find(key);
472 if ( ni != Ctx->Namespaces->end() )
473 Element->SetNamespace(ni->second);
476 for ( int i = 0; attrs[i] != 0; i += 2 )
478 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
479 local_name = attrs[i];
483 Element->SetAttr(local_name, attrs[i+1]);
489 xph_end(void* p, const XML_Char* name)
491 assert(p); assert(name);
492 ExpatParseContext* Ctx = (ExpatParseContext*)p;
498 xph_char(void* p, const XML_Char* data, int len)
500 assert(p); assert(data);
501 ExpatParseContext* Ctx = (ExpatParseContext*)p;
506 tmp_str.assign(data, len);
507 Ctx->Scope.top()->AppendBody(tmp_str);
513 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
515 assert(p); assert(ns_name);
516 ExpatParseContext* Ctx = (ExpatParseContext*)p;
518 if ( ns_prefix == 0 )
521 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
523 if ( ni != Ctx->Namespaces->end() )
525 if ( ni->second->Name() != std::string(ns_name) )
527 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
533 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
534 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
540 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
547 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
551 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
555 ExpatParseContext Ctx(this);
556 XML_SetUserData(Parser, (void*)&Ctx);
557 XML_SetElementHandler(Parser, xph_start, xph_end);
558 XML_SetCharacterDataHandler(Parser, xph_char);
559 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
561 if ( ! XML_Parse(Parser, document, doc_len, 1) )
563 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
564 XML_GetCurrentLineNumber(Parser),
565 XML_ErrorString(XML_GetErrorCode(Parser)));
566 XML_ParserFree(Parser);
570 XML_ParserFree(Parser);
572 if ( ! Ctx.Namespaces->empty() )
574 m_NamespaceOwner = (void*)Ctx.Namespaces;
580 // expat wrapper functions
583 xph_start_one_shot(void* p, const XML_Char* name, const XML_Char** attrs)
585 xph_start(p, name, attrs);
586 XML_Parser parser = (XML_Parser)p;
587 XML_StopParser(parser, false);
592 Kumu::XMLElement::ParseFirstFromString(const char* document, ui32_t doc_len)
599 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
603 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
607 ExpatParseContext Ctx(this);
608 XML_SetUserData(Parser, (void*)&Ctx);
609 XML_SetElementHandler(Parser, xph_start_one_shot, xph_end);
610 XML_SetCharacterDataHandler(Parser, xph_char);
611 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
613 if ( ! XML_Parse(Parser, document, doc_len, 1) )
615 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
616 XML_GetCurrentLineNumber(Parser),
617 XML_ErrorString(XML_GetErrorCode(Parser)));
618 XML_ParserFree(Parser);
622 XML_ParserFree(Parser);
624 if ( ! Ctx.Namespaces->empty() )
626 m_NamespaceOwner = (void*)Ctx.Namespaces;
636 //----------------------------------------------------------------------------------------------------
640 static Mutex sg_xerces_init_lock; // protect the xerces initialized
641 static bool sg_xml_init = false; // signal initialization
642 static Mutex sg_coder_lock; // protect the transcoder context
643 static XMLTranscoder* sg_coder = 0;
644 static const int sg_coder_buf_len = 128 * 1024;
645 static char sg_coder_buf[sg_coder_buf_len + 8];
646 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
648 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
649 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
650 chDash, chDigit_8, chNull};
658 AutoMutex AL(sg_xerces_init_lock);
664 XMLPlatformUtils::Initialize();
667 XMLTransService::Codes ret;
668 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
670 if ( ret != XMLTransService::Ok )
672 const char* message = "Undefined Error";
676 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
677 case XMLTransService::InternalFailure: message = "Internal failure"; break;
678 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
681 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
684 catch (const XMLException &e)
686 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
694 kumu_XercesString_to_UTF_8(const std::basic_string<XMLCh>& in_str, std::string& out_str) {
695 return kumu_XercesString_to_UTF_8_p(in_str.c_str(), out_str);
700 kumu_XercesString_to_UTF_8_p(const XMLCh* in_str, std::string& out_str)
704 AutoMutex AL(sg_coder_lock);
705 ui32_t str_len = XMLString::stringLen(in_str);
706 ui32_t read_total = 0;
710 while ( str_len > 0 )
712 #if XERCES_VERSION_MAJOR < 3
713 ui32_t read_count = 0;
715 XMLSize_t read_count = 0;
717 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
718 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
719 read_count, XMLTranscoder::UnRep_Throw);
721 out_str.append(sg_coder_buf, write_count);
722 str_len -= read_count;
723 read_total += read_count;
724 assert(str_len >= 0);
737 kumu_UTF_8_to_XercesString(const std::string& in_str, std::basic_string<XMLCh>& out_str) {
738 return kumu_UTF_8_to_XercesString_p(in_str.c_str(), out_str);
743 kumu_UTF_8_to_XercesString_p(const char* in_str, std::basic_string<XMLCh>& out_str)
747 AutoMutex AL(sg_coder_lock);
748 ui32_t str_len = strlen(in_str);
749 ui32_t read_total = 0;
753 while ( str_len > 0 )
755 #if XERCES_VERSION_MAJOR < 3
756 ui32_t read_count = 0;
758 XMLSize_t read_count = 0;
760 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
761 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
762 read_count, sg_coder_counts);
764 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
765 str_len -= read_count;
766 read_total += read_count;
767 assert(str_len >= 0);
779 class MyTreeHandler : public HandlerBase
781 ns_map* m_Namespaces;
782 std::stack<XMLElement*> m_Scope;
784 bool m_HasEncodeErrors;
787 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
790 m_Namespaces = new ns_map;
797 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
799 ns_map* TakeNamespaceMap()
801 if ( m_Namespaces == 0 || m_Namespaces->empty() )
804 ns_map* ret = m_Namespaces;
810 void AddNamespace(const char* ns_prefix, const char* ns_name)
815 if ( ns_prefix[0] == ':' )
821 assert(ns_prefix[0] == 0);
825 ns_map::iterator ni = m_Namespaces->find(ns_prefix);
827 if ( ni != m_Namespaces->end() )
829 if ( ni->second->Name() != std::string(ns_name) )
831 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
837 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
838 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
841 assert(!m_Namespaces->empty());
845 void startElement(const XMLCh* const x_name,
846 XERCES_CPP_NAMESPACE::AttributeList& attributes)
851 if ( ! kumu_XercesString_to_UTF_8(x_name, tx_name) )
852 m_HasEncodeErrors = true;
854 const char* name = tx_name.c_str();
856 const char* ns_root = name;
857 const char* local_name = strchr(name, ':');
859 if ( local_name != 0 )
860 name = local_name + 1;
862 if ( m_Scope.empty() )
864 m_Scope.push(m_Root);
868 Element = m_Scope.top();
869 m_Scope.push(Element->AddChild(name));
872 Element = m_Scope.top();
873 Element->SetName(name);
876 ui32_t a_len = attributes.getLength();
878 for ( ui32_t i = 0; i < a_len; i++)
880 std::string aname, value;
881 if ( ! kumu_XercesString_to_UTF_8(attributes.getName(i), aname) )
882 m_HasEncodeErrors = true;
884 if ( ! kumu_XercesString_to_UTF_8(attributes.getValue(i), value) )
885 m_HasEncodeErrors = true;
887 const char* x_aname = aname.c_str();
888 const char* x_value = value.c_str();
890 if ( strncmp(x_aname, "xmlns", 5) == 0 )
891 AddNamespace(x_aname+5, x_value);
893 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
894 local_name = x_aname;
898 Element->SetAttr(local_name, x_value);
903 if ( ns_root != name )
904 key.assign(ns_root, name - ns_root - 1);
906 ns_map::iterator ni = m_Namespaces->find(key);
907 if ( ni != m_Namespaces->end() )
908 Element->SetNamespace(ni->second);
911 void endElement(const XMLCh *const name) {
915 #if XERCES_VERSION_MAJOR < 3
916 void characters(const XMLCh *const chars, const unsigned int length)
918 void characters(const XMLCh* const chars, const XMLSize_t length)
924 if ( ! kumu_XercesString_to_UTF_8(chars, tmp) )
925 m_HasEncodeErrors = true;
927 m_Scope.top()->AppendBody(tmp);
934 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
944 SAXParser* parser = new SAXParser();
946 parser->setValidationScheme(SAXParser::Val_Always);
947 parser->setDoNamespaces(true); // optional
949 MyTreeHandler* docHandler = new MyTreeHandler(this);
950 parser->setDocumentHandler(docHandler);
951 parser->setErrorHandler(docHandler);
955 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
956 static_cast<const unsigned int>(doc_len),
959 parser->parse(xmlSource);
961 catch (const XMLException& e)
963 char* message = XMLString::transcode(e.getMessage());
964 DefaultLogSink().Error("Parser error: %s\n", message);
965 XMLString::release(&message);
968 catch (const SAXParseException& e)
970 char* message = XMLString::transcode(e.getMessage());
971 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
972 XMLString::release(&message);
977 DefaultLogSink().Error("Unexpected XML parser error\n");
981 if ( errorCount == 0 )
982 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
987 return errorCount > 0 ? false : true;
992 Kumu::XMLElement::ParseFirstFromString(const char* document, ui32_t doc_len)
1002 SAXParser* parser = new SAXParser();
1004 parser->setValidationScheme(SAXParser::Val_Always);
1005 parser->setDoNamespaces(true); // optional
1007 MyTreeHandler* docHandler = new MyTreeHandler(this);
1008 parser->setDocumentHandler(docHandler);
1009 parser->setErrorHandler(docHandler);
1010 XMLPScanToken token;
1014 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
1015 static_cast<const unsigned int>(doc_len),
1018 if ( ! parser->parseFirst(xmlSource, token) )
1023 if ( ! parser->parseNext(token) )
1028 catch (const XMLException& e)
1030 char* message = XMLString::transcode(e.getMessage());
1031 DefaultLogSink().Error("Parser error: %s\n", message);
1032 XMLString::release(&message);
1035 catch (const SAXParseException& e)
1037 char* message = XMLString::transcode(e.getMessage());
1038 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
1039 XMLString::release(&message);
1044 DefaultLogSink().Error("Unexpected XML parser error\n");
1048 if ( errorCount == 0 )
1049 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
1054 return errorCount > 0 ? false : true;
1060 //----------------------------------------------------------------------------------------------------
1062 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
1066 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
1068 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1073 Kumu::XMLElement::ParseFirstFromString(const char* document, ui32_t doc_len)
1075 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1082 //----------------------------------------------------------------------------------------------------
1086 Kumu::GetXMLDocType(const ByteString& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1087 AttributeList& doc_attr_list)
1089 return GetXMLDocType(buf.RoData(), buf.Length(), ns_prefix, type_name, namespace_name, doc_attr_list);
1094 Kumu::GetXMLDocType(const std::string& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1095 AttributeList& doc_attr_list)
1097 return GetXMLDocType((const byte_t*)buf.c_str(), buf.size(), ns_prefix, type_name, namespace_name, doc_attr_list);
1102 Kumu::GetXMLDocType(const byte_t* buf, ui32_t buf_len, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
1103 AttributeList& doc_attr_list)
1105 XMLElement tmp_element("tmp");
1107 if ( ! tmp_element.ParseFirstFromString((const char*)buf, buf_len) )
1112 const XMLNamespace* ns = tmp_element.Namespace();
1116 ns_prefix = ns->Prefix();
1117 namespace_name = ns->Name();
1120 type_name = tmp_element.GetName();
1121 doc_attr_list = tmp_element.GetAttributes();