2 Copyright (c) 2005-2010, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 typedef std::basic_string<XMLCh> XercesString;
67 bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str);
68 bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str);
69 bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str);
70 bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0, pretty);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const
209 add_spacer(outbuf, depth);
216 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i )
221 outbuf += (*i).value;
227 // body contents and children
228 if ( ! m_ChildList.empty() )
233 if ( m_Body.length() > 0 )
238 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i )
240 (*i)->RenderElement(outbuf, depth + 1, pretty);
245 add_spacer(outbuf, depth);
248 else if ( m_Body.length() > 0 )
260 Kumu::XMLElement::HasName(const char* name) const
262 if ( name == 0 || *name == 0 )
265 return (m_Name == name);
270 Kumu::XMLElement::SetName(const char* name)
278 Kumu::XMLElement::GetAttrWithName(const char* name) const
280 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
282 if ( (*i).name == name )
283 return (*i).value.c_str();
291 Kumu::XMLElement::GetChildWithName(const char* name) const
293 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
295 if ( (*i)->HasName(name) )
303 const Kumu::ElementList&
304 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
307 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
309 if ( (*i)->HasName(name) )
310 outList.push_back(*i);
312 if ( ! (*i)->m_ChildList.empty() )
313 (*i)->GetChildrenWithName(name, outList);
321 Kumu::XMLElement::DeleteAttributes()
328 Kumu::XMLElement::DeleteAttrWithName(const char* name)
331 AttributeList::iterator i = m_AttrList.begin();
333 while ( i != m_AttrList.end() )
335 if ( i->name == std::string(name) )
336 m_AttrList.erase(i++);
344 Kumu::XMLElement::DeleteChildren()
346 while ( ! m_ChildList.empty() )
348 delete m_ChildList.back();
349 m_ChildList.pop_back();
355 Kumu::XMLElement::DeleteChild(const XMLElement* element)
359 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
364 m_ChildList.erase(i);
373 Kumu::XMLElement::ForgetChild(const XMLElement* element)
377 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
381 m_ChildList.erase(i);
390 Kumu::XMLElement::ParseString(const ByteString& document)
392 return ParseString((const char*)document.RoData(), document.Length());
397 Kumu::XMLElement::ParseString(const std::string& document)
399 return ParseString(document.c_str(), document.size());
403 //----------------------------------------------------------------------------------------------------
408 class ExpatParseContext
410 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
414 std::stack<XMLElement*> Scope;
417 ExpatParseContext(XMLElement* root) : Root(root) {
418 Namespaces = new ns_map;
422 ~ExpatParseContext() {}
425 // expat wrapper functions
428 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
430 assert(p); assert(name); assert(attrs);
431 ExpatParseContext* Ctx = (ExpatParseContext*)p;
434 const char* ns_root = name;
435 const char* local_name = strchr(name, '|');
436 if ( local_name != 0 )
437 name = local_name + 1;
439 if ( Ctx->Scope.empty() )
441 Ctx->Scope.push(Ctx->Root);
445 Element = Ctx->Scope.top();
446 Ctx->Scope.push(Element->AddChild(name));
449 Element = Ctx->Scope.top();
450 Element->SetName(name);
454 if ( ns_root != name )
455 key.assign(ns_root, name - ns_root - 1);
457 ns_map::iterator ni = Ctx->Namespaces->find(key);
458 if ( ni != Ctx->Namespaces->end() )
459 Element->SetNamespace(ni->second);
462 for ( int i = 0; attrs[i] != 0; i += 2 )
464 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
465 local_name = attrs[i];
469 Element->SetAttr(local_name, attrs[i+1]);
475 xph_end(void* p, const XML_Char* name)
477 assert(p); assert(name);
478 ExpatParseContext* Ctx = (ExpatParseContext*)p;
484 xph_char(void* p, const XML_Char* data, int len)
486 assert(p); assert(data);
487 ExpatParseContext* Ctx = (ExpatParseContext*)p;
492 tmp_str.assign(data, len);
493 Ctx->Scope.top()->AppendBody(tmp_str);
499 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
501 assert(p); assert(ns_name);
502 ExpatParseContext* Ctx = (ExpatParseContext*)p;
504 if ( ns_prefix == 0 )
507 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
509 if ( ni != Ctx->Namespaces->end() )
511 if ( ni->second->Name() != std::string(ns_name) )
513 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
519 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
520 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
526 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
528 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
532 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
536 ExpatParseContext Ctx(this);
537 XML_SetUserData(Parser, (void*)&Ctx);
538 XML_SetElementHandler(Parser, xph_start, xph_end);
539 XML_SetCharacterDataHandler(Parser, xph_char);
540 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
542 if ( ! XML_Parse(Parser, document, doc_len, 1) )
544 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
545 XML_GetCurrentLineNumber(Parser),
546 XML_ErrorString(XML_GetErrorCode(Parser)));
547 XML_ParserFree(Parser);
551 XML_ParserFree(Parser);
553 if ( ! Ctx.Namespaces->empty() )
554 m_NamespaceOwner = (void*)Ctx.Namespaces;
559 //------------------------------------------------------------------------------------------
561 struct xph_test_wrapper
566 xph_test_wrapper(XML_Parser p) : Parser(p), Status(false) {}
569 // expat wrapper functions, map callbacks to IASAXHandler
572 xph_test_start(void* p, const XML_Char* name, const XML_Char** attrs)
575 xph_test_wrapper* Wrapper = (xph_test_wrapper*)p;
577 Wrapper->Status = true;
578 XML_StopParser(Wrapper->Parser, false);
584 Kumu::StringIsXML(const char* document, ui32_t len)
590 len = strlen(document);
592 XML_Parser Parser = XML_ParserCreate("UTF-8");
596 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
600 xph_test_wrapper Wrapper(Parser);
601 XML_SetUserData(Parser, (void*)&Wrapper);
602 XML_SetStartElementHandler(Parser, xph_test_start);
604 XML_Parse(Parser, document, len, 1);
605 XML_ParserFree(Parser);
606 return Wrapper.Status;
611 //----------------------------------------------------------------------------------------------------
615 static Mutex sg_xerces_init_lock; // protect the xerces initialized
616 static bool sg_xml_init = false; // signal initialization
617 static Mutex sg_coder_lock; // protect the transcoder context
618 static XMLTranscoder* sg_coder = 0;
619 static const int sg_coder_buf_len = 128 * 1024;
620 static char sg_coder_buf[sg_coder_buf_len + 8];
621 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
623 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
624 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
625 chDash, chDigit_8, chNull};
633 AutoMutex AL(sg_xerces_init_lock);
639 XMLPlatformUtils::Initialize();
642 XMLTransService::Codes ret;
643 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
645 if ( ret != XMLTransService::Ok )
647 const char* message = "Undefined Error";
651 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
652 case XMLTransService::InternalFailure: message = "Internal failure"; break;
653 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
656 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
659 catch (const XMLException &e)
661 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
669 Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) {
670 return XercesString_to_UTF_8(in_str.c_str(), out_str);
675 Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str)
679 AutoMutex AL(sg_coder_lock);
680 ui32_t str_len = XMLString::stringLen(in_str);
681 ui32_t read_total = 0;
685 while ( str_len > 0 )
687 #if XERCES_VERSION_MAJOR < 3
688 ui32_t read_count = 0;
690 XMLSize_t read_count = 0;
692 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
693 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
694 read_count, XMLTranscoder::UnRep_Throw);
696 out_str.append(sg_coder_buf, write_count);
697 str_len -= read_count;
698 read_total += read_count;
699 assert(str_len >= 0);
712 Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) {
713 return UTF_8_to_XercesString(in_str.c_str(), out_str);
718 Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str)
722 AutoMutex AL(sg_coder_lock);
723 ui32_t str_len = strlen(in_str);
724 ui32_t read_total = 0;
728 while ( str_len > 0 )
730 #if XERCES_VERSION_MAJOR < 3
731 ui32_t read_count = 0;
733 XMLSize_t read_count = 0;
735 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
736 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
737 read_count, sg_coder_counts);
739 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
740 str_len -= read_count;
741 read_total += read_count;
742 assert(str_len >= 0);
754 class MyTreeHandler : public HandlerBase
756 ns_map* m_Namespaces;
757 std::stack<XMLElement*> m_Scope;
759 bool m_HasEncodeErrors;
762 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
765 m_Namespaces = new ns_map;
772 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
774 ns_map* TakeNamespaceMap()
776 if ( m_Namespaces == 0 || m_Namespaces->empty() )
779 ns_map* ret = m_Namespaces;
785 void AddNamespace(const char* ns_prefix, const char* ns_name)
790 if ( ns_prefix[0] == ':' )
796 assert(ns_prefix[0] == 0);
800 ns_map::iterator ni = m_Namespaces->find(ns_name);
802 if ( ni != m_Namespaces->end() )
804 if ( ni->second->Name() != std::string(ns_name) )
806 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
812 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
813 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
816 assert(!m_Namespaces->empty());
820 void startElement(const XMLCh* const x_name,
821 XERCES_CPP_NAMESPACE::AttributeList& attributes)
826 if ( ! XercesString_to_UTF_8(x_name, tx_name) )
827 m_HasEncodeErrors = true;
829 const char* name = tx_name.c_str();
831 const char* ns_root = name;
832 const char* local_name = strchr(name, ':');
834 if ( local_name != 0 )
835 name = local_name + 1;
837 if ( m_Scope.empty() )
839 m_Scope.push(m_Root);
843 Element = m_Scope.top();
844 m_Scope.push(Element->AddChild(name));
847 Element = m_Scope.top();
848 Element->SetName(name);
851 ui32_t a_len = attributes.getLength();
853 for ( ui32_t i = 0; i < a_len; i++)
855 std::string aname, value;
856 if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) )
857 m_HasEncodeErrors = true;
859 if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) )
860 m_HasEncodeErrors = true;
862 const char* x_aname = aname.c_str();
863 const char* x_value = value.c_str();
865 if ( strncmp(x_aname, "xmlns", 5) == 0 )
866 AddNamespace(x_aname+5, x_value);
868 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
869 local_name = x_aname;
873 Element->SetAttr(local_name, x_value);
878 if ( ns_root != name )
879 key.assign(ns_root, name - ns_root - 1);
881 ns_map::iterator ni = m_Namespaces->find(key);
882 if ( ni != m_Namespaces->end() )
883 Element->SetNamespace(ni->second);
886 void endElement(const XMLCh *const name) {
890 void characters(const XMLCh *const chars, const unsigned int length)
895 if ( ! XercesString_to_UTF_8(chars, tmp) )
896 m_HasEncodeErrors = true;
898 m_Scope.top()->AppendBody(tmp);
905 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
913 SAXParser* parser = new SAXParser();
915 parser->setValidationScheme(SAXParser::Val_Always);
916 parser->setDoNamespaces(true); // optional
918 MyTreeHandler* docHandler = new MyTreeHandler(this);
919 parser->setDocumentHandler(docHandler);
920 parser->setErrorHandler(docHandler);
924 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
925 static_cast<const unsigned int>(doc_len),
928 parser->parse(xmlSource);
930 catch (const XMLException& e)
932 char* message = XMLString::transcode(e.getMessage());
933 DefaultLogSink().Error("Parser error: %s\n", message);
934 XMLString::release(&message);
937 catch (const SAXParseException& e)
939 char* message = XMLString::transcode(e.getMessage());
940 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
941 XMLString::release(&message);
946 DefaultLogSink().Error("Unexpected XML parser error\n");
950 if ( errorCount == 0 )
951 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
956 return errorCount > 0 ? false : true;
961 Kumu::StringIsXML(const char* document, ui32_t len)
963 if ( document == 0 || *document == 0 )
969 len = strlen(document);
977 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
978 static_cast<const unsigned int>(len),
981 if ( parser.parseFirst(xmlSource, token) )
983 if ( parser.parseNext(token) )
997 //----------------------------------------------------------------------------------------------------
999 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
1003 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
1005 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1011 Kumu::StringIsXML(const char* document, ui32_t len)
1013 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");