2 Copyright (c) 2005-2010, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 typedef std::basic_string<XMLCh> XercesString;
67 bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str);
68 bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str);
69 bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str);
70 bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const
207 add_spacer(outbuf, depth);
213 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
218 outbuf += (*i).value;
224 // body contents and children
225 if ( ! m_ChildList.empty() )
230 if ( m_Body.length() > 0 )
233 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
234 (*i)->RenderElement(outbuf, depth + 1);
236 add_spacer(outbuf, depth);
238 else if ( m_Body.length() > 0 )
250 Kumu::XMLElement::HasName(const char* name) const
252 if ( name == 0 || *name == 0 )
255 return (m_Name == name);
260 Kumu::XMLElement::SetName(const char* name)
268 Kumu::XMLElement::GetAttrWithName(const char* name) const
270 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
272 if ( (*i).name == name )
273 return (*i).value.c_str();
281 Kumu::XMLElement::GetChildWithName(const char* name) const
283 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
285 if ( (*i)->HasName(name) )
293 const Kumu::ElementList&
294 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
297 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
299 if ( (*i)->HasName(name) )
300 outList.push_back(*i);
302 if ( ! (*i)->m_ChildList.empty() )
303 (*i)->GetChildrenWithName(name, outList);
311 Kumu::XMLElement::DeleteAttributes()
318 Kumu::XMLElement::DeleteAttrWithName(const char* name)
321 AttributeList::iterator i = m_AttrList.begin();
323 while ( i != m_AttrList.end() )
325 if ( i->name == std::string(name) )
326 m_AttrList.erase(i++);
334 Kumu::XMLElement::DeleteChildren()
336 while ( ! m_ChildList.empty() )
338 delete m_ChildList.back();
339 m_ChildList.pop_back();
345 Kumu::XMLElement::DeleteChild(const XMLElement* element)
349 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
354 m_ChildList.erase(i);
363 Kumu::XMLElement::ForgetChild(const XMLElement* element)
367 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
371 m_ChildList.erase(i);
380 Kumu::XMLElement::ParseString(const ByteString& document)
382 return ParseString((const char*)document.RoData(), document.Length());
387 Kumu::XMLElement::ParseString(const std::string& document)
389 return ParseString(document.c_str(), document.size());
393 //----------------------------------------------------------------------------------------------------
398 class ExpatParseContext
400 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
404 std::stack<XMLElement*> Scope;
407 ExpatParseContext(XMLElement* root) : Root(root) {
408 Namespaces = new ns_map;
412 ~ExpatParseContext() {}
415 // expat wrapper functions
418 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
420 assert(p); assert(name); assert(attrs);
421 ExpatParseContext* Ctx = (ExpatParseContext*)p;
424 const char* ns_root = name;
425 const char* local_name = strchr(name, '|');
426 if ( local_name != 0 )
427 name = local_name + 1;
429 if ( Ctx->Scope.empty() )
431 Ctx->Scope.push(Ctx->Root);
435 Element = Ctx->Scope.top();
436 Ctx->Scope.push(Element->AddChild(name));
439 Element = Ctx->Scope.top();
440 Element->SetName(name);
444 if ( ns_root != name )
445 key.assign(ns_root, name - ns_root - 1);
447 ns_map::iterator ni = Ctx->Namespaces->find(key);
448 if ( ni != Ctx->Namespaces->end() )
449 Element->SetNamespace(ni->second);
452 for ( int i = 0; attrs[i] != 0; i += 2 )
454 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
455 local_name = attrs[i];
459 Element->SetAttr(local_name, attrs[i+1]);
465 xph_end(void* p, const XML_Char* name)
467 assert(p); assert(name);
468 ExpatParseContext* Ctx = (ExpatParseContext*)p;
474 xph_char(void* p, const XML_Char* data, int len)
476 assert(p); assert(data);
477 ExpatParseContext* Ctx = (ExpatParseContext*)p;
482 tmp_str.assign(data, len);
483 Ctx->Scope.top()->AppendBody(tmp_str);
489 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
491 assert(p); assert(ns_name);
492 ExpatParseContext* Ctx = (ExpatParseContext*)p;
494 if ( ns_prefix == 0 )
497 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
499 if ( ni != Ctx->Namespaces->end() )
501 if ( ni->second->Name() != std::string(ns_name) )
503 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
509 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
510 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
516 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
518 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
522 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
526 ExpatParseContext Ctx(this);
527 XML_SetUserData(Parser, (void*)&Ctx);
528 XML_SetElementHandler(Parser, xph_start, xph_end);
529 XML_SetCharacterDataHandler(Parser, xph_char);
530 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
532 if ( ! XML_Parse(Parser, document, doc_len, 1) )
534 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
535 XML_GetCurrentLineNumber(Parser),
536 XML_ErrorString(XML_GetErrorCode(Parser)));
537 XML_ParserFree(Parser);
541 XML_ParserFree(Parser);
543 if ( ! Ctx.Namespaces->empty() )
544 m_NamespaceOwner = (void*)Ctx.Namespaces;
549 //------------------------------------------------------------------------------------------
551 struct xph_test_wrapper
556 xph_test_wrapper(XML_Parser p) : Parser(p), Status(false) {}
559 // expat wrapper functions, map callbacks to IASAXHandler
562 xph_test_start(void* p, const XML_Char* name, const XML_Char** attrs)
565 xph_test_wrapper* Wrapper = (xph_test_wrapper*)p;
567 Wrapper->Status = true;
568 XML_StopParser(Wrapper->Parser, false);
574 Kumu::StringIsXML(const char* document, ui32_t len)
580 len = strlen(document);
582 XML_Parser Parser = XML_ParserCreate("UTF-8");
586 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
590 xph_test_wrapper Wrapper(Parser);
591 XML_SetUserData(Parser, (void*)&Wrapper);
592 XML_SetStartElementHandler(Parser, xph_test_start);
594 XML_Parse(Parser, document, len, 1);
595 XML_ParserFree(Parser);
596 return Wrapper.Status;
601 //----------------------------------------------------------------------------------------------------
605 static Mutex sg_xerces_init_lock; // protect the xerces initialized
606 static bool sg_xml_init = false; // signal initialization
607 static Mutex sg_coder_lock; // protect the transcoder context
608 static XMLTranscoder* sg_coder = 0;
609 static const int sg_coder_buf_len = 128 * 1024;
610 static char sg_coder_buf[sg_coder_buf_len + 8];
611 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
613 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
614 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
615 chDash, chDigit_8, chNull};
623 AutoMutex AL(sg_xerces_init_lock);
629 XMLPlatformUtils::Initialize();
632 XMLTransService::Codes ret;
633 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
635 if ( ret != XMLTransService::Ok )
637 const char* message = "Undefined Error";
641 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
642 case XMLTransService::InternalFailure: message = "Internal failure"; break;
643 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
646 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
649 catch (const XMLException &e)
651 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
659 Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) {
660 return XercesString_to_UTF_8(in_str.c_str(), out_str);
665 Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str)
669 AutoMutex AL(sg_coder_lock);
670 ui32_t str_len = XMLString::stringLen(in_str);
671 ui32_t read_total = 0;
675 while ( str_len > 0 )
677 #if XERCES_VERSION_MAJOR < 3
678 ui32_t read_count = 0;
680 XMLSize_t read_count = 0;
682 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
683 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
684 read_count, XMLTranscoder::UnRep_Throw);
686 out_str.append(sg_coder_buf, write_count);
687 str_len -= read_count;
688 read_total += read_count;
689 assert(str_len >= 0);
702 Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) {
703 return UTF_8_to_XercesString(in_str.c_str(), out_str);
708 Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str)
712 AutoMutex AL(sg_coder_lock);
713 ui32_t str_len = strlen(in_str);
714 ui32_t read_total = 0;
718 while ( str_len > 0 )
720 #if XERCES_VERSION_MAJOR < 3
721 ui32_t read_count = 0;
723 XMLSize_t read_count = 0;
725 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
726 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
727 read_count, sg_coder_counts);
729 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
730 str_len -= read_count;
731 read_total += read_count;
732 assert(str_len >= 0);
744 class MyTreeHandler : public HandlerBase
746 ns_map* m_Namespaces;
747 std::stack<XMLElement*> m_Scope;
749 bool m_HasEncodeErrors;
752 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
755 m_Namespaces = new ns_map;
762 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
764 ns_map* TakeNamespaceMap()
766 if ( m_Namespaces == 0 || m_Namespaces->empty() )
769 ns_map* ret = m_Namespaces;
775 void AddNamespace(const char* ns_prefix, const char* ns_name)
780 if ( ns_prefix[0] == ':' )
786 assert(ns_prefix[0] == 0);
790 ns_map::iterator ni = m_Namespaces->find(ns_name);
792 if ( ni != m_Namespaces->end() )
794 if ( ni->second->Name() != std::string(ns_name) )
796 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
802 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
803 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
806 assert(!m_Namespaces->empty());
810 void startElement(const XMLCh* const x_name,
811 XERCES_CPP_NAMESPACE::AttributeList& attributes)
816 if ( ! XercesString_to_UTF_8(x_name, tx_name) )
817 m_HasEncodeErrors = true;
819 const char* name = tx_name.c_str();
821 const char* ns_root = name;
822 const char* local_name = strchr(name, ':');
824 if ( local_name != 0 )
825 name = local_name + 1;
827 if ( m_Scope.empty() )
829 m_Scope.push(m_Root);
833 Element = m_Scope.top();
834 m_Scope.push(Element->AddChild(name));
837 Element = m_Scope.top();
838 Element->SetName(name);
841 ui32_t a_len = attributes.getLength();
843 for ( ui32_t i = 0; i < a_len; i++)
845 std::string aname, value;
846 if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) )
847 m_HasEncodeErrors = true;
849 if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) )
850 m_HasEncodeErrors = true;
852 const char* x_aname = aname.c_str();
853 const char* x_value = value.c_str();
855 if ( strncmp(x_aname, "xmlns", 5) == 0 )
856 AddNamespace(x_aname+5, x_value);
858 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
859 local_name = x_aname;
863 Element->SetAttr(local_name, x_value);
868 if ( ns_root != name )
869 key.assign(ns_root, name - ns_root - 1);
871 ns_map::iterator ni = m_Namespaces->find(key);
872 if ( ni != m_Namespaces->end() )
873 Element->SetNamespace(ni->second);
876 void endElement(const XMLCh *const name) {
880 void characters(const XMLCh *const chars, const unsigned int length)
885 if ( ! XercesString_to_UTF_8(chars, tmp) )
886 m_HasEncodeErrors = true;
888 m_Scope.top()->AppendBody(tmp);
895 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
903 SAXParser* parser = new SAXParser();
905 parser->setValidationScheme(SAXParser::Val_Always);
906 parser->setDoNamespaces(true); // optional
908 MyTreeHandler* docHandler = new MyTreeHandler(this);
909 parser->setDocumentHandler(docHandler);
910 parser->setErrorHandler(docHandler);
914 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
915 static_cast<const unsigned int>(doc_len),
918 parser->parse(xmlSource);
920 catch (const XMLException& e)
922 char* message = XMLString::transcode(e.getMessage());
923 DefaultLogSink().Error("Parser error: %s\n", message);
924 XMLString::release(&message);
927 catch (const SAXParseException& e)
929 char* message = XMLString::transcode(e.getMessage());
930 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
931 XMLString::release(&message);
936 DefaultLogSink().Error("Unexpected XML parser error\n");
940 if ( errorCount == 0 )
941 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
946 return errorCount > 0 ? false : true;
951 Kumu::StringIsXML(const char* document, ui32_t len)
953 if ( document == 0 || *document == 0 )
959 len = strlen(document);
967 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
968 static_cast<const unsigned int>(len),
971 if ( parser.parseFirst(xmlSource, token) )
973 if ( parser.parseNext(token) )
987 //----------------------------------------------------------------------------------------------------
989 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
993 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
995 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
1001 Kumu::StringIsXML(const char* document, ui32_t len)
1003 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");