2 Copyright (c) 2005-2010, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 typedef std::basic_string<XMLCh> XercesString;
67 bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str);
68 bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str);
69 bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str);
70 bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const
207 add_spacer(outbuf, depth);
213 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
218 outbuf += (*i).value;
224 // body contents and children
225 if ( ! m_ChildList.empty() )
230 if ( m_Body.length() > 0 )
233 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
234 (*i)->RenderElement(outbuf, depth + 1);
236 add_spacer(outbuf, depth);
238 else if ( m_Body.length() > 0 )
250 Kumu::XMLElement::HasName(const char* name) const
252 if ( name == 0 || *name == 0 )
255 return (m_Name == name);
260 Kumu::XMLElement::SetName(const char* name)
268 Kumu::XMLElement::GetAttrWithName(const char* name) const
270 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
272 if ( (*i).name == name )
273 return (*i).value.c_str();
281 Kumu::XMLElement::GetChildWithName(const char* name) const
283 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
285 if ( (*i)->HasName(name) )
293 const Kumu::ElementList&
294 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
297 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
299 if ( (*i)->HasName(name) )
300 outList.push_back(*i);
302 if ( ! (*i)->m_ChildList.empty() )
303 (*i)->GetChildrenWithName(name, outList);
311 Kumu::XMLElement::DeleteAttributes()
318 Kumu::XMLElement::DeleteAttrWithName(const char* name)
321 AttributeList::iterator i = m_AttrList.begin();
323 while ( i != m_AttrList.end() )
325 if ( i->name == std::string(name) )
326 m_AttrList.erase(i++);
334 Kumu::XMLElement::DeleteChildren()
336 while ( ! m_ChildList.empty() )
338 delete m_ChildList.back();
339 m_ChildList.pop_back();
345 Kumu::XMLElement::DeleteChild(const XMLElement* element)
349 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
354 m_ChildList.erase(i);
363 Kumu::XMLElement::ForgetChild(const XMLElement* element)
367 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
371 m_ChildList.erase(i);
379 //----------------------------------------------------------------------------------------------------
384 class ExpatParseContext
386 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
390 std::stack<XMLElement*> Scope;
393 ExpatParseContext(XMLElement* root) : Root(root) {
394 Namespaces = new ns_map;
398 ~ExpatParseContext() {}
401 // expat wrapper functions
404 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
406 assert(p); assert(name); assert(attrs);
407 ExpatParseContext* Ctx = (ExpatParseContext*)p;
410 const char* ns_root = name;
411 const char* local_name = strchr(name, '|');
412 if ( local_name != 0 )
413 name = local_name + 1;
415 if ( Ctx->Scope.empty() )
417 Ctx->Scope.push(Ctx->Root);
421 Element = Ctx->Scope.top();
422 Ctx->Scope.push(Element->AddChild(name));
425 Element = Ctx->Scope.top();
426 Element->SetName(name);
430 if ( ns_root != name )
431 key.assign(ns_root, name - ns_root - 1);
433 ns_map::iterator ni = Ctx->Namespaces->find(key);
434 if ( ni != Ctx->Namespaces->end() )
435 Element->SetNamespace(ni->second);
438 for ( int i = 0; attrs[i] != 0; i += 2 )
440 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
441 local_name = attrs[i];
445 Element->SetAttr(local_name, attrs[i+1]);
451 xph_end(void* p, const XML_Char* name)
453 assert(p); assert(name);
454 ExpatParseContext* Ctx = (ExpatParseContext*)p;
460 xph_char(void* p, const XML_Char* data, int len)
462 assert(p); assert(data);
463 ExpatParseContext* Ctx = (ExpatParseContext*)p;
468 tmp_str.assign(data, len);
469 Ctx->Scope.top()->AppendBody(tmp_str);
475 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
477 assert(p); assert(ns_name);
478 ExpatParseContext* Ctx = (ExpatParseContext*)p;
480 if ( ns_prefix == 0 )
483 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
485 if ( ni != Ctx->Namespaces->end() )
487 if ( ni->second->Name() != std::string(ns_name) )
489 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
495 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
496 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
502 Kumu::XMLElement::ParseString(const std::string& document)
504 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
508 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
512 ExpatParseContext Ctx(this);
513 XML_SetUserData(Parser, (void*)&Ctx);
514 XML_SetElementHandler(Parser, xph_start, xph_end);
515 XML_SetCharacterDataHandler(Parser, xph_char);
516 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
518 if ( ! XML_Parse(Parser, document.c_str(), document.size(), 1) )
520 XML_ParserFree(Parser);
521 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
522 XML_GetCurrentLineNumber(Parser),
523 XML_ErrorString(XML_GetErrorCode(Parser)));
527 XML_ParserFree(Parser);
529 if ( ! Ctx.Namespaces->empty() )
530 m_NamespaceOwner = (void*)Ctx.Namespaces;
535 //------------------------------------------------------------------------------------------
537 struct xph_test_wrapper
542 xph_test_wrapper(XML_Parser p) : Parser(p), Status(false) {}
545 // expat wrapper functions, map callbacks to IASAXHandler
548 xph_test_start(void* p, const XML_Char* name, const XML_Char** attrs)
551 xph_test_wrapper* Wrapper = (xph_test_wrapper*)p;
553 Wrapper->Status = true;
554 XML_StopParser(Wrapper->Parser, false);
560 Kumu::StringIsXML(const char* document, ui32_t len)
566 len = strlen(document);
568 XML_Parser Parser = XML_ParserCreate("UTF-8");
572 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
576 xph_test_wrapper Wrapper(Parser);
577 XML_SetUserData(Parser, (void*)&Wrapper);
578 XML_SetStartElementHandler(Parser, xph_test_start);
580 XML_Parse(Parser, document, len, 1);
581 XML_ParserFree(Parser);
582 return Wrapper.Status;
587 //----------------------------------------------------------------------------------------------------
591 static Mutex sg_xerces_init_lock; // protect the xerces initialized
592 static bool sg_xml_init = false; // signal initialization
593 static Mutex sg_coder_lock; // protect the transcoder context
594 static XMLTranscoder* sg_coder = 0;
595 static const int sg_coder_buf_len = 128 * 1024;
596 static char sg_coder_buf[sg_coder_buf_len + 8];
597 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
599 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
600 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
601 chDash, chDigit_8, chNull};
609 AutoMutex AL(sg_xerces_init_lock);
615 XMLPlatformUtils::Initialize();
618 XMLTransService::Codes ret;
619 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
621 if ( ret != XMLTransService::Ok )
623 const char* message = "Undefined Error";
627 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
628 case XMLTransService::InternalFailure: message = "Internal failure"; break;
629 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
632 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
635 catch (const XMLException &e)
637 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
645 Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) {
646 return XercesString_to_UTF_8(in_str.c_str(), out_str);
651 Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str)
655 AutoMutex AL(sg_coder_lock);
656 ui32_t str_len = XMLString::stringLen(in_str);
657 ui32_t read_total = 0;
661 while ( str_len > 0 )
663 ui32_t read_count = 0;
664 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
665 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
666 read_count, XMLTranscoder::UnRep_Throw);
668 out_str.append(sg_coder_buf, write_count);
669 str_len -= read_count;
670 read_total += read_count;
671 assert(str_len >= 0);
684 Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) {
685 return UTF_8_to_XercesString(in_str.c_str(), out_str);
690 Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str)
694 AutoMutex AL(sg_coder_lock);
695 ui32_t str_len = strlen(in_str);
696 ui32_t read_total = 0;
700 while ( str_len > 0 )
702 ui32_t read_count = 0;
703 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
704 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
705 read_count, sg_coder_counts);
707 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
708 str_len -= read_count;
709 read_total += read_count;
710 assert(str_len >= 0);
722 class MyTreeHandler : public HandlerBase
724 ns_map* m_Namespaces;
725 std::stack<XMLElement*> m_Scope;
727 bool m_HasEncodeErrors;
730 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
733 m_Namespaces = new ns_map;
740 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
742 ns_map* TakeNamespaceMap()
744 if ( m_Namespaces == 0 || m_Namespaces->empty() )
747 ns_map* ret = m_Namespaces;
753 void AddNamespace(const char* ns_prefix, const char* ns_name)
758 if ( ns_prefix[0] == ':' )
764 assert(ns_prefix[0] == 0);
768 ns_map::iterator ni = m_Namespaces->find(ns_name);
770 if ( ni != m_Namespaces->end() )
772 if ( ni->second->Name() != std::string(ns_name) )
774 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
780 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
781 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
784 assert(!m_Namespaces->empty());
788 void startElement(const XMLCh* const x_name,
789 XERCES_CPP_NAMESPACE::AttributeList& attributes)
794 if ( ! XercesString_to_UTF_8(x_name, tx_name) )
795 m_HasEncodeErrors = true;
797 const char* name = tx_name.c_str();
799 const char* ns_root = name;
800 const char* local_name = strchr(name, ':');
802 if ( local_name != 0 )
803 name = local_name + 1;
805 if ( m_Scope.empty() )
807 m_Scope.push(m_Root);
811 Element = m_Scope.top();
812 m_Scope.push(Element->AddChild(name));
815 Element = m_Scope.top();
816 Element->SetName(name);
819 ui32_t a_len = attributes.getLength();
821 for ( ui32_t i = 0; i < a_len; i++)
823 std::string aname, value;
824 if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) )
825 m_HasEncodeErrors = true;
827 if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) )
828 m_HasEncodeErrors = true;
830 const char* x_aname = aname.c_str();
831 const char* x_value = value.c_str();
833 if ( strncmp(x_aname, "xmlns", 5) == 0 )
834 AddNamespace(x_aname+5, x_value);
836 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
837 local_name = x_aname;
841 Element->SetAttr(local_name, x_value);
846 if ( ns_root != name )
847 key.assign(ns_root, name - ns_root - 1);
849 ns_map::iterator ni = m_Namespaces->find(key);
850 if ( ni != m_Namespaces->end() )
851 Element->SetNamespace(ni->second);
854 void endElement(const XMLCh *const name) {
858 void characters(const XMLCh *const chars, const unsigned int length)
863 if ( ! XercesString_to_UTF_8(chars, tmp) )
864 m_HasEncodeErrors = true;
866 m_Scope.top()->AppendBody(tmp);
873 Kumu::XMLElement::ParseString(const std::string& document)
875 if ( document.empty() )
881 SAXParser* parser = new SAXParser();
883 parser->setValidationScheme(SAXParser::Val_Always);
884 parser->setDoNamespaces(true); // optional
886 MyTreeHandler* docHandler = new MyTreeHandler(this);
887 parser->setDocumentHandler(docHandler);
888 parser->setErrorHandler(docHandler);
892 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document.c_str()),
893 static_cast<const unsigned int>(document.size()),
896 parser->parse(xmlSource);
898 catch (const XMLException& e)
900 char* message = XMLString::transcode(e.getMessage());
901 DefaultLogSink().Error("Parser error: %s\n", message);
902 XMLString::release(&message);
905 catch (const SAXParseException& e)
907 char* message = XMLString::transcode(e.getMessage());
908 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
909 XMLString::release(&message);
914 DefaultLogSink().Error("Unexpected XML parser error\n");
918 if ( errorCount == 0 )
919 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
924 return errorCount > 0 ? false : true;
929 Kumu::StringIsXML(const char* document, ui32_t len)
931 if ( document == 0 || *document == 0 )
937 len = strlen(document);
945 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
946 static_cast<const unsigned int>(len),
949 if ( parser.parseFirst(xmlSource, token) )
951 if ( parser.parseNext(token) )
965 //----------------------------------------------------------------------------------------------------
967 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
971 Kumu::XMLElement::ParseString(const std::string& document)
973 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
979 Kumu::StringIsXML(const char* document, ui32_t len)
981 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");