2 Copyright (c) 2005-2010, John Hurst
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
47 # error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
50 #include <xercesc/util/PlatformUtils.hpp>
51 #include <xercesc/util/XMLString.hpp>
52 #include <xercesc/util/TransService.hpp>
53 #include <xercesc/sax/AttributeList.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/sax/ErrorHandler.hpp>
56 #include <xercesc/sax/SAXParseException.hpp>
57 #include <xercesc/parsers/SAXParser.hpp>
58 #include <xercesc/framework/MemBufInputSource.hpp>
59 #include <xercesc/framework/XMLPScanToken.hpp>
62 XERCES_CPP_NAMESPACE_USE
66 typedef std::basic_string<XMLCh> XercesString;
67 bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str);
68 bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str);
69 bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str);
70 bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str);
78 class ns_map : public std::map<std::string, XMLNamespace*>
85 ns_map::iterator ni = begin();
93 Kumu::XMLElement::XMLElement(const char* name) : m_Namespace(0), m_NamespaceOwner(0)
98 Kumu::XMLElement::~XMLElement()
100 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
103 delete (ns_map*)m_NamespaceOwner;
108 Kumu::XMLElement::SetAttr(const char* name, const char* value)
112 TmpVal.value = value;
114 m_AttrList.push_back(TmpVal);
119 Kumu::XMLElement::AddChild(Kumu::XMLElement* element)
121 m_ChildList.push_back(element); // takes posession!
127 Kumu::XMLElement::AddChild(const char* name)
129 XMLElement* tmpE = new XMLElement(name);
130 m_ChildList.push_back(tmpE);
136 Kumu::XMLElement::AddChildWithContent(const char* name, const std::string& value)
138 return AddChildWithContent(name, value.c_str());
143 Kumu::XMLElement::AppendBody(const std::string& value)
150 Kumu::XMLElement::SetBody(const std::string& value)
157 Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
161 XMLElement* tmpE = new XMLElement(name);
162 tmpE->m_Body = value;
163 m_ChildList.push_back(tmpE);
169 Kumu::XMLElement::AddChildWithPrefixedContent(const char* name, const char* prefix, const char* value)
171 XMLElement* tmpE = new XMLElement(name);
172 tmpE->m_Body = prefix;
173 tmpE->m_Body += value;
174 m_ChildList.push_back(tmpE);
180 Kumu::XMLElement::AddComment(const char* value)
189 Kumu::XMLElement::Render(std::string& outbuf) const
191 outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
192 RenderElement(outbuf, 0);
197 add_spacer(std::string& outbuf, i32_t depth)
205 Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const
207 add_spacer(outbuf, depth);
213 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
218 outbuf += (*i).value;
224 // body contents and children
225 if ( ! m_ChildList.empty() )
230 if ( m_Body.length() > 0 )
233 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
234 (*i)->RenderElement(outbuf, depth + 1);
236 add_spacer(outbuf, depth);
238 else if ( m_Body.length() > 0 )
250 Kumu::XMLElement::HasName(const char* name) const
252 if ( name == 0 || *name == 0 )
255 return (m_Name == name);
260 Kumu::XMLElement::SetName(const char* name)
268 Kumu::XMLElement::GetAttrWithName(const char* name) const
270 for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
272 if ( (*i).name == name )
273 return (*i).value.c_str();
281 Kumu::XMLElement::GetChildWithName(const char* name) const
283 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
285 if ( (*i)->HasName(name) )
293 const Kumu::ElementList&
294 Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) const
297 for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
299 if ( (*i)->HasName(name) )
300 outList.push_back(*i);
302 if ( ! (*i)->m_ChildList.empty() )
303 (*i)->GetChildrenWithName(name, outList);
311 Kumu::XMLElement::DeleteAttributes()
318 Kumu::XMLElement::DeleteAttrWithName(const char* name)
321 AttributeList::iterator i = m_AttrList.begin();
323 while ( i != m_AttrList.end() )
325 if ( i->name == std::string(name) )
326 m_AttrList.erase(i++);
334 Kumu::XMLElement::DeleteChildren()
336 while ( ! m_ChildList.empty() )
338 delete m_ChildList.back();
339 m_ChildList.pop_back();
345 Kumu::XMLElement::DeleteChild(const XMLElement* element)
349 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
354 m_ChildList.erase(i);
363 Kumu::XMLElement::ForgetChild(const XMLElement* element)
367 for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
371 m_ChildList.erase(i);
379 //----------------------------------------------------------------------------------------------------
384 class ExpatParseContext
386 KM_NO_COPY_CONSTRUCT(ExpatParseContext);
390 std::stack<XMLElement*> Scope;
393 ExpatParseContext(XMLElement* root) : Root(root) {
394 Namespaces = new ns_map;
398 ~ExpatParseContext() {}
401 // expat wrapper functions
404 xph_start(void* p, const XML_Char* name, const XML_Char** attrs)
406 assert(p); assert(name); assert(attrs);
407 ExpatParseContext* Ctx = (ExpatParseContext*)p;
410 const char* ns_root = name;
411 const char* local_name = strchr(name, '|');
412 if ( local_name != 0 )
413 name = local_name + 1;
415 if ( Ctx->Scope.empty() )
417 Ctx->Scope.push(Ctx->Root);
421 Element = Ctx->Scope.top();
422 Ctx->Scope.push(Element->AddChild(name));
425 Element = Ctx->Scope.top();
426 Element->SetName(name);
430 if ( ns_root != name )
431 key.assign(ns_root, name - ns_root - 1);
433 ns_map::iterator ni = Ctx->Namespaces->find(key);
434 if ( ni != Ctx->Namespaces->end() )
435 Element->SetNamespace(ni->second);
438 for ( int i = 0; attrs[i] != 0; i += 2 )
440 if ( ( local_name = strchr(attrs[i], '|') ) == 0 )
441 local_name = attrs[i];
445 Element->SetAttr(local_name, attrs[i+1]);
451 xph_end(void* p, const XML_Char* name)
453 assert(p); assert(name);
454 ExpatParseContext* Ctx = (ExpatParseContext*)p;
460 xph_char(void* p, const XML_Char* data, int len)
462 assert(p); assert(data);
463 ExpatParseContext* Ctx = (ExpatParseContext*)p;
468 tmp_str.assign(data, len);
469 Ctx->Scope.top()->AppendBody(tmp_str);
475 xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name)
477 assert(p); assert(ns_name);
478 ExpatParseContext* Ctx = (ExpatParseContext*)p;
480 if ( ns_prefix == 0 )
483 ns_map::iterator ni = Ctx->Namespaces->find(ns_name);
485 if ( ni != Ctx->Namespaces->end() )
487 if ( ni->second->Name() != std::string(ns_name) )
489 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
495 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
496 Ctx->Namespaces->insert(ns_map::value_type(ns_name, Namespace));
502 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
504 XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
508 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
512 ExpatParseContext Ctx(this);
513 XML_SetUserData(Parser, (void*)&Ctx);
514 XML_SetElementHandler(Parser, xph_start, xph_end);
515 XML_SetCharacterDataHandler(Parser, xph_char);
516 XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
518 if ( ! XML_Parse(Parser, document, doc_len, 1) )
520 XML_ParserFree(Parser);
521 DefaultLogSink().Error("XML Parse error on line %d: %s\n",
522 XML_GetCurrentLineNumber(Parser),
523 XML_ErrorString(XML_GetErrorCode(Parser)));
527 XML_ParserFree(Parser);
529 if ( ! Ctx.Namespaces->empty() )
530 m_NamespaceOwner = (void*)Ctx.Namespaces;
535 //------------------------------------------------------------------------------------------
537 struct xph_test_wrapper
542 xph_test_wrapper(XML_Parser p) : Parser(p), Status(false) {}
545 // expat wrapper functions, map callbacks to IASAXHandler
548 xph_test_start(void* p, const XML_Char* name, const XML_Char** attrs)
551 xph_test_wrapper* Wrapper = (xph_test_wrapper*)p;
553 Wrapper->Status = true;
554 XML_StopParser(Wrapper->Parser, false);
560 Kumu::StringIsXML(const char* document, ui32_t len)
566 len = strlen(document);
568 XML_Parser Parser = XML_ParserCreate("UTF-8");
572 DefaultLogSink().Error("Error allocating memory for XML parser.\n");
576 xph_test_wrapper Wrapper(Parser);
577 XML_SetUserData(Parser, (void*)&Wrapper);
578 XML_SetStartElementHandler(Parser, xph_test_start);
580 XML_Parse(Parser, document, len, 1);
581 XML_ParserFree(Parser);
582 return Wrapper.Status;
587 //----------------------------------------------------------------------------------------------------
591 static Mutex sg_xerces_init_lock; // protect the xerces initialized
592 static bool sg_xml_init = false; // signal initialization
593 static Mutex sg_coder_lock; // protect the transcoder context
594 static XMLTranscoder* sg_coder = 0;
595 static const int sg_coder_buf_len = 128 * 1024;
596 static char sg_coder_buf[sg_coder_buf_len + 8];
597 static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
599 static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
600 static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
601 chDash, chDigit_8, chNull};
609 AutoMutex AL(sg_xerces_init_lock);
615 XMLPlatformUtils::Initialize();
618 XMLTransService::Codes ret;
619 sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
621 if ( ret != XMLTransService::Ok )
623 const char* message = "Undefined Error";
627 case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
628 case XMLTransService::InternalFailure: message = "Internal failure"; break;
629 case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
632 DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
635 catch (const XMLException &e)
637 DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage());
645 Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) {
646 return XercesString_to_UTF_8(in_str.c_str(), out_str);
651 Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str)
655 AutoMutex AL(sg_coder_lock);
656 ui32_t str_len = XMLString::stringLen(in_str);
657 ui32_t read_total = 0;
661 while ( str_len > 0 )
663 #if XERCES_VERSION_MAJOR < 3
664 ui32_t read_count = 0;
666 XMLSize_t read_count = 0;
668 ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
669 (XMLByte*)sg_coder_buf, sg_coder_buf_len,
670 read_count, XMLTranscoder::UnRep_Throw);
672 out_str.append(sg_coder_buf, write_count);
673 str_len -= read_count;
674 read_total += read_count;
675 assert(str_len >= 0);
688 Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) {
689 return UTF_8_to_XercesString(in_str.c_str(), out_str);
694 Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str)
698 AutoMutex AL(sg_coder_lock);
699 ui32_t str_len = strlen(in_str);
700 ui32_t read_total = 0;
704 while ( str_len > 0 )
706 #if XERCES_VERSION_MAJOR < 3
707 ui32_t read_count = 0;
709 XMLSize_t read_count = 0;
711 ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
712 (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
713 read_count, sg_coder_counts);
715 out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
716 str_len -= read_count;
717 read_total += read_count;
718 assert(str_len >= 0);
730 class MyTreeHandler : public HandlerBase
732 ns_map* m_Namespaces;
733 std::stack<XMLElement*> m_Scope;
735 bool m_HasEncodeErrors;
738 MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
741 m_Namespaces = new ns_map;
748 bool HasEncodeErrors() const { return m_HasEncodeErrors; }
750 ns_map* TakeNamespaceMap()
752 if ( m_Namespaces == 0 || m_Namespaces->empty() )
755 ns_map* ret = m_Namespaces;
761 void AddNamespace(const char* ns_prefix, const char* ns_name)
766 if ( ns_prefix[0] == ':' )
772 assert(ns_prefix[0] == 0);
776 ns_map::iterator ni = m_Namespaces->find(ns_name);
778 if ( ni != m_Namespaces->end() )
780 if ( ni->second->Name() != std::string(ns_name) )
782 DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix);
788 XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name);
789 m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace));
792 assert(!m_Namespaces->empty());
796 void startElement(const XMLCh* const x_name,
797 XERCES_CPP_NAMESPACE::AttributeList& attributes)
802 if ( ! XercesString_to_UTF_8(x_name, tx_name) )
803 m_HasEncodeErrors = true;
805 const char* name = tx_name.c_str();
807 const char* ns_root = name;
808 const char* local_name = strchr(name, ':');
810 if ( local_name != 0 )
811 name = local_name + 1;
813 if ( m_Scope.empty() )
815 m_Scope.push(m_Root);
819 Element = m_Scope.top();
820 m_Scope.push(Element->AddChild(name));
823 Element = m_Scope.top();
824 Element->SetName(name);
827 ui32_t a_len = attributes.getLength();
829 for ( ui32_t i = 0; i < a_len; i++)
831 std::string aname, value;
832 if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) )
833 m_HasEncodeErrors = true;
835 if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) )
836 m_HasEncodeErrors = true;
838 const char* x_aname = aname.c_str();
839 const char* x_value = value.c_str();
841 if ( strncmp(x_aname, "xmlns", 5) == 0 )
842 AddNamespace(x_aname+5, x_value);
844 if ( ( local_name = strchr(x_aname, ':') ) == 0 )
845 local_name = x_aname;
849 Element->SetAttr(local_name, x_value);
854 if ( ns_root != name )
855 key.assign(ns_root, name - ns_root - 1);
857 ns_map::iterator ni = m_Namespaces->find(key);
858 if ( ni != m_Namespaces->end() )
859 Element->SetNamespace(ni->second);
862 void endElement(const XMLCh *const name) {
866 void characters(const XMLCh *const chars, const unsigned int length)
871 if ( ! XercesString_to_UTF_8(chars, tmp) )
872 m_HasEncodeErrors = true;
874 m_Scope.top()->AppendBody(tmp);
881 Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
889 SAXParser* parser = new SAXParser();
891 parser->setValidationScheme(SAXParser::Val_Always);
892 parser->setDoNamespaces(true); // optional
894 MyTreeHandler* docHandler = new MyTreeHandler(this);
895 parser->setDocumentHandler(docHandler);
896 parser->setErrorHandler(docHandler);
900 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
901 static_cast<const unsigned int>(doc_len),
904 parser->parse(xmlSource);
906 catch (const XMLException& e)
908 char* message = XMLString::transcode(e.getMessage());
909 DefaultLogSink().Error("Parser error: %s\n", message);
910 XMLString::release(&message);
913 catch (const SAXParseException& e)
915 char* message = XMLString::transcode(e.getMessage());
916 DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
917 XMLString::release(&message);
922 DefaultLogSink().Error("Unexpected XML parser error\n");
926 if ( errorCount == 0 )
927 m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
932 return errorCount > 0 ? false : true;
937 Kumu::StringIsXML(const char* document, ui32_t len)
939 if ( document == 0 || *document == 0 )
945 len = strlen(document);
953 MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
954 static_cast<const unsigned int>(len),
957 if ( parser.parseFirst(xmlSource, token) )
959 if ( parser.parseNext(token) )
973 //----------------------------------------------------------------------------------------------------
975 #if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C)
979 Kumu::XMLElement::ParseString(const std::string& document)
981 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
987 Kumu::StringIsXML(const char* document, ui32_t len)
989 DefaultLogSink().Error("Kumu compiled without XML parser support.\n");