X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=src%2FKM_xml.cpp;h=eb9c25fb5c1f909f34b43e7629c33888076a91af;hb=80490136d3f872d162670e616827033fdd1be09d;hp=03102919b664d17ce1d0ae2d8b9d56fe7d07d32b;hpb=3a3aa48a5a4f7324a9e4c2273d0747d7f58a2813;p=asdcplib.git diff --git a/src/KM_xml.cpp b/src/KM_xml.cpp index 0310291..eb9c25f 100644 --- a/src/KM_xml.cpp +++ b/src/KM_xml.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2005-2006, John Hurst +Copyright (c) 2005-2010, John Hurst All rights reserved. Redistribution and use in source and binary forms, with or without @@ -31,29 +31,60 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include -#ifdef ASDCP_USE_EXPAT +#ifdef HAVE_EXPAT +# ifdef HAVE_XERCES_C +# error "Both HAVE_EXPAT and HAVE_XERCES_C defined" +# endif #include #endif +#ifdef HAVE_XERCES_C +# ifdef HAVE_EXPAT +# error "Both HAVE_EXPAT and HAVE_XERCES_C defined" +# endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +XERCES_CPP_NAMESPACE_USE + +namespace Kumu { + void init_xml_dom(); + typedef std::basic_string XercesString; + bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str); + bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str); + bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str); + bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str); +} + +#endif + using namespace Kumu; class ns_map : public std::map { public: - ns_map() {} ~ns_map() { - ns_map::iterator ni = begin(); - - while (ni != end() ) + while ( ! empty() ) { - // fprintf(stderr, "deleting namespace %s:%s\n", ni->second->Prefix().c_str(), ni->second->Name().c_str()); + ns_map::iterator ni = begin(); delete ni->second; - ni++; + erase(ni); } } }; @@ -69,8 +100,7 @@ Kumu::XMLElement::~XMLElement() for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) delete *i; - if ( m_NamespaceOwner != 0 ) - delete (ns_map*)m_NamespaceOwner; + delete (ns_map*)m_NamespaceOwner; } // @@ -84,6 +114,14 @@ Kumu::XMLElement::SetAttr(const char* name, const char* value) m_AttrList.push_back(TmpVal); } +// +Kumu::XMLElement* +Kumu::XMLElement::AddChild(Kumu::XMLElement* element) +{ + m_ChildList.push_back(element); // takes posession! + return element; +} + // Kumu::XMLElement* Kumu::XMLElement::AddChild(const char* name) @@ -107,6 +145,13 @@ Kumu::XMLElement::AppendBody(const std::string& value) m_Body += value; } +// +void +Kumu::XMLElement::SetBody(const std::string& value) +{ + m_Body = value; +} + // Kumu::XMLElement* Kumu::XMLElement::AddChildWithContent(const char* name, const char* value) @@ -141,10 +186,10 @@ Kumu::XMLElement::AddComment(const char* value) // void -Kumu::XMLElement::Render(std::string& outbuf) const +Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const { outbuf = "\n"; - RenderElement(outbuf, 0); + RenderElement(outbuf, 0, pretty); } // @@ -157,15 +202,18 @@ add_spacer(std::string& outbuf, i32_t depth) // void -Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const +Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const { - add_spacer(outbuf, depth); + if ( pretty ) + { + add_spacer(outbuf, depth); + } outbuf += "<"; outbuf += m_Name; // render attributes - for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ ) + for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i ) { outbuf += " "; outbuf += (*i).name; @@ -183,12 +231,19 @@ Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const // render body if ( m_Body.length() > 0 ) - outbuf += m_Body; + { + outbuf += m_Body; + } - for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) - (*i)->RenderElement(outbuf, depth + 1); + for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i ) + { + (*i)->RenderElement(outbuf, depth + 1, pretty); + } - add_spacer(outbuf, depth); + if ( pretty ) + { + add_spacer(outbuf, depth); + } } else if ( m_Body.length() > 0 ) { @@ -261,9 +316,93 @@ Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) co return outList; } +// +void +Kumu::XMLElement::DeleteAttributes() +{ + m_AttrList.clear(); +} + +// +void +Kumu::XMLElement::DeleteAttrWithName(const char* name) +{ + assert(name); + AttributeList::iterator i = m_AttrList.begin(); + + while ( i != m_AttrList.end() ) + { + if ( i->name == std::string(name) ) + m_AttrList.erase(i++); + else + ++i; + } +} + +// +void +Kumu::XMLElement::DeleteChildren() +{ + while ( ! m_ChildList.empty() ) + { + delete m_ChildList.back(); + m_ChildList.pop_back(); + } +} + +// +void +Kumu::XMLElement::DeleteChild(const XMLElement* element) +{ + if ( element != 0 ) + { + for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) + { + if ( *i == element ) + { + delete *i; + m_ChildList.erase(i); + return; + } + } + } +} + +// +void +Kumu::XMLElement::ForgetChild(const XMLElement* element) +{ + if ( element != 0 ) + { + for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) + { + if ( *i == element ) + { + m_ChildList.erase(i); + return; + } + } + } +} + +// +bool +Kumu::XMLElement::ParseString(const ByteString& document) +{ + return ParseString((const char*)document.RoData(), document.Length()); +} + +// +bool +Kumu::XMLElement::ParseString(const std::string& document) +{ + return ParseString(document.c_str(), document.size()); +} + + //---------------------------------------------------------------------------------------------------- -#ifdef ASDCP_USE_EXPAT +#ifdef HAVE_EXPAT class ExpatParseContext @@ -384,7 +523,7 @@ xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name) // bool -Kumu::XMLElement::ParseString(const std::string& document) +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) { XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|'); @@ -400,12 +539,12 @@ Kumu::XMLElement::ParseString(const std::string& document) XML_SetCharacterDataHandler(Parser, xph_char); XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start); - if ( ! XML_Parse(Parser, document.c_str(), document.size(), 1) ) + if ( ! XML_Parse(Parser, document, doc_len, 1) ) { - XML_ParserFree(Parser); DefaultLogSink().Error("XML Parse error on line %d: %s\n", XML_GetCurrentLineNumber(Parser), XML_ErrorString(XML_GetErrorCode(Parser))); + XML_ParserFree(Parser); return false; } @@ -442,7 +581,7 @@ xph_test_start(void* p, const XML_Char* name, const XML_Char** attrs) // bool -Kumu::XMLElement::TestString(const char* document, ui32_t len) +Kumu::StringIsXML(const char* document, ui32_t len) { if ( document == 0 ) return false; @@ -467,27 +606,537 @@ Kumu::XMLElement::TestString(const char* document, ui32_t len) return Wrapper.Status; } -#else // no XML parser support +#endif + +//---------------------------------------------------------------------------------------------------- + +#ifdef HAVE_XERCES_C + +static Mutex sg_xerces_init_lock; // protect the xerces initialized +static bool sg_xml_init = false; // signal initialization +static Mutex sg_coder_lock; // protect the transcoder context +static XMLTranscoder* sg_coder = 0; +static const int sg_coder_buf_len = 128 * 1024; +static char sg_coder_buf[sg_coder_buf_len + 8]; +static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom + +static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull }; +static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F, + chDash, chDigit_8, chNull}; + +// +void +Kumu::init_xml_dom() +{ + if ( ! sg_xml_init ) + { + AutoMutex AL(sg_xerces_init_lock); + + if ( ! sg_xml_init ) + { + try + { + XMLPlatformUtils::Initialize(); + sg_xml_init = true; + + XMLTransService::Codes ret; + sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len); + + if ( ret != XMLTransService::Ok ) + { + const char* message = "Undefined Error"; + + switch ( ret ) + { + case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break; + case XMLTransService::InternalFailure: message = "Internal failure"; break; + case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break; + } + + DefaultLogSink().Error("Xerces transform initialization error: %s\n", message); + } + } + catch (const XMLException &e) + { + DefaultLogSink().Error("Xerces initialization error: %s\n", e.getMessage()); + } + } + } +} // bool -Kumu::XMLElement::ParseString(const std::string& document) +Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) { + return XercesString_to_UTF_8(in_str.c_str(), out_str); +} + +// +bool +Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str) +{ + assert(in_str); + assert(sg_xml_init); + AutoMutex AL(sg_coder_lock); + ui32_t str_len = XMLString::stringLen(in_str); + ui32_t read_total = 0; + + try + { + while ( str_len > 0 ) + { +#if XERCES_VERSION_MAJOR < 3 + ui32_t read_count = 0; +#else + XMLSize_t read_count = 0; +#endif + ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len, + (XMLByte*)sg_coder_buf, sg_coder_buf_len, + read_count, XMLTranscoder::UnRep_Throw); + + out_str.append(sg_coder_buf, write_count); + str_len -= read_count; + read_total += read_count; + assert(str_len >= 0); + } + } + catch (...) + { + return false; + } + + return true; +} + +// +bool +Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) { + return UTF_8_to_XercesString(in_str.c_str(), out_str); +} + +// +bool +Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str) +{ + assert(in_str); + assert(sg_xml_init); + AutoMutex AL(sg_coder_lock); + ui32_t str_len = strlen(in_str); + ui32_t read_total = 0; + + try + { + while ( str_len > 0 ) + { +#if XERCES_VERSION_MAJOR < 3 + ui32_t read_count = 0; +#else + XMLSize_t read_count = 0; +#endif + ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len, + (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh), + read_count, sg_coder_counts); + + out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh)); + str_len -= read_count; + read_total += read_count; + assert(str_len >= 0); + } + } + catch (...) + { + return false; + } + + return true; +} + +// +class MyTreeHandler : public HandlerBase +{ + ns_map* m_Namespaces; + std::stack m_Scope; + XMLElement* m_Root; + bool m_HasEncodeErrors; + +public: + MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false) + { + assert(m_Root); + m_Namespaces = new ns_map; + } + + ~MyTreeHandler() { + delete m_Namespaces; + } + + bool HasEncodeErrors() const { return m_HasEncodeErrors; } + + ns_map* TakeNamespaceMap() + { + if ( m_Namespaces == 0 || m_Namespaces->empty() ) + return 0; + + ns_map* ret = m_Namespaces; + m_Namespaces = 0; + return ret; + } + + // + void AddNamespace(const char* ns_prefix, const char* ns_name) + { + assert(ns_prefix); + assert(ns_name); + + if ( ns_prefix[0] == ':' ) + { + ns_prefix++; + } + else + { + assert(ns_prefix[0] == 0); + ns_prefix = ""; + } + + ns_map::iterator ni = m_Namespaces->find(ns_name); + + if ( ni != m_Namespaces->end() ) + { + if ( ni->second->Name() != std::string(ns_name) ) + { + DefaultLogSink().Error("Duplicate prefix: %s\n", ns_prefix); + return; + } + } + else + { + XMLNamespace* Namespace = new XMLNamespace(ns_prefix, ns_name); + m_Namespaces->insert(ns_map::value_type(ns_prefix, Namespace)); + } + + assert(!m_Namespaces->empty()); + } + + // + void startElement(const XMLCh* const x_name, + XERCES_CPP_NAMESPACE::AttributeList& attributes) + { + assert(x_name); + std::string tx_name; + + if ( ! XercesString_to_UTF_8(x_name, tx_name) ) + m_HasEncodeErrors = true; + + const char* name = tx_name.c_str(); + XMLElement* Element; + const char* ns_root = name; + const char* local_name = strchr(name, ':'); + + if ( local_name != 0 ) + name = local_name + 1; + + if ( m_Scope.empty() ) + { + m_Scope.push(m_Root); + } + else + { + Element = m_Scope.top(); + m_Scope.push(Element->AddChild(name)); + } + + Element = m_Scope.top(); + Element->SetName(name); + + // set attributes + ui32_t a_len = attributes.getLength(); + + for ( ui32_t i = 0; i < a_len; i++) + { + std::string aname, value; + if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) ) + m_HasEncodeErrors = true; + + if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) ) + m_HasEncodeErrors = true; + + const char* x_aname = aname.c_str(); + const char* x_value = value.c_str(); + + if ( strncmp(x_aname, "xmlns", 5) == 0 ) + AddNamespace(x_aname+5, x_value); + + if ( ( local_name = strchr(x_aname, ':') ) == 0 ) + local_name = x_aname; + else + local_name++; + + Element->SetAttr(local_name, x_value); + } + + // map the namespace + std::string key; + if ( ns_root != name ) + key.assign(ns_root, name - ns_root - 1); + + ns_map::iterator ni = m_Namespaces->find(key); + if ( ni != m_Namespaces->end() ) + Element->SetNamespace(ni->second); + } + + void endElement(const XMLCh *const name) { + m_Scope.pop(); + } + + void characters(const XMLCh *const chars, const unsigned int length) + { + if ( length > 0 ) + { + std::string tmp; + if ( ! XercesString_to_UTF_8(chars, tmp) ) + m_HasEncodeErrors = true; + + m_Scope.top()->AppendBody(tmp); + } + } +}; + +// +bool +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) { - DefaultLogSink().Error("asdcplib compiled without XML parser support.\n"); + if ( doc_len == 0 ) + return false; + + init_xml_dom(); + + int errorCount = 0; + SAXParser* parser = new SAXParser(); + + parser->setValidationScheme(SAXParser::Val_Always); + parser->setDoNamespaces(true); // optional + + MyTreeHandler* docHandler = new MyTreeHandler(this); + parser->setDocumentHandler(docHandler); + parser->setErrorHandler(docHandler); + + try + { + MemBufInputSource xmlSource(reinterpret_cast(document), + static_cast(doc_len), + "pidc_rules_file"); + + parser->parse(xmlSource); + } + catch (const XMLException& e) + { + char* message = XMLString::transcode(e.getMessage()); + DefaultLogSink().Error("Parser error: %s\n", message); + XMLString::release(&message); + errorCount++; + } + catch (const SAXParseException& e) + { + char* message = XMLString::transcode(e.getMessage()); + DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber()); + XMLString::release(&message); + errorCount++; + } + catch (...) + { + DefaultLogSink().Error("Unexpected XML parser error\n"); + errorCount++; + } + + if ( errorCount == 0 ) + m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap(); + + delete parser; + delete docHandler; + + return errorCount > 0 ? false : true; +} + +// +bool +Kumu::StringIsXML(const char* document, ui32_t len) +{ + if ( document == 0 || *document == 0 ) + return false; + + init_xml_dom(); + + if ( len == 0 ) + len = strlen(document); + + SAXParser parser; + XMLPScanToken token; + bool status = false; + + try + { + MemBufInputSource xmlSource(reinterpret_cast(document), + static_cast(len), + "pidc_rules_file"); + + if ( parser.parseFirst(xmlSource, token) ) + { + if ( parser.parseNext(token) ) + status = true; + } + } + catch (...) + { + } + + return status; +} + + +#endif + +//---------------------------------------------------------------------------------------------------- + +#if ! defined(HAVE_EXPAT) && ! defined(HAVE_XERCES_C) + +// +bool +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) +{ + DefaultLogSink().Error("Kumu compiled without XML parser support.\n"); return false; } // bool -Kumu::XMLElement::TestString(const char* document, ui32_t len) +Kumu::StringIsXML(const char* document, ui32_t len) { - DefaultLogSink().Error("asdcplib compiled without XML parser support.\n"); + DefaultLogSink().Error("Kumu compiled without XML parser support.\n"); return false; } #endif +//---------------------------------------------------------------------------------------------------- + +// +bool +Kumu::GetXMLDocType(const ByteString& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + return GetXMLDocType(buf.RoData(), buf.Length(), ns_prefix, type_name, namespace_name, doc_attr_list); +} + +// +bool +Kumu::GetXMLDocType(const std::string& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + return GetXMLDocType((const byte_t*)buf.c_str(), buf.size(), ns_prefix, type_name, namespace_name, doc_attr_list); +} + +// +bool +Kumu::GetXMLDocType(const byte_t* buf, ui32_t buf_len, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + assert(buf); + const byte_t *p1 = buf, *p2; + const byte_t *end_p = buf + buf_len; + + while ( p1 < end_p && *p1 ) + { + if ( *p1 == '<' && isalpha(*(p1+1)) ) + { + p2 = ++p1; + + // collect element name + while ( p2 < end_p && *p2 && ! ( isspace(*p2) || *p2 == '>' ) ) + ++p2; + + if ( p2 < end_p ) + { + const byte_t* separator = (byte_t*)strchr(reinterpret_cast(p1), ':'); + if ( separator != 0 && separator < p2 ) + { + ns_prefix.assign(reinterpret_cast(p1), separator - p1); + p1 = separator + 1; + } + + type_name.assign(reinterpret_cast(p1), p2 - p1); + break; + } + } + + p1++; + } + + if ( *p2 == ' ' ) + { + const byte_t *p3 = p2+1; + while ( p3 < end_p && *p3 && *p3 != '>' ) + { + ++p3; + } + + if ( *p3 != '>' ) + { + return false; // not well-formed XML + } + + std::string attr_str; + attr_str.assign(reinterpret_cast(p2+1), p3 - p2 - 1); + + // normalize whitespace so the subesquent split works properly + for ( int j = 0; j < attr_str.length(); ++j ) + { + if ( attr_str[j] != ' ' && isspace(attr_str[j]) ) + { + attr_str[j] = ' '; + } + } + + std::list doc_attr_nvpairs = km_token_split(attr_str, " "); + + std::list::iterator i; + std::map ns_map; + + for ( i = doc_attr_nvpairs.begin(); i != doc_attr_nvpairs.end(); ++i ) + { + // trim leading and trailing whitespace an right-most character, i.e., \" + std::string trimmed = i->substr(i->find_first_not_of(" "), i->find_last_not_of(" ")); + std::list nv_tokens = km_token_split(trimmed, "=\""); + + if ( nv_tokens.size() != 2 ) + { + continue; + } + + NVPair nv_pair; + nv_pair.name = nv_tokens.front(); + nv_pair.value = nv_tokens.back(); + doc_attr_list.push_back(nv_pair); + ns_map.insert(std::map::value_type(nv_pair.name, nv_pair.value)); + } + + std::string doc_ns_name_selector = ns_prefix.empty() ? "xmlns" : "xmlns:"+ns_prefix; + std::map::iterator j = ns_map.find(doc_ns_name_selector); + + if ( j != ns_map.end() ) + { + namespace_name = j->second; + } + } + else if ( *p2 != '>' ) + { + return false; // not well-formed XML + } + + return ! type_name.empty(); +} + + + // // end KM_xml.cpp //