X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=src%2FKM_xml.cpp;h=eb9c25fb5c1f909f34b43e7629c33888076a91af;hb=80490136d3f872d162670e616827033fdd1be09d;hp=b7b8c09add7a2687cdf9a841979fc313bea2d2df;hpb=c10e0c7be537d3bb949d2c200f508a1b6bab1e0d;p=asdcplib.git diff --git a/src/KM_xml.cpp b/src/KM_xml.cpp index b7b8c09..eb9c25f 100644 --- a/src/KM_xml.cpp +++ b/src/KM_xml.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2005-2008, John Hurst +Copyright (c) 2005-2010, John Hurst All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,9 +35,6 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -//#undef HAVE_EXPAT -//#define HAVE_XERCES_C - #ifdef HAVE_EXPAT # ifdef HAVE_XERCES_C # error "Both HAVE_EXPAT and HAVE_XERCES_C defined" @@ -52,6 +49,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -62,6 +60,16 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. XERCES_CPP_NAMESPACE_USE + +namespace Kumu { + void init_xml_dom(); + typedef std::basic_string XercesString; + bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str); + bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str); + bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str); + bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str); +} + #endif using namespace Kumu; @@ -137,6 +145,13 @@ Kumu::XMLElement::AppendBody(const std::string& value) m_Body += value; } +// +void +Kumu::XMLElement::SetBody(const std::string& value) +{ + m_Body = value; +} + // Kumu::XMLElement* Kumu::XMLElement::AddChildWithContent(const char* name, const char* value) @@ -171,10 +186,10 @@ Kumu::XMLElement::AddComment(const char* value) // void -Kumu::XMLElement::Render(std::string& outbuf) const +Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const { outbuf = "\n"; - RenderElement(outbuf, 0); + RenderElement(outbuf, 0, pretty); } // @@ -187,15 +202,18 @@ add_spacer(std::string& outbuf, i32_t depth) // void -Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const +Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const { - add_spacer(outbuf, depth); + if ( pretty ) + { + add_spacer(outbuf, depth); + } outbuf += "<"; outbuf += m_Name; // render attributes - for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ ) + for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i ) { outbuf += " "; outbuf += (*i).name; @@ -213,12 +231,19 @@ Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const // render body if ( m_Body.length() > 0 ) - outbuf += m_Body; + { + outbuf += m_Body; + } - for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) - (*i)->RenderElement(outbuf, depth + 1); + for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i ) + { + (*i)->RenderElement(outbuf, depth + 1, pretty); + } - add_spacer(outbuf, depth); + if ( pretty ) + { + add_spacer(outbuf, depth); + } } else if ( m_Body.length() > 0 ) { @@ -291,6 +316,90 @@ Kumu::XMLElement::GetChildrenWithName(const char* name, ElementList& outList) co return outList; } +// +void +Kumu::XMLElement::DeleteAttributes() +{ + m_AttrList.clear(); +} + +// +void +Kumu::XMLElement::DeleteAttrWithName(const char* name) +{ + assert(name); + AttributeList::iterator i = m_AttrList.begin(); + + while ( i != m_AttrList.end() ) + { + if ( i->name == std::string(name) ) + m_AttrList.erase(i++); + else + ++i; + } +} + +// +void +Kumu::XMLElement::DeleteChildren() +{ + while ( ! m_ChildList.empty() ) + { + delete m_ChildList.back(); + m_ChildList.pop_back(); + } +} + +// +void +Kumu::XMLElement::DeleteChild(const XMLElement* element) +{ + if ( element != 0 ) + { + for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) + { + if ( *i == element ) + { + delete *i; + m_ChildList.erase(i); + return; + } + } + } +} + +// +void +Kumu::XMLElement::ForgetChild(const XMLElement* element) +{ + if ( element != 0 ) + { + for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ ) + { + if ( *i == element ) + { + m_ChildList.erase(i); + return; + } + } + } +} + +// +bool +Kumu::XMLElement::ParseString(const ByteString& document) +{ + return ParseString((const char*)document.RoData(), document.Length()); +} + +// +bool +Kumu::XMLElement::ParseString(const std::string& document) +{ + return ParseString(document.c_str(), document.size()); +} + + //---------------------------------------------------------------------------------------------------- #ifdef HAVE_EXPAT @@ -414,7 +523,7 @@ xph_namespace_start(void* p, const XML_Char* ns_prefix, const XML_Char* ns_name) // bool -Kumu::XMLElement::ParseString(const std::string& document) +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) { XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|'); @@ -430,12 +539,12 @@ Kumu::XMLElement::ParseString(const std::string& document) XML_SetCharacterDataHandler(Parser, xph_char); XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start); - if ( ! XML_Parse(Parser, document.c_str(), document.size(), 1) ) + if ( ! XML_Parse(Parser, document, doc_len, 1) ) { - XML_ParserFree(Parser); DefaultLogSink().Error("XML Parse error on line %d: %s\n", XML_GetCurrentLineNumber(Parser), XML_ErrorString(XML_GetErrorCode(Parser))); + XML_ParserFree(Parser); return false; } @@ -503,17 +612,25 @@ Kumu::StringIsXML(const char* document, ui32_t len) #ifdef HAVE_XERCES_C -static Mutex sg_Lock; -static bool sg_xml_init = false; +static Mutex sg_xerces_init_lock; // protect the xerces initialized +static bool sg_xml_init = false; // signal initialization +static Mutex sg_coder_lock; // protect the transcoder context +static XMLTranscoder* sg_coder = 0; +static const int sg_coder_buf_len = 128 * 1024; +static char sg_coder_buf[sg_coder_buf_len + 8]; +static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom +static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull }; +static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F, + chDash, chDigit_8, chNull}; // void -asdcp_init_xml_dom() +Kumu::init_xml_dom() { if ( ! sg_xml_init ) { - AutoMutex AL(sg_Lock); + AutoMutex AL(sg_xerces_init_lock); if ( ! sg_xml_init ) { @@ -521,6 +638,23 @@ asdcp_init_xml_dom() { XMLPlatformUtils::Initialize(); sg_xml_init = true; + + XMLTransService::Codes ret; + sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len); + + if ( ret != XMLTransService::Ok ) + { + const char* message = "Undefined Error"; + + switch ( ret ) + { + case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break; + case XMLTransService::InternalFailure: message = "Internal failure"; break; + case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break; + } + + DefaultLogSink().Error("Xerces transform initialization error: %s\n", message); + } } catch (const XMLException &e) { @@ -530,6 +664,91 @@ asdcp_init_xml_dom() } } +// +bool +Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) { + return XercesString_to_UTF_8(in_str.c_str(), out_str); +} + +// +bool +Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str) +{ + assert(in_str); + assert(sg_xml_init); + AutoMutex AL(sg_coder_lock); + ui32_t str_len = XMLString::stringLen(in_str); + ui32_t read_total = 0; + + try + { + while ( str_len > 0 ) + { +#if XERCES_VERSION_MAJOR < 3 + ui32_t read_count = 0; +#else + XMLSize_t read_count = 0; +#endif + ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len, + (XMLByte*)sg_coder_buf, sg_coder_buf_len, + read_count, XMLTranscoder::UnRep_Throw); + + out_str.append(sg_coder_buf, write_count); + str_len -= read_count; + read_total += read_count; + assert(str_len >= 0); + } + } + catch (...) + { + return false; + } + + return true; +} + +// +bool +Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) { + return UTF_8_to_XercesString(in_str.c_str(), out_str); +} + +// +bool +Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str) +{ + assert(in_str); + assert(sg_xml_init); + AutoMutex AL(sg_coder_lock); + ui32_t str_len = strlen(in_str); + ui32_t read_total = 0; + + try + { + while ( str_len > 0 ) + { +#if XERCES_VERSION_MAJOR < 3 + ui32_t read_count = 0; +#else + XMLSize_t read_count = 0; +#endif + ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len, + (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh), + read_count, sg_coder_counts); + + out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh)); + str_len -= read_count; + read_total += read_count; + assert(str_len >= 0); + } + } + catch (...) + { + return false; + } + + return true; +} // class MyTreeHandler : public HandlerBase @@ -537,9 +756,11 @@ class MyTreeHandler : public HandlerBase ns_map* m_Namespaces; std::stack m_Scope; XMLElement* m_Root; + bool m_HasEncodeErrors; public: - MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root) { + MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false) + { assert(m_Root); m_Namespaces = new ns_map; } @@ -548,7 +769,10 @@ public: delete m_Namespaces; } - ns_map* TakeNamespaceMap() { + bool HasEncodeErrors() const { return m_HasEncodeErrors; } + + ns_map* TakeNamespaceMap() + { if ( m_Namespaces == 0 || m_Namespaces->empty() ) return 0; @@ -597,9 +821,12 @@ public: XERCES_CPP_NAMESPACE::AttributeList& attributes) { assert(x_name); + std::string tx_name; - const char* tx_name = XMLString::transcode(x_name); - const char* name = tx_name; + if ( ! XercesString_to_UTF_8(x_name, tx_name) ) + m_HasEncodeErrors = true; + + const char* name = tx_name.c_str(); XMLElement* Element; const char* ns_root = name; const char* local_name = strchr(name, ':'); @@ -625,13 +852,15 @@ public: for ( ui32_t i = 0; i < a_len; i++) { - const XMLCh* aname = attributes.getName(i); - const XMLCh* value = attributes.getValue(i); - assert(aname); - assert(value); + std::string aname, value; + if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) ) + m_HasEncodeErrors = true; + + if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) ) + m_HasEncodeErrors = true; - char* x_aname = XMLString::transcode(aname); - char* x_value = XMLString::transcode(value); + const char* x_aname = aname.c_str(); + const char* x_value = value.c_str(); if ( strncmp(x_aname, "xmlns", 5) == 0 ) AddNamespace(x_aname+5, x_value); @@ -642,9 +871,6 @@ public: local_name++; Element->SetAttr(local_name, x_value); - - XMLString::release(&x_aname); - XMLString::release(&x_value); } // map the namespace @@ -655,8 +881,6 @@ public: ns_map::iterator ni = m_Namespaces->find(key); if ( ni != m_Namespaces->end() ) Element->SetNamespace(ni->second); - - XMLString::release((char**)&tx_name); } void endElement(const XMLCh *const name) { @@ -667,34 +891,38 @@ public: { if ( length > 0 ) { - char* text = XMLString::transcode(chars); - m_Scope.top()->AppendBody(text); - XMLString::release(&text); + std::string tmp; + if ( ! XercesString_to_UTF_8(chars, tmp) ) + m_HasEncodeErrors = true; + + m_Scope.top()->AppendBody(tmp); } } }; // bool -Kumu::XMLElement::ParseString(const std::string& document) +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) { - if ( document.empty() ) + if ( doc_len == 0 ) return false; - asdcp_init_xml_dom(); + init_xml_dom(); + int errorCount = 0; SAXParser* parser = new SAXParser(); - parser->setDoValidation(true); + + parser->setValidationScheme(SAXParser::Val_Always); parser->setDoNamespaces(true); // optional MyTreeHandler* docHandler = new MyTreeHandler(this); - ErrorHandler* errHandler = (ErrorHandler*)docHandler; parser->setDocumentHandler(docHandler); + parser->setErrorHandler(docHandler); try { - MemBufInputSource xmlSource(reinterpret_cast(document.c_str()), - static_cast(document.size()), + MemBufInputSource xmlSource(reinterpret_cast(document), + static_cast(doc_len), "pidc_rules_file"); parser->parse(xmlSource); @@ -704,25 +932,28 @@ Kumu::XMLElement::ParseString(const std::string& document) char* message = XMLString::transcode(e.getMessage()); DefaultLogSink().Error("Parser error: %s\n", message); XMLString::release(&message); - return false; + errorCount++; } catch (const SAXParseException& e) { char* message = XMLString::transcode(e.getMessage()); DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber()); XMLString::release(&message); - return false; + errorCount++; } catch (...) { DefaultLogSink().Error("Unexpected XML parser error\n"); - return false; + errorCount++; } - m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap(); + if ( errorCount == 0 ) + m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap(); + delete parser; delete docHandler; - return true; + + return errorCount > 0 ? false : true; } // @@ -732,7 +963,7 @@ Kumu::StringIsXML(const char* document, ui32_t len) if ( document == 0 || *document == 0 ) return false; - asdcp_init_xml_dom(); + init_xml_dom(); if ( len == 0 ) len = strlen(document); @@ -769,9 +1000,9 @@ Kumu::StringIsXML(const char* document, ui32_t len) // bool -Kumu::XMLElement::ParseString(const std::string& document) +Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len) { - DefaultLogSink().Error("asdcplib compiled without XML parser support.\n"); + DefaultLogSink().Error("Kumu compiled without XML parser support.\n"); return false; } @@ -786,6 +1017,126 @@ Kumu::StringIsXML(const char* document, ui32_t len) #endif +//---------------------------------------------------------------------------------------------------- + +// +bool +Kumu::GetXMLDocType(const ByteString& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + return GetXMLDocType(buf.RoData(), buf.Length(), ns_prefix, type_name, namespace_name, doc_attr_list); +} + +// +bool +Kumu::GetXMLDocType(const std::string& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + return GetXMLDocType((const byte_t*)buf.c_str(), buf.size(), ns_prefix, type_name, namespace_name, doc_attr_list); +} + +// +bool +Kumu::GetXMLDocType(const byte_t* buf, ui32_t buf_len, std::string& ns_prefix, std::string& type_name, std::string& namespace_name, + AttributeList& doc_attr_list) +{ + assert(buf); + const byte_t *p1 = buf, *p2; + const byte_t *end_p = buf + buf_len; + + while ( p1 < end_p && *p1 ) + { + if ( *p1 == '<' && isalpha(*(p1+1)) ) + { + p2 = ++p1; + + // collect element name + while ( p2 < end_p && *p2 && ! ( isspace(*p2) || *p2 == '>' ) ) + ++p2; + + if ( p2 < end_p ) + { + const byte_t* separator = (byte_t*)strchr(reinterpret_cast(p1), ':'); + if ( separator != 0 && separator < p2 ) + { + ns_prefix.assign(reinterpret_cast(p1), separator - p1); + p1 = separator + 1; + } + + type_name.assign(reinterpret_cast(p1), p2 - p1); + break; + } + } + + p1++; + } + + if ( *p2 == ' ' ) + { + const byte_t *p3 = p2+1; + while ( p3 < end_p && *p3 && *p3 != '>' ) + { + ++p3; + } + + if ( *p3 != '>' ) + { + return false; // not well-formed XML + } + + std::string attr_str; + attr_str.assign(reinterpret_cast(p2+1), p3 - p2 - 1); + + // normalize whitespace so the subesquent split works properly + for ( int j = 0; j < attr_str.length(); ++j ) + { + if ( attr_str[j] != ' ' && isspace(attr_str[j]) ) + { + attr_str[j] = ' '; + } + } + + std::list doc_attr_nvpairs = km_token_split(attr_str, " "); + + std::list::iterator i; + std::map ns_map; + + for ( i = doc_attr_nvpairs.begin(); i != doc_attr_nvpairs.end(); ++i ) + { + // trim leading and trailing whitespace an right-most character, i.e., \" + std::string trimmed = i->substr(i->find_first_not_of(" "), i->find_last_not_of(" ")); + std::list nv_tokens = km_token_split(trimmed, "=\""); + + if ( nv_tokens.size() != 2 ) + { + continue; + } + + NVPair nv_pair; + nv_pair.name = nv_tokens.front(); + nv_pair.value = nv_tokens.back(); + doc_attr_list.push_back(nv_pair); + ns_map.insert(std::map::value_type(nv_pair.name, nv_pair.value)); + } + + std::string doc_ns_name_selector = ns_prefix.empty() ? "xmlns" : "xmlns:"+ns_prefix; + std::map::iterator j = ns_map.find(doc_ns_name_selector); + + if ( j != ns_map.end() ) + { + namespace_name = j->second; + } + } + else if ( *p2 != '>' ) + { + return false; // not well-formed XML + } + + return ! type_name.empty(); +} + + + // // end KM_xml.cpp //