/*
-Copyright (c) 2005-2008, John Hurst
+Copyright (c) 2005-2010, John Hurst
All rights reserved.
Redistribution and use in source and binary forms, with or without
#include <stack>
#include <map>
-//#undef HAVE_EXPAT
-//#define HAVE_XERCES_C
-
#ifdef HAVE_EXPAT
# ifdef HAVE_XERCES_C
# error "Both HAVE_EXPAT and HAVE_XERCES_C defined"
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/XMLString.hpp>
+#include <xercesc/util/TransService.hpp>
#include <xercesc/sax/AttributeList.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/sax/ErrorHandler.hpp>
XERCES_CPP_NAMESPACE_USE
+
+namespace Kumu {
+ void init_xml_dom();
+ typedef std::basic_string<XMLCh> XercesString;
+ bool UTF_8_to_XercesString(const std::string& in_str, XercesString& out_str);
+ bool UTF_8_to_XercesString(const char* in_str, XercesString& out_str);
+ bool XercesString_to_UTF_8(const XercesString& in_str, std::string& out_str);
+ bool XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str);
+}
+
#endif
using namespace Kumu;
m_Body += value;
}
+//
+void
+Kumu::XMLElement::SetBody(const std::string& value)
+{
+ m_Body = value;
+}
+
//
Kumu::XMLElement*
Kumu::XMLElement::AddChildWithContent(const char* name, const char* value)
//
void
-Kumu::XMLElement::Render(std::string& outbuf) const
+Kumu::XMLElement::Render(std::string& outbuf, const bool& pretty) const
{
outbuf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- RenderElement(outbuf, 0);
+ RenderElement(outbuf, 0, pretty);
}
//
//
void
-Kumu::XMLElement::RenderElement(std::string& outbuf, ui32_t depth) const
+Kumu::XMLElement::RenderElement(std::string& outbuf, const ui32_t& depth, const bool& pretty) const
{
- add_spacer(outbuf, depth);
+ if ( pretty )
+ {
+ add_spacer(outbuf, depth);
+ }
outbuf += "<";
outbuf += m_Name;
// render attributes
- for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); i++ )
+ for ( Attr_i i = m_AttrList.begin(); i != m_AttrList.end(); ++i )
{
outbuf += " ";
outbuf += (*i).name;
// render body
if ( m_Body.length() > 0 )
- outbuf += m_Body;
+ {
+ outbuf += m_Body;
+ }
- for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
- (*i)->RenderElement(outbuf, depth + 1);
+ for ( Elem_i i = m_ChildList.begin(); i != m_ChildList.end(); ++i )
+ {
+ (*i)->RenderElement(outbuf, depth + 1, pretty);
+ }
- add_spacer(outbuf, depth);
+ if ( pretty )
+ {
+ add_spacer(outbuf, depth);
+ }
}
else if ( m_Body.length() > 0 )
{
return outList;
}
+//
+void
+Kumu::XMLElement::DeleteAttributes()
+{
+ m_AttrList.clear();
+}
+
+//
+void
+Kumu::XMLElement::DeleteAttrWithName(const char* name)
+{
+ assert(name);
+ AttributeList::iterator i = m_AttrList.begin();
+
+ while ( i != m_AttrList.end() )
+ {
+ if ( i->name == std::string(name) )
+ m_AttrList.erase(i++);
+ else
+ ++i;
+ }
+}
+
+//
+void
+Kumu::XMLElement::DeleteChildren()
+{
+ while ( ! m_ChildList.empty() )
+ {
+ delete m_ChildList.back();
+ m_ChildList.pop_back();
+ }
+}
+
+//
+void
+Kumu::XMLElement::DeleteChild(const XMLElement* element)
+{
+ if ( element != 0 )
+ {
+ for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
+ {
+ if ( *i == element )
+ {
+ delete *i;
+ m_ChildList.erase(i);
+ return;
+ }
+ }
+ }
+}
+
+//
+void
+Kumu::XMLElement::ForgetChild(const XMLElement* element)
+{
+ if ( element != 0 )
+ {
+ for ( ElementList::iterator i = m_ChildList.begin(); i != m_ChildList.end(); i++ )
+ {
+ if ( *i == element )
+ {
+ m_ChildList.erase(i);
+ return;
+ }
+ }
+ }
+}
+
+//
+bool
+Kumu::XMLElement::ParseString(const ByteString& document)
+{
+ return ParseString((const char*)document.RoData(), document.Length());
+}
+
+//
+bool
+Kumu::XMLElement::ParseString(const std::string& document)
+{
+ return ParseString(document.c_str(), document.size());
+}
+
+
//----------------------------------------------------------------------------------------------------
#ifdef HAVE_EXPAT
//
bool
-Kumu::XMLElement::ParseString(const std::string& document)
+Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
{
XML_Parser Parser = XML_ParserCreateNS("UTF-8", '|');
XML_SetCharacterDataHandler(Parser, xph_char);
XML_SetStartNamespaceDeclHandler(Parser, xph_namespace_start);
- if ( ! XML_Parse(Parser, document.c_str(), document.size(), 1) )
+ if ( ! XML_Parse(Parser, document, doc_len, 1) )
{
- XML_ParserFree(Parser);
DefaultLogSink().Error("XML Parse error on line %d: %s\n",
XML_GetCurrentLineNumber(Parser),
XML_ErrorString(XML_GetErrorCode(Parser)));
+ XML_ParserFree(Parser);
return false;
}
#ifdef HAVE_XERCES_C
-static Mutex sg_Lock;
-static bool sg_xml_init = false;
+static Mutex sg_xerces_init_lock; // protect the xerces initialized
+static bool sg_xml_init = false; // signal initialization
+static Mutex sg_coder_lock; // protect the transcoder context
+static XMLTranscoder* sg_coder = 0;
+static const int sg_coder_buf_len = 128 * 1024;
+static char sg_coder_buf[sg_coder_buf_len + 8];
+static unsigned char sg_coder_counts[sg_coder_buf_len / sizeof(XMLCh)]; // see XMLTranscoder::transcodeFrom
+static const XMLCh sg_LS[] = { chLatin_L, chLatin_S, chNull };
+static const XMLCh sg_label_UTF_8[] = { chLatin_U, chLatin_T, chLatin_F,
+ chDash, chDigit_8, chNull};
//
void
-asdcp_init_xml_dom()
+Kumu::init_xml_dom()
{
if ( ! sg_xml_init )
{
- AutoMutex AL(sg_Lock);
+ AutoMutex AL(sg_xerces_init_lock);
if ( ! sg_xml_init )
{
{
XMLPlatformUtils::Initialize();
sg_xml_init = true;
+
+ XMLTransService::Codes ret;
+ sg_coder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(sg_label_UTF_8, ret, sg_coder_buf_len);
+
+ if ( ret != XMLTransService::Ok )
+ {
+ const char* message = "Undefined Error";
+
+ switch ( ret )
+ {
+ case XMLTransService::UnsupportedEncoding: message = "Unsupported encoding"; break;
+ case XMLTransService::InternalFailure: message = "Internal failure"; break;
+ case XMLTransService::SupportFilesNotFound: message = "Support files not found"; break;
+ }
+
+ DefaultLogSink().Error("Xerces transform initialization error: %s\n", message);
+ }
}
catch (const XMLException &e)
{
}
}
+//
+bool
+Kumu::XercesString_to_UTF_8(const Kumu::XercesString& in_str, std::string& out_str) {
+ return XercesString_to_UTF_8(in_str.c_str(), out_str);
+}
+
+//
+bool
+Kumu::XercesString_to_UTF_8(const XMLCh* in_str, std::string& out_str)
+{
+ assert(in_str);
+ assert(sg_xml_init);
+ AutoMutex AL(sg_coder_lock);
+ ui32_t str_len = XMLString::stringLen(in_str);
+ ui32_t read_total = 0;
+
+ try
+ {
+ while ( str_len > 0 )
+ {
+#if XERCES_VERSION_MAJOR < 3
+ ui32_t read_count = 0;
+#else
+ XMLSize_t read_count = 0;
+#endif
+ ui32_t write_count = sg_coder->transcodeTo(in_str + read_total, str_len,
+ (XMLByte*)sg_coder_buf, sg_coder_buf_len,
+ read_count, XMLTranscoder::UnRep_Throw);
+
+ out_str.append(sg_coder_buf, write_count);
+ str_len -= read_count;
+ read_total += read_count;
+ assert(str_len >= 0);
+ }
+ }
+ catch (...)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+//
+bool
+Kumu::UTF_8_to_XercesString(const std::string& in_str, Kumu::XercesString& out_str) {
+ return UTF_8_to_XercesString(in_str.c_str(), out_str);
+}
+
+//
+bool
+Kumu::UTF_8_to_XercesString(const char* in_str, Kumu::XercesString& out_str)
+{
+ assert(in_str);
+ assert(sg_xml_init);
+ AutoMutex AL(sg_coder_lock);
+ ui32_t str_len = strlen(in_str);
+ ui32_t read_total = 0;
+
+ try
+ {
+ while ( str_len > 0 )
+ {
+#if XERCES_VERSION_MAJOR < 3
+ ui32_t read_count = 0;
+#else
+ XMLSize_t read_count = 0;
+#endif
+ ui32_t write_count = sg_coder->transcodeFrom((const XMLByte*)(in_str + read_total), str_len,
+ (XMLCh*)sg_coder_buf, sg_coder_buf_len / sizeof(XMLCh),
+ read_count, sg_coder_counts);
+
+ out_str.append((XMLCh*)sg_coder_buf, write_count * sizeof(XMLCh));
+ str_len -= read_count;
+ read_total += read_count;
+ assert(str_len >= 0);
+ }
+ }
+ catch (...)
+ {
+ return false;
+ }
+
+ return true;
+}
//
class MyTreeHandler : public HandlerBase
ns_map* m_Namespaces;
std::stack<XMLElement*> m_Scope;
XMLElement* m_Root;
+ bool m_HasEncodeErrors;
public:
- MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root) {
+ MyTreeHandler(XMLElement* root) : m_Namespaces(0), m_Root(root), m_HasEncodeErrors(false)
+ {
assert(m_Root);
m_Namespaces = new ns_map;
}
delete m_Namespaces;
}
- ns_map* TakeNamespaceMap() {
+ bool HasEncodeErrors() const { return m_HasEncodeErrors; }
+
+ ns_map* TakeNamespaceMap()
+ {
if ( m_Namespaces == 0 || m_Namespaces->empty() )
return 0;
XERCES_CPP_NAMESPACE::AttributeList& attributes)
{
assert(x_name);
+ std::string tx_name;
- const char* tx_name = XMLString::transcode(x_name);
- const char* name = tx_name;
+ if ( ! XercesString_to_UTF_8(x_name, tx_name) )
+ m_HasEncodeErrors = true;
+
+ const char* name = tx_name.c_str();
XMLElement* Element;
const char* ns_root = name;
const char* local_name = strchr(name, ':');
for ( ui32_t i = 0; i < a_len; i++)
{
- const XMLCh* aname = attributes.getName(i);
- const XMLCh* value = attributes.getValue(i);
- assert(aname);
- assert(value);
+ std::string aname, value;
+ if ( ! XercesString_to_UTF_8(attributes.getName(i), aname) )
+ m_HasEncodeErrors = true;
+
+ if ( ! XercesString_to_UTF_8(attributes.getValue(i), value) )
+ m_HasEncodeErrors = true;
- char* x_aname = XMLString::transcode(aname);
- char* x_value = XMLString::transcode(value);
+ const char* x_aname = aname.c_str();
+ const char* x_value = value.c_str();
if ( strncmp(x_aname, "xmlns", 5) == 0 )
AddNamespace(x_aname+5, x_value);
local_name++;
Element->SetAttr(local_name, x_value);
-
- XMLString::release(&x_aname);
- XMLString::release(&x_value);
}
// map the namespace
ns_map::iterator ni = m_Namespaces->find(key);
if ( ni != m_Namespaces->end() )
Element->SetNamespace(ni->second);
-
- XMLString::release((char**)&tx_name);
}
void endElement(const XMLCh *const name) {
{
if ( length > 0 )
{
- char* text = XMLString::transcode(chars);
- m_Scope.top()->AppendBody(text);
- XMLString::release(&text);
+ std::string tmp;
+ if ( ! XercesString_to_UTF_8(chars, tmp) )
+ m_HasEncodeErrors = true;
+
+ m_Scope.top()->AppendBody(tmp);
}
}
};
//
bool
-Kumu::XMLElement::ParseString(const std::string& document)
+Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
{
- if ( document.empty() )
+ if ( doc_len == 0 )
return false;
- asdcp_init_xml_dom();
+ init_xml_dom();
+ int errorCount = 0;
SAXParser* parser = new SAXParser();
- parser->setDoValidation(true);
+
+ parser->setValidationScheme(SAXParser::Val_Always);
parser->setDoNamespaces(true); // optional
MyTreeHandler* docHandler = new MyTreeHandler(this);
- ErrorHandler* errHandler = (ErrorHandler*)docHandler;
parser->setDocumentHandler(docHandler);
+ parser->setErrorHandler(docHandler);
try
{
- MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document.c_str()),
- static_cast<const unsigned int>(document.size()),
+ MemBufInputSource xmlSource(reinterpret_cast<const XMLByte*>(document),
+ static_cast<const unsigned int>(doc_len),
"pidc_rules_file");
parser->parse(xmlSource);
char* message = XMLString::transcode(e.getMessage());
DefaultLogSink().Error("Parser error: %s\n", message);
XMLString::release(&message);
- return false;
+ errorCount++;
}
catch (const SAXParseException& e)
{
char* message = XMLString::transcode(e.getMessage());
DefaultLogSink().Error("Parser error: %s at line %d\n", message, e.getLineNumber());
XMLString::release(&message);
- return false;
+ errorCount++;
}
catch (...)
{
DefaultLogSink().Error("Unexpected XML parser error\n");
- return false;
+ errorCount++;
}
- m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
+ if ( errorCount == 0 )
+ m_NamespaceOwner = (void*)docHandler->TakeNamespaceMap();
+
delete parser;
delete docHandler;
- return true;
+
+ return errorCount > 0 ? false : true;
}
//
if ( document == 0 || *document == 0 )
return false;
- asdcp_init_xml_dom();
+ init_xml_dom();
if ( len == 0 )
len = strlen(document);
//
bool
-Kumu::XMLElement::ParseString(const std::string& document)
+Kumu::XMLElement::ParseString(const char* document, ui32_t doc_len)
{
- DefaultLogSink().Error("asdcplib compiled without XML parser support.\n");
+ DefaultLogSink().Error("Kumu compiled without XML parser support.\n");
return false;
}
#endif
+//----------------------------------------------------------------------------------------------------
+
+//
+bool
+Kumu::GetXMLDocType(const ByteString& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
+ AttributeList& doc_attr_list)
+{
+ return GetXMLDocType(buf.RoData(), buf.Length(), ns_prefix, type_name, namespace_name, doc_attr_list);
+}
+
+//
+bool
+Kumu::GetXMLDocType(const std::string& buf, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
+ AttributeList& doc_attr_list)
+{
+ return GetXMLDocType((const byte_t*)buf.c_str(), buf.size(), ns_prefix, type_name, namespace_name, doc_attr_list);
+}
+
+//
+bool
+Kumu::GetXMLDocType(const byte_t* buf, ui32_t buf_len, std::string& ns_prefix, std::string& type_name, std::string& namespace_name,
+ AttributeList& doc_attr_list)
+{
+ assert(buf);
+ const byte_t *p1 = buf, *p2;
+ const byte_t *end_p = buf + buf_len;
+
+ while ( p1 < end_p && *p1 )
+ {
+ if ( *p1 == '<' && isalpha(*(p1+1)) )
+ {
+ p2 = ++p1;
+
+ // collect element name
+ while ( p2 < end_p && *p2 && ! ( isspace(*p2) || *p2 == '>' ) )
+ ++p2;
+
+ if ( p2 < end_p )
+ {
+ const byte_t* separator = (byte_t*)strchr(reinterpret_cast<const char*>(p1), ':');
+ if ( separator != 0 && separator < p2 )
+ {
+ ns_prefix.assign(reinterpret_cast<const char*>(p1), separator - p1);
+ p1 = separator + 1;
+ }
+
+ type_name.assign(reinterpret_cast<const char*>(p1), p2 - p1);
+ break;
+ }
+ }
+
+ p1++;
+ }
+
+ if ( *p2 == ' ' )
+ {
+ const byte_t *p3 = p2+1;
+ while ( p3 < end_p && *p3 && *p3 != '>' )
+ {
+ ++p3;
+ }
+
+ if ( *p3 != '>' )
+ {
+ return false; // not well-formed XML
+ }
+
+ std::string attr_str;
+ attr_str.assign(reinterpret_cast<const char*>(p2+1), p3 - p2 - 1);
+
+ // normalize whitespace so the subesquent split works properly
+ for ( int j = 0; j < attr_str.length(); ++j )
+ {
+ if ( attr_str[j] != ' ' && isspace(attr_str[j]) )
+ {
+ attr_str[j] = ' ';
+ }
+ }
+
+ std::list<std::string> doc_attr_nvpairs = km_token_split(attr_str, " ");
+
+ std::list<std::string>::iterator i;
+ std::map<std::string, std::string> ns_map;
+
+ for ( i = doc_attr_nvpairs.begin(); i != doc_attr_nvpairs.end(); ++i )
+ {
+ // trim leading and trailing whitespace an right-most character, i.e., \"
+ std::string trimmed = i->substr(i->find_first_not_of(" "), i->find_last_not_of(" "));
+ std::list<std::string> nv_tokens = km_token_split(trimmed, "=\"");
+
+ if ( nv_tokens.size() != 2 )
+ {
+ continue;
+ }
+
+ NVPair nv_pair;
+ nv_pair.name = nv_tokens.front();
+ nv_pair.value = nv_tokens.back();
+ doc_attr_list.push_back(nv_pair);
+ ns_map.insert(std::map<std::string,std::string>::value_type(nv_pair.name, nv_pair.value));
+ }
+
+ std::string doc_ns_name_selector = ns_prefix.empty() ? "xmlns" : "xmlns:"+ns_prefix;
+ std::map<std::string,std::string>::iterator j = ns_map.find(doc_ns_name_selector);
+
+ if ( j != ns_map.end() )
+ {
+ namespace_name = j->second;
+ }
+ }
+ else if ( *p2 != '>' )
+ {
+ return false; // not well-formed XML
+ }
+
+ return ! type_name.empty();
+}
+
+
+
//
// end KM_xml.cpp
//