Merge pull request #20 from cinecert/htj2c
[asdcplib.git] / src / ST2052_TextParser.cpp
index 312c9d996e174fc03af102e3fb682ba8fe6e6878..305f46ff836b882cda0b3ca7f67400a27b4fe34e 100644 (file)
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2013-2014, John Hurst
+Copyright (c) 2013-2016, John Hurst
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -29,17 +29,198 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     \brief   AS-DCP library, PCM essence reader and writer implementation
 */
 
-
 #include "AS_02_internal.h"
 #include "KM_xml.h"
+#include <openssl/sha.h>
 
 using namespace Kumu;
 using namespace ASDCP;
 
 using Kumu::DefaultLogSink;
 
-// TODO: 
-const char* c_dcst_namespace_name = "http://www.smpte-ra.org/schemas/428-7/2007/DCST";
+const char* c_tt_namespace_name = "http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt";
+
+
+//------------------------------------------------------------------------------------------
+
+//
+int const NS_ID_LENGTH = 16;
+
+//
+static byte_t s_png_id_prefix[NS_ID_LENGTH] = {
+  // RFC 4122 type 5
+  // 2067-2 5.4.5 / RFC4122 Appendix C
+  0x6b, 0xa7, 0xb8, 0x11, 0x9d, 0xad, 0x11, 0xd1,
+  0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4, 0x30, 0xc8
+};
+
+//
+static byte_t s_font_id_prefix[NS_ID_LENGTH] = {
+  // RFC 4122 type 5
+  // 2067-2 5.4.6
+  0xb6, 0xcc, 0x57, 0xa0, 0x87, 0xe7, 0x4e, 0x75,
+  0xb1, 0xc3, 0x33, 0x59, 0xf3, 0xae, 0x88, 0x17
+};
+
+//
+static Kumu::UUID
+create_4122_type5_id(const std::string& subject_name, const byte_t* ns_id)
+{
+  SHA_CTX ctx;
+  SHA1_Init(&ctx);
+  SHA1_Update(&ctx, ns_id, NS_ID_LENGTH);
+  SHA1_Update(&ctx, (byte_t*)subject_name.c_str(), subject_name.size());
+
+  const ui32_t sha_len = 20;
+  byte_t bin_buf[sha_len];
+  SHA1_Final(bin_buf, &ctx);
+
+  // Derive the asset ID from the digest. Make it a type-5 UUID
+  byte_t buf[UUID_Length];
+  memcpy(buf, bin_buf, UUID_Length);
+  buf[6] &= 0x0f; // clear bits 4-7
+  buf[6] |= 0x50; // set UUID version 'digest'
+  buf[8] &= 0x3f; // clear bits 6&7
+  buf[8] |= 0x80; // set bit 7
+  return Kumu::UUID(buf);
+}
+
+//
+Kumu::UUID
+AS_02::TimedText::CreatePNGNameId(const std::string& image_name)
+{
+  return create_4122_type5_id(image_name, s_png_id_prefix);
+}
+
+//
+Kumu::UUID
+AS_02::TimedText::CreateFontNameId(const std::string& font_name)
+{
+  return create_4122_type5_id(font_name, s_font_id_prefix);
+}
+
+//
+static Kumu::Mutex sg_default_font_family_list_lock;
+static std::set<std::string> sg_default_font_family_list;
+
+static void
+setup_default_font_family_list()
+{
+  AutoMutex l(sg_default_font_family_list_lock);
+  sg_default_font_family_list.insert("default");
+  sg_default_font_family_list.insert("monospace");
+  sg_default_font_family_list.insert("sansSerif");
+  sg_default_font_family_list.insert("serif");
+  sg_default_font_family_list.insert("monospaceSansSerif");
+  sg_default_font_family_list.insert("monospaceSerif");
+  sg_default_font_family_list.insert("proportionalSansSerif");
+  sg_default_font_family_list.insert("proportionalSerif");
+}
+
+
+//------------------------------------------------------------------------------------------
+
+
+AS_02::TimedText::Type5UUIDFilenameResolver::Type5UUIDFilenameResolver() {}
+AS_02::TimedText::Type5UUIDFilenameResolver::~Type5UUIDFilenameResolver() {}
+
+const byte_t PNGMagic[8] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a };
+const byte_t OpenTypeMagic[5] = { 0x4f, 0x54, 0x54, 0x4f, 0x00 };
+const byte_t TrueTypeMagic[5] = { 0x00, 0x01, 0x00, 0x00, 0x00 };
+
+//
+Result_t
+AS_02::TimedText::Type5UUIDFilenameResolver::OpenRead(const std::string& dirname)
+{
+  DirScannerEx dir_reader;
+  DirectoryEntryType_t ft;
+  std::string next_item;
+  std::string abs_dirname = PathMakeCanonical(dirname);
+  byte_t read_buffer[16];
+
+  if ( abs_dirname.empty() )
+    {
+      abs_dirname = ".";
+    }
+
+  Result_t result = dir_reader.Open(abs_dirname);
+
+  if ( KM_SUCCESS(result) )
+    {
+      while ( KM_SUCCESS(dir_reader.GetNext(next_item, ft)) )
+        {
+          if ( next_item[0] == '.' ) continue; // no hidden files
+         std::string tmp_path = PathJoin(abs_dirname, next_item);
+
+         if ( ft == DET_FILE )
+           {
+             FileReader reader;
+             Result_t read_result = reader.OpenRead(tmp_path);
+
+             if ( KM_SUCCESS(read_result) )
+               {
+                 read_result = reader.Read(read_buffer, 16);
+               }
+
+             if ( KM_SUCCESS(read_result) )
+               {
+                 // is it PNG?
+                 if ( memcmp(read_buffer, PNGMagic, sizeof(PNGMagic)) == 0 )
+                   {
+                     UUID asset_id = CreatePNGNameId(PathBasename(next_item));
+                     m_ResourceMap.insert(ResourceMap::value_type(asset_id, next_item));
+                   }
+                 // is it a font?
+                 else if ( memcmp(read_buffer, OpenTypeMagic, sizeof(OpenTypeMagic)) == 0
+                           || memcmp(read_buffer, TrueTypeMagic, sizeof(TrueTypeMagic)) == 0 )
+                   {
+                     std::string font_root_name = PathSetExtension(next_item, "");
+                     UUID asset_id = CreateFontNameId(PathBasename(font_root_name));
+                     m_ResourceMap.insert(ResourceMap::value_type(asset_id, next_item));
+                   }
+               }
+           }
+       }
+    }
+
+  return result;
+}
+
+//
+Result_t
+AS_02::TimedText::Type5UUIDFilenameResolver::ResolveRID(const byte_t* uuid, ASDCP::TimedText::FrameBuffer& FrameBuf) const
+{
+  Kumu::UUID tmp_id(uuid);
+  char buf[64];
+
+  ResourceMap::const_iterator i = m_ResourceMap.find(tmp_id);
+
+  if ( i == m_ResourceMap.end() )
+    {
+      DefaultLogSink().Debug("Missing timed-text resource \"%s\"\n", tmp_id.EncodeHex(buf, 64));
+      return RESULT_NOT_FOUND;
+    }
+
+  FileReader Reader;
+
+  DefaultLogSink().Debug("Retrieving resource %s from file %s\n", tmp_id.EncodeHex(buf, 64), i->second.c_str());
+
+  Result_t result = Reader.OpenRead(i->second.c_str());
+
+  if ( KM_SUCCESS(result) )
+    {
+      ui32_t read_count, read_size = Reader.Size();
+      result = FrameBuf.Capacity(read_size);
+      
+      if ( KM_SUCCESS(result) )
+       result = Reader.Read(FrameBuf.Data(), read_size, &read_count);
+      
+      if ( KM_SUCCESS(result) )
+       FrameBuf.Size(read_count);
+    }
+
+  return result;
+}
 
 //------------------------------------------------------------------------------------------
 
@@ -57,7 +238,7 @@ public:
   std::string m_Filename;
   std::string m_XMLDoc;
   TimedTextDescriptor  m_TDesc;
-  ASDCP::mem_ptr<ASDCP::TimedText::LocalFilenameResolver> m_DefaultResolver;
+  ASDCP::mem_ptr<ASDCP::TimedText::IResourceResolver> m_DefaultResolver;
 
   h__TextParser() : m_Root("**ParserRoot**")
   {
@@ -70,7 +251,7 @@ public:
   {
     if ( m_DefaultResolver.empty() )
       {
-       ASDCP::TimedText::LocalFilenameResolver *resolver = new ASDCP::TimedText::LocalFilenameResolver;
+       AS_02::TimedText::Type5UUIDFilenameResolver *resolver = new AS_02::TimedText::Type5UUIDFilenameResolver;
        resolver->OpenRead(PathDirname(m_Filename));
        m_DefaultResolver = resolver;
       }
@@ -80,7 +261,7 @@ public:
 
   Result_t OpenRead(const std::string& filename);
   Result_t OpenRead(const std::string& xml_doc, const std::string& filename);
-  Result_t ReadAncillaryResource(const UUID& uuid, ASDCP::TimedText::FrameBuffer& FrameBuf,
+  Result_t ReadAncillaryResource(const byte_t *uuid, ASDCP::TimedText::FrameBuffer& FrameBuf,
                                 const ASDCP::TimedText::IResourceResolver& Resolver) const;
 };
 
@@ -108,31 +289,167 @@ AS_02::TimedText::ST2052_TextParser::h__TextParser::OpenRead(const std::string&
   return OpenRead();
 }
 
+
+
+std::string const IMSC1_imageProfile = "http://www.w3.org/ns/ttml/profile/imsc1/image";
+std::string const IMSC1_textProfile = "http://www.w3.org/ns/ttml/profile/imsc1/text";
+
 //
 Result_t
 AS_02::TimedText::ST2052_TextParser::h__TextParser::OpenRead()
 {
+  setup_default_font_family_list();
+
   if ( ! m_Root.ParseString(m_XMLDoc.c_str()) )
-    return RESULT_FORMAT;
+    {
+      DefaultLogSink(). Error("ST 2052-1 document is not well-formed.\n");
+      return RESULT_FORMAT;
+    }
 
   m_TDesc.EncodingName = "UTF-8"; // the XML parser demands UTF-8
   m_TDesc.ResourceList.clear();
   m_TDesc.ContainerDuration = 0;
-  const XMLNamespace* ns = m_Root.Namespace();
+  std::set<std::string>::const_iterator i;
+
+  // Attempt to set the profile from <conformsToStandard>
+  if ( m_TDesc.NamespaceName.empty() )
+    {
+      ElementVisitor conforms_visitor("conformsToStandard");
+      apply_visitor(m_Root, conforms_visitor);
+
+      for ( i = conforms_visitor.value_list.begin(); i != conforms_visitor.value_list.end(); ++i )
+       {
+         if ( *i == IMSC1_imageProfile || *i == IMSC1_textProfile )
+           {
+             m_TDesc.NamespaceName = *i;
+             break;
+           }
+       }
+    }
+
+  // Attempt to set the profile from the use of attribute "profile"
+  if ( m_TDesc.NamespaceName.empty() )
+    {
+      AttributeVisitor profile_visitor("profile");
+      apply_visitor(m_Root, profile_visitor);
+
+      for ( i = profile_visitor.value_list.begin(); i != profile_visitor.value_list.end(); ++i )
+       {
+         if ( *i == IMSC1_imageProfile || *i == IMSC1_textProfile )
+           {
+             m_TDesc.NamespaceName = *i;
+             break;
+           }
+       }
+    }
+
+  // Find image resources for later packaging as GS partitions.
+  // Attempt to set the profile; infer from use of images.
+  AttributeVisitor png_visitor("backgroundImage");
+  apply_visitor(m_Root, png_visitor);
+
+  for ( i = png_visitor.value_list.begin(); i != png_visitor.value_list.end(); ++i )
+    {
+      UUID asset_id = CreatePNGNameId(PathBasename(*i));
+      TimedTextResourceDescriptor png_resource;
+      memcpy(png_resource.ResourceID, asset_id.Value(), UUIDlen);
+      png_resource.Type = ASDCP::TimedText::MT_PNG;
+      m_TDesc.ResourceList.push_back(png_resource);
+      m_ResourceTypes.insert(ResourceTypeMap_t::value_type(UUID(png_resource.ResourceID),
+                                                          ASDCP::TimedText::MT_PNG));
+
+      if ( m_TDesc.NamespaceName.empty() )
+       {
+         m_TDesc.NamespaceName = IMSC1_imageProfile;
+       }
+    }
 
-  if ( ns == 0 )
+  // If images are present and profile is "text" make sure to say something.
+  if ( ! m_ResourceTypes.empty() && m_TDesc.NamespaceName == IMSC1_textProfile )
     {
-      DefaultLogSink(). Warn("Document has no namespace name, assuming %s\n", c_dcst_namespace_name);
-      m_TDesc.NamespaceName = c_dcst_namespace_name;
+      DefaultLogSink().Warn("Unexpected IMSC-1 text profile; document contains images.\n ");
     }
-  else
+  
+  // If all else fails set the profile to "text".
+  if ( m_TDesc.NamespaceName.empty() )
     {
-      m_TDesc.NamespaceName = ns->Name();
+      DefaultLogSink().Warn("Using default IMSC-1 text profile.\n ");
+      m_TDesc.NamespaceName = IMSC1_textProfile;
+    }
+
+  // Find font resources for later packaging as GS partitions.
+  AttributeVisitor font_visitor("fontFamily");
+  apply_visitor(m_Root, font_visitor);
+  char buf[64];
+
+  for ( i = font_visitor.value_list.begin(); i != font_visitor.value_list.end(); ++i )
+    {
+      UUID font_id = CreateFontNameId(PathBasename(*i));
+
+      if ( PathIsFile(font_id.EncodeHex(buf, 64))
+          || PathIsFile(*i+".ttf")
+          || PathIsFile(*i+".otf") )
+       {
+         TimedTextResourceDescriptor font_resource;
+         memcpy(font_resource.ResourceID, font_id.Value(), UUIDlen);
+         font_resource.Type = ASDCP::TimedText::MT_OPENTYPE;
+         m_TDesc.ResourceList.push_back(font_resource);
+         m_ResourceTypes.insert(ResourceTypeMap_t::value_type(UUID(font_resource.ResourceID),
+                                                              ASDCP::TimedText::MT_OPENTYPE));
+       }
+      else
+       {
+         AutoMutex l(sg_default_font_family_list_lock);
+         if ( sg_default_font_family_list.find(*i) == sg_default_font_family_list.end() )
+           {
+             DefaultLogSink(). Error("Unable to locate external font resource \"%s\".\n", i->c_str());
+             return RESULT_FORMAT;
+           }
+       }
     }
 
   return RESULT_OK;
 }
 
+//
+Result_t
+AS_02::TimedText::ST2052_TextParser::h__TextParser::ReadAncillaryResource(const byte_t* uuid, ASDCP::TimedText::FrameBuffer& FrameBuf,
+                                                                         const ASDCP::TimedText::IResourceResolver& Resolver) const
+{
+  FrameBuf.AssetID(uuid);
+  UUID TmpID(uuid);
+  char buf[64];
+
+  ResourceTypeMap_t::const_iterator rmi = m_ResourceTypes.find(TmpID);
+
+  if ( rmi == m_ResourceTypes.end() )
+    {
+      DefaultLogSink().Error("Unknown ancillary resource id: %s\n", TmpID.EncodeHex(buf, 64));
+      return RESULT_RANGE;
+    }
+
+  Result_t result = Resolver.ResolveRID(uuid, FrameBuf);
+
+  if ( KM_SUCCESS(result) )
+    {
+      if ( (*rmi).second == ASDCP::TimedText::MT_PNG )
+       {
+         FrameBuf.MIMEType("image/png");
+       }    
+      else if ( (*rmi).second == ASDCP::TimedText::MT_OPENTYPE )
+       {
+         FrameBuf.MIMEType("application/x-font-opentype");
+       }
+      else
+       {
+         FrameBuf.MIMEType("application/octet-stream");
+       }
+    }
+
+  return result;
+}
+
+
 
 //------------------------------------------------------------------------------------------
 
@@ -200,7 +517,13 @@ ASDCP::Result_t
 AS_02::TimedText::ST2052_TextParser::ReadAncillaryResource(const Kumu::UUID& uuid, ASDCP::TimedText::FrameBuffer& FrameBuf,
                                                           const ASDCP::TimedText::IResourceResolver* Resolver) const
 {
-  return RESULT_NOTIMPL;
+  if ( m_Parser.empty() )
+    return RESULT_INIT;
+
+  if ( Resolver == 0 )
+    Resolver = m_Parser->GetDefaultResolver();
+
+  return m_Parser->ReadAncillaryResource(uuid.Value(), FrameBuf, *Resolver);
 }