Rename everything.
[libdcp.git] / src / text_asset.cc
1 /*
2     Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/text_asset.cc
36  *  @brief TextAsset class
37  */
38
39
40 #include "compose.hpp"
41 #include "dcp_assert.h"
42 #include "load_font_node.h"
43 #include "raw_convert.h"
44 #include "reel_asset.h"
45 #include "subtitle_image.h"
46 #include "text_asset.h"
47 #include "text_asset_internal.h"
48 #include "text_string.h"
49 #include "util.h"
50 #include "xml.h"
51 #include <asdcp/AS_DCP.h>
52 #include <asdcp/KM_util.h>
53 #include <libxml++/nodes/element.h>
54 #include <boost/algorithm/string.hpp>
55 #include <boost/lexical_cast.hpp>
56 #include <boost/shared_array.hpp>
57 #include <algorithm>
58
59
60 using std::cerr;
61 using std::cout;
62 using std::dynamic_pointer_cast;
63 using std::make_shared;
64 using std::map;
65 using std::pair;
66 using std::shared_ptr;
67 using std::string;
68 using std::vector;
69 using boost::lexical_cast;
70 using boost::optional;
71 using namespace dcp;
72
73
74 TextAsset::TextAsset (boost::filesystem::path file)
75         : Asset (file)
76 {
77
78 }
79
80
81 string
82 string_attribute (xmlpp::Element const * node, string name)
83 {
84         auto a = node->get_attribute (name);
85         if (!a) {
86                 throw XMLError (String::compose ("missing attribute %1", name));
87         }
88         return string (a->get_value ());
89 }
90
91
92 optional<string>
93 optional_string_attribute (xmlpp::Element const * node, string name)
94 {
95         auto a = node->get_attribute (name);
96         if (!a) {
97                 return {};
98         }
99         return string (a->get_value ());
100 }
101
102
103 optional<bool>
104 optional_bool_attribute (xmlpp::Element const * node, string name)
105 {
106         auto s = optional_string_attribute (node, name);
107         if (!s) {
108                 return {};
109         }
110
111         return (s.get() == "1" || s.get() == "yes");
112 }
113
114
115 template <class T>
116 optional<T>
117 optional_number_attribute (xmlpp::Element const * node, string name)
118 {
119         auto s = optional_string_attribute (node, name);
120         if (!s) {
121                 return boost::optional<T> ();
122         }
123
124         std::string t = s.get ();
125         boost::erase_all (t, " ");
126         return raw_convert<T> (t);
127 }
128
129
130 TextAsset::ParseState
131 TextAsset::font_node_state (xmlpp::Element const * node, Standard standard) const
132 {
133         ParseState ps;
134
135         if (standard == Standard::INTEROP) {
136                 ps.font_id = optional_string_attribute (node, "Id");
137         } else {
138                 ps.font_id = optional_string_attribute (node, "ID");
139         }
140         ps.size = optional_number_attribute<int64_t> (node, "Size");
141         ps.aspect_adjust = optional_number_attribute<float> (node, "AspectAdjust");
142         ps.italic = optional_bool_attribute (node, "Italic");
143         ps.bold = optional_string_attribute(node, "Weight").get_value_or("normal") == "bold";
144         if (standard == Standard::INTEROP) {
145                 ps.underline = optional_bool_attribute (node, "Underlined");
146         } else {
147                 ps.underline = optional_bool_attribute (node, "Underline");
148         }
149         auto c = optional_string_attribute (node, "Color");
150         if (c) {
151                 ps.colour = Colour (c.get ());
152         }
153         auto const e = optional_string_attribute (node, "Effect");
154         if (e) {
155                 ps.effect = string_to_effect (e.get ());
156         }
157         c = optional_string_attribute (node, "EffectColor");
158         if (c) {
159                 ps.effect_colour = Colour (c.get ());
160         }
161
162         return ps;
163 }
164
165 void
166 TextAsset::position_align (TextAsset::ParseState& ps, xmlpp::Element const * node) const
167 {
168         auto hp = optional_number_attribute<float> (node, "HPosition");
169         if (!hp) {
170                 hp = optional_number_attribute<float> (node, "Hposition");
171         }
172         if (hp) {
173                 ps.h_position = hp.get () / 100;
174         }
175
176         auto ha = optional_string_attribute (node, "HAlign");
177         if (!ha) {
178                 ha = optional_string_attribute (node, "Halign");
179         }
180         if (ha) {
181                 ps.h_align = string_to_halign (ha.get ());
182         }
183
184         auto vp = optional_number_attribute<float> (node, "VPosition");
185         if (!vp) {
186                 vp = optional_number_attribute<float> (node, "Vposition");
187         }
188         if (vp) {
189                 ps.v_position = vp.get () / 100;
190         }
191
192         auto va = optional_string_attribute (node, "VAlign");
193         if (!va) {
194                 va = optional_string_attribute (node, "Valign");
195         }
196         if (va) {
197                 ps.v_align = string_to_valign (va.get ());
198         }
199
200         auto zp = optional_number_attribute<float>(node, "Zposition");
201         if (zp) {
202                 ps.z_position = zp.get() / 100;
203         }
204 }
205
206
207 TextAsset::ParseState
208 TextAsset::text_node_state (xmlpp::Element const * node) const
209 {
210         ParseState ps;
211
212         position_align (ps, node);
213
214         auto d = optional_string_attribute (node, "Direction");
215         if (d) {
216                 ps.direction = string_to_direction (d.get ());
217         }
218
219         ps.type = ParseState::Type::TEXT;
220
221         return ps;
222 }
223
224
225 TextAsset::ParseState
226 TextAsset::image_node_state (xmlpp::Element const * node) const
227 {
228         ParseState ps;
229
230         position_align (ps, node);
231
232         ps.type = ParseState::Type::IMAGE;
233
234         return ps;
235 }
236
237
238 TextAsset::ParseState
239 TextAsset::subtitle_node_state (xmlpp::Element const * node, optional<int> tcr) const
240 {
241         ParseState ps;
242         ps.in = Time (string_attribute(node, "TimeIn"), tcr);
243         ps.out = Time (string_attribute(node, "TimeOut"), tcr);
244         ps.fade_up_time = fade_time (node, "FadeUpTime", tcr);
245         ps.fade_down_time = fade_time (node, "FadeDownTime", tcr);
246         return ps;
247 }
248
249
250 Time
251 TextAsset::fade_time (xmlpp::Element const * node, string name, optional<int> tcr) const
252 {
253         auto const u = optional_string_attribute(node, name).get_value_or ("");
254         Time t;
255
256         if (u.empty ()) {
257                 t = Time (0, 0, 0, 20, 250);
258         } else if (u.find (":") != string::npos) {
259                 t = Time (u, tcr);
260         } else {
261                 t = Time (0, 0, 0, lexical_cast<int> (u), tcr.get_value_or(250));
262         }
263
264         if (t > Time (0, 0, 8, 0, 250)) {
265                 t = Time (0, 0, 8, 0, 250);
266         }
267
268         return t;
269 }
270
271
272 void
273 TextAsset::parse_texts(xmlpp::Element const * node, vector<ParseState>& state, optional<int> tcr, Standard standard)
274 {
275         if (node->get_name() == "Font") {
276                 state.push_back (font_node_state (node, standard));
277         } else if (node->get_name() == "Subtitle") {
278                 state.push_back (subtitle_node_state (node, tcr));
279         } else if (node->get_name() == "Text") {
280                 state.push_back (text_node_state (node));
281         } else if (node->get_name() == "SubtitleList") {
282                 state.push_back (ParseState ());
283         } else if (node->get_name() == "Image") {
284                 state.push_back (image_node_state (node));
285         } else {
286                 throw XMLError ("unexpected node " + node->get_name());
287         }
288
289         float space_before = 0;
290
291         /* Collect <Ruby>s first */
292         auto get_text_content = [](xmlpp::Element const* element) {
293                 string all_content;
294                 for (auto child: element->get_children()) {
295                         auto content = dynamic_cast<xmlpp::ContentNode const*>(child);
296                         if (content) {
297                                 all_content += content->get_content();
298                         }
299                 }
300                 return all_content;
301         };
302
303         vector<Ruby> rubies;
304         for (auto child: node->get_children()) {
305                 auto element = dynamic_cast<xmlpp::Element const*>(child);
306                 if (element && element->get_name() == "Ruby") {
307                         optional<string> base;
308                         optional<string> annotation;
309                         optional<float> size;
310                         optional<RubyPosition> position;
311                         optional<float> offset;
312                         optional<float> spacing;
313                         optional<float> aspect_adjust;
314                         for (auto ruby_child: element->get_children()) {
315                                 if (auto ruby_element = dynamic_cast<xmlpp::Element const*>(ruby_child)) {
316                                         if (ruby_element->get_name() == "Rb") {
317                                                 base = get_text_content(ruby_element);
318                                         } else if (ruby_element->get_name() == "Rt") {
319                                                 annotation = get_text_content(ruby_element);
320                                                 size = optional_number_attribute<float>(ruby_element, "Size");
321                                                 if (auto position_string = optional_string_attribute(ruby_element, "Position")) {
322                                                         if (*position_string == "before") {
323                                                                 position = RubyPosition::BEFORE;
324                                                         } else if (*position_string == "after") {
325                                                                 position = RubyPosition::AFTER;
326                                                         } else {
327                                                                 DCP_ASSERT(false);
328                                                         }
329                                                 }
330                                                 offset = optional_number_attribute<float>(ruby_element, "Offset");
331                                                 spacing = optional_number_attribute<float>(ruby_element, "Spacing");
332                                                 aspect_adjust = optional_number_attribute<float>(ruby_element, "AspectAdjust");
333                                         }
334                                 }
335                         }
336                         DCP_ASSERT(base);
337                         DCP_ASSERT(annotation);
338                         auto ruby = Ruby{*base, *annotation};
339                         if (size) {
340                                 ruby.size = *size;
341                         }
342                         if (position) {
343                                 ruby.position = *position;
344                         }
345                         if (offset) {
346                                 ruby.offset = *offset;
347                         }
348                         if (spacing) {
349                                 ruby.spacing = *spacing;
350                         }
351                         if (aspect_adjust) {
352                                 ruby.aspect_adjust = *aspect_adjust;
353                         }
354                         rubies.push_back(ruby);
355                 }
356         }
357
358         for (auto i: node->get_children()) {
359
360                 /* Handle actual content e.g. text */
361                 auto const v = dynamic_cast<xmlpp::ContentNode const *>(i);
362                 if (v) {
363                         maybe_add_text(v->get_content(), state, space_before, standard, rubies);
364                         space_before = 0;
365                 }
366
367                 /* Handle other nodes */
368                 auto const e = dynamic_cast<xmlpp::Element const *>(i);
369                 if (e) {
370                         if (e->get_name() == "Space") {
371                                 if (node->get_name() != "Text") {
372                                         throw XMLError ("Space node found outside Text");
373                                 }
374                                 auto size = optional_string_attribute(e, "Size").get_value_or("0.5");
375                                 if (standard == dcp::Standard::INTEROP) {
376                                         boost::replace_all(size, "em", "");
377                                 }
378                                 space_before += raw_convert<float>(size);
379                         } else if (e->get_name() != "Ruby") {
380                                 parse_texts(e, state, tcr, standard);
381                         }
382                 }
383         }
384
385         state.pop_back ();
386 }
387
388
389 void
390 TextAsset::maybe_add_text(
391         string text,
392         vector<ParseState> const & parse_state,
393         float space_before,
394         Standard standard,
395         vector<Ruby> const& rubies
396         )
397 {
398         auto wanted = [](ParseState const& ps) {
399                 return ps.type && (ps.type.get() == ParseState::Type::TEXT || ps.type.get() == ParseState::Type::IMAGE);
400         };
401
402         if (find_if(parse_state.begin(), parse_state.end(), wanted) == parse_state.end()) {
403                 return;
404         }
405
406         ParseState ps;
407         for (auto const& i: parse_state) {
408                 if (i.font_id) {
409                         ps.font_id = i.font_id.get();
410                 }
411                 if (i.size) {
412                         ps.size = i.size.get();
413                 }
414                 if (i.aspect_adjust) {
415                         ps.aspect_adjust = i.aspect_adjust.get();
416                 }
417                 if (i.italic) {
418                         ps.italic = i.italic.get();
419                 }
420                 if (i.bold) {
421                         ps.bold = i.bold.get();
422                 }
423                 if (i.underline) {
424                         ps.underline = i.underline.get();
425                 }
426                 if (i.colour) {
427                         ps.colour = i.colour.get();
428                 }
429                 if (i.effect) {
430                         ps.effect = i.effect.get();
431                 }
432                 if (i.effect_colour) {
433                         ps.effect_colour = i.effect_colour.get();
434                 }
435                 if (i.h_position) {
436                         ps.h_position = i.h_position.get();
437                 }
438                 if (i.h_align) {
439                         ps.h_align = i.h_align.get();
440                 }
441                 if (i.v_position) {
442                         ps.v_position = i.v_position.get();
443                 }
444                 if (i.v_align) {
445                         ps.v_align = i.v_align.get();
446                 }
447                 if (i.z_position) {
448                         ps.z_position = i.z_position.get();
449                 }
450                 if (i.direction) {
451                         ps.direction = i.direction.get();
452                 }
453                 if (i.in) {
454                         ps.in = i.in.get();
455                 }
456                 if (i.out) {
457                         ps.out = i.out.get();
458                 }
459                 if (i.fade_up_time) {
460                         ps.fade_up_time = i.fade_up_time.get();
461                 }
462                 if (i.fade_down_time) {
463                         ps.fade_down_time = i.fade_down_time.get();
464                 }
465                 if (i.type) {
466                         ps.type = i.type.get();
467                 }
468         }
469
470         if (!ps.in || !ps.out) {
471                 /* We're not in a <Text> node; just ignore this content */
472                 return;
473         }
474
475         DCP_ASSERT (ps.type);
476
477         switch (ps.type.get()) {
478         case ParseState::Type::TEXT:
479                 _texts.push_back(
480                         make_shared<TextString>(
481                                 ps.font_id,
482                                 ps.italic.get_value_or (false),
483                                 ps.bold.get_value_or (false),
484                                 ps.underline.get_value_or (false),
485                                 ps.colour.get_value_or (dcp::Colour (255, 255, 255)),
486                                 ps.size.get_value_or (42),
487                                 ps.aspect_adjust.get_value_or (1.0),
488                                 ps.in.get(),
489                                 ps.out.get(),
490                                 ps.h_position.get_value_or(0),
491                                 ps.h_align.get_value_or(HAlign::CENTER),
492                                 ps.v_position.get_value_or(0),
493                                 ps.v_align.get_value_or(VAlign::CENTER),
494                                 ps.z_position.get_value_or(0),
495                                 ps.direction.get_value_or (Direction::LTR),
496                                 text,
497                                 ps.effect.get_value_or (Effect::NONE),
498                                 ps.effect_colour.get_value_or (dcp::Colour (0, 0, 0)),
499                                 ps.fade_up_time.get_value_or(Time()),
500                                 ps.fade_down_time.get_value_or(Time()),
501                                 space_before,
502                                 rubies
503                                 )
504                         );
505                 break;
506         case ParseState::Type::IMAGE:
507         {
508                 switch (standard) {
509                 case Standard::INTEROP:
510                         if (text.size() >= 4) {
511                                 /* Remove file extension */
512                                 text = text.substr(0, text.size() - 4);
513                         }
514                         break;
515                 case Standard::SMPTE:
516                         /* It looks like this urn:uuid: is required, but DoM wasn't expecting it (and not writing it)
517                          * until around 2.15.140 so I guess either:
518                          *   a) it is not (always) used in the field, or
519                          *   b) nobody noticed / complained.
520                          */
521                         if (text.substr(0, 9) == "urn:uuid:") {
522                                 text = text.substr(9);
523                         }
524                         break;
525                 }
526
527                 /* Add a subtitle with no image data and we'll fill that in later */
528                 _texts.push_back(
529                         make_shared<SubtitleImage>(
530                                 ArrayData(),
531                                 text,
532                                 ps.in.get(),
533                                 ps.out.get(),
534                                 ps.h_position.get_value_or(0),
535                                 ps.h_align.get_value_or(HAlign::CENTER),
536                                 ps.v_position.get_value_or(0),
537                                 ps.v_align.get_value_or(VAlign::CENTER),
538                                 ps.z_position.get_value_or(0),
539                                 ps.fade_up_time.get_value_or(Time()),
540                                 ps.fade_down_time.get_value_or(Time())
541                                 )
542                         );
543                 break;
544         }
545         }
546 }
547
548
549 vector<shared_ptr<const Text>>
550 TextAsset::texts() const
551 {
552         vector<shared_ptr<const Text>> s;
553         for (auto i: _texts) {
554                 s.push_back (i);
555         }
556         return s;
557 }
558
559
560 vector<shared_ptr<const Text>>
561 TextAsset::texts_during(Time from, Time to, bool starting) const
562 {
563         vector<shared_ptr<const Text>> s;
564         for (auto i: _texts) {
565                 if ((starting && from <= i->in() && i->in() < to) || (!starting && i->out() >= from && i->in() <= to)) {
566                         s.push_back (i);
567                 }
568         }
569
570         return s;
571 }
572
573
574 void
575 TextAsset::add(shared_ptr<Text> s)
576 {
577         _texts.push_back (s);
578 }
579
580
581 Time
582 TextAsset::latest_text_out() const
583 {
584         Time t;
585         for (auto i: _texts) {
586                 if (i->out() > t) {
587                         t = i->out ();
588                 }
589         }
590
591         return t;
592 }
593
594
595 bool
596 TextAsset::equals(shared_ptr<const Asset> other_asset, EqualityOptions const& options, NoteHandler note) const
597 {
598         if (!Asset::equals (other_asset, options, note)) {
599                 return false;
600         }
601
602         auto other = dynamic_pointer_cast<const TextAsset> (other_asset);
603         if (!other) {
604                 return false;
605         }
606
607         if (_texts.size() != other->_texts.size()) {
608                 note(NoteType::ERROR, String::compose("different number of subtitles / closed captions: %1 vs %2", _texts.size(), other->_texts.size()));
609                 return false;
610         }
611
612         auto i = _texts.begin();
613         auto j = other->_texts.begin();
614
615         while (i != _texts.end()) {
616                 auto string_i = dynamic_pointer_cast<TextString> (*i);
617                 auto string_j = dynamic_pointer_cast<TextString> (*j);
618                 auto image_i = dynamic_pointer_cast<SubtitleImage>(*i);
619                 auto image_j = dynamic_pointer_cast<SubtitleImage>(*j);
620
621                 if ((string_i && !string_j) || (image_i && !image_j)) {
622                         note (NoteType::ERROR, "subtitles differ: string vs. image");
623                         return false;
624                 }
625
626                 if (string_i && !string_i->equals(string_j, options, note)) {
627                         return false;
628                 }
629
630                 if (image_i && !image_i->equals(image_j, options, note)) {
631                         return false;
632                 }
633
634                 ++i;
635                 ++j;
636         }
637
638         return true;
639 }
640
641
642 struct TextSorter
643 {
644         bool operator() (shared_ptr<Text> a, shared_ptr<Text> b) {
645                 if (a->in() != b->in()) {
646                         return a->in() < b->in();
647                 }
648                 if (a->v_align() == VAlign::BOTTOM) {
649                         return a->v_position() > b->v_position();
650                 }
651                 return a->v_position() < b->v_position();
652         }
653 };
654
655
656 void
657 TextAsset::pull_fonts(shared_ptr<order::Part> part)
658 {
659         if (part->children.empty ()) {
660                 return;
661         }
662
663         /* Pull up from children */
664         for (auto i: part->children) {
665                 pull_fonts (i);
666         }
667
668         if (part->parent) {
669                 /* Establish the common font features that each of part's children have;
670                    these features go into part's font.
671                 */
672                 part->font = part->children.front()->font;
673                 for (auto i: part->children) {
674                         part->font.take_intersection (i->font);
675                 }
676
677                 /* Remove common values from part's children's fonts */
678                 for (auto i: part->children) {
679                         i->font.take_difference (part->font);
680                 }
681         }
682
683         /* Merge adjacent children with the same font */
684         auto i = part->children.begin();
685         vector<shared_ptr<order::Part>> merged;
686
687         while (i != part->children.end()) {
688
689                 if ((*i)->font.empty ()) {
690                         merged.push_back (*i);
691                         ++i;
692                 } else {
693                         auto j = i;
694                         ++j;
695                         while (j != part->children.end() && (*i)->font == (*j)->font) {
696                                 ++j;
697                         }
698                         if (std::distance (i, j) == 1) {
699                                 merged.push_back (*i);
700                                 ++i;
701                         } else {
702                                 shared_ptr<order::Part> group (new order::Part (part, (*i)->font));
703                                 for (auto k = i; k != j; ++k) {
704                                         (*k)->font.clear ();
705                                         group->children.push_back (*k);
706                                 }
707                                 merged.push_back (group);
708                                 i = j;
709                         }
710                 }
711         }
712
713         part->children = merged;
714 }
715
716
717 /** @param standard Standard (INTEROP or SMPTE); this is used rather than putting things in the child
718  *  class because the differences between the two are fairly subtle.
719  */
720 void
721 TextAsset::texts_as_xml(xmlpp::Element* xml_root, int time_code_rate, Standard standard) const
722 {
723         auto sorted = _texts;
724         std::stable_sort(sorted.begin(), sorted.end(), TextSorter());
725
726         /* Gather our subtitles into a hierarchy of Text/Text/String objects, writing
727            font information into the bottom level (String) objects.
728         */
729
730         auto root = make_shared<order::Part>(shared_ptr<order::Part>());
731         shared_ptr<order::Subtitle> subtitle;
732         shared_ptr<order::Text> text;
733
734         Time last_in;
735         Time last_out;
736         Time last_fade_up_time;
737         Time last_fade_down_time;
738         HAlign last_h_align;
739         float last_h_position;
740         VAlign last_v_align;
741         float last_v_position;
742         float last_z_position;
743         Direction last_direction;
744
745         for (auto i: sorted) {
746                 if (!subtitle ||
747                     (last_in != i->in() ||
748                      last_out != i->out() ||
749                      last_fade_up_time != i->fade_up_time() ||
750                      last_fade_down_time != i->fade_down_time())
751                         ) {
752
753                         subtitle = make_shared<order::Subtitle>(root, i->in(), i->out(), i->fade_up_time(), i->fade_down_time());
754                         root->children.push_back (subtitle);
755
756                         last_in = i->in ();
757                         last_out = i->out ();
758                         last_fade_up_time = i->fade_up_time ();
759                         last_fade_down_time = i->fade_down_time ();
760                         text.reset ();
761                 }
762
763                 auto is = dynamic_pointer_cast<TextString>(i);
764                 if (is) {
765                         if (!text ||
766                             last_h_align != is->h_align() ||
767                             fabs(last_h_position - is->h_position()) > ALIGN_EPSILON ||
768                             last_v_align != is->v_align() ||
769                             fabs(last_v_position - is->v_position()) > ALIGN_EPSILON ||
770                             fabs(last_z_position - is->z_position()) > ALIGN_EPSILON ||
771                             last_direction != is->direction()
772                                 ) {
773                                 text = make_shared<order::Text>(
774                                         subtitle,
775                                         is->h_align(),
776                                         is->h_position(),
777                                         is->v_align(),
778                                         is->v_position(),
779                                         is->z_position(),
780                                         is->direction(),
781                                         is->rubies()
782                                         );
783                                 subtitle->children.push_back (text);
784
785                                 last_h_align = is->h_align ();
786                                 last_h_position = is->h_position ();
787                                 last_v_align = is->v_align ();
788                                 last_v_position = is->v_position ();
789                                 last_z_position = is->z_position();
790                                 last_direction = is->direction ();
791                         }
792
793                         text->children.push_back (make_shared<order::String>(text, order::Font (is, standard), is->text(), is->space_before()));
794                 }
795
796                 auto ii = dynamic_pointer_cast<SubtitleImage>(i);
797                 if (ii) {
798                         text.reset ();
799                         subtitle->children.push_back (
800                                 make_shared<order::Image>(subtitle, ii->id(), ii->png_image(), ii->h_align(), ii->h_position(), ii->v_align(), ii->v_position(), ii->z_position())
801                                 );
802                 }
803         }
804
805         /* Pull font changes as high up the hierarchy as we can */
806
807         pull_fonts (root);
808
809         /* Write XML */
810
811         order::Context context;
812         context.time_code_rate = time_code_rate;
813         context.standard = standard;
814         context.spot_number = 1;
815
816         root->write_xml (xml_root, context);
817 }
818
819
820 map<string, ArrayData>
821 TextAsset::font_data() const
822 {
823         map<string, ArrayData> out;
824         for (auto const& i: _fonts) {
825                 out[i.load_id] = i.data;
826         }
827         return out;
828 }
829
830
831 map<string, boost::filesystem::path>
832 TextAsset::font_filenames() const
833 {
834         map<string, boost::filesystem::path> out;
835         for (auto const& i: _fonts) {
836                 if (i.file) {
837                         out[i.load_id] = *i.file;
838                 }
839         }
840         return out;
841 }
842
843
844 /** Replace empty IDs in any <LoadFontId> and <Font> tags with
845  *  a dummy string.  Some systems give errors with empty font IDs
846  *  (see DCP-o-matic bug #1689).
847  */
848 void
849 TextAsset::fix_empty_font_ids()
850 {
851         bool have_empty = false;
852         vector<string> ids;
853         for (auto i: load_font_nodes()) {
854                 if (i->id == "") {
855                         have_empty = true;
856                 } else {
857                         ids.push_back (i->id);
858                 }
859         }
860
861         if (!have_empty) {
862                 return;
863         }
864
865         string const empty_id = unique_string (ids, "font");
866
867         for (auto i: load_font_nodes()) {
868                 if (i->id == "") {
869                         i->id = empty_id;
870                 }
871         }
872
873         for (auto i: _texts) {
874                 auto j = dynamic_pointer_cast<TextString> (i);
875                 if (j && j->font() && j->font().get() == "") {
876                         j->set_font (empty_id);
877                 }
878         }
879 }
880
881
882 namespace {
883
884 struct State
885 {
886         int indent;
887         string xml;
888         int disable_formatting;
889 };
890
891 }
892
893
894 static
895 void
896 format_xml_node (xmlpp::Node const* node, State& state)
897 {
898         if (auto text_node = dynamic_cast<const xmlpp::TextNode*>(node)) {
899                 string content = text_node->get_content();
900                 boost::replace_all(content, "&", "&amp;");
901                 boost::replace_all(content, "<", "&lt;");
902                 boost::replace_all(content, ">", "&gt;");
903                 state.xml += content;
904         } else if (auto element = dynamic_cast<const xmlpp::Element*>(node)) {
905                 ++state.indent;
906
907                 auto children = element->get_children();
908                 auto const should_disable_formatting =
909                         std::any_of(
910                                 children.begin(), children.end(),
911                                 [](xmlpp::Node const* node) { return static_cast<bool>(dynamic_cast<const xmlpp::ContentNode*>(node)); }
912                                 ) || element->get_name() == "Text";
913
914                 if (!state.disable_formatting) {
915                         state.xml += "\n" + string(state.indent * 2, ' ');
916                 }
917
918                 state.xml += "<" + element->get_name();
919
920                 for (auto attribute: element->get_attributes()) {
921                         state.xml += String::compose(" %1=\"%2\"", attribute->get_name().raw(), attribute->get_value().raw());
922                 }
923
924                 if (children.empty()) {
925                         state.xml += "/>";
926                 } else {
927                         state.xml += ">";
928
929                         if (should_disable_formatting) {
930                                 ++state.disable_formatting;
931                         }
932
933                         for (auto child: children) {
934                                 format_xml_node(child, state);
935                         }
936
937                         if (!state.disable_formatting) {
938                                 state.xml += "\n" + string(state.indent * 2, ' ');
939                         }
940
941                         state.xml += String::compose("</%1>", element->get_name().raw());
942
943                         if (should_disable_formatting) {
944                                 --state.disable_formatting;
945                         }
946                 }
947
948                 --state.indent;
949         }
950 }
951
952
953 /** Format XML much as write_to_string_formatted() would do, except without adding any white space
954  *  to <Text> nodes.  This is an attempt to avoid changing what is actually displayed as subtitles
955  *  while also formatting the XML in such a way as to avoid DoM bug 2205.
956  *
957  *  xml_namespace is an optional namespace for the root node; it would be nicer to set this up with
958  *  set_namespace_declaration in the caller and then to extract it here but I couldn't find a way
959  *  to get all namespaces with the libxml++ API.
960  */
961 string
962 TextAsset::format_xml(xmlpp::Document const& document, optional<pair<string, string>> xml_namespace)
963 {
964         auto root = document.get_root_node();
965
966         State state = {};
967         state.xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<" + root->get_name();
968
969         if (xml_namespace) {
970                 if (xml_namespace->first.empty()) {
971                         state.xml += String::compose(" xmlns=\"%1\"", xml_namespace->second);
972                 } else {
973                         state.xml += String::compose(" xmlns:%1=\"%2\"", xml_namespace->first, xml_namespace->second);
974                 }
975         }
976
977         for (auto attribute: root->get_attributes()) {
978                 state.xml += String::compose(" %1=\"%2\"", attribute->get_name().raw(), attribute->get_value().raw());
979         }
980
981         state.xml += ">";
982
983         for (auto child: document.get_root_node()->get_children()) {
984                 format_xml_node(child, state);
985         }
986
987         state.xml += String::compose("\n</%1>\n", root->get_name().raw());
988
989         return state.xml;
990 }
991
992
993 void
994 TextAsset::ensure_font(string load_id, dcp::ArrayData data)
995 {
996         if (std::find_if(_fonts.begin(), _fonts.end(), [load_id](Font const& font) { return font.load_id == load_id; }) == _fonts.end()) {
997                 add_font(load_id, data);
998         }
999 }
1000