Optimise sorting of image filenames.
authorCarl Hetherington <cth@carlh.net>
Fri, 26 May 2017 23:47:15 +0000 (00:47 +0100)
committerCarl Hetherington <cth@carlh.net>
Fri, 26 May 2017 23:47:15 +0000 (00:47 +0100)
One regression here is that /1/01/1 will be seen as greater than /1/2/1
as the numbers are now coalesced so the comparsion would be 1011 cf 121.

src/lib/image_filename_sorter.cc
src/lib/image_filename_sorter.h
test/image_filename_sorter_test.cc

index 25a8acb3da5455af942c5774af88c0876c1fe3a5..c32b07115245145061bd4c87c206db9c565cb3a0 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2015-2016 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2015-2017 Carl Hetherington <cth@carlh.net>
 
     This file is part of DCP-o-matic.
 
 */
 
 #include "image_filename_sorter.h"
-#include <dcp/raw_convert.h>
+#include <dcp/locale_convert.h>
 #include <boost/filesystem.hpp>
 #include <boost/foreach.hpp>
+#include <boost/optional.hpp>
 #include <iostream>
 
 using std::list;
-using dcp::raw_convert;
+using std::string;
+using dcp::locale_convert;
+using boost::optional;
 
 bool
 ImageFilenameSorter::operator() (boost::filesystem::path a, boost::filesystem::path b)
 {
-       std::list<int> na = extract_numbers (a);
-       std::list<int> nb = extract_numbers (b);
-       if (na.empty() || nb.empty()) {
+       optional<int> na = extract_numbers (a);
+       optional<int> nb = extract_numbers (b);
+       if (!na || !nb) {
                return a.string() < b.string();
        }
 
-       if (na.size() != nb.size()) {
-               /* Just use the first one */
-               return na.front() < nb.front();
-       }
-
-       std::list<int>::const_iterator i = na.begin ();
-       std::list<int>::const_iterator j = nb.begin ();
-
-       while (i != na.end()) {
-               if (*i != *j) {
-                       return *i < *j;
-               }
-               ++i;
-               ++j;
-       }
-
-       /* All the same */
-       return false;
-
+       return *na < *nb;
 }
 
-list<int>
+optional<int>
 ImageFilenameSorter::extract_numbers (boost::filesystem::path p)
 {
-       p = p.leaf ();
-
-       std::list<std::string> numbers;
-
-       std::string current;
-       for (size_t i = 0; i < p.string().size(); ++i) {
-               if (isdigit (p.string()[i])) {
-                       current += p.string()[i];
-               } else {
-                       if (!current.empty ()) {
-                               numbers.push_back (current);
-                               current.clear ();
-                       }
+       string numbers;
+       string const ps = p.leaf().string();
+       for (size_t i = 0; i < ps.size(); ++i) {
+               if (isdigit (ps[i])) {
+                       numbers += ps[i];
                }
        }
 
-       if (!current.empty ()) {
-               numbers.push_back (current);
-       }
-
-       std::list<int> numbers_as_int;
-       BOOST_FOREACH (std::string i, numbers) {
-               numbers_as_int.push_back (raw_convert<int> (i));
+       if (numbers.empty ()) {
+               return optional<int> ();
        }
 
-       return numbers_as_int;
+       /* locale_convert is quicker than raw_convert and numbers can only contain
+          things which are isdigit() so locale_convert is fine to use.
+       */
+       return locale_convert<int> (numbers);
 }
index e1932beadb5aa40b893190b89f90c95b851132c8..2a15639cea4a487f8af0c07d1d992ccf748ec0a5 100644 (file)
@@ -19,6 +19,7 @@
 */
 
 #include <boost/filesystem.hpp>
+#include <boost/optional.hpp>
 
 class ImageFilenameSorter
 {
@@ -26,5 +27,5 @@ public:
        bool operator() (boost::filesystem::path a, boost::filesystem::path b);
 
 private:
-       std::list<int> extract_numbers (boost::filesystem::path p);
+       boost::optional<int> extract_numbers (boost::filesystem::path p);
 };
index 113c24d108f2736f1a56b1b6912195acd79d3f0f..f2af9000c0149f4f6e7d8e29c8d0286d68a0b210 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2015 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2015-2017 Carl Hetherington <cth@carlh.net>
 
     This file is part of DCP-o-matic.
 
  */
 
 #include "lib/image_filename_sorter.h"
+#include "lib/compose.hpp"
 #include <boost/test/unit_test.hpp>
 
-BOOST_AUTO_TEST_CASE (image_filename_sorter_test)
+using std::random_shuffle;
+using std::sort;
+using std::vector;
+
+BOOST_AUTO_TEST_CASE (image_filename_sorter_test1)
 {
        ImageFilenameSorter x;
        BOOST_CHECK (x ("abc0000000001", "abc0000000002"));
@@ -49,3 +54,17 @@ BOOST_AUTO_TEST_CASE (image_filename_sorter_test)
        BOOST_CHECK (!x ("EWS_DCP_092815_000000.j2c", "EWS_DCP_092815_000000.j2c"));
        BOOST_CHECK (!x ("EWS_DCP_092815_000100.j2c", "EWS_DCP_092815_000000.j2c"));
 }
+
+/** Test a sort of a lot of paths.  Mostly useful for profiling. */
+BOOST_AUTO_TEST_CASE (image_filename_sorter_test2)
+{
+       vector<boost::filesystem::path> paths;
+       for (int i = 0; i < 100000; ++i) {
+               paths.push_back(String::compose("some.filename.with.%1.number.tiff", i));
+       }
+       random_shuffle (paths.begin(), paths.end());
+       sort (paths.begin(), paths.end(), ImageFilenameSorter());
+       for (int i = 0; i < 100000; ++i) {
+               BOOST_CHECK_EQUAL(paths[i].string(), String::compose("some.filename.with.%1.number.tiff", i));
+       }
+}