From fd30397b1a1d0df6b43aedbb205913d2eec09fd0 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Sat, 27 May 2017 00:47:15 +0100 Subject: [PATCH] Optimise sorting of image filenames. One regression here is that /1/01/1 will be seen as greater than /1/2/1 as the numbers are now coalesced so the comparsion would be 1011 cf 121. --- src/lib/image_filename_sorter.cc | 69 ++++++++++-------------------- src/lib/image_filename_sorter.h | 3 +- test/image_filename_sorter_test.cc | 23 +++++++++- 3 files changed, 45 insertions(+), 50 deletions(-) diff --git a/src/lib/image_filename_sorter.cc b/src/lib/image_filename_sorter.cc index 25a8acb3d..c32b07115 100644 --- a/src/lib/image_filename_sorter.cc +++ b/src/lib/image_filename_sorter.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Carl Hetherington + Copyright (C) 2015-2017 Carl Hetherington This file is part of DCP-o-matic. @@ -19,71 +19,46 @@ */ #include "image_filename_sorter.h" -#include +#include #include #include +#include #include using std::list; -using dcp::raw_convert; +using std::string; +using dcp::locale_convert; +using boost::optional; bool ImageFilenameSorter::operator() (boost::filesystem::path a, boost::filesystem::path b) { - std::list na = extract_numbers (a); - std::list nb = extract_numbers (b); - if (na.empty() || nb.empty()) { + optional na = extract_numbers (a); + optional nb = extract_numbers (b); + if (!na || !nb) { return a.string() < b.string(); } - if (na.size() != nb.size()) { - /* Just use the first one */ - return na.front() < nb.front(); - } - - std::list::const_iterator i = na.begin (); - std::list::const_iterator j = nb.begin (); - - while (i != na.end()) { - if (*i != *j) { - return *i < *j; - } - ++i; - ++j; - } - - /* All the same */ - return false; - + return *na < *nb; } -list +optional ImageFilenameSorter::extract_numbers (boost::filesystem::path p) { - p = p.leaf (); - - std::list numbers; - - std::string current; - for (size_t i = 0; i < p.string().size(); ++i) { - if (isdigit (p.string()[i])) { - current += p.string()[i]; - } else { - if (!current.empty ()) { - numbers.push_back (current); - current.clear (); - } + string numbers; + string const ps = p.leaf().string(); + for (size_t i = 0; i < ps.size(); ++i) { + if (isdigit (ps[i])) { + numbers += ps[i]; } } - if (!current.empty ()) { - numbers.push_back (current); - } - - std::list numbers_as_int; - BOOST_FOREACH (std::string i, numbers) { - numbers_as_int.push_back (raw_convert (i)); + if (numbers.empty ()) { + return optional (); } - return numbers_as_int; + /* locale_convert is quicker than raw_convert and numbers can only contain + things which are isdigit() so locale_convert is fine to use. + */ + return locale_convert (numbers); } diff --git a/src/lib/image_filename_sorter.h b/src/lib/image_filename_sorter.h index e1932bead..2a15639ce 100644 --- a/src/lib/image_filename_sorter.h +++ b/src/lib/image_filename_sorter.h @@ -19,6 +19,7 @@ */ #include +#include class ImageFilenameSorter { @@ -26,5 +27,5 @@ public: bool operator() (boost::filesystem::path a, boost::filesystem::path b); private: - std::list extract_numbers (boost::filesystem::path p); + boost::optional extract_numbers (boost::filesystem::path p); }; diff --git a/test/image_filename_sorter_test.cc b/test/image_filename_sorter_test.cc index 113c24d10..f2af9000c 100644 --- a/test/image_filename_sorter_test.cc +++ b/test/image_filename_sorter_test.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2015 Carl Hetherington + Copyright (C) 2015-2017 Carl Hetherington This file is part of DCP-o-matic. @@ -24,9 +24,14 @@ */ #include "lib/image_filename_sorter.h" +#include "lib/compose.hpp" #include -BOOST_AUTO_TEST_CASE (image_filename_sorter_test) +using std::random_shuffle; +using std::sort; +using std::vector; + +BOOST_AUTO_TEST_CASE (image_filename_sorter_test1) { ImageFilenameSorter x; BOOST_CHECK (x ("abc0000000001", "abc0000000002")); @@ -49,3 +54,17 @@ BOOST_AUTO_TEST_CASE (image_filename_sorter_test) BOOST_CHECK (!x ("EWS_DCP_092815_000000.j2c", "EWS_DCP_092815_000000.j2c")); BOOST_CHECK (!x ("EWS_DCP_092815_000100.j2c", "EWS_DCP_092815_000000.j2c")); } + +/** Test a sort of a lot of paths. Mostly useful for profiling. */ +BOOST_AUTO_TEST_CASE (image_filename_sorter_test2) +{ + vector paths; + for (int i = 0; i < 100000; ++i) { + paths.push_back(String::compose("some.filename.with.%1.number.tiff", i)); + } + random_shuffle (paths.begin(), paths.end()); + sort (paths.begin(), paths.end(), ImageFilenameSorter()); + for (int i = 0; i < 100000; ++i) { + BOOST_CHECK_EQUAL(paths[i].string(), String::compose("some.filename.with.%1.number.tiff", i)); + } +} -- 2.30.2