Hack to fix image luminance when cropping subsampled images (#1872).

[dcpomatic.git] / src / lib / image.cc
diff --git a/src/lib/image.cc b/src/lib/image.cc

index d3b193e7bc99a7d17c5c103489118809a3875d33..97aeccd7679e099b02831ae11ce1c9aa210b33b6 100644 (file)
--- a/src/lib/image.cc
+++ b/src/lib/image.cc
@@ -55,6 +55,11 @@ using std::runtime_error;
  using boost::shared_ptr;
  using dcp::Size;
  
+
+/** The memory alignment, in bytes, used for each row of an image if aligment is requested */
+#define ALIGNMENT 64
+
+
  int
  Image::vertical_factor (int n) const
  {
@@ -67,7 +72,7 @@ Image::vertical_factor (int n) const
                 throw PixelFormatError ("line_factor()", _pixel_format);
         }
  
-       return pow (2.0f, d->log2_chroma_h);
+       return lrintf(powf(2.0f, d->log2_chroma_h));
  }
  
  int
@@ -82,7 +87,7 @@ Image::horizontal_factor (int n) const
                 throw PixelFormatError ("sample_size()", _pixel_format);
         }
  
-       return pow (2.0f, d->log2_chroma_w);
+       return lrintf(powf(2.0f, d->log2_chroma_w));
  }
  
  /** @param n Component index.
@@ -117,6 +122,23 @@ Image::planes () const
         return d->nb_components;
  }
  
+
+static
+int
+round_width_for_subsampling (int p, AVPixFmtDescriptor const * desc)
+{
+       return p & ~ ((1 << desc->log2_chroma_w) - 1);
+}
+
+
+static
+int
+round_height_for_subsampling (int p, AVPixFmtDescriptor const * desc)
+{
+       return p & ~ ((1 << desc->log2_chroma_h) - 1);
+}
+
+
  /** Crop this image, scale it to `inter_size' and then place it in a black frame of `out_size'.
   *  @param crop Amount to crop by.
   *  @param inter_size Size to scale the cropped image to.
@@ -140,33 +162,34 @@ Image::crop_scale_window (
         DCPOMATIC_ASSERT (out_size.width >= inter_size.width);
         DCPOMATIC_ASSERT (out_size.height >= inter_size.height);
  
-       /* Here's an image of out_size.  Below we may write to it starting at an offset so we get some padding.
-          Hence we want to write in the following pattern:
-
-          block start   write start                                  line end
-          |..(padding)..|<------line-size------------->|..(padding)..|
-          |..(padding)..|<------line-size------------->|..(padding)..|
-          |..(padding)..|<------line-size------------->|..(padding)..|
-
-          where line-size is of the smaller (inter_size) image and the full padded line length is that of
-          out_size.  To get things to work we have to tell FFmpeg that the stride is that of out_size.
-          However some parts of FFmpeg (notably rgb48Toxyz12 in swscale.c) process data for the full
-          specified *stride*.  This does not matter until we get to the last line:
-
-          block start   write start                                  line end
-          |..(padding)..|<------line-size------------->|XXXwrittenXXX|
-          |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXX|
-          |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
-                                                                      ^^^^ out of bounds
+       shared_ptr<Image> out (new Image(out_format, out_size, out_aligned));
+       out->make_black ();
  
-          To get around this, we ask Image to overallocate its buffers by the overrun.
-       */
+       AVPixFmtDescriptor const * in_desc = av_pix_fmt_desc_get (_pixel_format);
+       if (!in_desc) {
+               throw PixelFormatError ("crop_scale_window()", _pixel_format);
+       }
  
-       shared_ptr<Image> out (new Image (out_format, out_size, out_aligned, (out_size.width - inter_size.width) / 2));
-       out->make_black ();
+       /* Round down so that we crop only the number of pixels that is straightforward
+        * considering any subsampling.
+        */
+       Crop rounded_crop(
+               round_width_for_subsampling(crop.left, in_desc),
+               round_width_for_subsampling(crop.right, in_desc),
+               round_height_for_subsampling(crop.top, in_desc),
+               round_height_for_subsampling(crop.bottom, in_desc)
+               );
  
         /* Size of the image after any crop */
-       dcp::Size const cropped_size = crop.apply (size ());
+       dcp::Size const cropped_size = rounded_crop.apply (size());
+
+       /* Hack: if we're not doing quite the crop that we were asked for, and we carry on scaling
+        * to the inter_size we were asked for, there is a small but noticeable wobble in the image
+        * luminance (#1872).  This hack means we will jump in steps of the subsampling distance
+        * in both crop and scale.
+        */
+       inter_size.width = round_width_for_subsampling(inter_size.width, in_desc);
+       inter_size.height = round_width_for_subsampling(inter_size.height, in_desc);
  
         /* Scale context for a scale from cropped_size to inter_size */
         struct SwsContext* scale_context = sws_getContext (
@@ -204,35 +227,27 @@ Image::crop_scale_window (
                 0, 1 << 16, 1 << 16
                 );
  
-       AVPixFmtDescriptor const * in_desc = av_pix_fmt_desc_get (_pixel_format);
-       if (!in_desc) {
-               throw PixelFormatError ("crop_scale_window()", _pixel_format);
-       }
-
         /* Prepare input data pointers with crop */
         uint8_t* scale_in_data[planes()];
         for (int c = 0; c < planes(); ++c) {
-               /* To work out the crop in bytes, start by multiplying
-                  the crop by the (average) bytes per pixel.  Then
-                  round down so that we don't crop a subsampled pixel until
-                  we've cropped all of its Y-channel pixels.
-               */
-               int const x = lrintf (bytes_per_pixel(c) * crop.left) & ~ ((int) in_desc->log2_chroma_w);
-               scale_in_data[c] = data()[c] + x + stride()[c] * (crop.top / vertical_factor(c));
+               int const x = lrintf(bytes_per_pixel(c) * rounded_crop.left);
+               scale_in_data[c] = data()[c] + x + stride()[c] * (rounded_crop.top / vertical_factor(c));
         }
  
-       /* Corner of the image within out_size */
-       Position<int> const corner ((out_size.width - inter_size.width) / 2, (out_size.height - inter_size.height) / 2);
-
         AVPixFmtDescriptor const * out_desc = av_pix_fmt_desc_get (out_format);
         if (!out_desc) {
                 throw PixelFormatError ("crop_scale_window()", out_format);
         }
  
+       /* Corner of the image within out_size */
+       Position<int> const corner (
+               round_width_for_subsampling((out_size.width - inter_size.width) / 2, out_desc),
+               round_height_for_subsampling((out_size.height - inter_size.height) / 2, out_desc)
+               );
+
         uint8_t* scale_out_data[out->planes()];
         for (int c = 0; c < out->planes(); ++c) {
-               /* See the note in the crop loop above */
-               int const x = lrintf (out->bytes_per_pixel(c) * corner.x) & ~ ((int) out_desc->log2_chroma_w);
+               int const x = lrintf(out->bytes_per_pixel(c) * corner.x);
                 scale_out_data[c] = out->data()[c] + x + out->stride()[c] * (corner.y / out->vertical_factor(c));
         }
  
@@ -245,6 +260,14 @@ Image::crop_scale_window (
  
         sws_freeContext (scale_context);
  
+       if (rounded_crop != Crop() && cropped_size == inter_size) {
+               /* We are cropping without any scaling or pixel format conversion, so FFmpeg may have left some
+                  data behind in our image.  Clear it out.  It may get to the point where we should just stop
+                  trying to be clever with cropping.
+               */
+               out->make_part_black (corner.x + cropped_size.width, out_size.width - cropped_size.width);
+       }
+
         return out;
  }
  
@@ -342,6 +365,36 @@ Image::swap_16 (uint16_t v)
         return ((v >> 8) & 0xff) | ((v & 0xff) << 8);
  }
  
+void
+Image::make_part_black (int x, int w)
+{
+       switch (_pixel_format) {
+       case AV_PIX_FMT_RGB24:
+       case AV_PIX_FMT_ARGB:
+       case AV_PIX_FMT_RGBA:
+       case AV_PIX_FMT_ABGR:
+       case AV_PIX_FMT_BGRA:
+       case AV_PIX_FMT_RGB555LE:
+       case AV_PIX_FMT_RGB48LE:
+       case AV_PIX_FMT_RGB48BE:
+       case AV_PIX_FMT_XYZ12LE:
+       {
+               int const h = sample_size(0).height;
+               int const bpp = bytes_per_pixel(0);
+               int const s = stride()[0];
+               uint8_t* p = data()[0];
+               for (int y = 0; y < h; y++) {
+                       memset (p + x * bpp, 0, w * bpp);
+                       p += s;
+               }
+               break;
+       }
+
+       default:
+               throw PixelFormatError ("make_part_black()", _pixel_format);
+       }
+}
+
  void
  Image::make_black ()
  {
@@ -814,14 +867,12 @@ Image::bytes_per_pixel (int c) const
   *
   *  @param p Pixel format.
   *  @param s Size in pixels.
- *  @param aligned true to make each row of this image aligned to a 32-byte boundary.
- *  @param extra_pixels Amount of extra "run-off" memory to allocate at the end of each plane in pixels.
+ *  @param aligned true to make each row of this image aligned to a ALIGNMENT-byte boundary.
   */
-Image::Image (AVPixelFormat p, dcp::Size s, bool aligned, int extra_pixels)
+Image::Image (AVPixelFormat p, dcp::Size s, bool aligned)
         : _size (s)
         , _pixel_format (p)
         , _aligned (aligned)
-       , _extra_pixels (extra_pixels)
  {
         allocate ();
  }
@@ -840,7 +891,7 @@ Image::allocate ()
  
         for (int i = 0; i < planes(); ++i) {
                 _line_size[i] = ceil (_size.width * bytes_per_pixel(i));
-               _stride[i] = stride_round_up (i, _line_size, _aligned ? 32 : 1);
+               _stride[i] = stride_round_up (i, _line_size, _aligned ? ALIGNMENT : 1);
  
                 /* The assembler function ff_rgb24ToY_avx (in libswscale/x86/input.asm)
                    uses a 16-byte fetch to read three bytes (R/G/B) of image data.
@@ -853,15 +904,38 @@ Image::allocate ()
  
                    Further to the above, valgrind is now telling me that ff_rgb24ToY_ssse3
                    over-reads by more then _avx.  I can't follow the code to work out how much,
-                  so I'll just over-allocate by 32 bytes and have done with it.  Empirical
+                  so I'll just over-allocate by ALIGNMENT bytes and have done with it.  Empirical
                    testing suggests that it works.
+
+                  In addition to these concerns, we may read/write as much as a whole extra line
+                  at the end of each plane in cases where we are messing with offsets in order to
+                  do pad or crop.  To solve this we over-allocate by an extra _stride[i] bytes.
+
+                  As an example: we may write to images starting at an offset so we get some padding.
+                  Hence we want to write in the following pattern:
+
+                  block start   write start                                  line end
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+
+                  where line-size is of the smaller (inter_size) image and the full padded line length is that of
+                  out_size.  To get things to work we have to tell FFmpeg that the stride is that of out_size.
+                  However some parts of FFmpeg (notably rgb48Toxyz12 in swscale.c) process data for the full
+                  specified *stride*.  This does not matter until we get to the last line:
+
+                  block start   write start                                  line end
+                  |..(padding)..|<------line-size------------->|XXXwrittenXXX|
+                  |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXX|
+                  |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
+                                                                              ^^^^ out of bounds
                 */
-               _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * sample_size(i).height + _extra_pixels * bytes_per_pixel(i) + 32);
+               _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
  #if HAVE_VALGRIND_MEMCHECK_H
                 /* The data between the end of the line size and the stride is undefined but processed by
                    libswscale, causing lots of valgrind errors.  Mark it all defined to quell these errors.
                 */
-               VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * sample_size(i).height + _extra_pixels * bytes_per_pixel(i) + 32);
+               VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
  #endif
         }
  }
@@ -871,7 +945,6 @@ Image::Image (Image const & other)
         , _size (other._size)
         , _pixel_format (other._pixel_format)
         , _aligned (other._aligned)
-       , _extra_pixels (other._extra_pixels)
  {
         allocate ();
  
@@ -891,7 +964,6 @@ Image::Image (AVFrame* frame)
         : _size (frame->width, frame->height)
         , _pixel_format (static_cast<AVPixelFormat> (frame->format))
         , _aligned (true)
-       , _extra_pixels (0)
  {
         allocate ();
  
@@ -912,7 +984,6 @@ Image::Image (shared_ptr<const Image> other, bool aligned)
         : _size (other->_size)
         , _pixel_format (other->_pixel_format)
         , _aligned (aligned)
-       , _extra_pixels (other->_extra_pixels)
  {
         allocate ();
  
@@ -954,7 +1025,6 @@ Image::swap (Image & other)
         }
  
         std::swap (_aligned, other._aligned);
-       std::swap (_extra_pixels, other._extra_pixels);
  }
  
  /** Destroy a Image */