X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=libs%2Fardour%2Fsse_functions_xmm.cc;h=6eac488a253a3af89df5f7044a1e60d07ed126b2;hb=4cab03887ccc311e2b195c81703811c1cb6aeb1b;hp=c1f11c4c8381f620c6baff13d81fec49b85974ce;hpb=c4ac43749048c4c0e0ab3656d39384112a628742;p=ardour.git diff --git a/libs/ardour/sse_functions_xmm.cc b/libs/ardour/sse_functions_xmm.cc index c1f11c4c83..6eac488a25 100644 --- a/libs/ardour/sse_functions_xmm.cc +++ b/libs/ardour/sse_functions_xmm.cc @@ -22,7 +22,7 @@ #include "ardour/types.h" void -x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::nframes_t nframes, float *min, float *max) +x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max) { __m128 current_max, current_min, work; @@ -31,7 +31,7 @@ x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::nframes_t nframes, float * current_max = _mm_set1_ps(*max); // Work input until "buf" reaches 16 byte alignment - while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) { + while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) { // Load the next float into the work buffer work = _mm_set1_ps(*buf); @@ -45,8 +45,11 @@ x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::nframes_t nframes, float * // use 64 byte prefetch for quadruple quads while (nframes >= 16) { +#ifdef COMPILER_MSVC + _mm_prefetch(((char*)buf+64), 0); // A total guess! Assumed to be eqivalent to +#else // the line below but waiting to be tested !! __builtin_prefetch(buf+64,0,0); - +#endif work = _mm_load_ps(buf); current_min = _mm_min_ps(current_min, work); current_max = _mm_max_ps(current_max, work);