A similar question has been asked before (301735) but I am reposting
in hopes that someone may be able to provide source code (or can
further optimize the source code below).
We have a need for a bilinear interpolation algorithm that can
interpolate a 2MP (2000x1000) image at an ideal rate of 40-50/s. On a
2GHz DualCore (laptop) with 2GB of RAM, the attached code does 50
successive interpolations in about 3.5-4 seconds (< 15/s).
The code implements the standard bilinear interpolation algorithm for
which there are multiple references to on the web. The above stated
performance is about the best I have been able to achieve with this
Any solution, be it inline assembly or C is acceptable provided it can
perform to meet our needs.
#include "stdafx.h"
#include <math.h>
#include <time.h>
#include <conio.h>
#include <memory>
struct RECT
int Top;
int Bottom;
int Left;
int Right;
template <class T>
void Interpolate(
RECT srcRegionRect,
T* pSrcPixelData,
unsigned int srcWidth,
unsigned int srcHeight,
unsigned int srcBytesPerPixel,
RECT dstRegionRect,
unsigned char* pDstPixelData,
unsigned int dstWidth,
unsigned int dstBytesPerPixel,
bool swapXY,
unsigned char* pLutData,
bool isRGB,
bool isPlanar,
bool IsSigned)
unsigned int dstRegionHeight, dstRegionWidth,
xDstStride, yDstStride,
xDstIncrement, yDstIncrement;
if (swapXY)
dstRegionHeight = abs(dstRegionRect.Right - dstRegionRect.Left) + 1;
dstRegionWidth = abs(dstRegionRect.Bottom - dstRegionRect.Top) + 1;
xDstStride = dstWidth * dstBytesPerPixel;
yDstStride = dstBytesPerPixel;
xDstIncrement = ((dstRegionRect.Bottom - dstRegionRect.Top) < 0 ?
-1: 1) * xDstStride;
yDstIncrement = ((dstRegionRect.Right - dstRegionRect.Left) < 0 ?
-1: 1) * yDstStride;
pDstPixelData += (dstRegionRect.Top * xDstStride) +
(dstRegionRect.Left * yDstStride);
dstRegionHeight = abs(dstRegionRect.Bottom - dstRegionRect.Top) + 1;
dstRegionWidth = abs(dstRegionRect.Right - dstRegionRect.Left) + 1;
xDstStride = dstBytesPerPixel;
yDstStride = dstWidth * dstBytesPerPixel;
xDstIncrement = ((dstRegionRect.Right - dstRegionRect.Left) <
0 ? -1: 1) * xDstStride;
yDstIncrement = ((dstRegionRect.Bottom - dstRegionRect.Top) <
0 ? -1: 1) * yDstStride;
pDstPixelData += (dstRegionRect.Top * yDstStride) +
(dstRegionRect.Left * xDstStride);
int xSrcStride, ySrcStride;
if (isRGB && !isPlanar)
xSrcStride = 3;
ySrcStride = srcWidth * 3;
xSrcStride = 1;
ySrcStride = srcWidth;
int srcNextChannelOffset = 0;
if (isRGB)
if (!isPlanar)
srcNextChannelOffset = 1;
srcNextChannelOffset = srcWidth * srcHeight;
int srcRegionWidth = srcRegionRect.Right - srcRegionRect.Left;
int srcRegionHeight = srcRegionRect.Bottom - srcRegionRect.Top;
int xSrcIncrementDirection = (srcRegionWidth < 0) ? -1 : 1; //set the
int ySrcIncrementDirection = (srcRegionHeight < 0) ? -1 : 1;
srcRegionWidth = srcRegionWidth * xSrcIncrementDirection + 1;
srcRegionHeight = srcRegionHeight * ySrcIncrementDirection + 1;
//remove the sign from w/h and add 1
float srcSlightlyLessThanWidthMinusOne = (float)srcWidth - 1.001f;
float srcSlightlyLessThanHeightMinusOne = (float)srcHeight - 1.001f;
float xRatio = (float)dstRegionWidth / srcRegionWidth * xSrcIncrementDirection;
float yRatio = (float)dstRegionHeight / srcRegionHeight *
for (unsigned int y = 0; y < dstRegionHeight; ++y)
float ySrcCoord = srcRegionRect.Top + y / yRatio;
//a necessary evil, I'm afraid.
//if (ySrcCoord < 0)
// ySrcCoord = 0;
//else if (ySrcCoord > srcSlightlyLessThanHeightMinusOne)
// ySrcCoord = srcSlightlyLessThanHeightMinusOne; //force it to be
just barely before the last pixel.
int ySrcPixel = (int)ySrcCoord;
float dy = ySrcCoord - (float)ySrcPixel;
unsigned char* pRowDstPixelData = pDstPixelData;
T* pRowSrcPixelData = pSrcPixelData + ySrcPixel * ySrcStride;
for (unsigned int x = 0; x < dstRegionWidth; ++x)
float xSrcCoord = srcRegionRect.Left + x / xRatio;
//a necessary evil, I'm afraid.
//if (xSrcCoord < 0)
// xSrcCoord = 0;
//if (xSrcCoord > srcSlightlyLessThanWidthMinusOne)
// xSrcCoord = srcSlightlyLessThanWidthMinusOne; //force it to be
just barely before the last pixel.
int xSrcPixel = (int)xSrcCoord;
float dx = xSrcCoord - (float)xSrcPixel;
T* pSrcPixel00 = pRowSrcPixelData + xSrcPixel;
T* pSrcPixel01 = pSrcPixel00 + 1;
T* pSrcPixel10 = pSrcPixel00 + srcWidth;
T* pSrcPixel11 = pSrcPixel10 + 1;
float yInterpolated1 = (float)(*pSrcPixel00) + (*pSrcPixel10 -
*pSrcPixel00) * dy;
float yInterpolated2 = (float)(*pSrcPixel01) + (*pSrcPixel11 -
*pSrcPixel01) * dy;
T IFinal = (T)(yInterpolated1 + (yInterpolated2 - yInterpolated1) * dx);
unsigned char value = pLutData[IFinal];
pRowDstPixelData[0] = value; //B
pRowDstPixelData[1] = value; //G
pRowDstPixelData[2] = value; //R
pRowDstPixelData[3] = 0xff; //A
pRowDstPixelData += xDstIncrement;
pDstPixelData += yDstIncrement;
int _tmain(int argc, _TCHAR* argv[])
RECT srcrect;
RECT dstrect;
int imageheight = 1900;
int imagewidth = 1000;
srcrect.Top = 0;
srcrect.Left = 0;
srcrect.Bottom = imageheight;
srcrect.Right = imagewidth;
dstrect.Top = 0;
dstrect.Left = 0;
dstrect.Bottom = imageheight;
dstrect.Right = imagewidth;
std::auto_ptr<unsigned short> spimage(new unsigned
short[srcrect.Right * srcrect.Bottom]);
std::auto_ptr<unsigned short> spdstimage(new unsigned short[4 *
dstrect.Right * dstrect.Bottom]);
std::auto_ptr<unsigned char> splut(new unsigned char[(int)(pow(2.0,
sizeof(unsigned short) * 8.0))]);
clock_t start, finish;
double duration;
start = clock();
int nimages = 50;
for (int i = 0; i < nimages; ++i)
Interpolate<unsigned short>( srcrect,
(unsigned short*)spimage.get(),
srcrect.Right + 1,
srcrect.Bottom + 1,
(unsigned char*)spdstimage.get(),
dstrect.Right + 1,
(unsigned char*)splut.get(),
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "%.4f seconds (%d images)\n", duration, 50);
duration = nimages / duration;
printf( "%.4f f/s\n", duration );
return 0;
} |