oh well.. if any of you remember me from this post :
http://www.hard-light.net/forums/index.php/topic,40950.20.htmlanyways.. its been quite a long time since that post.
i recently began to learn in college, i understand C much better than before. C++ too;
in one of the classes it was boring, and i had an urge to.. make up for what i wrote two years ago for freespace.
i came back home and i started to code a bit.
oh well; i know i said i'll never do software shaders again, but the methods i used back then were.. really badly approach.
*first of all, RGBImage is a class which contains r,g,b matrix array (a matrix array for each color channel), and also w, h (width, height)
*essential functions :
float pitagoras(int a, int b) { return ( sqrt(float(a*a+b*b)));}
float sqr(float a, float b) {return (a*a);}
float dist(int x1, int y1, int x2, int y2) { return (sqrt((float)(((x2-x1)*(x2-x1))+((y2-y1)*(y2-y1)))));}
unsigned char checkcolor(int color)
{
if (color < 0)
return 0;
else if (color > 255)
return 255;
else
return color;
}
*all the tests are performed on a 2ghz pentium 4 computer.
*all the tests are using these two images :


let's begin with the oldies..
i kinda converted the old bloom function to use RGBImage, something was messed up, so its kinda screwed, but i believe the timings didn't change.
this old function uses a bizzare method that makes up 3x3 kernel using glow values for each color to speed up the things and just adds it for every pixel over the same image, so its kinda overwriting it and that is what causes the smooth ugly blurs.
static unsigned char glowtable[255] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,255,255,255,255,255,255,255,255,255};
void drawglowrect(RGBImage &blurImg, int x, int y,int counterlocation, unsigned char glowr, unsigned char glowg, unsigned char glowb)
{
int currecty, currectx;
currecty = y-1;
currectx = x-1;
unsigned char r, g, b;
if (currectx <= 0 || currectx >= blurImg.w || currecty <= 0 || currecty >= blurImg.h) printf("warning!");
blurImg.r[currectx][currecty] = glowtable[glowr];
blurImg.g[currectx][currecty] = glowtable[glowg];
blurImg.b[currectx][currecty] = glowtable[glowb];
while (currecty <= (y+1))
{
++currectx;
if (currectx >= (x+1))
{
currectx = x-1;
++currecty;
}
if (currectx == -1)
{
++currectx;
}
if (currectx == -2)
{
currectx = currectx+2;
}
if (currecty == -2)
{
currecty = currecty+2;
}
if (currecty == -1)
{
++currecty;
}
if ((currectx == x) && (currecty == y))
{
++currectx;
}
//counterlocation = (currecty*640+currectx)*3;
if (currectx <= 0 || currectx >= blurImg.w || currecty <= 0 || currecty >= blurImg.h) printf("warning! %d %d", currectx, currecty);
r = blurImg.r[currectx][currecty];
g = blurImg.g[currectx][currecty];
b = blurImg.b[currectx][currecty];
blurImg.r[currectx][currecty] = checkcolor(((glowtable[glowr]+r)/2));
blurImg.g[currectx][currecty] = checkcolor(((glowtable[glowg]+g)/2));
blurImg.b[currectx][currecty] = checkcolor(((glowtable[glowb]+b)/2));
}
}
void old_bloom(RGBImage &img, int minglow)
{
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
unsigned char r, g, b; // itay390 wrote : first pass
int coloraverage;
int x = 2;
int y = 2;
while (y < (img.h-3))
{
++x;
if (x == (img.w-2))
{
x = 2;
++y;
}
//counterlocation = (y*640+x)*3;
if (x <= 0 || x >= img.w || y <= 0 || y >= img.h) printf("warning!");
r = img.r[x][y];
g = img.g[x][y];
b = img.b[x][y];
coloraverage = (r+g+b);
if (coloraverage > minglow)
drawglowrect(img, x, y, 0, r, g, b);
}
/*
x = img.w-2; //itay390 wrote : this is another pass. get first the fps high , this isn't worth much right now
y = 2;
counterlocation = 5754;
while (y < img.h-3)
{
--x;
counterlocation = counterlocation-3;
if (x == 0)
{
x = img.w-2;
++y;
counterlocation = counterlocation+3834;
}
//counterlocation = (y*640+x)*3;
r = img.r[x][y];
g = img.g[x][y];
b = img.b[x][y];
coloraverage = (r+g+b);
if (coloraverage > minglow)
drawglowrect(blurImg, x, y, counterlocation, r, g, b);
}*/
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
old_bloom(img, 300);
result 1:

bloom took 0.119000 seconds
result 2:

bloom took 0.152000 seconds
under 640x480 : bloom took 0.034000 seconds
function number two, is the old bloom number two. a much slower function which uses a blur layer, instead of using the same image and adding pixels and overwriting whats already inside. the result is much more beautiful, photoshop'ic like. its a good thing that this function does not increase its time by the surrounding size of the samples for each pixel (kernel size) too much.
void collectkernelsum(RGBImage &img, int kernelsize,int minbrightness, int kernelx, int kernely, int *rsum, int *gsum, int *bsum, int *rsumold, int *gsumold, int *bsumold, int *count, bool *notfullkernel, bool *insertthis)
{
int w = img.w;
int h = img.h;
int x1 = kernelx-kernelsize;
int x2 = kernelx+kernelsize;
int y1 = kernely-kernelsize;
int y2 = kernely+kernelsize;
int x = x1;
int y = y1;
unsigned char r,g,b;
*insertthis = 0;
if (*notfullkernel == true)
{
y = y1;
while (y <= y2)
{
r = img.r[x1][y];
g = img.g[x1][y];
b = img.b[x1][y];
if ((r > minbrightness) && (g > minbrightness) && (b > minbrightness))
{
*rsum = *rsum-r;
*gsum = *gsum-g;
*bsum = *bsum-b;
}
++y;
}
y = y1;
while (y <= y2)
{
r = img.r[x2][y];
g = img.g[x2][y];
b = img.b[x2][y];
if ((r > minbrightness) && (g > minbrightness) && (b > minbrightness))
{
*rsum = *rsum+r;
*gsum = *gsum+g;
*bsum = *bsum+b;
*insertthis = 1;
}
++y;
}
}
else
{
if (kernely == kernelsize)
{
*count = 0;
while (y <= y2)
{
++x;
if (x == x2)
{
x = x1;
++y;
}
r = img.r[x][y];
g = img.g[x][y];
b = img.b[x][y];
if ((r > minbrightness) && (g > minbrightness) && (b > minbrightness))
{
*rsum = *rsum+r;
*gsum = *gsum+g;
*bsum = *bsum+b;
*insertthis = 1;
}
++(*count);
}
}
else
{
*rsum = *rsumold;
*gsum = *gsumold;
*bsum = *bsumold;
while (x <= x2)
{
r = img.r[x][y1];
g = img.g[x][y1];
b = img.b[x][y1];
if ((r > minbrightness) && (g > minbrightness) && (b > minbrightness))
{
*rsum = *rsum-r;
*gsum = *gsum-g;
*bsum = *bsum-b;
}
++x;
}
x = x1;
while (x <= x2)
{
r = img.r[x][y2];
g = img.g[x][y2];
b = img.b[x][y2];
if ((r > minbrightness) && (g > minbrightness) && (b > minbrightness))
{
*rsum = *rsum+r;
*gsum = *gsum+g;
*bsum = *bsum+b;
*insertthis = 1;
}
++x;
}
}
*rsumold = *rsum;
*gsumold = *gsum;
*bsumold = *bsum;
}
}
void old_bloom2(RGBImage &img, int kernelsize,int minbrightness, int multiplation)
{
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int w = img.w;
int h = img.h;
int layerx = 0;
int layery = 0;
int kernelx = kernelsize;
int kernely = kernelsize;
int x = 0;
int y = 0;
int rsum = 0;
int gsum = 0;
int bsum = 0;
int rsumold = 0;
int gsumold = 0;
int bsumold = 0;
int count = 1;
unsigned char r, g, b;
bool notfullkernel = 0;
bool insertthis = 0;
while (kernely < (h-kernelsize))
{
collectkernelsum(img, kernelsize, minbrightness, kernelx, kernely, &rsum, &gsum, &bsum, &rsumold, &gsumold, &bsumold, &count, ¬fullkernel, &insertthis);
if (insertthis == 1)
{
blurImg.r[kernelx][kernely] = checkcolor((rsum/count)*multiplation);
blurImg.g[kernelx][kernely] = checkcolor((gsum/count)*multiplation);
blurImg.b[kernelx][kernely] = checkcolor((bsum/count)*multiplation);
}
++kernelx;
if (kernelx == (w-kernelsize))
{
kernelx = kernelsize;
++kernely;
}
if (kernelx == kernelsize)
{
notfullkernel = 0;
rsum = 0;
gsum = 0;
bsum = 0;
}
else
notfullkernel = 1;
}
unsigned char r2, g2, b2;
while (y < h)
{
++x;
if (x == w)
{
x = 0;
++y;
}
r2 = blurImg.r[x][y];
g2 = blurImg.g[x][y];
b2 = blurImg.b[x][y];
r = img.r[x][y];
g = img.g[x][y];
b = img.b[x][y];
if (r2 > minbrightness && b2 > minbrightness && g2 > minbrightness)
{
img.r[x][y] = (r + r2)/2;
img.g[x][y] = (g + g2)/2;
img.b[x][y] = (b + b2)/2;
}
else
{
img.r[x][y] = r;
img.g[x][y] = g;
img.b[x][y] = b;
}
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
old_bloom2(img, 5, 50, 2);
the minimum brightness is set to 50, so it won't apply bloom over all the image, but only on bright parts.
result 1:

bloom took 0.775000 seconds
:O now we can see that this function photoshop'ic look sure pays the price, cause its much slower than the old_bloom function
result 2:

bloom took 0.803000 seconds
a test on 640x480 resolution : bloom took 0.161000 seconds
anyways, let's head to the new bloom functions.
first of all, all of them are with blur layers.
which means, the bloom, is produced by blurring the image on an external layer, and then mixing the image with the original image.
at the first bloom functions, i wanted to try blooming by downsampling an image and then resampling it back to its original resolution. which would create the blurred look.
i imagine some of the functions can be optimized more.
anyways, bloom1 uses downsampling and resampling on a basic way. it makes a pixelated "blurred layer" instead of a smooth bloom. what makes downsampling and resampling better than collecting sum of pixels around an a pixel and then averaging, is that downsampling and resampling works faster when i use less points / more blur, instead the opposite in the sum of pixels around a pixel and average, which its time increases as its getting more blurred.
void bloom1(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int miniImgX = 0, miniImgY = 0;
for (int y = 0; y < img.h; y += kernel)
{
for (int x = 0; x < img.w; x += kernel)
{
if (((img.w-x) > kernel) && ((img.h-y) > kernel))
{
int sumR = 0, sumG = 0, sumB = 0;
for (int kernelCountX = 0, newX = x; kernelCountX != kernel; kernelCountX++, newX++)
for (int kernelCountY = 0, newY = y; kernelCountY != kernel; kernelCountY++, newY++)
{
sumR += img.r[newX][newY];
sumG += img.g[newX][newY];
sumB += img.b[newX][newY];
}
sumR /= kernel*kernel;
sumG /= kernel*kernel;
sumB /= kernel*kernel;
miniImg.r[miniImgX][miniImgY] = sumR;
miniImg.g[miniImgX][miniImgY] = sumG;
miniImg.b[miniImgX][miniImgY] = sumB;
miniImgX++;
}
}
miniImgX = 0;
miniImgY++;
}
for (miniImgX = 0; miniImgX != miniImg.w; miniImgX++)
for (miniImgY = 0; miniImgY != miniImg.h; miniImgY++)
{
for (int x = miniImgX*kernel, kernelCountX = 0; kernelCountX != kernel; kernelCountX++, x++ )
for (int y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++ )
{
blurImg.r[x][y] = miniImg.r[miniImgX][miniImgY];
blurImg.g[x][y] = miniImg.g[miniImgX][miniImgY];
blurImg.b[x][y] = miniImg.b[miniImgX][miniImgY];
}
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
bloom1(img, 5);
result 1 :

bloom took 0.075000 seconds
result 2 :

bloom took 0.075000 seconds
well, i imagine it would look better if the kernel was lower.
hmmm.. let's try setting the kernel size to 3
bloom1(img, 3);
result 3 :

bloom took 0.113000 seconds
well, i guess it doesn't look particularly good, but its pretty fast.
test on 640x480 , with kernel size 3, showed : bloom took 0.019000 seconds
which means allowing around 50 FPS on that resolution.
from here on, the next bloom functions which uses downsampling and resampling are trying to interpolate.
sometimes it works, sometimes it doesn't.
from my concern, bloom2 is a failure.
it trys to interpolate using distance calculations from the 4 nearby pixels of the downsampled image.
but it doesn't really works.
void bloom2(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int miniImgX = 0, miniImgY = 0;
for (int y = 0; y < img.h; y += kernel)
{
for (int x = 0; x < img.w; x += kernel)
{
if (((img.w-x) > kernel) && ((img.h-y) > kernel))
{
int sumR = 0, sumG = 0, sumB = 0;
for (int kernelCountX = 0, newX = x; kernelCountX != kernel; kernelCountX++, newX++)
for (int kernelCountY = 0, newY = y; kernelCountY != kernel; kernelCountY++, newY++)
{
sumR += img.r[newX][newY];
sumG += img.g[newX][newY];
sumB += img.b[newX][newY];
}
sumR /= kernel*kernel;
sumG /= kernel*kernel;
sumB /= kernel*kernel;
miniImg.r[miniImgX][miniImgY] = sumR;
miniImg.g[miniImgX][miniImgY] = sumG;
miniImg.b[miniImgX][miniImgY] = sumB;
miniImgX++;
}
}
miniImgX = 0;
miniImgY++;
}
for (miniImgY = 0; miniImgY != (miniImg.h-1); miniImgY++)
for (miniImgX = 0; miniImgX != (miniImg.w-1); miniImgX++)
{
unsigned char topLeftColorR = miniImg.r[miniImgX][miniImgY];
unsigned char topLeftColorG = miniImg.g[miniImgX][miniImgY];
unsigned char topLeftColorB = miniImg.b[miniImgX][miniImgY];
unsigned char topRightColorR = miniImg.r[miniImgX+1][miniImgY];
unsigned char topRightColorG = miniImg.g[miniImgX+1][miniImgY];
unsigned char topRightColorB = miniImg.b[miniImgX+1][miniImgY];
int precentTopLeft = 100;
int precentTopRight = 0;
int kernelAddition = 100/kernel;
int x, y, kernelCountX, kernelCountY;
for (x = miniImgX*kernel, y = miniImgY*kernel, kernelCountX = 0; kernelCountX != kernel; kernelCountX++, x++ )
{
blurImg.r[x][y] = (topLeftColorR*precentTopLeft+topRightColorR*precentTopRight)/100;
blurImg.g[x][y] = (topLeftColorG*precentTopLeft+topRightColorG*precentTopRight)/100;
blurImg.b[x][y] = (topLeftColorB*precentTopLeft+topRightColorB*precentTopRight)/100;
precentTopLeft -= kernelAddition;
precentTopRight += kernelAddition;
}
unsigned char bottomLeftColorR = miniImg.r[miniImgX][miniImgY+1];
unsigned char bottomLeftColorG = miniImg.g[miniImgX][miniImgY+1];
unsigned char bottomLeftColorB = miniImg.b[miniImgX][miniImgY+1];
unsigned char bottomRightColorR = miniImg.r[miniImgX+1][miniImgY+1];
unsigned char bottomRightColorG = miniImg.g[miniImgX+1][miniImgY+1];
unsigned char bottomRightColorB = miniImg.b[miniImgX+1][miniImgY+1];
int precentBottomLeft = 100;
int precentBottomRight = 0;
for (x = miniImgX*kernel, y = (miniImgY+1)*kernel, kernelCountX = 0; kernelCountX != kernel; kernelCountX++, x++ )
{
blurImg.r[x][y] = (bottomLeftColorR*precentBottomLeft+bottomRightColorR*precentBottomRight)/100;
blurImg.g[x][y] = (bottomLeftColorR*precentBottomLeft+bottomRightColorR*precentBottomRight)/100;
blurImg.b[x][y] = (bottomLeftColorR*precentBottomLeft+bottomRightColorR*precentBottomRight)/100;
precentTopLeft -= kernelAddition;
precentTopRight += kernelAddition;
}
precentTopLeft = 100;
precentBottomLeft = 0;
for (x = miniImgX*kernel, y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++ )
{
blurImg.r[x][y] = (topLeftColorR*precentTopLeft+bottomLeftColorR*precentBottomLeft)/100;
blurImg.g[x][y] = (topLeftColorG*precentTopLeft+bottomLeftColorG*precentBottomLeft)/100;
blurImg.b[x][y] = (topLeftColorB*precentTopLeft+bottomLeftColorB*precentBottomLeft)/100;
precentTopLeft -= kernelAddition;
precentBottomLeft += kernelAddition;
}
precentTopRight = 100;
precentBottomRight = 0;
for (x = (miniImgX+1)*kernel, y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++ )
{
blurImg.r[x][y] = (topRightColorR*precentTopRight+bottomRightColorR*precentBottomRight)/100;
blurImg.g[x][y] = (topRightColorG*precentTopRight+bottomRightColorG*precentBottomRight)/100;
blurImg.b[x][y] = (topRightColorB*precentTopRight+bottomRightColorB*precentBottomRight)/100;
precentTopRight -= kernelAddition;
precentBottomRight += kernelAddition;
}
int precentTop, precentBottom, precentRight, precentLeft;
for (x = (miniImgX*kernel+1), kernelCountX = 1, precentLeft = 100, precentRight = 0; kernelCountX != kernel; x++, kernelCountX++, precentLeft -= kernelAddition, precentRight += kernelAddition)
for (y = (miniImgY*kernel+1), kernelCountY = 1, precentTop = 100, precentBottom = 0; kernelCountY != kernel; y++, kernelCountY++, precentTop -= kernelAddition, precentBottom += kernelAddition)
{
blurImg.r[x][y] = ((blurImg.r[miniImgX*kernel+kernelCountX][miniImgY*kernel]*precentTop+
blurImg.r[miniImgX*kernel+kernelCountX][(miniImgY+1)*kernel]*precentBottom)/100+
(blurImg.r[miniImgX*kernel][miniImgY*kernel+kernelCountY]*precentLeft+
blurImg.r[(miniImgX+1)*kernel][miniImgY*kernel+kernelCountY]*precentRight)/100)/2;
blurImg.g[x][y] = ((blurImg.g[miniImgX*kernel+kernelCountX][miniImgY*kernel]*precentTop+
blurImg.g[miniImgX*kernel+kernelCountX][(miniImgY+1)*kernel]*precentBottom)/100+
(blurImg.g[miniImgX*kernel][miniImgY*kernel+kernelCountY]*precentLeft+
blurImg.g[(miniImgX+1)*kernel][miniImgY*kernel+kernelCountY]*precentRight)/100)/2;
blurImg.b[x][y] = ((blurImg.b[miniImgX*kernel+kernelCountX][miniImgY*kernel]*precentTop+
blurImg.b[miniImgX*kernel+kernelCountX][(miniImgY+1)*kernel]*precentBottom)/100+
(blurImg.b[miniImgX*kernel][miniImgY*kernel+kernelCountY]*precentLeft+
blurImg.b[(miniImgX+1)*kernel][miniImgY*kernel+kernelCountY]*precentRight)/100)/2;
/*blurImg.g[x][y] = (blurImg.g[miniImgX*kernel+kernelCountX][miniImgY*kernel]+
blurImg.g[miniImgX*kernel+kernelCountX][(miniImgY+1)*kernel]+
blurImg.g[miniImgX*kernel][miniImgY*kernel+kernelCountY]+
blurImg.g[(miniImgX+1)*kernel][miniImgY*kernel+kernelCountY])/4;
blurImg.b[x][y] = (blurImg.b[miniImgX*kernel+kernelCountX][miniImgY*kernel]+
blurImg.b[miniImgX*kernel+kernelCountX][(miniImgY+1)*kernel]+
blurImg.b[miniImgX*kernel][miniImgY*kernel+kernelCountY]+
blurImg.b[(miniImgX+1)*kernel][miniImgY*kernel+kernelCountY])/4;*/
}
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
bloom2(img, 5);
result 1 :

bloom took 0.171000 seconds
result 2:

bloom took 0.165000 seconds
hmm.. looks a bit like puzzle. and not particularly fast either.
bloom3, trys to interpolate again, using distance calculations. in a bit different way.
void bloom3(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int miniImgX = 0, miniImgY = 0;
for (int y = 0; y < img.h; y += kernel)
{
for (int x = 0; x < img.w; x += kernel)
{
if (((img.w-x) > kernel) && ((img.h-y) > kernel))
{
int sumR = 0, sumG = 0, sumB = 0;
for (int kernelCountX = 0, newX = x; kernelCountX != kernel; kernelCountX++, newX++)
for (int kernelCountY = 0, newY = y; kernelCountY != kernel; kernelCountY++, newY++)
{
sumR += img.r[newX][newY];
sumG += img.g[newX][newY];
sumB += img.b[newX][newY];
}
sumR /= kernel*kernel;
sumG /= kernel*kernel;
sumB /= kernel*kernel;
miniImg.r[miniImgX][miniImgY] = sumR;
miniImg.g[miniImgX][miniImgY] = sumG;
miniImg.b[miniImgX][miniImgY] = sumB;
miniImgX++;
}
}
miniImgX = 0;
miniImgY++;
}
for (miniImgY = 0; miniImgY != (miniImg.h-1); miniImgY++)
for (miniImgX = 0; miniImgX != (miniImg.w-1); miniImgX++)
{
for (int x = miniImgX*kernel, kernelCountX = 0; kernelCountX != kernel; kernelCountX++, x++ )
for (int y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++ )
{
float distanceTopLeft = ((x-miniImgX*kernel)+(y-miniImgY*kernel))/2;
float distanceTopRight = (((miniImgX+1)*kernel-x)+(y-miniImgY*kernel))/2;
float distanceBottomLeft = ((x-miniImgX*kernel)+((miniImgY+1)*kernel-y))/2;
float distanceBottomRight = (((miniImgX+1)*kernel-x)+((miniImgY+1)*kernel-y))/2;
unsigned char topLeftR = miniImg.r[miniImgX][miniImgY];
unsigned char topLeftG = miniImg.g[miniImgX][miniImgY];
unsigned char topLeftB = miniImg.b[miniImgX][miniImgY];
unsigned char topRightR = miniImg.r[miniImgX+1][miniImgY];
unsigned char topRightG = miniImg.g[miniImgX+1][miniImgY];
unsigned char topRightB = miniImg.b[miniImgX+1][miniImgY];
unsigned char bottomLeftR = miniImg.r[miniImgX][miniImgY+1];
unsigned char bottomLeftG = miniImg.g[miniImgX][miniImgY+1];
unsigned char bottomLeftB = miniImg.b[miniImgX][miniImgY+1];
unsigned char bottomRightR = miniImg.r[miniImgX+1][miniImgY+1];
unsigned char bottomRightG = miniImg.g[miniImgX+1][miniImgY+1];
unsigned char bottomRightB = miniImg.b[miniImgX+1][miniImgY+1];
distanceTopLeft = (1-(distanceTopLeft/(kernel)))/4;
distanceTopRight = (1-(distanceTopRight/(kernel)))/4;
distanceBottomLeft = (1-(distanceBottomLeft/(kernel)))/4;
distanceBottomRight = (1-(distanceBottomRight/(kernel)))/4;
//printf("%f %f %f %f\n", distanceTopLeft, distanceTopRight, distanceBottomLeft, distanceBottomRight);
blurImg.r[x][y] = distanceTopLeft*topLeftR+distanceTopRight*topRightR+distanceBottomLeft*bottomLeftR+distanceBottomRight*bottomRightR;
blurImg.g[x][y] = distanceTopLeft*topLeftG+distanceTopRight*topRightG+distanceBottomLeft*bottomLeftG+distanceBottomRight*bottomRightG;
blurImg.b[x][y] = distanceTopLeft*topLeftB+distanceTopRight*topRightB+distanceBottomLeft*bottomLeftB+distanceBottomRight*bottomRightB;
}
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test: uses ->
bloom3(img, 5);
result 1:

bloom took 0.228000 seconds
result 2:

bloom took 0.225000 seconds
lets try crank up the kernel size to 10
bloom3(img, 10);
result 3:

bloom took 0.199000 seconds
looks a bit pixelated, isn't it?
when i try to do this over 640x480 image, i get : bloom took 0.070000 seconds
around 13FPS in that low resolution, pretty slow, and worse than old_bloom.
but definitely better than bloom2.
bloom4 trys to interpolate again, using a different interpolation attempt based on distance;
void bloom4(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int miniImgX = 0, miniImgY = 0;
for (int y = 0; y < img.h; y += kernel)
{
for (int x = 0; x < img.w; x += kernel)
{
if (((img.w-x) > kernel) && ((img.h-y) > kernel))
{
int sumR = 0, sumG = 0, sumB = 0;
for (int kernelCountX = 0, newX = x; kernelCountX != kernel; kernelCountX++, newX++)
for (int kernelCountY = 0, newY = y; kernelCountY != kernel; kernelCountY++, newY++)
{
sumR += img.r[newX][newY];
sumG += img.g[newX][newY];
sumB += img.b[newX][newY];
}
sumR /= kernel*kernel;
sumG /= kernel*kernel;
sumB /= kernel*kernel;
miniImg.r[miniImgX][miniImgY] = sumR;
miniImg.g[miniImgX][miniImgY] = sumG;
miniImg.b[miniImgX][miniImgY] = sumB;
miniImgX++;
}
}
miniImgX = 0;
miniImgY++;
}
for (miniImgY = 0; miniImgY != (miniImg.h-1); miniImgY++)
for (miniImgX = 0; miniImgX != (miniImg.w-1); miniImgX++)
{
float maximumDistance = pitagoras(kernel,kernel);
float topLeftR = miniImg.r[miniImgX][miniImgY];
float topLeftG = miniImg.g[miniImgX][miniImgY];
float topLeftB = miniImg.b[miniImgX][miniImgY];
float topRightR = miniImg.r[miniImgX+1][miniImgY];
float topRightG = miniImg.g[miniImgX+1][miniImgY];
float topRightB = miniImg.b[miniImgX+1][miniImgY];
float bottomLeftR = miniImg.r[miniImgX][miniImgY+1];
float bottomLeftG = miniImg.g[miniImgX][miniImgY+1];
float bottomLeftB = miniImg.b[miniImgX][miniImgY+1];
float bottomRightR = miniImg.r[miniImgX+1][miniImgY+1];
float bottomRightG = miniImg.g[miniImgX+1][miniImgY+1];
float bottomRightB = miniImg.b[miniImgX+1][miniImgY+1];
for (int x = miniImgX*kernel, kernelCountX = 0; kernelCountX != kernel; kernelCountX++, x++ )
for (int y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++ )
{
float distanceTopLeft = dist(x, y, miniImgX*kernel, miniImgY*kernel);
float distanceTopRight = dist(x,y, (miniImgX+1)*kernel, miniImgY*kernel);
float distanceBottomLeft = dist(x, y, miniImgX*kernel, (miniImgY+1)*kernel);
float distanceBottomRight = dist(x,y, (miniImgX+1)*kernel, (miniImgY+1)*kernel);
float precentTopLeft = (1-(distanceTopLeft/maximumDistance))/2;
float precentTopRight = (1-(distanceTopRight/maximumDistance))/2;
float precentBottomLeft = (1-(distanceBottomLeft/maximumDistance))/2;
float precentBottomRight = (1-(distanceBottomRight/maximumDistance))/2;
//printf("%f %f %f %f\n", precentTopLeft, precentTopRight, precentBottomLeft, precentBottomRight);
blurImg.r[x][y] = (precentTopLeft*topLeftR)+(precentTopRight*topRightR)+(precentBottomLeft*bottomLeftR)+(precentBottomRight*bottomRightR);
blurImg.g[x][y] = (precentTopLeft*topLeftG)+(precentTopRight*topRightG)+(precentBottomLeft*bottomLeftG)+(precentBottomRight*bottomRightG);
blurImg.b[x][y] = (precentTopLeft*topLeftB)+(precentTopRight*topRightB)+(precentBottomLeft*bottomLeftB)+(precentBottomRight*bottomRightB);
}
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test: used ->
bloom4(img, 5);
result 1 :

bloom took 0.311000 seconds
result 2 :

bloom took 0.312000 seconds
bahhh.. looks like a damn puzzle.
bloom5 is my prime.
it uses interpolation not by distance, but by working on each of the big "pixel" blocks only in the internal, when i DO know how to make an interpolated line affected by two pixels only, i can make interpolated lines for each line of the kernel box, and then i know also how to fill this pixel block/kernel. more inside the algorithm itself.
void bloom5(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int miniImgX = 0, miniImgY = 0;
for (int y = 0; y < img.h; y += kernel)
{
for (int x = 0; x < img.w; x += kernel)
{
if (((img.w-x) > kernel) && ((img.h-y) > kernel))
{
int sumR = 0, sumG = 0, sumB = 0;
for (int kernelCountX = 0, newX = x; kernelCountX != kernel; kernelCountX++, newX++)
for (int kernelCountY = 0, newY = y; kernelCountY != kernel; kernelCountY++, newY++)
{
sumR += img.r[newX][newY];
sumG += img.g[newX][newY];
sumB += img.b[newX][newY];
}
sumR /= kernel*kernel;
sumG /= kernel*kernel;
sumB /= kernel*kernel;
miniImg.r[miniImgX][miniImgY] = sumR;
miniImg.g[miniImgX][miniImgY] = sumG;
miniImg.b[miniImgX][miniImgY] = sumB;
miniImgX++;
}
}
miniImgX = 0;
miniImgY++;
}
int x, y, kernelCountX, kernelCountY;
unsigned char startR, startG, startB, endR, endG, endB, additionR, additionG, additionB;
for (miniImgY = 0; miniImgY != (miniImg.h-1); miniImgY++)
for (miniImgX = 0; miniImgX != (miniImg.w-1); miniImgX++)
{
//draw first vertical
startR = miniImg.r[miniImgX][miniImgY];
startG = miniImg.g[miniImgX][miniImgY];
startB = miniImg.b[miniImgX][miniImgY];
endR = miniImg.r[miniImgX][miniImgY+1];
endG = miniImg.g[miniImgX][miniImgY+1];
endB = miniImg.b[miniImgX][miniImgY+1];
additionR = (endR-startR)/kernel;
additionG = (endG-startG)/kernel;
additionB = (endB-startB)/kernel;
for (x = miniImgX*kernel, kernelCountY = 0, y = miniImgY*kernel; kernelCountY != kernel; y++, kernelCountY++)
{
blurImg.r[x][y] = startR;
blurImg.g[x][y] = startG;
blurImg.b[x][y] = startB;
startR += additionR;
startG += additionG;
startB += additionB;
}
//draw second vertical
startR = miniImg.r[miniImgX+1][miniImgY];
startG = miniImg.g[miniImgX+1][miniImgY];
startB = miniImg.b[miniImgX+1][miniImgY];
endR = miniImg.r[miniImgX+1][miniImgY+1];
endG = miniImg.g[miniImgX+1][miniImgY+1];
endB = miniImg.b[miniImgX+1][miniImgY+1];
additionR = (endR-startR)/kernel;
additionG = (endG-startG)/kernel;
additionB = (endB-startB)/kernel;
for (x = (miniImgX+1)*kernel, kernelCountY = 0, y = miniImgY*kernel; kernelCountY != kernel; y++, kernelCountY++)
{
blurImg.r[x][y] = startR;
blurImg.g[x][y] = startG;
blurImg.b[x][y] = startB;
startR += additionR;
startG += additionG;
startB += additionB;
}
//step 3
for (y = miniImgY*kernel, kernelCountY = 0; kernelCountY != kernel; kernelCountY++, y++)
{
startR = blurImg.r[miniImgX*kernel][y];
startG = blurImg.g[miniImgX*kernel][y];
startB = blurImg.b[miniImgX*kernel][y];
endR = blurImg.r[(miniImgX+1)*kernel][y];
endG = blurImg.g[(miniImgX+1)*kernel][y];
endB = blurImg.b[(miniImgX+1)*kernel][y];
additionR = (endR-startR)/kernel;
additionG = (endG-startG)/kernel;
additionB = (endB-startB)/kernel;
for (x = miniImgX*kernel+1, kernelCountX = 1; kernelCountX != kernel; kernelCountX++, x++)
{
blurImg.r[x][y] = startR;
blurImg.g[x][y] = startG;
blurImg.b[x][y] = startB;
startR += additionR;
startG += additionG;
startB += additionB;
}
}
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : used ->
bloom5(img, 5);
result 1:

bloom took 0.120000 seconds
result 2:

bloom took 0.130000 seconds
so smooth, and fast too. with only 130ms for 1024x768 image, it can make 640x480 image with : bloom took 0.024000 seconds. which means , around 40FPS in 640x480.
let's crank up the kernel to 10
bloom5(img, 10);
result 3:

bloom took 0.084000 seconds
here it becomes a bit pixelated. i don't know, seems like i'm too dumb in order to know how to make a good interpolation.
there is a formula on wikipedia on how to do a bilinear interpolation, but i don't really care now, cause.. such interpolations would get me times of 150-200miliseconds or so, just like in bloom2/3/4..
perhaps another version of scaling up using resampling equally and doing a blur this way, if i increase the size of the low resolution image by placing a center point in each pixel square, which means (rgb1+rgb2+rgb3+rgb4)/4 of the 4 corners. and then passing the low resolution image again and again.
i don't really know how efficient would it be.
bloom6 leaves the resampling approach, and now heads for getting an average pixel out of a surrounding area.
for each pixel, there is a loop of the surrounding area to get the average, the result - a blurred image. and then mixing the blurred image with the original image, would give bloom.
the problem with that approach - the more blur i want - the more values it has to collect in order to average. which means worse performance.
void bloom6(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int x, y, kernelCountX, kernelCountY, kernelX, kernelY, sumR, sumG, sumB;
for (x = kernel; x != (img.w-kernel); x++)
for (y = kernel; y != (img.h-kernel); y++)
{
sumR = sumG = sumB = 0;
for (kernelX = x-(kernel/2), kernelCountX = 0; kernelCountX != kernel; kernelCountX++, kernelX++)
for (kernelY = y-(kernel/2), kernelCountY = 0; kernelCountY != kernel; kernelCountY++, kernelY++)
{
sumR += img.r[kernelX][kernelY];
sumG += img.g[kernelX][kernelY];
sumB += img.b[kernelX][kernelY];
}
blurImg.r[x][y] = sumR/(kernel*kernel);
blurImg.g[x][y] = sumG/(kernel*kernel);
blurImg.b[x][y] = sumB/(kernel*kernel);
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
bloom6(img, 5);
result 1:

bloom took 0.184000 seconds
result 2:

bloom took 0.184000 seconds
as you can see, it looks better than bloom2/3/4, and works faster.
a test on 640x480 : bloom took 0.067000 seconds, meaning around 14FPS.
but if i try to increase the kernel, lets say.. 10
bloom6(img, 10);
result 3 :

bloom took 0.508000 seconds
oh oh, begins to be slow.
if i try to increase to.. 20
bloom6(img, 20);
result 4 :

bloom took 1.635000 seconds
even the old_bloom2 produces better results than that.
bloom7 is supposed to an optimized version of bloom6.
instead of taking summing all the pixels again and again, 25<= pixels for each pixel, its adding columns and rows and substracting while saving the sum. its supposed to be faster by 2 or so for 5 kernel, and much more for bigger kernels, but unfortunely, something messed up and smells fishy.. sniff sniff. i don't know really why it is so slow, maybe because of all those arrays? anyways, here it is.
void bloom7(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
int *columnSumR = (int *)calloc(img.w, sizeof(int));
int *columnSumG = (int *)calloc(img.w, sizeof(int));
int *columnSumB = (int *)calloc(img.w, sizeof(int));
int *rowSumR = (int *)calloc(img.h, sizeof(int));
int *rowSumG = (int *)calloc(img.h, sizeof(int));
int *rowSumB = (int *)calloc(img.h, sizeof(int));
Uint32 time1 = SDL_GetTicks();
int x, y, kernelCount, kernelX, kernelY, sumXR, sumXG, sumXB, sumYR, sumYG, sumYB, totalSumXR, totalSumXG, totalSumXB, totalSumYR, totalSumYG, totalSumYB, yPlusHalfKernel, yMinusHalfKernel, xPlusHalfKernel, xMinusHalfKernel;
int halfKernel = kernel/2;
int kernelSqr = kernel*kernel;
//initializing
totalSumXR = totalSumXG = totalSumXB = 0;
for (kernelY = 0; kernelY != kernel; kernelY++ )
{
sumXR = sumXG = sumXB = 0;
for (kernelX = 0; kernelX != kernel; kernelX++)
{
sumXR += img.r[kernelX][kernelY];
sumXG += img.g[kernelX][kernelY];
sumXB += img.b[kernelX][kernelY];
}
rowSumR[kernelY] = sumXR;
rowSumG[kernelY] = sumXG;
rowSumB[kernelY] = sumXB;
totalSumXR += sumXR;
totalSumXG += sumXG;
totalSumXB += sumXB;
}
totalSumYR = totalSumXR;
totalSumYG = totalSumXG;
totalSumYB = totalSumXB;
for (kernelX = 0; kernelX != kernel; kernelX++)
{
sumYR = sumYG = sumYB = 0;
for (kernelY = 0; kernelY != kernel; kernelY++)
{
sumYR += img.r[kernelX][kernelY];
sumYG += img.g[kernelX][kernelY];
sumYB += img.b[kernelX][kernelY];
}
columnSumR[kernelX] = sumYR;
columnSumG[kernelX] = sumYG;
columnSumB[kernelX] = sumYB;
}
//running
for (y = halfKernel, yMinusHalfKernel = 0, yPlusHalfKernel = halfKernel+halfKernel; y != (img.h-halfKernel-1); y++, yMinusHalfKernel++, yPlusHalfKernel++)
{
for (x = halfKernel, xMinusHalfKernel = 0, xPlusHalfKernel = halfKernel+halfKernel; x != (img.w-halfKernel-1); x++, xMinusHalfKernel++, xPlusHalfKernel++)
{
blurImg.r[x][y] = checkcolor(totalSumYR/kernelSqr);
blurImg.g[x][y] = checkcolor(totalSumYG/kernelSqr);
blurImg.b[x][y] = checkcolor(totalSumYB/kernelSqr);
sumYR = sumYG = sumYB = 0;
for (kernelY = yMinusHalfKernel, kernelX = xPlusHalfKernel, kernelCount = 0; kernelCount != kernel; kernelCount++, kernelY++)
{
sumYR += img.r[kernelX][kernelY];
sumYG += img.g[kernelX][kernelY];
sumYB += img.b[kernelX][kernelY];
}
columnSumR[xPlusHalfKernel] = sumYR;
columnSumG[xPlusHalfKernel] = sumYG;
columnSumB[xPlusHalfKernel] = sumYB;
totalSumYR += sumYR;
totalSumYG += sumYG;
totalSumYB += sumYB;
totalSumYR -= columnSumR[xMinusHalfKernel];
totalSumYG -= columnSumG[xMinusHalfKernel];
totalSumYB -= columnSumB[xMinusHalfKernel];
}
sumXR = sumXG = sumXB = 0;
for (kernelX = 0, kernelY = yPlusHalfKernel, kernelCount = 0; kernelCount != kernel; kernelCount++, kernelX++)
{
sumXR += img.r[kernelX][kernelY];
sumXG += img.g[kernelX][kernelY];
sumXB += img.b[kernelX][kernelY];
}
rowSumR[yPlusHalfKernel] = sumXR;
rowSumG[yPlusHalfKernel] = sumXG;
rowSumB[yPlusHalfKernel] = sumXB;
totalSumXR += sumXR;
totalSumXG += sumXG;
totalSumXB += sumXB;
totalSumXR -= rowSumR[yMinusHalfKernel];
totalSumXG -= rowSumG[yMinusHalfKernel];
totalSumXB -= rowSumB[yMinusHalfKernel];
totalSumYR = totalSumXR;
totalSumYG = totalSumXG;
totalSumYB = totalSumXB;
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
//saveRGBImage(img, 0, 0, img.w, img.h, img.w, img.h);
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
free(columnSumR);
free(columnSumG);
free(columnSumB);
free(rowSumR);
free(rowSumG);
free(rowSumB);
}
test : uses ->
bloom7(img, 5);
result 1 :

bloom took 0.519000 seconds
result 2:

bloom took 0.523000 seconds
indeed there is a problem, why doesn't it forget the beam? oh well, maybe i'll try figuring it out and fixing it later.
if i try to do kernel = 10,
bloom7(img, 10);
result 3:

bloom took 0.560000 seconds
almost the same as bloom6. for bigger kernels, this will be faster.
kernel = 20
bloom7(img, 20);
result 4:

bloom took 0.613000 seconds
almost no increase in the time. but it still a bugged code.. pfff.. maybe later i'll try fix it.
well, now i tried to see maybe there's a superfast blur, i tried to check a blur based on the average of 4 points in the kernel square. it may sound good but the result is some kind of a combination rather than a blur.
void bloom8(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int x, y, kernelCountX, kernelCountY, kernelX, kernelY, sumR, sumG, sumB;
int halfKernel = kernel/2;
for (x = kernel; x != (img.w-kernel); x++)
for (y = kernel; y != (img.h-kernel); y++)
{
blurImg.r[x][y] = (img.r[x-halfKernel][y-halfKernel]+img.r[x+halfKernel][y-halfKernel]+img.r[x-halfKernel][y+halfKernel]+img.r[x+halfKernel][y+halfKernel])/4;
blurImg.g[x][y] = (img.g[x-halfKernel][y-halfKernel]+img.g[x+halfKernel][y-halfKernel]+img.g[x-halfKernel][y+halfKernel]+img.g[x+halfKernel][y+halfKernel])/4;
blurImg.b[x][y] = (img.b[x-halfKernel][y-halfKernel]+img.b[x+halfKernel][y-halfKernel]+img.b[x-halfKernel][y+halfKernel]+img.b[x+halfKernel][y+halfKernel])/4 ;
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}
test : uses ->
bloom8(img, 5);
result 1:

bloom took 0.038000 seconds
result 2 :

bloom took 0.041000 seconds
well, on a first sight it might look good. since im doing this outside freespace, and only applying this on a full image, the text is going over the same procedure too.
however if its not jpeg image you were watching, you would see its not that good. for an example, let me increase the kernel to 10.
bloom8(img, 10);
result 3 :

bloom took 0.040000 seconds
that doesn't look really good.
anyways, in the last bloom function, called bloom9, i tried to see what happens if i get the pixel value for the blur layer if i randomize a pixel within the kernel and assigning the pixel of the blur layer its value.
void bloom9(RGBImage &img, int kernel)
{
RGBImage miniImg(img.w/kernel, img.h/kernel);
RGBImage blurImg(img.w, img.h);
Uint32 time1 = SDL_GetTicks();
int x, y, kernelCountX, kernelCountY, kernelX, kernelY, sumR, sumG, sumB;
int halfKernel = kernel/2;
for (x = kernel; x != (img.w-kernel); x++)
for (y = kernel; y != (img.h-kernel); y++)
{
int randomX = random(-halfKernel, halfKernel);
int randomY = random(-halfKernel, halfKernel);
blurImg.r[x][y] = img.r[x+randomX][y+randomY];
blurImg.g[x][y] = img.g[x+randomX][y+randomY];
blurImg.b[x][y] = img.b[x+randomX][y+randomY];
}
for (int x = 0; x != img.w; x++)
for (int y = 0; y != img.h; y++)
{
/*img.r[x][y] = blurImg.r[x][y];
img.g[x][y] = blurImg.g[x][y];
img.b[x][y] = blurImg.b[x][y];*/
img.r[x][y] = (img.r[x][y]+blurImg.r[x][y])/2;
img.g[x][y] = (img.g[x][y]+blurImg.g[x][y])/2;
img.b[x][y] = (img.b[x][y]+blurImg.b[x][y])/2;
}
Uint32 time2 = SDL_GetTicks();
printf("bloom took %f seconds\n", (time2-time1)/1000.0);
}