View Single Post
Old 11-22-2018, 05:51 PM   #37
wwwmaze
Human being with feelings
 
Join Date: Oct 2009
Posts: 115
Default

For the Gaussian blurs I think I managed to let the YV12 version work almost (not exactly, there are still minor color differences) like the RGBA version. I had to introduce some additional multi-add pairs though because I couldn't figure out how to modify the coefficients. So there's still room for optimization.

I also fixed the image borders for the bidirectional case.


RGBA version:
Code:
// Gaussian Blur RGBA
// v0.2
//@param1:sigma 'Sigma' 10 0.5 50 25 0.1
//@param2:bidir 'bidirectional' 1 0 1 0 1
//@param8:restore_cs 'preserve colorspace' 1 0 1 0 1

in=0;

initDone==0 ? (lsigma=-1; initDone=1;);

// coeffs
lsigma!=sigma ? (
  q = (0.5<=sigma && sigma <=2.5) ? 3.97156-4.14554*sqrt(1-0.26891*sigma) : 0.98711*sigma-0.96330;
  b0 = 1.57825 + 2.44413*q + 1.4281*q^2 + 0.422205*q^3;
  b1 = 2.44413*q + 2.85619*q^2 + 1.26661*q^3;
  b2 = -1.4281*q^2 - 1.26661*q^3;
  b3 = 0.422205*q^3;
  BB = 1-(b1+b2+b3)/b0;
  b1b0=b1/b0;
  b2b0=b2/b0;
  b3b0=b3/b0;
  lsigma=sigma;
);

oldcs=colorspace;
colorspace='RGBA';
input_info(in,w,h);
gfx_img_resize(-1,w,h);

code="
(_1 -= 1) < 0 ? (
  _1=eval_w-1;
  _31=_21=_11=r; _32=_22=_12=g; _33=_23=_13=b;
);
r=BB*r + b1b0*_11 + b2b0*_21 +b3b0*_31; _31=_21; _21=_11; _11=r;    // r=BB*r + b1b0*lr + b2b0*llr +b3b0*lllr; lllr=llr; llr=lr; lr=r;
g=BB*g + b1b0*_12 + b2b0*_22 +b3b0*_32; _32=_22; _22=_12; _12=g;
b=BB*b + b1b0*_13 + b2b0*_23 +b3b0*_33; _33=_23; _23=_13; _13=b;
";

pass=0;
loop(bidir?2:1,
  pass ? (
    // second pass, use main buffer rotated 180
    gfx_dest = rotWs2 = gfx_img_resize(rotWs2,w,h);
    gfx_deltablit(-1,0,0,w,h,w-1,h-1,-1,0,0,-1);
  ) : (
    gfx_blit(in,0,0,0,w,h,0,0,w,h);
  );

  // convolve in x
  eval_w = w;
  gfx_evalrect(0,0,w,h, code);

  // rotate to vertical work buffer
  gfx_dest = rotWs1 = gfx_img_resize(rotWs1,h,w);
  gfx_deltablit(pass?rotWs2:-1,0,0,h,w, 0,0, 0,1,1,0,0,0);

  // convolve in y (now x)
  eval_w = h;
  gfx_evalrect(0,0,h,w, code);

  // rotate back to output framebuffer
  gfx_dest = -1;
  pass?gfx_deltablit(rotWs1,0,0,w,h,h-1,w-1, 0,-1,-1,0,0,0):
       gfx_deltablit(rotWs1,0,0,w,h,0,0, 0,1,1,0,0,0);
  pass+=1;
);

restore_cs && oldcs != colorspace ? colorspace=oldcs;
YV12 version:
Code:
// Gaussian Blur YV12
// v0.2
//@param1:sigma 'Sigma' 10 0.5 50 25 0.1
//@param2:bidir 'bidirectional' 1 0 1 0 1

in=0;

initDone==0 ? (lsigma=-1; initDone=1;);

// coeffs
lsigma!=sigma ? (
  q = (0.5<=sigma && sigma <=2.5) ? 3.97156-4.14554*sqrt(1-0.26891*sigma) : 0.98711*sigma-0.96330;
  b0 = 1.57825 + 2.44413*q + 1.4281*q^2 + 0.422205*q^3;
  b1 = 2.44413*q + 2.85619*q^2 + 1.26661*q^3;
  b2 = -1.4281*q^2 - 1.26661*q^3;
  b3 = 0.422205*q^3;
  BB = 1-(b1+b2+b3)/b0;
  b1b0=b1/b0;
  b2b0=b2/b0;
  b3b0=b3/b0;
  lsigma=sigma;
);

colorspace='YV12';
input_info(in,w,h);
gfx_img_resize(-1,w,h);

code="
(_1 -= 1) < 0 ? (
  _1=eval_w-1;
  _31=_21=_11=y1; _32=_22=_12=y3; _33=_23=_13=u; _34=_24=_14=v;
);
y1=BB*y1 + b1b0*_11 + b2b0*_21 +b3b0*_31;
y2=BB*y2 + b1b0*y1 + b2b0*_11 +b3b0*_21; _31=_11; _21=y1; _11=y2;
y3=BB*y3 + b1b0*_12 + b2b0*_22 +b3b0*_32;
y4=BB*y4 + b1b0*y3 + b2b0*_12 +b3b0*_22; _32=_12; _22=y3; _12=y4;

_90=BB*u + b1b0*_13 + b2b0*_23 +b3b0*_33;
u=BB*u + b1b0*_90 + b2b0*_13 +b3b0*_23; _33=_13; _23=_90; _13=u;
_91=BB*v + b1b0*_14 + b2b0*_24 +b3b0*_34;
v=BB*v + b1b0*_91 + b2b0*_14 +b3b0*_24; _34=_14; _24=_91; _14=v;
";


gfx_dest=-1;
pass=0;
loop(bidir?2:1,
  pass ? (
    // second pass, use main buffer rotated 180
    gfx_dest = rotWs2 = gfx_img_resize(rotWs2,w,h);
    gfx_deltablit(-1,0,0,w,h,w-1,h-1,-1,0,0,-1);
  ) : (
    gfx_blit(in,0,0,0,w,h,0,0,w,h);
  );

  // convolve in x
  eval_w = w/2;
  gfx_evalrect(0,0,w,h, code);

  // rotate to vertical work buffer
  gfx_dest = rotWs1 = gfx_img_resize(rotWs1,h,w);
  gfx_deltablit(pass?rotWs2:-1,0,0,h,w, 0,0, 0,1,1,0,0,0);

  // convolve in y (now x)
  eval_w = h/2;
  gfx_evalrect(0,0,h,w, code);

  // rotate back to output framebuffer
  gfx_dest = -1;
  pass?gfx_deltablit(rotWs1,0,0,w,h,h-1,w-1, 0,-1,-1,0,0,0):
       gfx_deltablit(rotWs1,0,0,w,h,0,0, 0,1,1,0,0,0);
  pass+=1;
);
EDIT: oops I think the YV12 version now uses the same CPU resources?? so hmmm... not ideal

EDIT: updated versions with a bugfix see Justins posts below
__________________
Video processor programming: (1) How inputs work: https://forum.cockos.com/showthread....26#post2057426 (2) YV12 colorspace: https://forum.cockos.com/showthread....54#post2059354 (3) EEL documentation: https://www.cockos.com/EEL2/

Last edited by wwwmaze; 11-27-2018 at 08:30 AM.
wwwmaze is offline   Reply With Quote