View Single Post
 11-22-2018, 05:51 PM #37 wwwmaze Human being with feelings   Join Date: Oct 2009 Posts: 115 For the Gaussian blurs I think I managed to let the YV12 version work almost (not exactly, there are still minor color differences) like the RGBA version. I had to introduce some additional multi-add pairs though because I couldn't figure out how to modify the coefficients. So there's still room for optimization. I also fixed the image borders for the bidirectional case. RGBA version: Code: ```// Gaussian Blur RGBA // v0.2 //@param1:sigma 'Sigma' 10 0.5 50 25 0.1 //@param2:bidir 'bidirectional' 1 0 1 0 1 //@param8:restore_cs 'preserve colorspace' 1 0 1 0 1 in=0; initDone==0 ? (lsigma=-1; initDone=1;); // coeffs lsigma!=sigma ? ( q = (0.5<=sigma && sigma <=2.5) ? 3.97156-4.14554*sqrt(1-0.26891*sigma) : 0.98711*sigma-0.96330; b0 = 1.57825 + 2.44413*q + 1.4281*q^2 + 0.422205*q^3; b1 = 2.44413*q + 2.85619*q^2 + 1.26661*q^3; b2 = -1.4281*q^2 - 1.26661*q^3; b3 = 0.422205*q^3; BB = 1-(b1+b2+b3)/b0; b1b0=b1/b0; b2b0=b2/b0; b3b0=b3/b0; lsigma=sigma; ); oldcs=colorspace; colorspace='RGBA'; input_info(in,w,h); gfx_img_resize(-1,w,h); code=" (_1 -= 1) < 0 ? ( _1=eval_w-1; _31=_21=_11=r; _32=_22=_12=g; _33=_23=_13=b; ); r=BB*r + b1b0*_11 + b2b0*_21 +b3b0*_31; _31=_21; _21=_11; _11=r; // r=BB*r + b1b0*lr + b2b0*llr +b3b0*lllr; lllr=llr; llr=lr; lr=r; g=BB*g + b1b0*_12 + b2b0*_22 +b3b0*_32; _32=_22; _22=_12; _12=g; b=BB*b + b1b0*_13 + b2b0*_23 +b3b0*_33; _33=_23; _23=_13; _13=b; "; pass=0; loop(bidir?2:1, pass ? ( // second pass, use main buffer rotated 180 gfx_dest = rotWs2 = gfx_img_resize(rotWs2,w,h); gfx_deltablit(-1,0,0,w,h,w-1,h-1,-1,0,0,-1); ) : ( gfx_blit(in,0,0,0,w,h,0,0,w,h); ); // convolve in x eval_w = w; gfx_evalrect(0,0,w,h, code); // rotate to vertical work buffer gfx_dest = rotWs1 = gfx_img_resize(rotWs1,h,w); gfx_deltablit(pass?rotWs2:-1,0,0,h,w, 0,0, 0,1,1,0,0,0); // convolve in y (now x) eval_w = h; gfx_evalrect(0,0,h,w, code); // rotate back to output framebuffer gfx_dest = -1; pass?gfx_deltablit(rotWs1,0,0,w,h,h-1,w-1, 0,-1,-1,0,0,0): gfx_deltablit(rotWs1,0,0,w,h,0,0, 0,1,1,0,0,0); pass+=1; ); restore_cs && oldcs != colorspace ? colorspace=oldcs;``` YV12 version: Code: ```// Gaussian Blur YV12 // v0.2 //@param1:sigma 'Sigma' 10 0.5 50 25 0.1 //@param2:bidir 'bidirectional' 1 0 1 0 1 in=0; initDone==0 ? (lsigma=-1; initDone=1;); // coeffs lsigma!=sigma ? ( q = (0.5<=sigma && sigma <=2.5) ? 3.97156-4.14554*sqrt(1-0.26891*sigma) : 0.98711*sigma-0.96330; b0 = 1.57825 + 2.44413*q + 1.4281*q^2 + 0.422205*q^3; b1 = 2.44413*q + 2.85619*q^2 + 1.26661*q^3; b2 = -1.4281*q^2 - 1.26661*q^3; b3 = 0.422205*q^3; BB = 1-(b1+b2+b3)/b0; b1b0=b1/b0; b2b0=b2/b0; b3b0=b3/b0; lsigma=sigma; ); colorspace='YV12'; input_info(in,w,h); gfx_img_resize(-1,w,h); code=" (_1 -= 1) < 0 ? ( _1=eval_w-1; _31=_21=_11=y1; _32=_22=_12=y3; _33=_23=_13=u; _34=_24=_14=v; ); y1=BB*y1 + b1b0*_11 + b2b0*_21 +b3b0*_31; y2=BB*y2 + b1b0*y1 + b2b0*_11 +b3b0*_21; _31=_11; _21=y1; _11=y2; y3=BB*y3 + b1b0*_12 + b2b0*_22 +b3b0*_32; y4=BB*y4 + b1b0*y3 + b2b0*_12 +b3b0*_22; _32=_12; _22=y3; _12=y4; _90=BB*u + b1b0*_13 + b2b0*_23 +b3b0*_33; u=BB*u + b1b0*_90 + b2b0*_13 +b3b0*_23; _33=_13; _23=_90; _13=u; _91=BB*v + b1b0*_14 + b2b0*_24 +b3b0*_34; v=BB*v + b1b0*_91 + b2b0*_14 +b3b0*_24; _34=_14; _24=_91; _14=v; "; gfx_dest=-1; pass=0; loop(bidir?2:1, pass ? ( // second pass, use main buffer rotated 180 gfx_dest = rotWs2 = gfx_img_resize(rotWs2,w,h); gfx_deltablit(-1,0,0,w,h,w-1,h-1,-1,0,0,-1); ) : ( gfx_blit(in,0,0,0,w,h,0,0,w,h); ); // convolve in x eval_w = w/2; gfx_evalrect(0,0,w,h, code); // rotate to vertical work buffer gfx_dest = rotWs1 = gfx_img_resize(rotWs1,h,w); gfx_deltablit(pass?rotWs2:-1,0,0,h,w, 0,0, 0,1,1,0,0,0); // convolve in y (now x) eval_w = h/2; gfx_evalrect(0,0,h,w, code); // rotate back to output framebuffer gfx_dest = -1; pass?gfx_deltablit(rotWs1,0,0,w,h,h-1,w-1, 0,-1,-1,0,0,0): gfx_deltablit(rotWs1,0,0,w,h,0,0, 0,1,1,0,0,0); pass+=1; );``` EDIT: oops I think the YV12 version now uses the same CPU resources?? so hmmm... not ideal EDIT: updated versions with a bugfix see Justins posts below __________________ Video processor programming: (1) How inputs work: https://forum.cockos.com/showthread....26#post2057426 (2) YV12 colorspace: https://forum.cockos.com/showthread....54#post2059354 (3) EEL documentation: https://www.cockos.com/EEL2/ Last edited by wwwmaze; 11-27-2018 at 08:30 AM.