@@ -28,9 +28,10 @@ float2 _TileMaxOffs;
2828
2929// Maximum blur radius (in pixels)
3030half _MaxBlurRadius;
31+ float _RcpMaxBlurRadius;
3132
3233// Filter parameters/coefficients
33- int _LoopCount;
34+ half _LoopCount;
3435
3536// History buffer for frame blending
3637sampler2D _History1LumaTex;
@@ -79,81 +80,38 @@ half4 FragVelocitySetup(VaryingsDefault i) : SV_Target
7980 // Sample the motion vector.
8081 float2 v = tex2D (_CameraMotionVectorsTexture, i.uv).rg;
8182
82- // Apply the exposure time.
83- v *= _VelocityScale;
84-
85- // Halve the vector and convert it to the pixel space.
86- v = v * 0.5 * _CameraMotionVectorsTexture_TexelSize.zw;
83+ // Apply the exposure time and convert to the pixel space.
84+ v *= (_VelocityScale * 0.5 ) * _CameraMotionVectorsTexture_TexelSize.zw;
8785
8886 // Clamp the vector with the maximum blur radius.
89- float lv = length (v);
90- v *= min (lv, _MaxBlurRadius) / max (lv, 1e-6 );
87+ v /= max (1.0 , length (v) * _RcpMaxBlurRadius);
9188
9289 // Sample the depth of the pixel.
93- float d = SAMPLE_DEPTH_TEXTURE (_CameraDepthTexture, i.uv.xy);
94- half z01 = LinearizeDepth (d);
90+ half d = LinearizeDepth (SAMPLE_DEPTH_TEXTURE (_CameraDepthTexture, i.uv));
9591
9692 // Pack into 10/10/10/2 format.
97- return half4 ((v / _MaxBlurRadius + 1.0 ) / 2.0 , z01 , 0.0 );
93+ return half4 ((v * _RcpMaxBlurRadius + 1.0 ) * 0.5 , d , 0.0 );
9894}
9995
100- // TileMax filter (4 pixels width with normalization)
101- half4 FragTileMax4 (VaryingsDefault i) : SV_Target
96+ // TileMax filter (2 pixel width with normalization)
97+ half4 FragTileMax1 (VaryingsDefault i) : SV_Target
10298{
103- float4 d1 = _MainTex_TexelSize.xyxy * float4 ( 0.5 , 0.5 , 1.5 , 1.5 );
104- float4 d2 = _MainTex_TexelSize.xyxy * float4 (-0.5 , 0.5 , -1.5 , 1.5 );
105-
106- half2 v01 = tex2D (_MainTex, i.uv - d1.zw).rg; // -1.5, -1.5
107- half2 v02 = tex2D (_MainTex, i.uv - d1.xw).rg; // -0.5, -1.5
108- half2 v03 = tex2D (_MainTex, i.uv - d2.xw).rg; // +0.5, -1.5
109- half2 v04 = tex2D (_MainTex, i.uv - d2.zw).rg; // +1.5, -1.5
110-
111- half2 v05 = tex2D (_MainTex, i.uv - d1.zy).rg; // -1.5, -0.5
112- half2 v06 = tex2D (_MainTex, i.uv - d1.xy).rg; // -0.5, -0.5
113- half2 v07 = tex2D (_MainTex, i.uv - d2.xy).rg; // +0.5, -0.5
114- half2 v08 = tex2D (_MainTex, i.uv - d2.zy).rg; // +1.5, -0.5
115-
116- half2 v09 = tex2D (_MainTex, i.uv + d2.zy).rg; // -1.5, +0.5
117- half2 v10 = tex2D (_MainTex, i.uv + d2.xy).rg; // -0.5, +0.5
118- half2 v11 = tex2D (_MainTex, i.uv + d1.xy).rg; // +0.5, +0.5
119- half2 v12 = tex2D (_MainTex, i.uv + d1.zy).rg; // +1.5, +0.5
120-
121- half2 v13 = tex2D (_MainTex, i.uv + d2.zw).rg; // -1.5, +1.5
122- half2 v14 = tex2D (_MainTex, i.uv + d2.xw).rg; // -0.5, +1.5
123- half2 v15 = tex2D (_MainTex, i.uv + d1.xw).rg; // +0.5, +1.5
124- half2 v16 = tex2D (_MainTex, i.uv + d1.zw).rg; // +1.5, +1.5
125-
126- v01 = (v01 * 2.0 - 1.0 ) * _MaxBlurRadius;
127- v02 = (v02 * 2.0 - 1.0 ) * _MaxBlurRadius;
128- v03 = (v03 * 2.0 - 1.0 ) * _MaxBlurRadius;
129- v04 = (v04 * 2.0 - 1.0 ) * _MaxBlurRadius;
130-
131- v05 = (v05 * 2.0 - 1.0 ) * _MaxBlurRadius;
132- v06 = (v06 * 2.0 - 1.0 ) * _MaxBlurRadius;
133- v07 = (v07 * 2.0 - 1.0 ) * _MaxBlurRadius;
134- v08 = (v08 * 2.0 - 1.0 ) * _MaxBlurRadius;
135-
136- v09 = (v09 * 2.0 - 1.0 ) * _MaxBlurRadius;
137- v10 = (v10 * 2.0 - 1.0 ) * _MaxBlurRadius;
138- v11 = (v11 * 2.0 - 1.0 ) * _MaxBlurRadius;
139- v12 = (v12 * 2.0 - 1.0 ) * _MaxBlurRadius;
140-
141- v13 = (v13 * 2.0 - 1.0 ) * _MaxBlurRadius;
142- v14 = (v14 * 2.0 - 1.0 ) * _MaxBlurRadius;
143- v15 = (v15 * 2.0 - 1.0 ) * _MaxBlurRadius;
144- v16 = (v16 * 2.0 - 1.0 ) * _MaxBlurRadius;
145-
146- half2 va = MaxV (MaxV (MaxV (v01, v02), v03), v04);
147- half2 vb = MaxV (MaxV (MaxV (v05, v06), v07), v08);
148- half2 vc = MaxV (MaxV (MaxV (v09, v10), v11), v12);
149- half2 vd = MaxV (MaxV (MaxV (v13, v14), v15), v16);
150-
151- half2 vo = MaxV (MaxV (MaxV (va, vb), vc), vd);
99+ float4 d = _MainTex_TexelSize.xyxy * float4 (-0.5 , -0.5 , 0.5 , 0.5 );
152100
153- return half4 (vo, 0.0 , 0.0 );
101+ half2 v1 = tex2D (_MainTex, i.uv + d.xy).rg;
102+ half2 v2 = tex2D (_MainTex, i.uv + d.zy).rg;
103+ half2 v3 = tex2D (_MainTex, i.uv + d.xw).rg;
104+ half2 v4 = tex2D (_MainTex, i.uv + d.zw).rg;
105+
106+ v1 = (v1 * 2.0 - 1.0 ) * _MaxBlurRadius;
107+ v2 = (v2 * 2.0 - 1.0 ) * _MaxBlurRadius;
108+ v3 = (v3 * 2.0 - 1.0 ) * _MaxBlurRadius;
109+ v4 = (v4 * 2.0 - 1.0 ) * _MaxBlurRadius;
110+
111+ return half4 (MaxV (MaxV (MaxV (v1, v2), v3), v4), 0.0 , 0.0 );
154112}
155113
156- // TileMax filter (2 pixels width)
114+ // TileMax filter (2 pixel width)
157115half4 FragTileMax2 (VaryingsDefault i) : SV_Target
158116{
159117 float4 d = _MainTex_TexelSize.xyxy * float4 (-0.5 , -0.5 , 0.5 , 0.5 );
@@ -163,9 +121,7 @@ half4 FragTileMax2(VaryingsDefault i) : SV_Target
163121 half2 v3 = tex2D (_MainTex, i.uv + d.xw).rg;
164122 half2 v4 = tex2D (_MainTex, i.uv + d.zw).rg;
165123
166- half2 vo = MaxV (MaxV (MaxV (v1, v2), v3), v4);
167-
168- return half4 (vo, 0.0 , 0.0 );
124+ return half4 (MaxV (MaxV (MaxV (v1, v2), v3), v4), 0.0 , 0.0 );
169125}
170126
171127// TileMax filter (variable width)
@@ -215,165 +171,118 @@ half4 FragNeighborMax(VaryingsDefault i) : SV_Target
215171 half2 vb = MaxV (v4, MaxV (v5, v6));
216172 half2 vc = MaxV (v7, MaxV (v8, v9));
217173
218- return half4 (MaxV (va, MaxV (vb, vc)) / cw, 0.0 , 0.0 );
174+ return half4 (MaxV (va, MaxV (vb, vc)) * ( 1.0 / cw) , 0.0 , 0.0 );
219175}
220176
221177// -----------------------------------------------------------------------------
222178// Reconstruction
223179
224- // Strength of the depth filter
225- static const float kDepthFilterCoeff = 15.0 ;
226-
227- // Safer version of vector normalization function
228- half2 SafeNorm (half2 v)
180+ // Returns true or false with a given interval.
181+ bool Interval (half phase, half interval)
229182{
230- half l = max (length (v), EPSILON);
231- return v / l * (l >= 0.5 );
183+ return frac (phase / interval) > 0.499 ;
232184}
233185
234186// Jitter function for tile lookup
235187float2 JitterTile (float2 uv)
236188{
237189 float rx, ry;
238190 sincos (GradientNoise (uv + float2 (2.0 , 0.0 )) * UNITY_PI_2, ry, rx);
239- return float2 (rx, ry) * _NeighborMaxTex_TexelSize.xy / 4.0 ;
240- }
241-
242- // Cone shaped interpolation
243- half Cone (half T, half l_V)
244- {
245- return saturate (1.0 - T / l_V);
246- }
247-
248- // Cylinder shaped interpolation
249- half Cylinder (half T, half l_V)
250- {
251- return 1.0 - smoothstep (0.95 * l_V, 1.05 * l_V, T);
252- }
253-
254- // Depth comparison function
255- half CompareDepth (half za, half zb)
256- {
257- return saturate (1.0 - kDepthFilterCoeff * (zb - za) / min (za, zb));
258- }
259-
260- // Lerp and normalization
261- half2 RNMix (half2 a, half2 b, half p)
262- {
263- return SafeNorm (lerp (a, b, saturate (p)));
191+ return float2 (rx, ry) * _NeighborMaxTex_TexelSize.xy * 0.25 ;
264192}
265193
266194// Velocity sampling function
267195half3 SampleVelocity (float2 uv)
268196{
269- half3 v = tex2D (_VelocityTex, uv ).xyz;
197+ half3 v = tex2Dlod (_VelocityTex, float4 (uv, 0.0 , 0.0 ) ).xyz;
270198 return half3 ((v.xy * 2.0 - 1.0 ) * _MaxBlurRadius, v.z);
271199}
272200
273- // Sample weighting function
274- half SampleWeight ( half2 d_n, half l_v_c, half z_p, half T, float2 S_uv, half w_A)
201+ // Reconstruction filter
202+ half4 FragReconstruction (VaryingsMultitex i) : SV_Target
275203{
276- half3 temp = tex2Dlod (_VelocityTex, float4 (S_uv, 0.0 , 0.0 ));
204+ // Color sample at the center point
205+ const half4 c_p = tex2D (_MainTex, i.uv0);
277206
278- half2 v_S = (temp.xy * 2.0 - 1.0 ) * _MaxBlurRadius;
279- half l_v_S = max (length (v_S), 0.5 );
207+ // Velocity/Depth sample at the center point
208+ const half3 vd_p = SampleVelocity (i.uv1);
209+ const half l_v_p = max (length (vd_p.xy), 0.5 );
210+ const half rcp_d_p = 1.0 / vd_p.z;
280211
281- half z_S = temp.z;
212+ // NeighborMax vector sample at the center point
213+ const half2 v_max = tex2D (_NeighborMaxTex, i.uv1 + JitterTile (i.uv1)).xy;
214+ const half l_v_max = length (v_max);
215+ const half rcp_l_v_max = 1.0 / l_v_max;
282216
283- half f = CompareDepth (z_p, z_S);
284- half b = CompareDepth (z_S, z_p) ;
217+ // Escape early if the NeighborMax vector is small enough.
218+ if (l_v_max < 2.0 ) return c_p ;
285219
286- half w_B = abs (dot (v_S / l_v_S, d_n));
220+ // Use V_p as a secondary sampling direction except when it's too small
221+ // compared to V_max. This vector is rescaled to be the length of V_max.
222+ const half2 v_alt = (l_v_p * 2.0 > l_v_max) ? vd_p.xy * (l_v_max / l_v_p) : v_max;
287223
288- half weight = 0.0 ;
289- weight += f * Cone (T, l_v_S) * w_B;
290- weight += b * Cone (T, l_v_c) * w_A;
291- weight += Cylinder (T, min (l_v_S, l_v_c)) * max (w_A, w_B) * 2.0 ;
224+ // Determine the sample count.
225+ const half sc = floor (min (_LoopCount, l_v_max * 0.5 ));
292226
293- return weight;
294- }
227+ // Loop variables (starts from the outermost sample)
228+ const half dt = 1.0 / sc;
229+ const half t_offs = (GradientNoise (i.uv0) - 0.5 ) * dt;
230+ half t = 1.0 - dt * 0.5 ;
231+ half count = 0.0 ;
295232
296- // Reconstruction filter
297- half4 FragReconstruction (VaryingsMultitex i) : SV_Target
298- {
299- float2 p = i.uv1 * _ScreenParams.xy;
300- float2 p_uv = i.uv1;
301-
302- // Nonfiltered source color;
303- half4 source = tex2D (_MainTex, i.uv0);
304-
305- // Velocity vector at p.
306- half3 v_c_t = SampleVelocity (p_uv);
307- half2 v_c = v_c_t.xy;
308- half2 v_c_n = SafeNorm (v_c);
309- half l_v_c = max (length (v_c), 0.5 );
310-
311- // NeighborMax vector at p (with small).
312- half2 v_max = tex2D (_NeighborMaxTex, p_uv + JitterTile (p_uv)).xy;
313- half2 v_max_n = SafeNorm (v_max);
314- half l_v_max = length (v_max);
315-
316- // Escape early if the NeighborMax vector is too short.
317- if (l_v_max < 0.5 )
318- return source;
319-
320- // Linearized depth at p.
321- half z_p = v_c_t.z;
322-
323- // A vector perpendicular to v_max.
324- half2 w_p = v_max_n.yx * float2 (-1.0 , 1.0 );
325- if (dot (w_p, v_c) < 0.0 )
326- w_p = -w_p;
327-
328- // Secondary sampling direction.
329- half2 w_c = RNMix (w_p, v_c_n, (l_v_c - 0.5 ) / 1.5 );
330-
331- // The center sample.
332- half sampleCount = _LoopCount * 2.0 ;
333- half totalWeight = sampleCount / (l_v_c * 40.0 );
334- half3 result = source.rgb * totalWeight;
335-
336- // Start from t=-1 + small jitter.
337- // The width of jitter is equivalent to 4 sample steps.
338- half sampleJitter = 4.0 * 2.0 / (sampleCount + 4.0 );
339- half t = -1.0 + GradientNoise (p_uv) * sampleJitter;
340- half dt = (2.0 - sampleJitter) / sampleCount;
341-
342- // Precalculate the w_A parameters.
343- half w_A1 = dot (w_c, v_c_n);
344- half w_A2 = dot (w_c, v_max_n);
345-
346- #ifndef UNROLL_LOOP_COUNT
347- UNITY_LOOP for (int c = 0 ; c < _LoopCount; c++)
348- #else
349- UNITY_UNROLL for (int c = 0 ; c < UNROLL_LOOP_COUNT; c++)
350- #endif
233+ // Background velocity
234+ // This is used for tracking the maximum velocity in the background layer.
235+ half l_v_bg = max (l_v_p, 1.0 );
236+
237+ // Color accumlation
238+ half4 acc = 0.0 ;
239+
240+ UNITY_LOOP while (t > dt * 0.25 )
351241 {
352- // Odd-numbered sample: sample along v_c.
353- {
354- float2 S_uv0 = i.uv0 + t * v_c * _MainTex_TexelSize.xy;
355- float2 S_uv1 = i.uv1 + t * v_c * _VelocityTex_TexelSize.xy;
356- half weight = SampleWeight (v_c_n, l_v_c, z_p, abs (t * l_v_max), S_uv1, w_A1);
242+ // Sampling direction (switched per every two samples)
243+ const half2 v_s = Interval (count, 4.0 ) ? v_alt : v_max;
357244
358- result += tex2Dlod (_MainTex, float4 (S_uv0, 0.0 , 0.0 )).rgb * weight;
359- totalWeight += weight ;
245+ // Sample position (inverted per every sample)
246+ const half t_s = ( Interval (count, 2.0 ) ? -t : t) + t_offs ;
360247
361- t += dt;
362- }
363- // Even-numbered sample: sample along v_max.
364- {
365- float2 S_uv0 = i.uv0 + t * v_max * _MainTex_TexelSize.xy;
366- float2 S_uv1 = i.uv1 + t * v_max * _VelocityTex_TexelSize.xy;
367- half weight = SampleWeight (v_max_n, l_v_c, z_p, abs (t * l_v_max), S_uv1, w_A2);
248+ // Distance to the sample position
249+ const half l_t = l_v_max * abs (t_s);
368250
369- result += tex2Dlod (_MainTex, float4 (S_uv0, 0.0 , 0.0 )).rgb * weight;
370- totalWeight += weight;
251+ // UVs for the sample position
252+ const float2 uv0 = i.uv0 + v_s * t_s * _MainTex_TexelSize.xy;
253+ const float2 uv1 = i.uv1 + v_s * t_s * _VelocityTex_TexelSize.xy;
371254
372- t += dt;
373- }
255+ // Color sample
256+ const half3 c = tex2Dlod (_MainTex, float4 (uv0, 0.0 , 0.0 )).rgb;
257+
258+ // Velocity/Depth sample
259+ const half3 vd = SampleVelocity (uv1);
260+
261+ // Background/Foreground separation
262+ const half fg = saturate ((vd_p.z - vd.z) * 20.0 * rcp_d_p);
263+
264+ // Length of the velocity vector
265+ const half l_v = lerp (l_v_bg, length (vd.xy), fg);
266+
267+ // Sample weight
268+ // (Distance test) * (Spreading out by motion) * (Triangular window)
269+ const half w = saturate (l_v - l_t) / l_v * (1.2 - t);
270+
271+ // Color accumulation
272+ acc += half4 (c, 1.0 ) * w;
273+
274+ // Update the background velocity.
275+ l_v_bg = max (l_v_bg, l_v);
276+
277+ // Advance to the next sample.
278+ t = Interval (count, 2.0 ) ? t - dt : t;
279+ count += 1.0 ;
374280 }
375281
376- return half4 (result / totalWeight, source.a);
282+ // Add the center sample.
283+ acc += half4 (c_p.rgb, 1.0 ) * (1.2 / (l_v_bg * sc * 2.0 ));
284+
285+ return half4 (acc.rgb / acc.a, c_p.a);
377286}
378287
379288// -----------------------------------------------------------------------------
0 commit comments