Unity-Technologies
diff --git a/‎PostProcessing/Resources/Shaders/Common.cginc‎
Lines changed: 3 additions & 3 deletions b/‎PostProcessing/Resources/Shaders/Common.cginc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎PostProcessing/Resources/Shaders/MotionBlur.cginc‎
Lines changed: 99 additions & 190 deletions b/‎PostProcessing/Resources/Shaders/MotionBlur.cginc‎
Lines changed: 99 additions & 190 deletions
@@ -76,9 +76,9 @@ inline half4 Pow4(half4 x) { return x * x * x * x; }
 #endif
 
 // Returns the largest vector of v1 and v2
-inline half2 MaxV(half2 v1, half2 v2) { return lerp(v1, v2, dot(v1, v1) < dot(v2, v2)); }
-inline half3 MaxV(half3 v1, half3 v2) { return lerp(v1, v2, dot(v1, v1) < dot(v2, v2)); }
-inline half4 MaxV(half4 v1, half4 v2) { return lerp(v1, v2, dot(v1, v1) < dot(v2, v2)); }
+inline half2 MaxV(half2 v1, half2 v2) { return dot(v1, v1) < dot(v2, v2) ? v2 : v1; }
+inline half3 MaxV(half3 v1, half3 v2) { return dot(v1, v1) < dot(v2, v2) ? v2 : v1; }
+inline half4 MaxV(half4 v1, half4 v2) { return dot(v1, v1) < dot(v2, v2) ? v2 : v1; }
 
 // Clamp HDR value within a safe range
 inline half  SafeHDR(half  c) { return min(c, HALF_MAX); }
 
@@ -28,9 +28,10 @@ float2 _TileMaxOffs;
 
 // Maximum blur radius (in pixels)
 half _MaxBlurRadius;
+float _RcpMaxBlurRadius;
 
 // Filter parameters/coefficients
-int _LoopCount;
+half _LoopCount;
 
 // History buffer for frame blending
 sampler2D _History1LumaTex;
@@ -79,81 +80,38 @@ half4 FragVelocitySetup(VaryingsDefault i) : SV_Target
     // Sample the motion vector.
     float2 v = tex2D(_CameraMotionVectorsTexture, i.uv).rg;
 
-    // Apply the exposure time.
-    v *= _VelocityScale;
-
-    // Halve the vector and convert it to the pixel space.
-    v = v * 0.5 * _CameraMotionVectorsTexture_TexelSize.zw;
+    // Apply the exposure time and convert to the pixel space.
+    v *= (_VelocityScale * 0.5) * _CameraMotionVectorsTexture_TexelSize.zw;
 
     // Clamp the vector with the maximum blur radius.
-    float lv = length(v);
-    v *= min(lv, _MaxBlurRadius) / max(lv, 1e-6);
+    v /= max(1.0, length(v) * _RcpMaxBlurRadius);
 
     // Sample the depth of the pixel.
-    float d = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, i.uv.xy);
-    half z01 = LinearizeDepth(d);
+    half d = LinearizeDepth(SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, i.uv));
 
     // Pack into 10/10/10/2 format.
-    return half4((v / _MaxBlurRadius + 1.0) / 2.0, z01, 0.0);
+    return half4((v * _RcpMaxBlurRadius + 1.0) * 0.5, d, 0.0);
 }
 
-// TileMax filter (4 pixels width with normalization)
-half4 FragTileMax4(VaryingsDefault i) : SV_Target
+// TileMax filter (2 pixel width with normalization)
+half4 FragTileMax1(VaryingsDefault i) : SV_Target
 {
-    float4 d1 = _MainTex_TexelSize.xyxy * float4( 0.5, 0.5,  1.5, 1.5);
-    float4 d2 = _MainTex_TexelSize.xyxy * float4(-0.5, 0.5, -1.5, 1.5);
-
-    half2 v01 = tex2D(_MainTex, i.uv - d1.zw).rg; // -1.5, -1.5
-    half2 v02 = tex2D(_MainTex, i.uv - d1.xw).rg; // -0.5, -1.5
-    half2 v03 = tex2D(_MainTex, i.uv - d2.xw).rg; // +0.5, -1.5
-    half2 v04 = tex2D(_MainTex, i.uv - d2.zw).rg; // +1.5, -1.5
-
-    half2 v05 = tex2D(_MainTex, i.uv - d1.zy).rg; // -1.5, -0.5
-    half2 v06 = tex2D(_MainTex, i.uv - d1.xy).rg; // -0.5, -0.5
-    half2 v07 = tex2D(_MainTex, i.uv - d2.xy).rg; // +0.5, -0.5
-    half2 v08 = tex2D(_MainTex, i.uv - d2.zy).rg; // +1.5, -0.5
-
-    half2 v09 = tex2D(_MainTex, i.uv + d2.zy).rg; // -1.5, +0.5
-    half2 v10 = tex2D(_MainTex, i.uv + d2.xy).rg; // -0.5, +0.5
-    half2 v11 = tex2D(_MainTex, i.uv + d1.xy).rg; // +0.5, +0.5
-    half2 v12 = tex2D(_MainTex, i.uv + d1.zy).rg; // +1.5, +0.5
-
-    half2 v13 = tex2D(_MainTex, i.uv + d2.zw).rg; // -1.5, +1.5
-    half2 v14 = tex2D(_MainTex, i.uv + d2.xw).rg; // -0.5, +1.5
-    half2 v15 = tex2D(_MainTex, i.uv + d1.xw).rg; // +0.5, +1.5
-    half2 v16 = tex2D(_MainTex, i.uv + d1.zw).rg; // +1.5, +1.5
-
-    v01 = (v01 * 2.0 - 1.0) * _MaxBlurRadius;
-    v02 = (v02 * 2.0 - 1.0) * _MaxBlurRadius;
-    v03 = (v03 * 2.0 - 1.0) * _MaxBlurRadius;
-    v04 = (v04 * 2.0 - 1.0) * _MaxBlurRadius;
-
-    v05 = (v05 * 2.0 - 1.0) * _MaxBlurRadius;
-    v06 = (v06 * 2.0 - 1.0) * _MaxBlurRadius;
-    v07 = (v07 * 2.0 - 1.0) * _MaxBlurRadius;
-    v08 = (v08 * 2.0 - 1.0) * _MaxBlurRadius;
-
-    v09 = (v09 * 2.0 - 1.0) * _MaxBlurRadius;
-    v10 = (v10 * 2.0 - 1.0) * _MaxBlurRadius;
-    v11 = (v11 * 2.0 - 1.0) * _MaxBlurRadius;
-    v12 = (v12 * 2.0 - 1.0) * _MaxBlurRadius;
-
-    v13 = (v13 * 2.0 - 1.0) * _MaxBlurRadius;
-    v14 = (v14 * 2.0 - 1.0) * _MaxBlurRadius;
-    v15 = (v15 * 2.0 - 1.0) * _MaxBlurRadius;
-    v16 = (v16 * 2.0 - 1.0) * _MaxBlurRadius;
-
-    half2 va = MaxV(MaxV(MaxV(v01, v02), v03), v04);
-    half2 vb = MaxV(MaxV(MaxV(v05, v06), v07), v08);
-    half2 vc = MaxV(MaxV(MaxV(v09, v10), v11), v12);
-    half2 vd = MaxV(MaxV(MaxV(v13, v14), v15), v16);
-
-    half2 vo = MaxV(MaxV(MaxV(va, vb), vc), vd);
+    float4 d = _MainTex_TexelSize.xyxy * float4(-0.5, -0.5, 0.5, 0.5);
 
-    return half4(vo, 0.0, 0.0);
+    half2 v1 = tex2D(_MainTex, i.uv + d.xy).rg;
+    half2 v2 = tex2D(_MainTex, i.uv + d.zy).rg;
+    half2 v3 = tex2D(_MainTex, i.uv + d.xw).rg;
+    half2 v4 = tex2D(_MainTex, i.uv + d.zw).rg;
+
+    v1 = (v1 * 2.0 - 1.0) * _MaxBlurRadius;
+    v2 = (v2 * 2.0 - 1.0) * _MaxBlurRadius;
+    v3 = (v3 * 2.0 - 1.0) * _MaxBlurRadius;
+    v4 = (v4 * 2.0 - 1.0) * _MaxBlurRadius;
+
+    return half4(MaxV(MaxV(MaxV(v1, v2), v3), v4), 0.0, 0.0);
 }
 
-// TileMax filter (2 pixels width)
+// TileMax filter (2 pixel width)
 half4 FragTileMax2(VaryingsDefault i) : SV_Target
 {
     float4 d = _MainTex_TexelSize.xyxy * float4(-0.5, -0.5, 0.5, 0.5);
@@ -163,9 +121,7 @@ half4 FragTileMax2(VaryingsDefault i) : SV_Target
     half2 v3 = tex2D(_MainTex, i.uv + d.xw).rg;
     half2 v4 = tex2D(_MainTex, i.uv + d.zw).rg;
 
-    half2 vo = MaxV(MaxV(MaxV(v1, v2), v3), v4);
-
-    return half4(vo, 0.0, 0.0);
+    return half4(MaxV(MaxV(MaxV(v1, v2), v3), v4), 0.0, 0.0);
 }
 
 // TileMax filter (variable width)
@@ -215,165 +171,118 @@ half4 FragNeighborMax(VaryingsDefault i) : SV_Target
     half2 vb = MaxV(v4, MaxV(v5, v6));
     half2 vc = MaxV(v7, MaxV(v8, v9));
 
-    return half4(MaxV(va, MaxV(vb, vc)) / cw, 0.0, 0.0);
+    return half4(MaxV(va, MaxV(vb, vc)) * (1.0 / cw), 0.0, 0.0);
 }
 
 // -----------------------------------------------------------------------------
 // Reconstruction
 
-// Strength of the depth filter
-static const float kDepthFilterCoeff = 15.0;
-
-// Safer version of vector normalization function
-half2 SafeNorm(half2 v)
+// Returns true or false with a given interval.
+bool Interval(half phase, half interval)
 {
-    half l = max(length(v), EPSILON);
-    return v / l * (l >= 0.5);
+    return frac(phase / interval) > 0.499;
 }
 
 // Jitter function for tile lookup
 float2 JitterTile(float2 uv)
 {
     float rx, ry;
     sincos(GradientNoise(uv + float2(2.0, 0.0)) * UNITY_PI_2, ry, rx);
-    return float2(rx, ry) * _NeighborMaxTex_TexelSize.xy / 4.0;
-}
-
-// Cone shaped interpolation
-half Cone(half T, half l_V)
-{
-    return saturate(1.0 - T / l_V);
-}
-
-// Cylinder shaped interpolation
-half Cylinder(half T, half l_V)
-{
-    return 1.0 - smoothstep(0.95 * l_V, 1.05 * l_V, T);
-}
-
-// Depth comparison function
-half CompareDepth(half za, half zb)
-{
-    return saturate(1.0 - kDepthFilterCoeff * (zb - za) / min(za, zb));
-}
-
-// Lerp and normalization
-half2 RNMix(half2 a, half2 b, half p)
-{
-    return SafeNorm(lerp(a, b, saturate(p)));
+    return float2(rx, ry) * _NeighborMaxTex_TexelSize.xy * 0.25;
 }
 
 // Velocity sampling function
 half3 SampleVelocity(float2 uv)
 {
-    half3 v = tex2D(_VelocityTex, uv).xyz;
+    half3 v = tex2Dlod(_VelocityTex, float4(uv, 0.0, 0.0)).xyz;
     return half3((v.xy * 2.0 - 1.0) * _MaxBlurRadius, v.z);
 }
 
-// Sample weighting function
-half SampleWeight(half2 d_n, half l_v_c, half z_p, half T, float2 S_uv, half w_A)
+// Reconstruction filter
+half4 FragReconstruction(VaryingsMultitex i) : SV_Target
 {
-    half3 temp = tex2Dlod(_VelocityTex, float4(S_uv, 0.0, 0.0));
+    // Color sample at the center point
+    const half4 c_p = tex2D(_MainTex, i.uv0);
 
-    half2 v_S = (temp.xy * 2.0 - 1.0) * _MaxBlurRadius;
-    half l_v_S = max(length(v_S), 0.5);
+    // Velocity/Depth sample at the center point
+    const half3 vd_p = SampleVelocity(i.uv1);
+    const half l_v_p = max(length(vd_p.xy), 0.5);
+    const half rcp_d_p = 1.0 / vd_p.z;
 
-    half z_S = temp.z;
+    // NeighborMax vector sample at the center point
+    const half2 v_max = tex2D(_NeighborMaxTex, i.uv1 + JitterTile(i.uv1)).xy;
+    const half l_v_max = length(v_max);
+    const half rcp_l_v_max = 1.0 / l_v_max;
 
-    half f = CompareDepth(z_p, z_S);
-    half b = CompareDepth(z_S, z_p);
+    // Escape early if the NeighborMax vector is small enough.
+    if (l_v_max < 2.0) return c_p;
 
-    half w_B = abs(dot(v_S / l_v_S, d_n));
+    // Use V_p as a secondary sampling direction except when it's too small
+    // compared to V_max. This vector is rescaled to be the length of V_max.
+    const half2 v_alt = (l_v_p * 2.0 > l_v_max) ? vd_p.xy * (l_v_max / l_v_p) : v_max;
 
-    half weight = 0.0;
-    weight += f * Cone(T, l_v_S) * w_B;
-    weight += b * Cone(T, l_v_c) * w_A;
-    weight += Cylinder(T, min(l_v_S, l_v_c)) * max(w_A, w_B) * 2.0;
+    // Determine the sample count.
+    const half sc = floor(min(_LoopCount, l_v_max * 0.5));
 
-    return weight;
-}
+    // Loop variables (starts from the outermost sample)
+    const half dt = 1.0 / sc;
+    const half t_offs = (GradientNoise(i.uv0) - 0.5) * dt;
+    half t = 1.0 - dt * 0.5;
+    half count = 0.0;
 
-// Reconstruction filter
-half4 FragReconstruction(VaryingsMultitex i) : SV_Target
-{
-    float2 p = i.uv1 * _ScreenParams.xy;
-    float2 p_uv = i.uv1;
-
-    // Nonfiltered source color;
-    half4 source = tex2D(_MainTex, i.uv0);
-
-    // Velocity vector at p.
-    half3 v_c_t = SampleVelocity(p_uv);
-    half2 v_c = v_c_t.xy;
-    half2 v_c_n = SafeNorm(v_c);
-    half l_v_c = max(length(v_c), 0.5);
-
-    // NeighborMax vector at p (with small).
-    half2 v_max = tex2D(_NeighborMaxTex, p_uv + JitterTile(p_uv)).xy;
-    half2 v_max_n = SafeNorm(v_max);
-    half l_v_max = length(v_max);
-
-    // Escape early if the NeighborMax vector is too short.
-    if (l_v_max < 0.5)
-        return source;
-
-    // Linearized depth at p.
-    half z_p = v_c_t.z;
-
-    // A vector perpendicular to v_max.
-    half2 w_p = v_max_n.yx * float2(-1.0, 1.0);
-    if (dot(w_p, v_c) < 0.0)
-        w_p = -w_p;
-
-    // Secondary sampling direction.
-    half2 w_c = RNMix(w_p, v_c_n, (l_v_c - 0.5) / 1.5);
-
-    // The center sample.
-    half sampleCount = _LoopCount * 2.0;
-    half totalWeight = sampleCount / (l_v_c * 40.0);
-    half3 result = source.rgb * totalWeight;
-
-    // Start from t=-1 + small jitter.
-    // The width of jitter is equivalent to 4 sample steps.
-    half sampleJitter = 4.0 * 2.0 / (sampleCount + 4.0);
-    half t = -1.0 + GradientNoise(p_uv) * sampleJitter;
-    half dt = (2.0 - sampleJitter) / sampleCount;
-
-    // Precalculate the w_A parameters.
-    half w_A1 = dot(w_c, v_c_n);
-    half w_A2 = dot(w_c, v_max_n);
-
-#ifndef UNROLL_LOOP_COUNT
-    UNITY_LOOP for (int c = 0; c < _LoopCount; c++)
-#else
-    UNITY_UNROLL for (int c = 0; c < UNROLL_LOOP_COUNT; c++)
-#endif
+    // Background velocity
+    // This is used for tracking the maximum velocity in the background layer.
+    half l_v_bg = max(l_v_p, 1.0);
+
+    // Color accumlation
+    half4 acc = 0.0;
+
+    UNITY_LOOP while (t > dt * 0.25)
     {
-        // Odd-numbered sample: sample along v_c.
-        {
-            float2 S_uv0 = i.uv0 + t * v_c * _MainTex_TexelSize.xy;
-            float2 S_uv1 = i.uv1 + t * v_c * _VelocityTex_TexelSize.xy;
-            half weight = SampleWeight(v_c_n, l_v_c, z_p, abs(t * l_v_max), S_uv1, w_A1);
+        // Sampling direction (switched per every two samples)
+        const half2 v_s = Interval(count, 4.0) ? v_alt : v_max;
 
-            result += tex2Dlod(_MainTex, float4(S_uv0, 0.0, 0.0)).rgb * weight;
-            totalWeight += weight;
+        // Sample position (inverted per every sample)
+        const half t_s = (Interval(count, 2.0) ? -t : t) + t_offs;
 
-            t += dt;
-        }
-        // Even-numbered sample: sample along v_max.
-        {
-            float2 S_uv0 = i.uv0 + t * v_max * _MainTex_TexelSize.xy;
-            float2 S_uv1 = i.uv1 + t * v_max * _VelocityTex_TexelSize.xy;
-            half weight = SampleWeight(v_max_n, l_v_c, z_p, abs(t * l_v_max), S_uv1, w_A2);
+        // Distance to the sample position
+        const half l_t = l_v_max * abs(t_s);
 
-            result += tex2Dlod(_MainTex, float4(S_uv0, 0.0, 0.0)).rgb * weight;
-            totalWeight += weight;
+        // UVs for the sample position
+        const float2 uv0 = i.uv0 + v_s * t_s * _MainTex_TexelSize.xy;
+        const float2 uv1 = i.uv1 + v_s * t_s * _VelocityTex_TexelSize.xy;
 
-            t += dt;
-        }
+        // Color sample
+        const half3 c = tex2Dlod(_MainTex, float4(uv0, 0.0, 0.0)).rgb;
+
+        // Velocity/Depth sample
+        const half3 vd = SampleVelocity(uv1);
+
+        // Background/Foreground separation
+        const half fg = saturate((vd_p.z - vd.z) * 20.0 * rcp_d_p);
+
+        // Length of the velocity vector
+        const half l_v = lerp(l_v_bg, length(vd.xy), fg);
+
+        // Sample weight
+        // (Distance test) * (Spreading out by motion) * (Triangular window)
+        const half w = saturate(l_v - l_t) / l_v * (1.2 - t);
+
+        // Color accumulation
+        acc += half4(c, 1.0) * w;
+
+        // Update the background velocity.
+        l_v_bg = max(l_v_bg, l_v);
+
+        // Advance to the next sample.
+        t = Interval(count, 2.0) ? t - dt : t;
+        count += 1.0;
     }
 
-    return half4(result / totalWeight, source.a);
+    // Add the center sample.
+    acc += half4(c_p.rgb, 1.0) * (1.2 / (l_v_bg * sc * 2.0));
+
+    return half4(acc.rgb / acc.a, c_p.a);
 }
 
 // -----------------------------------------------------------------------------