diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs
index 22d3c641b..89699d6bc 100644
--- a/src/TensorFlowNET.Core/Gradients/math_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs
@@ -840,7 +840,7 @@ public static Tensor[] _PowGrad(Operation op, Tensor[] grads)
         /// <param name="x"></param>
         /// <param name="y"></param>
         /// <returns></returns>
-        private static (Tensor, Tensor, bool)[] SmartBroadcastGradientArgs(Tensor x, Tensor y, Tensor grad)
+        public static (Tensor, Tensor, bool)[] SmartBroadcastGradientArgs(Tensor x, Tensor y, Tensor grad)
         {
             Tensor sx, sy;
             if (x.shape.IsFullyDefined &&
diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
index 15b72f55c..e95163930 100644
--- a/src/TensorFlowNET.Core/Gradients/nn_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
@@ -15,6 +15,7 @@ limitations under the License.
 ******************************************************************************/
 
 using System;
+using System.Diagnostics;
 using System.Linq;
 using Tensorflow.Operations;
 using static Tensorflow.Binding;
@@ -135,13 +136,35 @@ public static Tensor[] _SquaredDifferenceGrad(Operation op, Tensor[] grads)
         {
             Tensor x = op.inputs[0];
             Tensor y = op.inputs[1];
+            var grad = grads[0];
             var scale = ops.convert_to_tensor(2.0f, dtype: x.dtype);
-            var x_grad = math_ops.scalar_mul(scale, grads[0]) * (x - y);
-            return new Tensor[]
+            var x_grad = math_ops.scalar_mul(scale, grad) * (x - y);
+            if (math_grad._ShapesFullySpecifiedAndEqual(x, y, grad))
             {
-                x_grad,
-                -x_grad
-            };
+                return new Tensor[] { x_grad, -x_grad };
+            }
+            var broadcast_info = math_grad.SmartBroadcastGradientArgs(x, y, grad);
+            Debug.Assert(broadcast_info.Length == 2);
+            var (sx, rx, must_reduce_x) = broadcast_info[0];
+            var (sy, ry, must_reduce_y) = broadcast_info[1];
+            Tensor gx, gy;
+            if (must_reduce_x)
+            {
+                gx = array_ops.reshape(math_ops.reduce_sum(x_grad, rx), sx);
+            }
+            else
+            {
+                gx = x_grad;
+            }
+            if (must_reduce_y)
+            {
+                gy = -array_ops.reshape(math_ops.reduce_sum(x_grad, ry), sy);
+            }
+            else
+            {
+                gy = -x_grad;
+            }
+            return new Tensor[] { gx, gy };
         }
 
         /// <summary>