SKaiNET-developers · michalharakal · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/kotlin-js-store/yarn.lock b/kotlin-js-store/yarn.lock
@@ -2110,7 +2110,12 @@ wrappy@1:
   resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
   integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==
 
-ws@8.18.3, ws@~8.18.3:
+ws@8.20.1:
+  version "8.20.1"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-8.20.1.tgz#91a9ae2b312ccf98e0a85ec499b48cef45ab0ddb"
+  integrity sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==
+
+ws@~8.18.3:
   version "8.18.3"
   resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.3.tgz#b56b88abffde62791c639170400c93dcb0c95472"
   integrity sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==

diff --git a/...kends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt b/...kends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt
@@ -34,6 +34,7 @@ import sk.ainet.lang.types.FP16
 import sk.ainet.lang.types.FP32
 import sk.ainet.lang.types.Int32
 import sk.ainet.lang.types.Int8
+import kotlin.math.floor
 import kotlin.math.ln
 import kotlin.math.log10 as kmLog10
 import kotlin.math.log2 as kmLog2
@@ -1257,7 +1258,6 @@ public open class DefaultCpuOpsBase(protected val dataFactory: TensorDataFactory
         require(input.rank == 4) { "upsample2d: input must be 4D (N, C, H, W)" }
         val (scaleH, scaleW) = scale
         require(scaleH > 0 && scaleW > 0) { "upsample2d: scale factors must be positive" }
-        require(mode == UpsampleMode.Nearest) { "upsample2d: only Nearest mode is implemented on CPU backend" }
 
         val n = input.shape[0]
         val c = input.shape[1]
@@ -1267,16 +1267,61 @@ public open class DefaultCpuOpsBase(protected val dataFactory: TensorDataFactory
         val outW = inW * scaleW
         val outShape = Shape(n, c, outH, outW)
 
-        val outData = dataFactory.init<T, V>(outShape, input.dtype) { idx ->
-            val oh = idx[2]
-            val ow = idx[3]
-            val ih = oh / scaleH
-            val iw = ow / scaleW
-            input.data.get(idx[0], idx[1], ih, iw)
+        val outData = when (mode) {
+            UpsampleMode.Nearest -> dataFactory.init<T, V>(outShape, input.dtype) { idx ->
+                val oh = idx[2]
+                val ow = idx[3]
+                val ih = oh / scaleH
+                val iw = ow / scaleW
+                input.data.get(idx[0], idx[1], ih, iw)
+            }
+
+            UpsampleMode.Bilinear -> {
+                require(input.dtype == FP32::class || input.dtype == FP16::class) {
+                    "upsample2d: Bilinear mode is only implemented for float dtypes (got ${input.dtype})"
+                }
+                dataFactory.init<T, V>(outShape, input.dtype) { idx ->
+                    val b = idx[0]
+                    val ch = idx[1]
+                    val srcH = sourceCoord(idx[2], scaleH, inH, alignCorners)
+                    val srcW = sourceCoord(idx[3], scaleW, inW, alignCorners)
+                    val ih0 = floor(srcH).toInt().coerceIn(0, inH - 1)
+                    val ih1 = (ih0 + 1).coerceIn(0, inH - 1)
+                    val iw0 = floor(srcW).toInt().coerceIn(0, inW - 1)
+                    val iw1 = (iw0 + 1).coerceIn(0, inW - 1)
+                    val wh = (srcH - ih0).coerceIn(0.0f, 1.0f)
+                    val ww = (srcW - iw0).coerceIn(0.0f, 1.0f)
+                    val v00 = (input.data.get(b, ch, ih0, iw0) as Number).toFloat()
+                    val v01 = (input.data.get(b, ch, ih0, iw1) as Number).toFloat()
+                    val v10 = (input.data.get(b, ch, ih1, iw0) as Number).toFloat()
+                    val v11 = (input.data.get(b, ch, ih1, iw1) as Number).toFloat()
+                    val blend = v00 * (1f - wh) * (1f - ww) +
+                        v01 * (1f - wh) * ww +
+                        v10 * wh * (1f - ww) +
+                        v11 * wh * ww
+                    @Suppress("UNCHECKED_CAST")
+                    (blend as V)
+                }
+            }
         }
         return newTensor(outData, input.dtype, input)
     }
 
+    /**
+     * Maps an output coordinate to the (fractional) source coordinate for upsampling,
+     * matching the PyTorch convention. With [alignCorners] = false the sample centers are
+     * `(o + 0.5) / scale - 0.5`; with align corners the endpoints are pinned via
+     * `o * (in - 1) / (out - 1)`. The result may fall outside `[0, in-1]`; callers clamp.
+     */
+    private fun sourceCoord(out: Int, scale: Int, inDim: Int, alignCorners: Boolean): Float {
+        val outDim = inDim * scale
+        return if (alignCorners) {
+            if (outDim <= 1) 0f else out.toFloat() * (inDim - 1) / (outDim - 1)
+        } else {
+            (out + 0.5f) / scale - 0.5f
+        }
+    }
+
     @TensorOp()
     @InProgress("cpu", owner = "team:cpu", issue = "task-ops.md#op-maxpool2d")
     override fun <T : DType, V> maxPool2d(

diff --git a/...-cpu/src/commonTest/kotlin/sk/ainet/sk/ainet/exec/tensor/ops/DefaultCpuOpsUpsampleTest.kt b/...-cpu/src/commonTest/kotlin/sk/ainet/sk/ainet/exec/tensor/ops/DefaultCpuOpsUpsampleTest.kt
@@ -36,4 +36,33 @@ class DefaultCpuOpsUpsampleTest {
             assertEquals(4f, upsampled.data[0, 0, 3, 3])
         }
     }
+
+    @Test
+    fun bilinear_mode_blends_neighbors() {
+        data(ctx) { _ ->
+            // input rows [1,2] / [3,4]; bilinear 2x2 with PyTorch align_corners=false.
+            val input = tensor<FP32, Float> {
+                shape(1, 1, 2, 2) {
+                    init { idx -> (1 + idx[2] * 2 + idx[3]).toFloat() }
+                }
+            }
+
+            val upsampled = ops.upsample2d(
+                input = input,
+                scale = 2 to 2,
+                mode = UpsampleMode.Bilinear,
+                alignCorners = false
+            )
+
+            assertEquals(Shape(1, 1, 4, 4), upsampled.shape)
+            // Corners clamp to the source corner values.
+            assertEquals(1f, upsampled.data[0, 0, 0, 0], 1e-5f)
+            assertEquals(2f, upsampled.data[0, 0, 0, 3], 1e-5f)
+            assertEquals(3f, upsampled.data[0, 0, 3, 0], 1e-5f)
+            assertEquals(4f, upsampled.data[0, 0, 3, 3], 1e-5f)
+            // Interior blends: out[1,1] uses frac 0.25/0.25; out[2,2] uses 0.75/0.75.
+            assertEquals(1.75f, upsampled.data[0, 0, 1, 1], 1e-5f)
+            assertEquals(3.25f, upsampled.data[0, 0, 2, 2], 1e-5f)
+        }
+    }
 }
diff --git a/...ile/skainet-compile-dag/src/commonMain/kotlin/sk/ainet/lang/graph/DefaultExecutionTape.kt b/...ile/skainet-compile-dag/src/commonMain/kotlin/sk/ainet/lang/graph/DefaultExecutionTape.kt
@@ -14,6 +14,7 @@ import sk.ainet.tape.GradientTape
 import sk.ainet.tape.RecordedOperation
 import sk.ainet.tape.TapeStack
 import kotlin.math.exp
+import kotlin.math.floor
 import sk.ainet.lang.tensor.ops.AddOperation
 import sk.ainet.lang.tensor.ops.DivideOperation
 import sk.ainet.lang.tensor.ops.MatmulOperation
@@ -817,7 +818,8 @@ public class DefaultGradientTape(
         val input = inputs[0]
         val scale = pair2(attributes["scale"], 1)
         val mode = (attributes["mode"] as? String) ?: "Nearest"
-        return listOf(upsample2dGrad(upstream, input, scale, mode))
+        val alignCorners = (attributes["alignCorners"] as? Boolean) ?: false
+        return listOf(upsample2dGrad(upstream, input, scale, mode, alignCorners))
     }
 
     override fun leakyReluBackward(upstream: Tensor<DType, Any>, output: Tensor<DType, Any>, inputs: List<Tensor<DType, Any>>, attributes: Map<String, Any?>): List<Tensor<DType, Any>?> {
@@ -1761,20 +1763,20 @@ public class DefaultGradientTape(
     }
 
     /**
-     * upsample2d backward (NEAREST only — the CPU forward only supports
-     * Nearest, so the backward mirrors that). For each input position, sum
-     * the upstream gradients of every output position it produced (the
-     * scaleH × scaleW block above-left of [ih*scaleH, iw*scaleW]).
+     * upsample2d backward — the transpose (scatter) of the forward sampler.
+     * Nearest: each input position sums the upstream gradients of every output
+     * position it produced (the scaleH × scaleW block above-left of
+     * [ih*scaleH, iw*scaleW]). Bilinear: each output gradient is distributed
+     * back to the same 4 source neighbors with the same bilinear weights used
+     * in the forward blend.
      */
     private fun upsample2dGrad(
         upstream: Tensor<DType, Any>,
         input: Tensor<DType, Any>,
         scale: Pair<Int, Int>,
         mode: String,
+        alignCorners: Boolean,
     ): Tensor<DType, Any> {
-        require(mode.equals("Nearest", ignoreCase = true)) {
-            "upsample2dBackward: only Nearest mode implemented (got mode=$mode)"
-        }
         val n = input.shape[0]
         val c = input.shape[1]
         val inH = input.shape[2]
@@ -1784,25 +1786,69 @@ public class DefaultGradientTape(
         val outW = upstream.shape[3]
         val dInput = zerosLike(input)
 
-        for (b in 0 until n) {
-            for (ch in 0 until c) {
-                for (oh in 0 until outH) {
-                    val ih = oh / scaleH
-                    if (ih !in 0 until inH) continue
-                    for (ow in 0 until outW) {
-                        val iw = ow / scaleW
-                        if (iw !in 0 until inW) continue
-                        val gOut = (upstream.data.get(b, ch, oh, ow) as Number).toFloat()
-                        val cur = (dInput.data.get(b, ch, ih, iw) as Number).toFloat()
-                        @Suppress("UNCHECKED_CAST")
-                        dInput.data.set(b, ch, ih, iw, value = (cur + gOut) as Any)
+        fun accumulate(b: Int, ch: Int, ih: Int, iw: Int, delta: Float) {
+            val cur = (dInput.data.get(b, ch, ih, iw) as Number).toFloat()
+            @Suppress("UNCHECKED_CAST")
+            dInput.data.set(b, ch, ih, iw, value = (cur + delta) as Any)
+        }
+
+        when (mode.lowercase()) {
+            "nearest" -> {
+                for (b in 0 until n) {
+                    for (ch in 0 until c) {
+                        for (oh in 0 until outH) {
+                            val ih = oh / scaleH
+                            if (ih !in 0 until inH) continue
+                            for (ow in 0 until outW) {
+                                val iw = ow / scaleW
+                                if (iw !in 0 until inW) continue
+                                val gOut = (upstream.data.get(b, ch, oh, ow) as Number).toFloat()
+                                accumulate(b, ch, ih, iw, gOut)
+                            }
+                        }
+                    }
+                }
+            }
+
+            "bilinear" -> {
+                for (b in 0 until n) {
+                    for (ch in 0 until c) {
+                        for (oh in 0 until outH) {
+                            val srcH = upsampleSourceCoord(oh, scaleH, inH, alignCorners)
+                            val ih0 = floor(srcH).toInt().coerceIn(0, inH - 1)
+                            val ih1 = (ih0 + 1).coerceIn(0, inH - 1)
+                            val wh = (srcH - ih0).coerceIn(0.0f, 1.0f)
+                            for (ow in 0 until outW) {
+                                val srcW = upsampleSourceCoord(ow, scaleW, inW, alignCorners)
+                                val iw0 = floor(srcW).toInt().coerceIn(0, inW - 1)
+                                val iw1 = (iw0 + 1).coerceIn(0, inW - 1)
+                                val ww = (srcW - iw0).coerceIn(0.0f, 1.0f)
+                                val gOut = (upstream.data.get(b, ch, oh, ow) as Number).toFloat()
+                                accumulate(b, ch, ih0, iw0, gOut * (1f - wh) * (1f - ww))
+                                accumulate(b, ch, ih0, iw1, gOut * (1f - wh) * ww)
+                                accumulate(b, ch, ih1, iw0, gOut * wh * (1f - ww))
+                                accumulate(b, ch, ih1, iw1, gOut * wh * ww)
+                            }
+                        }
                     }
                 }
             }
+
+            else -> throw IllegalArgumentException("upsample2dBackward: unsupported mode '$mode'")
         }
         return dInput
     }
 
+    /** Output→source coordinate map for upsampling (PyTorch convention); see DefaultCpuOps.sourceCoord. */
+    private fun upsampleSourceCoord(out: Int, scale: Int, inDim: Int, alignCorners: Boolean): Float {
+        val outDim = inDim * scale
+        return if (alignCorners) {
+            if (outDim <= 1) 0f else out.toFloat() * (inDim - 1) / (outDim - 1)
+        } else {
+            (out + 0.5f) / scale - 0.5f
+        }
+    }
+
     private fun <T : DType, V> clampGrad(upstream: Tensor<T, V>, input: Tensor<T, V>, minVal: Float, maxVal: Float): Tensor<T, V> {
         val matchedUpstream = matchShape(upstream, input)
         val gradOut = zerosLike(input)

diff --git a/.../skainet-compile-dag/src/commonTest/kotlin/sk/ainet/exec/autograd/ConvPoolBackwardTest.kt b/.../skainet-compile-dag/src/commonTest/kotlin/sk/ainet/exec/autograd/ConvPoolBackwardTest.kt
@@ -222,4 +222,14 @@ class ConvPoolBackwardTest {
             x.ops.upsample2d(x, scale = 2 to 2, mode = UpsampleMode.Nearest, alignCorners = false)
         }
     }
+
+    @Test
+    fun upsample2d_bilinear_backward_distributes_weights() {
+        assertGradMatchesFiniteDiff(
+            xShape = Shape(1, 1, 3, 3),
+            x0 = FloatArray(9) { (it - 4) * 0.25f },
+        ) { _, x ->
+            x.ops.upsample2d(x, scale = 2 to 2, mode = UpsampleMode.Bilinear, alignCorners = false)
+        }
+    }
 }