diff --git a/experiments/mipmap/DiffCoopVec.slang b/experiments/mipmap/DiffCoopVec.slang new file mode 100644 index 0000000..ec90a76 --- /dev/null +++ b/experiments/mipmap/DiffCoopVec.slang @@ -0,0 +1,272 @@ +typealias IReal = __BuiltinFloatingPointType; + +// This class wraps around a CoopVec to make it differentiable. This is a temporary +// workaround until the Slang core library supplies a differentiable CoopVec. +struct DiffCoopVec : IDifferentiable, IArray, IArithmetic +{ + typealias Differential = DiffCoopVec; + + static const CoopVecComponentType ComponentType = + (T is half) ? CoopVecComponentType.Float16 : + (T is float) ? CoopVecComponentType.Float32 : + CoopVecComponentType.Float64; + + CoopVec cv; + + [BackwardDifferentiable] __init() { this = fill(T(0.0f)); } + [BackwardDifferentiable] __init(T x) { this = fill(x); } + [BackwardDifferentiable] __init(S x) { this = fill(x); } + [BackwardDifferentiable] __init(This x) { this = x; } + [BackwardDifferentiable] __init(DiffCoopVec x) { cv = CoopVec(x.cv); } + __init(no_diff CoopVec x) { cv = x; } + + int getCount() + { + return N; + } + + __subscript(int index) -> T + { + [BackwardDifferentiable] get { return indexRead(this, index); } + [BackwardDifferentiable] set { indexWrite(this, index, newValue); } + } + + bool equals(This other) { return cv.equals(other.cv); } + bool lessThan(This other) { return cv.lessThan(other.cv); } + bool lessThanOrEquals(This other) { return cv.lessThanOrEquals(other.cv); } + [BackwardDifferentiable] This add(This other) { return add(this, other); } + [BackwardDifferentiable] This sub(This other) { return sub(this, other); } + [BackwardDifferentiable] This mul(This other) { return mul(this, other); } + [BackwardDifferentiable] This div(This other) { return div(this, other); } + [BackwardDifferentiable] This neg() { return neg(this); } + This mod(This other) { return This(cv.mod(other.cv)); } + + [BackwardDifferentiable] T[N] toArray() { return toArray(this); } + [BackwardDifferentiable] vector toVector() { return toVector(this); } + + static Differential dzero() { return Differential(T(0.0f)); } + static Differential dadd(Differential a, Differential b) { return a + b; } + static Differential dmul(S factor, Differential d) { return This(__realCast(factor) * d.cv); } + + [BackwardDerivative(fill_bwd)] + static This fill(S x) { return This(CoopVec(T(x.toFloat()))); } + [BackwardDerivative(cast_bwd)] + static This cast(DiffCoopVec x) { return This(CoopVec(x.cv)); } + [BackwardDerivative(indexRead_bwd)] + static T indexRead(This x, int i) { return x.cv[i]; } + [BackwardDerivative(indexWrite_bwd)] + static void indexWrite(inout This x, int i, T value) { x.cv[i] = value; } + [BackwardDerivative(toArray_bwd)] + static T[N] toArray(This x) + { + T result[N]; + for (int i = 0; i < N; ++i) + result[i] = x.cv[i]; + return result; + } + [BackwardDerivative(toVector_bwd)] + static vector toVector(This x) + { + vector result; + for (int i = 0; i < N; ++i) + result[i] = x.cv[i]; + return result; + } + [BackwardDerivative(fromArray_bwd)] + static This fromArray(T x[N]) + { + CoopVec cv; + for (int i = 0; i < N; ++i) + cv[i] = x[i]; + return This(cv); + } + [BackwardDerivative(fromVector_bwd)] + static This fromVector(vector x) + { + CoopVec cv; + for (int i = 0; i < N; ++i) + cv[i] = x[i]; + return This(cv); + } + [BackwardDerivative(add_bwd)] static This add(This a, This b) { return This(a.cv.add(b.cv)); } + [BackwardDerivative(sub_bwd)] static This sub(This a, This b) { return This(a.cv.sub(b.cv)); } + [BackwardDerivative(mul_bwd)] static This mul(This a, This b) { return This(a.cv.mul(b.cv)); } + [BackwardDerivative(div_bwd)] static This div(This a, This b) { return This(a.cv.div(b.cv)); } + [BackwardDerivative(neg_bwd)] static This neg(This x) { return This(x.cv.neg()); } + + static void fill_bwd(inout DifferentialPair x, Differential grad) + { + T dx = T(0.0f); + [ForceUnroll] + for (int i = 0; i < N; ++i) + dx += grad[i]; + + x = diffPair(x.p, __slang_noop_cast.DifferentialElementType>(S(dx.toFloat()))); + } + static void cast_bwd(inout DifferentialPair> x, Differential grad) + { + x = diffPair(x.p, DiffCoopVec(CoopVec(grad.cv))); + } + static void indexRead_bwd(inout DifferentialPair x, int i, T.Differential grad) + { + Differential d = dzero(); + indexWrite(d, i, __slang_noop_cast(grad)); + x = diffPair(x.p, d); + } + static void indexWrite_bwd(inout DifferentialPair x, int i, inout DifferentialPair value) + { + let grad = __slang_noop_cast(indexRead(x.d, i)); + value = diffPair(value.p, grad); + } + static void toArray_bwd(inout DifferentialPair x, T.Differential[N] grad) + { + Differential dx; + for (int i = 0; i < N; ++i) + dx.cv[i] = __slang_noop_cast(grad[i]); + x = diffPair(x.p, dx); + } + static void toVector_bwd(inout DifferentialPair x, vector grad) + { + Differential dx; + for (int i = 0; i < N; ++i) + dx.cv[i] = grad[i]; + x = diffPair(x.p, dx); + } + static void fromArray_bwd(inout DifferentialPair x, This grad) + { + T dx[N]; + for (int i = 0; i < N; ++i) + dx[i] = grad.cv[i]; + x = diffPair(x.p, __slang_noop_cast.DifferentialElementType>(dx)); + } + static void fromVector_bwd(inout DifferentialPair> x, This grad) + { + vector dx; + for (int i = 0; i < N; ++i) + dx[i] = grad.cv[i]; + x = diffPair(x.p, __slang_noop_cast>.DifferentialElementType>(dx)); + } + static void add_bwd(inout DifferentialPair a, inout DifferentialPair b, Differential grad) + { + a = diffPair(a.p, grad); + b = diffPair(b.p, grad); + } + static void sub_bwd(inout DifferentialPair a, inout DifferentialPair b, Differential grad) + { + a = diffPair(a.p, grad); + b = diffPair(b.p, -grad); + } + static void mul_bwd(inout DifferentialPair a, inout DifferentialPair b, Differential grad) + { + a = diffPair(a.p, b.p * grad); + b = diffPair(b.p, a.p * grad); + } + static void div_bwd(inout DifferentialPair a, inout DifferentialPair b, Differential grad) + { + a = diffPair(a.p, grad / b.p); + b = diffPair(b.p, (-a.p * grad) / (b.p * b.p)); + } + static void neg_bwd(inout DifferentialPair x, Differential grad) + { + x = diffPair(x.p, -grad); + } +} + +[BackwardDifferentiable] DiffCoopVec operator +(DiffCoopVec lhs, const T rhs) { return lhs + DiffCoopVec(rhs); } +[BackwardDifferentiable] DiffCoopVec operator -(DiffCoopVec lhs, const T rhs) { return lhs - DiffCoopVec(rhs); } +[BackwardDifferentiable] DiffCoopVec operator /(DiffCoopVec lhs, const T rhs) { return lhs / DiffCoopVec(rhs); } +[BackwardDifferentiable] DiffCoopVec operator +(const T lhs, DiffCoopVec rhs) { return DiffCoopVec(lhs) + rhs; } +[BackwardDifferentiable] DiffCoopVec operator -(const T lhs, DiffCoopVec rhs) { return DiffCoopVec(lhs) - rhs; } +[BackwardDifferentiable] DiffCoopVec operator /(const T lhs, DiffCoopVec rhs) { return DiffCoopVec(lhs) / rhs; } +[BackwardDerivative(scalarMultiplyR_bwd)] DiffCoopVec operator *(DiffCoopVec lhs, const T rhs) { return DiffCoopVec(lhs.cv * S(rhs.toFloat())); } +[BackwardDerivative(scalarMultiplyL_bwd)] DiffCoopVec operator *(const T lhs, DiffCoopVec rhs) { return DiffCoopVec(S(lhs.toFloat()) * rhs.cv); } +void scalarMultiplyR_bwd(inout DifferentialPair> lhs, inout DifferentialPair rhs, DiffCoopVec grad) +{ + lhs = diffPair(lhs.p, grad * rhs.p); + DiffCoopVec::fill_bwd(rhs, grad * lhs.p); +} +void scalarMultiplyL_bwd(inout DifferentialPair lhs, inout DifferentialPair> rhs, DiffCoopVec grad) +{ + scalarMultiplyR_bwd(rhs, lhs, grad); +} + +[BackwardDerivative(exp_bwd)] +DiffCoopVec exp(DiffCoopVec x) +{ + return DiffCoopVec(exp(x.cv)); +} +void exp_bwd(inout DifferentialPair> x, DiffCoopVec grad) +{ + x = diffPair(x.p, grad * exp(x.p)); +} + +[BackwardDerivative(log_bwd)] +DiffCoopVec log(DiffCoopVec x) +{ + return DiffCoopVec(log(x.cv)); +} +void log_bwd(inout DifferentialPair> x, DiffCoopVec grad) +{ + x = diffPair(x.p, grad / x.p); +} + +[BackwardDerivative(tanh_bwd)] +DiffCoopVec tanh(DiffCoopVec x) +{ + return DiffCoopVec(tanh(x.cv)); +} +void tanh_bwd(inout DifferentialPair> x, DiffCoopVec grad) +{ + let y = tanh(x.p); + x = diffPair(x.p, (1.0f - y * y) * grad); +} + +[BackwardDerivative(atan_bwd)] +DiffCoopVec atan(DiffCoopVec x) +{ + return DiffCoopVec(atan(x.cv)); +} +void atan_bwd(inout DifferentialPair> x, DiffCoopVec grad) +{ + x = diffPair(x.p, grad / (x.p * x.p + 1.0f)); +} + +[BackwardDerivative(max_bwd)] +DiffCoopVec max(DiffCoopVec x, DiffCoopVec y) +{ + return DiffCoopVec(max(x.cv, y.cv)); +} +void max_bwd(inout DifferentialPair> x, inout DifferentialPair> y, DiffCoopVec grad) +{ + DiffCoopVec gradX, gradY; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + if (x.p[i] > y.p[i]) + gradX[i] = grad[i]; + else + gradY[i] = grad[i]; + } + x = diffPair(x.p, gradX); + y = diffPair(y.p, gradY); +} + +[BackwardDerivative(min_bwd)] +DiffCoopVec min(DiffCoopVec x, DiffCoopVec y) +{ + return DiffCoopVec(min(x.cv, y.cv)); +} +void min_bwd(inout DifferentialPair> x, inout DifferentialPair> y, DiffCoopVec grad) +{ + DiffCoopVec gradX, gradY; + [ForceUnroll] + for (int i = 0; i < N; ++i) + { + if (x.p[i] > y.p[i]) + gradY[i] = grad[i]; + else + gradX[i] = grad[i]; + } + x = diffPair(x.p, gradX); + y = diffPair(y.p, gradY); +} diff --git a/experiments/mipmap/nsc_nw_02_3layers_coopvec.py b/experiments/mipmap/nsc_nw_02_3layers_coopvec.py new file mode 100644 index 0000000..7adab36 --- /dev/null +++ b/experiments/mipmap/nsc_nw_02_3layers_coopvec.py @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from app import App +import slangpy as spy +import numpy as np +import json + +# Create the app and load the slang module. +app = App(width=512*3+10*2, height=512, title="Mipmap Example", device_type=spy.DeviceType.vulkan) +module = spy.Module.load_from_file(app.device, "nsc_nw_02_3layers_coopvec.slang") + +# Load some materials. +image = spy.Tensor.load_from_image(app.device, + "slangstars.png", linearize=False) + +np.random.seed(0) + +class NetworkParameters(spy.InstanceList): + def __init__(self, inputs: int, outputs: int): + super().__init__(module[f"NetworkParameters<{inputs},{outputs}>"]) + self.inputs = inputs + self.outputs = outputs + self.layout = spy.CoopVecMatrixLayout.training_optimal + + # Create initial values of biases and weights + weights_np = np.random.uniform(-0.5, 0.5, (outputs, inputs)).astype(np.float16) + biases_np = np.zeros(outputs).astype(np.float16) + + # Convert weights into coopvec layout for training + desc = app.device.coopvec_create_matrix_desc(outputs, inputs, self.layout, spy.DataType.float16, 0) + weight_count = desc.size // 2 # sizeof(half) + params_np = np.zeros((weight_count, ), dtype=np.float16) + app.device.coopvec_convert_matrix_host(weights_np, params_np, dst_layout=self.layout) + + # Create bias and weight tensors + self.biases = spy.Tensor.zeros(app.device, (outputs, ), dtype='half') + self.weights = spy.Tensor.zeros(app.device, (weight_count, ), dtype='half') + self.biases.copy_from_numpy(biases_np) + self.weights.copy_from_numpy(params_np) + + # Gradients for the biases and weights. + self.biases_grad = spy.Tensor.zeros_like(self.biases) + self.weights_grad = spy.Tensor.zeros_like(self.weights) + + # Moment buffers for Adam optimizer. + self.m_biases = spy.Tensor.zeros(app.device, self.biases.shape, 'float') + self.m_weights = spy.Tensor.zeros(app.device, self.weights.shape, 'float') + self.v_biases = spy.Tensor.zeros_like(self.m_biases) + self.v_weights = spy.Tensor.zeros_like(self.m_weights) + + self.set_data({ + 'biases': self.biases.storage, + 'weights': self.weights.storage, + 'biasGrads': self.biases_grad.storage, + 'weightGrads': self.weights_grad.storage, + '_type': f"NetworkParameters<{inputs},{outputs}>" + }) + + # Calls the Slang 'optimize' function for biases and weights + def optimize(self, learning_rate: float, optimize_counter: int): + module.optimize1(self.biases, self.biases_grad, self.m_biases, self.v_biases, learning_rate, optimize_counter) + module.optimize1(self.weights, self.weights_grad, self.m_weights, self.v_weights, learning_rate, optimize_counter) + + def serialize(self): + params_np = self.weights.to_numpy() + weights_np = np.zeros((self.outputs, self.inputs), dtype=np.float16) + app.device.coopvec_convert_matrix_host(params_np, weights_np, src_layout=self.layout) + + biases_np = self.biases.to_numpy() + + return { + 'num_inputs': self.inputs, + 'num_outputs': self.outputs, + 'weights': weights_np.flatten().tolist(), + 'biases': biases_np.tolist() + } + +class Network(spy.InstanceList): + def __init__(self): + super().__init__(module["Network"]) + self.layer0 = NetworkParameters(16,32) + self.layer1 = NetworkParameters(32,32) + self.layer2 = NetworkParameters(32,3) + + # Calls the Slang 'optimize' function for the layer. + def optimize(self, learning_rate: float, optimize_counter: int): + self.layer0.optimize(learning_rate, optimize_counter) + self.layer1.optimize(learning_rate, optimize_counter) + self.layer2.optimize(learning_rate, optimize_counter) + + def serialize(self): + return { + 'layers': [ + self.layer0.serialize(), + self.layer1.serialize(), + self.layer2.serialize() + ] + } + +if spy.Feature.cooperative_vector not in module.device.features: + raise RuntimeError("Device does not support cooperative vector API") + +network = Network() + +optimize_counter = 0 + +while app.process_events(): + + # Blit tensor to screen. + offset = 0 + app.blit(image, size=spy.int2(512), offset=spy.int2(offset,0), tonemap=False, bilinear=True) + offset += 512 + 10 + res = spy.int2(256,256) + + lr_output = spy.Tensor.empty_like(image) + module.render(pixel = spy.call_id(), + resolution = res, + network = network, + _result = lr_output) + + # Blit tensor to screen. + app.blit(lr_output, size=spy.int2(512, 512), offset=spy.int2(offset, 0), tonemap=False) + offset += 512 + 10 + + # Loss between downsampled output and quarter res rendered output. + loss_output = spy.Tensor.empty_like(image) + module.loss(pixel = spy.call_id(), + resolution = res, + network = network, + reference = image, + _result = loss_output) + + # Blit tensor to screen. + app.blit(loss_output, size=spy.int2(512, 512), offset=spy.int2(offset, 0), tonemap=False) + offset += 512 + 10 + + learning_rate = 0.001 + + for i in range(50): + # Loss between downsampled output and quarter res rendered output. + module.calculate_grads( + seed = spy.wang_hash(seed=optimize_counter, warmup=2), + pixel = spy.call_id(), + resolution = res, + reference = image, + network = network) + optimize_counter += 1 + + network.optimize(learning_rate, optimize_counter) + + print("Loss:", np.sum(np.abs(loss_output.to_numpy()))) + + # Present the window. + app.present() + +open('weights.json', 'w').write(json.dumps(network.serialize(), indent=4)) + + diff --git a/experiments/mipmap/nsc_nw_02_3layers_coopvec.slang b/experiments/mipmap/nsc_nw_02_3layers_coopvec.slang new file mode 100644 index 0000000..7a9300f --- /dev/null +++ b/experiments/mipmap/nsc_nw_02_3layers_coopvec.slang @@ -0,0 +1,152 @@ +import slangpy; +import DiffCoopVec; + +#define PI 3.14159265358979323846f + +struct NetworkParameters +{ + static const CoopVecComponentType ComponentType = CoopVecComponentType.Float16; + + StructuredBuffer weights, biases; + RWStructuredBuffer weightGrads, biasGrads; + + [BackwardDerivative(backward)] + DiffCoopVec forward(DiffCoopVec x) + { + return DiffCoopVec(coopVecMatMulAdd( + x.cv, ComponentType, + weights, 0, ComponentType, + biases, 0, ComponentType, + CoopVecMatrixLayout.TrainingOptimal, false, 0 + )); + } + + void backward(inout DifferentialPair> x, DiffCoopVec grad) + { + coopVecOuterProductAccumulate(grad.cv, x.p.cv, weightGrads, 0, 0, CoopVecMatrixLayout.TrainingOptimal, ComponentType); + coopVecReduceSumAccumulate(grad.cv, biasGrads, 0); + + let dX = coopVecMatMul(grad.cv, ComponentType, weights, 0, ComponentType, CoopVecMatrixLayout.TrainingOptimal, true, 0); + + x = diffPair(x.p, DiffCoopVec(dX)); + } +} + + +struct Network { + NetworkParameters<16, 32> layer0; + NetworkParameters<32, 32> layer1; + NetworkParameters<32, 3> layer2; + + [Differentiable] + float3 eval(no_diff float2 uv) + { + DiffCoopVec inputs; + [ForceUnroll] + for (int i = 0; i < 4; ++i) { + float scale = float(2 << i); + inputs[i * 4 + 0] = half(sin(uv.x * PI * scale)); + inputs[i * 4 + 1] = half(cos(uv.x * PI * scale)); + inputs[i * 4 + 2] = half(sin(uv.y * PI * scale)); + inputs[i * 4 + 3] = half(cos(uv.y * PI * scale)); + } + + var output0 = layer0.forward(inputs); + output0 = leakyReLU(output0); + var output1 = layer1.forward(output0); + output1 = leakyReLU(output1); + var output2 = layer2.forward(output1); + output2 = exp(output2); + return float3(output2.toVector()); + } +} + +[Differentiable] +DiffCoopVec activation(DiffCoopVec x) +{ + return max(x, DiffCoopVec(0.0h)); +} + +[Differentiable] +DiffCoopVec leakyReLU(DiffCoopVec x) +{ + return max(x, DiffCoopVec(0.0h)) + min(x, DiffCoopVec(0.0h)) * 0.01h; +} + +// Render full res BRDF from given inputs. +[Differentiable] +float3 render(int2 pixel, int2 resolution, Network network) +{ + float2 uv = (float2(pixel) + 0.5f) / float2(resolution); + return network.eval(uv); +} + +[Differentiable] +float3 loss(int2 pixel, int2 resolution, no_diff float3 reference, Network network) +{ + float3 color = render(pixel, resolution, network); + float3 error = color - reference; + return error * error; // Squared error +} + +struct LCG +{ + uint state; + + __init(uint seed) { state = seed; } + + [mutating] + uint next_uint() + { + const uint A = 1664525u; + const uint C = 1013904223u; + state = (A * state + C); + return state; + } + + [mutating] + float next_float() + { + // Convert to float in range [0, 1) + return (next_uint() >> 8) * 0x1p-24; + } +}; + +void optimize1(inout half primalH, inout half gradH, inout float m_prev, inout float v_prev, float learning_rate, int iteration) +{ + // Standard Adam default values. + const float ADAM_BETA_1 = 0.9; + const float ADAM_BETA_2 = 0.999; + const float ADAM_EPSILON = 1e-8; + + float primal = float(primalH); + float grad = float(gradH); + + if (isnan(grad) || isinf(grad)) + grad = 0.0h; + + // Adam optimization. + float gradient2 = grad * grad; + + float m = ADAM_BETA_1 * m_prev + (1.0 - ADAM_BETA_1) * grad; + float v = ADAM_BETA_2 * v_prev + (1.0 - ADAM_BETA_2) * gradient2; + + m_prev = m; + v_prev = v; + + float mHat = m / (1.0f - pow(ADAM_BETA_1, iteration)); + float vHat = v / (1.0f - pow(ADAM_BETA_2, iteration)); + + float update = learning_rate * (mHat / (sqrt(vHat) + ADAM_EPSILON)); + + // Subtract the optimized result from the trained normal and reset the gradient. + primal -= update; + + primalH = half(primal); + gradH = 0.0h; +} + +void calculate_grads(uint seed, int2 pixel, int2 resolution, float3 reference, Network network) +{ + bwd_diff(loss)(pixel, resolution, reference, network, 1.0f); +} diff --git a/experiments/mipmap/nsc_nw_inference_only.py b/experiments/mipmap/nsc_nw_inference_only.py new file mode 100644 index 0000000..d97765e --- /dev/null +++ b/experiments/mipmap/nsc_nw_inference_only.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from app import App +import slangpy as spy +import numpy as np +import json + +# Create the app and load the slang module. +app = App(width=512*3+10*2, height=512, title="Mipmap Example", device_type=spy.DeviceType.vulkan) +module = spy.Module.load_from_file(app.device, "nsc_nw_inference_only.slang") + +# Load some materials. +image = spy.Tensor.load_from_image(app.device, + "slangstars.png", linearize=False) + +class NetworkParameters(spy.InstanceList): + def __init__(self, data: dict): + inputs, outputs = data['num_inputs'], data['num_outputs'] + super().__init__(module[f"NetworkParameters<{inputs},{outputs}>"]) + + self.inputs = inputs + self.outputs = outputs + self.layout = spy.CoopVecMatrixLayout.inferencing_optimal + + print(outputs, inputs, outputs * inputs, len(data['weights'])) + + # Load values of biases and weights + weights_np = np.array(data['weights'], dtype=np.float16).reshape((outputs, inputs)) + biases_np = np.array(data['biases'], dtype=np.float16) + + # Convert weights into coopvec layout for training + desc = app.device.coopvec_create_matrix_desc(self.outputs, self.inputs, self.layout, spy.DataType.float16, 0) + weight_count = desc.size // 2 # sizeof(half) + params_np = np.zeros((weight_count, ), dtype=np.float16) + app.device.coopvec_convert_matrix_host(weights_np, params_np, dst_layout=self.layout) + + self.biases = app.device.create_buffer(struct_size=2, element_count=self.outputs, data=biases_np) + self.weights = app.device.create_buffer(struct_size=2, element_count=weight_count, data=params_np) + +class Network(spy.InstanceList): + def __init__(self, data: dict): + super().__init__(module["Network"]) + + assert len(data['layers']) == 3 + + self.layer0 = NetworkParameters(data['layers'][0]) + self.layer1 = NetworkParameters(data['layers'][1]) + self.layer2 = NetworkParameters(data['layers'][2]) + + +if spy.Feature.cooperative_vector not in module.device.features: + raise RuntimeError("Device does not support cooperative vector API") + +trained_weights = json.load(open('weights.json')) + +network = Network(trained_weights) + +while app.process_events(): + + # Blit tensor to screen. + offset = 0 + app.blit(image, size=spy.int2(512), offset=spy.int2(offset,0), tonemap=False, bilinear=True) + offset += 512 + 10 + res = spy.int2(256,256) + + lr_output = spy.Tensor.empty_like(image) + module.render(pixel = spy.call_id(), + resolution = res, + network = network, + _result = lr_output) + + # Blit tensor to screen. + app.blit(lr_output, size=spy.int2(512, 512), offset=spy.int2(offset, 0), tonemap=False) + offset += 512 + 10 + + # Loss between downsampled output and quarter res rendered output. + loss_output = spy.Tensor.empty_like(image) + module.loss(pixel = spy.call_id(), + resolution = res, + network = network, + reference = image, + _result = loss_output) + + # Blit tensor to screen. + app.blit(loss_output, size=spy.int2(512, 512), offset=spy.int2(offset, 0), tonemap=False) + offset += 512 + 10 + + # Present the window. + app.present() diff --git a/experiments/mipmap/nsc_nw_inference_only.slang b/experiments/mipmap/nsc_nw_inference_only.slang new file mode 100644 index 0000000..ba111a0 --- /dev/null +++ b/experiments/mipmap/nsc_nw_inference_only.slang @@ -0,0 +1,72 @@ +import slangpy; + +#define PI 3.14159265358979323846f + +struct NetworkParameters +{ + static const CoopVecComponentType ComponentType = CoopVecComponentType.Float16; + + StructuredBuffer weights, biases; + + CoopVec forward(CoopVec x) + { + return coopVecMatMulAdd( + x, ComponentType, + weights, 0, ComponentType, + biases, 0, ComponentType, + CoopVecMatrixLayout.InferencingOptimal, false, 0 + ); + } +} + + +struct Network { + NetworkParameters<16, 32> layer0; + NetworkParameters<32, 32> layer1; + NetworkParameters<32, 3> layer2; + + float3 eval(no_diff float2 uv) + { + CoopVec inputs; + [ForceUnroll] + for (int i = 0; i < 4; ++i) { + float scale = float(2 << i); + inputs[i * 4 + 0] = half(sin(uv.x * PI * scale)); + inputs[i * 4 + 1] = half(cos(uv.x * PI * scale)); + inputs[i * 4 + 2] = half(sin(uv.y * PI * scale)); + inputs[i * 4 + 3] = half(cos(uv.y * PI * scale)); + } + + var output0 = layer0.forward(inputs); + output0 = leakyReLU(output0); + var output1 = layer1.forward(output0); + output1 = leakyReLU(output1); + var output2 = layer2.forward(output1); + output2 = exp(output2); + return float3(output2[0], output2[1], output2[2]); + } +} + +CoopVec activation(CoopVec x) +{ + return max(x, CoopVec(0.0h)); +} + +CoopVec leakyReLU(CoopVec x) +{ + return max(x, CoopVec(0.0h)) + min(x, CoopVec(0.0h)) * 0.01h; +} + +// Render full res BRDF from given inputs. +float3 render(int2 pixel, int2 resolution, Network network) +{ + float2 uv = (float2(pixel) + 0.5f) / float2(resolution); + return network.eval(uv); +} + +float3 loss(int2 pixel, int2 resolution, no_diff float3 reference, Network network) +{ + float3 color = render(pixel, resolution, network); + float3 error = color - reference; + return error * error; // Squared error +}