From 22180578227bfe48598aefbdb11aba443d164086 Mon Sep 17 00:00:00 2001
From: protsenkovi <protsenkovi@gmail.com>
Date: Thu, 13 Jun 2024 09:49:08 +0400
Subject: [PATCH] updates

---
 src/common/layers.py   |   4 +-
 src/models/__init__.py |   3 +
 src/models/hdblut.py   |  63 +++++++---
 src/models/hdbnet.py   | 257 +++++++++++++++++++++++++++++++++--------
 src/models/srnet.py    | 239 +++++++++++++++++++++-----------------
 src/train.py           |   4 +-
 6 files changed, 393 insertions(+), 177 deletions(-)

diff --git a/src/common/layers.py b/src/common/layers.py
index 95b3731..1b2e0b7 100644
--- a/src/common/layers.py
+++ b/src/common/layers.py
@@ -44,9 +44,9 @@ class UpscaleBlock(nn.Module):
     
     def forward(self, x):
         x = (x-self.in_bias)/self.in_scale
-        x = torch.relu(self.embed(x))
+        x = torch.nn.functional.gelu(self.embed(x))
         for linear_projection in self.linear_projections:
-            x = torch.cat([x, torch.relu(linear_projection(x))], dim=2)
+            x = torch.cat([x, torch.nn.functional.gelu(linear_projection(x))], dim=2)
         x = self.project_channels(x)
         x = torch.tanh(x)         
         x = x*self.out_scale + self.out_bias
diff --git a/src/models/__init__.py b/src/models/__init__.py
index 020ae1c..b61bbf5 100644
--- a/src/models/__init__.py
+++ b/src/models/__init__.py
@@ -5,6 +5,7 @@ from . import srlut
 from . import sdynet
 from . import hdbnet
 from . import hdblut
+from common import losses
 import torch
 import numpy as np 
 from pathlib import Path
@@ -32,6 +33,8 @@ AVAILABLE_MODELS = {
     'HDBLut': hdblut.HDBLut,
     'HDBLNet': hdbnet.HDBLNet,
     'HDBHNet': hdbnet.HDBHNet,
+    'SRMsbLsbNet': srnet.SRMsbLsbNet,
+    'SRMsbLsbShift2Net': srnet.SRMsbLsbShift2Net,
     'SRMsbLsbR90Net': srnet.SRMsbLsbR90Net,
     'SRMsbLsb4R90Net': srnet.SRMsbLsb4R90Net,
     # 'RCNetCentered_3x3': rcnet.RCNetCentered_3x3, 'RCLutCentered_3x3': rclut.RCLutCentered_3x3,   
diff --git a/src/models/hdblut.py b/src/models/hdblut.py
index 17db7ba..f77ba8d 100644
--- a/src/models/hdblut.py
+++ b/src/models/hdblut.py
@@ -18,17 +18,17 @@ class HDBLut(nn.Module):
         self.scale = scale
         self.quantization_interval = quantization_interval
 
-        self.stage1_3H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage1_3D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage1_3B = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage1_2H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage1_2D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
+        self.stage1_3H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage1_3D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage1_3B = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage1_2H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*2 + (2,2)).type(torch.float32))
+        self.stage1_2D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*2 + (2,2)).type(torch.float32))
 
-        self.stage2_3H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage2_3D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage2_3B = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage2_2H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
-        self.stage2_2D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*4 + (2,2)).type(torch.float32))
+        self.stage2_3H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage2_3D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage2_3B = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*3 + (2,2)).type(torch.float32))
+        self.stage2_2H = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*2 + (2,2)).type(torch.float32))
+        self.stage2_2D = nn.Parameter(torch.randint(0, 255, size=(256//quantization_interval+1,)*2 + (2,2)).type(torch.float32))
 
         self._extract_pattern_3H = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[0,2]], center=[0,0], window_size=3)
         self._extract_pattern_3D = layers.PercievePattern(receptive_field_idxes=[[0,0],[1,1],[2,2]], center=[0,0], window_size=3)
@@ -41,9 +41,9 @@ class HDBLut(nn.Module):
         stage1_3H, stage1_3D, stage1_3B, stage1_2H, stage1_2D, 
         stage2_3H, stage2_3D, stage2_3B, stage2_2H, stage2_2D 
     ):   
-        scale = int(stage_lut.shape[-1])
-        quantization_interval = 256//(stage_lut.shape[0]-1)
-        lut_model = HDBLut(quantization_interval=quantization_interval, scale=scale)
+        # quantization_interval = 256//(stage1_3H.shape[0]-1)
+        quantization_interval = 16
+        lut_model = HDBLut(quantization_interval=quantization_interval, scale=4)
         lut_model.stage1_3H = nn.Parameter(torch.tensor(stage1_3H).type(torch.float32))
         lut_model.stage1_3D = nn.Parameter(torch.tensor(stage1_3D).type(torch.float32))
         lut_model.stage1_3B = nn.Parameter(torch.tensor(stage1_3B).type(torch.float32))
@@ -59,15 +59,26 @@ class HDBLut(nn.Module):
 
     def forward_stage(self, x, scale, percieve_pattern, lut):
         b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = select_index_4dlut_tetrahedral(index=x, lut=lut)
+        print(np.prod(x.shape))
+        x = percieve_pattern(x)
+        shifts = torch.tensor([lut.shape[0]**d for d in range(len(lut.shape)-2)], device=x.device).flip(0).reshape(1,1,len(lut.shape)-2)
+        print(x.shape, x.min(), x.max())
+        x = torch.sum(x * shifts, dim=-1)
+        print(x.shape)
+        lut = torch.clamp(lut, 0, 255)
+        lut = lut.reshape(-1, scale, scale)
+        x = x.flatten().type(torch.int64)
+        x = lut[x]
+
         x = round_func(x)
         x = x.reshape(b, c, h, w, scale, scale)
+        print(x.shape)
+        # raise RuntimeError
         x = x.permute(0,1,2,4,3,5)
         x = x.reshape(b, c, h*scale, w*scale)
         return x
 
-    def forward(self, x):
+    def forward(self, x, config=None):
         b,c,h,w = x.shape
         x = x.reshape(b*c, 1, h, w)
         lsb = x % 16
@@ -75,7 +86,7 @@ class HDBLut(nn.Module):
         output_msb = torch.zeros([b*c, 1, h*2, w*2], dtype=x.dtype, device=x.device)
         output_lsb = torch.zeros([b*c, 1, h*2, w*2], dtype=x.dtype, device=x.device)
         for rotations_count in range(4):
-           rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
+           rotated_msb = torch.floor_divide(torch.rot90(msb, k=rotations_count, dims=[2, 3]), 16)
            rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
            output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3H, self.stage1_3H), k=-rotations_count, dims=[2, 3])
            output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3D, self.stage1_3D), k=-rotations_count, dims=[2, 3])
@@ -84,14 +95,18 @@ class HDBLut(nn.Module):
            output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2D, self.stage1_2D), k=-rotations_count, dims=[2, 3])
         output_msb /= 4*3
         output_lsb /= 4*2
+        print(output_msb.min(), output_msb.max())
+        print(output_lsb.min(), output_lsb.max())
+
         output_msb = output_msb + output_lsb
         x = output_msb
         lsb = x % 16
         msb = x - lsb        
         output_msb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
         output_lsb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
+        print("STAGE2", msb.min(), msb.max(), lsb.min(), lsb.max())
         for rotations_count in range(4):
-           rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
+           rotated_msb = torch.floor_divide(torch.rot90(msb, k=rotations_count, dims=[2, 3]), 16)
            rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
            output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3H, self.stage2_3H), k=-rotations_count, dims=[2, 3])
            output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3D, self.stage2_3D), k=-rotations_count, dims=[2, 3])
@@ -106,4 +121,14 @@ class HDBLut(nn.Module):
         return x        
     
     def __repr__(self):
-        return f"{self.__class__.__name__}\n  lut size: {self.stage_lut.shape}"
\ No newline at end of file
+        return f"{self.__class__.__name__}" + \
+               f"\n  stage1_3H size: {self.stage1_3H.shape}" + \
+               f"\n  stage1_3D size: {self.stage1_3D.shape}" + \
+               f"\n  stage1_3B size: {self.stage1_3B.shape}" + \
+               f"\n  stage1_2H size: {self.stage1_2H.shape}" + \
+               f"\n  stage1_2D size: {self.stage1_2D.shape}" + \
+               f"\n  stage2_3H size: {self.stage2_3H.shape}" + \
+               f"\n  stage2_3D size: {self.stage2_3D.shape}" + \
+               f"\n  stage2_3B size: {self.stage2_3B.shape}" + \
+               f"\n  stage2_2H size: {self.stage2_2H.shape}" + \
+               f"\n  stage2_2D size: {self.stage2_2D.shape}"
\ No newline at end of file
diff --git a/src/models/hdbnet.py b/src/models/hdbnet.py
index 01c45e1..94b0334 100644
--- a/src/models/hdbnet.py
+++ b/src/models/hdbnet.py
@@ -5,25 +5,26 @@ import numpy as np
 from common.utils import round_func
 from common import lut
 from pathlib import Path
-# from . import srlut 
+from . import hdblut
 from common import layers
+from itertools import cycle
 
 class HDBNet(nn.Module):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
         super(HDBNet, self).__init__()
         assert scale == 4
         self.scale = scale 
-        self.stage1_3H = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage1_3D = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage1_3B = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage1_2H = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage1_2D = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-
-        self.stage2_3H = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage2_3D = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage2_3B = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage2_2H = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
-        self.stage2_2D = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
+        self.stage1_3H = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2*2)
+        self.stage1_3D = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2*2)
+        self.stage1_3B = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2*2)
+        self.stage1_2H = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2*2)
+        self.stage1_2D = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2*2)
+
+        # self.stage2_3H = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
+        # self.stage2_3D = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
+        # self.stage2_3B = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
+        # self.stage2_2H = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
+        # self.stage2_2D = layers.UpscaleBlock(in_features=2, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=2)
 
         self._extract_pattern_3H = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[0,2]], center=[0,0], window_size=3)
         self._extract_pattern_3D = layers.PercievePattern(receptive_field_idxes=[[0,0],[1,1],[2,2]], center=[0,0], window_size=3)
@@ -41,59 +42,213 @@ class HDBNet(nn.Module):
         x = x.reshape(b, c, h*scale, w*scale)
         return x
             
-    def forward(self, x):
+    def forward(self, x, config=None):
         b,c,h,w = x.shape
         x = x.reshape(b*c, 1, h, w)
         lsb = x % 16
-        msb = x - lsb        
-        output_msb = torch.zeros([b*c, 1, h*2, w*2], dtype=x.dtype, device=x.device)
-        output_lsb = torch.zeros([b*c, 1, h*2, w*2], dtype=x.dtype, device=x.device)
+        msb = x - lsb     
+        output_msb = torch.zeros([b*c, 1, h*2*2, w*2*2], dtype=x.dtype, device=x.device)
+        output_lsb = torch.zeros([b*c, 1, h*2*2, w*2*2], dtype=x.dtype, device=x.device)
         for rotations_count in range(4):
            rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
            rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3H, self.stage1_3H), k=-rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3D, self.stage1_3D), k=-rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3B, self.stage1_3B), k=-rotations_count, dims=[2, 3])
-           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2H, self.stage1_2H), k=-rotations_count, dims=[2, 3])
-           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2D, self.stage1_2D), k=-rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2*2, self._extract_pattern_3H, self.stage1_3H), k=-rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2*2, self._extract_pattern_3D, self.stage1_3D), k=-rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2*2, self._extract_pattern_3B, self.stage1_3B), k=-rotations_count, dims=[2, 3])
+           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2*2, self._extract_pattern_2H, self.stage1_2H), k=-rotations_count, dims=[2, 3])
+           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2*2, self._extract_pattern_2D, self.stage1_2D), k=-rotations_count, dims=[2, 3])
         output_msb /= 4*3
         output_lsb /= 4*2
-        output_msb = output_msb + output_lsb
-        x = output_msb
-        lsb = x % 16
-        msb = x - lsb        
-        output_msb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
-        output_lsb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
-        for rotations_count in range(4):
-           rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
-           rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3H, self.stage2_3H), k=-rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3D, self.stage2_3D), k=-rotations_count, dims=[2, 3])
-           output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3B, self.stage2_3B), k=-rotations_count, dims=[2, 3])
-           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2H, self.stage2_2H), k=-rotations_count, dims=[2, 3])
-           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2D, self.stage2_2D), k=-rotations_count, dims=[2, 3])
-        output_msb /= 4*3
-        output_lsb /= 4*2
-        output_msb = output_msb + output_lsb
-        x = output_msb
+        output_msb = round_func((output_msb / 255) * 16) * 15
+        output_lsb = (output_lsb / 255) * 15
+        # print(output_msb.min(), output_msb.max(), output_lsb.min(), output_lsb.max())
+        x = output_msb + output_lsb
+        # lsb = x % 16
+        # msb = x - lsb
+
+        # output_msb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
+        # output_lsb = torch.zeros([b*c, 1, h*4, w*4], dtype=x.dtype, device=x.device)
+        # for rotations_count in range(4):
+        #    rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
+        #    rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
+        #    output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3H, self.stage2_3H), k=-rotations_count, dims=[2, 3])
+        #    output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3D, self.stage2_3D), k=-rotations_count, dims=[2, 3])
+        #    output_msb += torch.rot90(self.forward_stage(rotated_msb, 2, self._extract_pattern_3B, self.stage2_3B), k=-rotations_count, dims=[2, 3])
+        #    output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2H, self.stage2_2H), k=-rotations_count, dims=[2, 3])
+        #    output_lsb += torch.rot90(self.forward_stage(rotated_lsb, 2, self._extract_pattern_2D, self.stage2_2D), k=-rotations_count, dims=[2, 3])
+        # output_msb /= 4*3
+        # output_lsb /= 4*2
+        # output_msb = round_func((output_msb / 255) * 16) * 15
+        # output_lsb = (output_lsb / 255) * 15
+        # # print(output_msb.min(), output_msb.max(), output_lsb.min(), output_lsb.max())
+        # x = output_msb + output_lsb
         x = x.reshape(b, c, h*self.scale, w*self.scale)
         return x
 
     def get_lut_model(self, quantization_interval=16, batch_size=2**10):
-        stage1_3H = lut.transfer_2x2_input_SxS_output(self.stage1_3H, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage1_3D = lut.transfer_2x2_input_SxS_output(self.stage1_3D, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage1_3B = lut.transfer_2x2_input_SxS_output(self.stage1_3B, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage1_2H = lut.transfer_2x2_input_SxS_output(self.stage1_2H, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage1_2D = lut.transfer_2x2_input_SxS_output(self.stage1_2D, quantization_interval=quantization_interval, batch_size=batch_size)
-
-        stage2_3H = lut.transfer_2x2_input_SxS_output(self.stage2_3H, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage2_3D = lut.transfer_2x2_input_SxS_output(self.stage2_3D, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage2_3B = lut.transfer_2x2_input_SxS_output(self.stage2_3B, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage2_2H = lut.transfer_2x2_input_SxS_output(self.stage2_2H, quantization_interval=quantization_interval, batch_size=batch_size)
-        stage2_2D = lut.transfer_2x2_input_SxS_output(self.stage2_2D, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage1_3H = lut.transfer_3_input_SxS_output(self.stage1_3H, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage1_3D = lut.transfer_3_input_SxS_output(self.stage1_3D, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage1_3B = lut.transfer_3_input_SxS_output(self.stage1_3B, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage1_2H = lut.transfer_2_input_SxS_output(self.stage1_2H, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage1_2D = lut.transfer_2_input_SxS_output(self.stage1_2D, quantization_interval=quantization_interval, batch_size=batch_size)
+
+        stage2_3H = lut.transfer_3_input_SxS_output(self.stage2_3H, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage2_3D = lut.transfer_3_input_SxS_output(self.stage2_3D, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage2_3B = lut.transfer_3_input_SxS_output(self.stage2_3B, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage2_2H = lut.transfer_2_input_SxS_output(self.stage2_2H, quantization_interval=quantization_interval, batch_size=batch_size)
+        stage2_2D = lut.transfer_2_input_SxS_output(self.stage2_2D, quantization_interval=quantization_interval, batch_size=batch_size)
 
         lut_model = hdblut.HDBLut.init_from_numpy(
             stage1_3H, stage1_3D, stage1_3B, stage1_2H, stage1_2D, 
             stage2_3H, stage2_3D, stage2_3B, stage2_2H, stage2_2D
         )
-        return lut_model
\ No newline at end of file
+        return lut_model
+
+    def get_loss_fn(self):
+        def loss_fn(pred, target):
+            return F.mse_loss(pred/255, target/255)
+        return loss_fn
+
+
+class HDBLNet(nn.Module):
+    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
+        super(HDBLNet, self).__init__()
+        self.scale = scale 
+        self.stage1_3H = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=self.scale)
+        self.stage1_3D = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=self.scale)
+        self.stage1_3B = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=self.scale)
+        self.stage1_3L = layers.UpscaleBlock(in_features=3, hidden_dim=hidden_dim, layers_count=layers_count, upscale_factor=self.scale)
+
+        self._extract_pattern_3H = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[0,2]], center=[0,0], window_size=3)
+        self._extract_pattern_3D = layers.PercievePattern(receptive_field_idxes=[[0,0],[1,1],[2,2]], center=[0,0], window_size=3)
+        self._extract_pattern_3B = layers.PercievePattern(receptive_field_idxes=[[0,0],[1,2],[2,1]], center=[0,0], window_size=3)
+        self._extract_pattern_3L = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,1]], center=[0,0], window_size=2)
+
+    def forward_stage(self, x, scale, percieve_pattern, stage):
+        b,c,h,w = x.shape
+        x = percieve_pattern(x)   
+        x = stage(x)
+        x = round_func(x)
+        x = x.reshape(b, c, h, w, scale, scale)
+        x = x.permute(0,1,2,4,3,5)
+        x = x.reshape(b, c, h*scale, w*scale)
+        return x
+            
+    def forward(self, x, config=None):
+        b,c,h,w = x.shape
+        x = x.reshape(b*c, 1, h, w)
+        lsb = x % 16
+        msb = x - lsb     
+        output_msb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        output_lsb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        for rotations_count in range(4):
+           rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
+           rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, self.scale, self._extract_pattern_3H, self.stage1_3H), k=-rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, self.scale, self._extract_pattern_3D, self.stage1_3D), k=-rotations_count, dims=[2, 3])
+           output_msb += torch.rot90(self.forward_stage(rotated_msb, self.scale, self._extract_pattern_3B, self.stage1_3B), k=-rotations_count, dims=[2, 3])
+           output_lsb += torch.rot90(self.forward_stage(rotated_lsb, self.scale, self._extract_pattern_3L, self.stage1_3L), k=-rotations_count, dims=[2, 3])
+        output_msb /= 4*3
+        output_lsb /= 4
+        output_msb = round_func((output_msb / 255) * 16) * 15
+        output_lsb = (output_lsb / 255) * 15
+  
+        x = output_msb + output_lsb
+  
+        x = x.reshape(b, c, h*self.scale, w*self.scale)
+        return x
+
+    def get_loss_fn(self):
+        def loss_fn(pred, target):
+            return F.mse_loss(pred/255, target/255)
+        return loss_fn
+
+    # def get_lut_model(self, quantization_interval=16, batch_size=2**10):
+    #     stage1_3H = lut.transfer_3_input_SxS_output(self.stage1_3H, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage1_3D = lut.transfer_3_input_SxS_output(self.stage1_3D, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage1_3B = lut.transfer_3_input_SxS_output(self.stage1_3B, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage1_2H = lut.transfer_2_input_SxS_output(self.stage1_2H, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage1_2D = lut.transfer_2_input_SxS_output(self.stage1_2D, quantization_interval=quantization_interval, batch_size=batch_size)
+
+    #     stage2_3H = lut.transfer_3_input_SxS_output(self.stage2_3H, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage2_3D = lut.transfer_3_input_SxS_output(self.stage2_3D, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage2_3B = lut.transfer_3_input_SxS_output(self.stage2_3B, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage2_2H = lut.transfer_2_input_SxS_output(self.stage2_2H, quantization_interval=quantization_interval, batch_size=batch_size)
+    #     stage2_2D = lut.transfer_2_input_SxS_output(self.stage2_2D, quantization_interval=quantization_interval, batch_size=batch_size)
+
+    #     lut_model = hdblut.HDBLut.init_from_numpy(
+    #         stage1_3H, stage1_3D, stage1_3B, stage1_2H, stage1_2D, 
+    #         stage2_3H, stage2_3D, stage2_3B, stage2_2H, stage2_2D
+    #     )
+    #     return lut_model
+
+class HDBHNet(nn.Module):
+    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
+        super(HDBHNet, self).__init__()
+        self.scale = scale 
+        self.hidden_dim = hidden_dim
+        self.layers_count = layers_count
+
+        self.msb_fns = nn.ModuleList([layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale
+        ) for x in range(1)])
+        self.lsb_fns = nn.ModuleList([layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale
+        ) for x in range(1)])
+        self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
+
+    def forward_stage(self, x, scale, percieve_pattern, stage):
+        b,c,h,w = x.shape
+        x = percieve_pattern(x)   
+        x = stage(x)
+        x = round_func(x)
+        x = x.reshape(b, c, h, w, scale, scale)
+        x = x.permute(0,1,2,4,3,5)
+        x = x.reshape(b, c, h*scale, w*scale)
+        return x
+            
+    def forward(self, x, config=None):
+        b,c,h,w = x.shape
+        x = x.reshape(b*c, 1, h, w)
+
+        lsb = x % 16
+        msb = x - lsb
+
+        output_msb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        output_lsb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        for rotations_count, msb_fn, lsb_fn in zip(range(4), cycle(self.msb_fns), cycle(self.lsb_fns)):
+           rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
+           rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
+           output_msb_r = self.forward_stage(rotated_msb, self.scale, self._extract_pattern_S, msb_fn)           
+           output_lsb_r = self.forward_stage(rotated_lsb, self.scale, self._extract_pattern_S, lsb_fn)
+           output_msb_r = round_func((output_msb_r / 255)*16) * 15 
+           output_lsb_r = (output_lsb_r / 255) * 15  
+           output_msb += torch.rot90(output_msb_r, k=-rotations_count, dims=[2, 3])
+           output_lsb += torch.rot90(output_lsb_r, k=-rotations_count, dims=[2, 3])
+        output_msb /= 4
+        output_lsb /= 4
+        if not config is None and config.current_iter % config.display_step == 0:
+            config.writer.add_histogram('output_lsb', output_lsb.detach().cpu().numpy(), config.current_iter)
+            config.writer.add_histogram('output_msb', output_msb.detach().cpu().numpy(), config.current_iter)
+        x = output_msb + output_lsb
+        x = x.reshape(b, c, h*self.scale, w*self.scale)
+        return x
+
+    def get_lut_model(self, quantization_interval=16, batch_size=2**10):
+        raise NotImplementedError
+    
+    def get_loss_fn(self):
+        fourier_loss_fn = FocalFrequencyLoss()
+        high_frequency_loss_fn = FourierLoss()
+        def loss_fn(pred, target):
+            a = fourier_loss_fn(pred/255, target/255) * 1e8
+            # b = F.mse_loss(pred/255, target/255) #* 1e3
+            # c = high_frequency_loss_fn(pred/255, target/255) * 1e6
+            return a #+ b #+ c
+        return loss_fn
\ No newline at end of file
diff --git a/src/models/srnet.py b/src/models/srnet.py
index eb818be..15e5c4b 100644
--- a/src/models/srnet.py
+++ b/src/models/srnet.py
@@ -9,17 +9,10 @@ from . import srlut
 from common import layers
 from common import losses
 
-class SRNet(nn.Module):
-    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
-        super(SRNet, self).__init__()
-        self.scale = scale 
-        self.stage1_S = layers.UpscaleBlock(
-            hidden_dim=hidden_dim, 
-            layers_count=layers_count, 
-            upscale_factor=self.scale
-        )
-        self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
-
+class SRNetBase(nn.Module):
+    def __init__(self):
+        super(SRNetBase, self).__init__()
+    
     def forward_stage(self, x, scale, percieve_pattern, stage):
         b,c,h,w = x.shape
         x = percieve_pattern(x)   
@@ -29,6 +22,26 @@ class SRNet(nn.Module):
         x = x.permute(0,1,2,4,3,5)
         x = x.reshape(b, c, h*scale, w*scale)
         return x
+
+    def get_lut_model(self, quantization_interval=16, batch_size=2**10):
+        raise NotImplementedError
+
+    def get_loss_fn(self):
+        def loss_fn(pred, target):
+            return F.mse_loss(pred/255, target/255)
+        return loss_fn
+
+
+class SRNet(SRNetBase):
+    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
+        super(SRNet, self).__init__()
+        self.scale = scale 
+        self.stage1_S = layers.UpscaleBlock(
+            hidden_dim=hidden_dim, 
+            layers_count=layers_count, 
+            upscale_factor=self.scale
+        )
+        self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
             
     def forward(self, x, config=None):
         b,c,h,w = x.shape
@@ -42,12 +55,8 @@ class SRNet(nn.Module):
         lut_model = srlut.SRLut.init_from_numpy(stage_lut)
         return lut_model
 
-    def get_loss_fn(self):
-        def loss_fn(pred, target):
-            return F.mse_loss(pred/255, target/255)
-        return loss_fn
 
-class SRNetY(nn.Module):
+class SRNetY(SRNetBase):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
         super(SRNetY, self).__init__()
         self.scale = scale       
@@ -60,16 +69,6 @@ class SRNetY(nn.Module):
         self.rgb_to_ycbcr = layers.RgbToYcbcr()
         self.ycbcr_to_rgb = layers.YcbcrToRgb()
 
-    def forward_stage(self, x, scale, percieve_pattern, stage):
-        b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = stage(x)
-        x = round_func(x)
-        x = x.reshape(b, c, h, w, scale, scale)
-        x = x.permute(0,1,2,4,3,5)
-        x = x.reshape(b, c, h*scale, w*scale)
-        return x
-
     def forward(self, x, config=None):
         b,c,h,w = x.shape
         x = self.rgb_to_ycbcr(x)
@@ -88,12 +87,8 @@ class SRNetY(nn.Module):
         lut_model = srlut.SRLutY.init_from_numpy(stage_lut)
         return lut_model
 
-    def get_loss_fn(self):
-        def loss_fn(pred, target):
-            return F.mse_loss(pred/255, target/255)
-        return loss_fn
 
-class SRNetR90(nn.Module):
+class SRNetR90(SRNetBase):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
         super(SRNetR90, self).__init__()
         self.scale = scale       
@@ -104,16 +99,6 @@ class SRNetR90(nn.Module):
         )
         self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
 
-    def forward_stage(self, x, scale, percieve_pattern, stage):
-        b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = stage(x)
-        x = round_func(x)
-        x = x.reshape(b, c, h, w, scale, scale)
-        x = x.permute(0,1,2,4,3,5)
-        x = x.reshape(b, c, h*scale, w*scale)
-        return x
-
     def forward(self, x, config=None):
         b,c,h,w = x.shape
         x = x.reshape(b*c, 1, h, w)
@@ -131,12 +116,8 @@ class SRNetR90(nn.Module):
         lut_model = srlut.SRLutR90.init_from_numpy(stage_lut)
         return lut_model
 
-    def get_loss_fn(self):
-        def loss_fn(pred, target):
-            return F.mse_loss(pred/255, target/255)
-        return loss_fn
 
-class SRNetR90Y(nn.Module):
+class SRNetR90Y(SRNetBase):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
         super(SRNetR90Y, self).__init__()
         self.scale = scale       
@@ -150,16 +131,6 @@ class SRNetR90Y(nn.Module):
         self.rgb_to_ycbcr = layers.RgbToYcbcr()
         self.ycbcr_to_rgb = layers.YcbcrToRgb()
 
-    def forward_stage(self, x, scale, percieve_pattern, stage):
-        b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = stage(x)
-        x = round_func(x)
-        x = x.reshape(b, c, h, w, scale, scale)
-        x = x.permute(0,1,2,4,3,5)
-        x = x.reshape(b, c, h*scale, w*scale)
-        return x
-
     def forward(self, x, config=None):
         b,c,h,w = x.shape
         x = self.rgb_to_ycbcr(x)
@@ -183,15 +154,10 @@ class SRNetR90Y(nn.Module):
         lut_model = srlut.SRLutR90Y.init_from_numpy(stage_lut)
         return lut_model
 
-    def get_loss_fn(self):
-        def loss_fn(pred, target):
-            return F.mse_loss(pred/255, target/255)
-        return loss_fn
-
 
-class SRMsbLsbR90Net(nn.Module):
+class SRMsbLsbNet(SRNetBase):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
-        super(SRMsbLsbR90Net, self).__init__()
+        super(SRMsbLsbNet, self).__init__()
         self.scale = scale 
         self.hidden_dim = hidden_dim
         self.layers_count = layers_count
@@ -202,7 +168,7 @@ class SRMsbLsbR90Net(nn.Module):
             layers_count=layers_count,
             upscale_factor=self.scale,
             input_max_value=255,
-            output_max_value=15
+            output_max_value=255
         )
         self.lsb_fn = layers.UpscaleBlock(
             in_features=4,
@@ -210,19 +176,109 @@ class SRMsbLsbR90Net(nn.Module):
             layers_count=layers_count,
             upscale_factor=self.scale,
             input_max_value=15,
-            output_max_value=15
+            output_max_value=255
         )
         self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
+            
+    def forward(self, x, config=None):
+        b,c,h,w = x.shape
+        x = x.reshape(b*c, 1, h, w)
 
-    def forward_stage(self, x, scale, percieve_pattern, stage):
+        lsb = x % 16
+        msb = x - lsb
+
+        output_msb = self.forward_stage(msb, self.scale, self._extract_pattern_S, self.msb_fn)           
+        output_lsb = self.forward_stage(lsb, self.scale, self._extract_pattern_S, self.lsb_fn)
+
+        if not config is None and config.current_iter % config.display_step == 0:
+            config.writer.add_histogram('output_lsb', output_lsb.detach().cpu().numpy(), config.current_iter)
+            config.writer.add_histogram('output_msb', output_msb.detach().cpu().numpy(), config.current_iter)
+        x = output_msb + output_lsb
+        x = x.reshape(b, c, h*self.scale, w*self.scale)
+        return x
+
+    def get_lut_model(self, quantization_interval=16, batch_size=2**10):
+        raise NotImplementedError
+
+
+class SRMsbLsbShift2Net(SRNetBase):
+    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
+        super(SRMsbLsbShift2Net, self).__init__()
+        self.scale = scale 
+        self.hidden_dim = hidden_dim
+        self.layers_count = layers_count
+        self.count = 4
+        self.msb_fns = nn.ModuleList([layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale,
+            input_max_value=255,
+            output_max_value=255
+        ) for x in range(self.count)])
+        self.lsb_fns = nn.ModuleList([layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale,
+            input_max_value=15,
+            output_max_value=255
+        ) for x in range(self.count)])
+        self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
+
+    def forward(self, x, config=None):
         b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = stage(x)
-        x = round_func(x)
-        x = x.reshape(b, c, h, w, scale, scale)
-        x = x.permute(0,1,2,4,3,5)
-        x = x.reshape(b, c, h*scale, w*scale)
+        x = x.reshape(b*c, 1, h, w)
+
+        lsb = x % 16
+        msb = x - lsb
+
+        output_msb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        output_lsb = torch.zeros([b*c, 1, h*self.scale, w*self.scale], dtype=x.dtype, device=x.device)
+        for i, msb_fn, lsb_fn in zip(range(self.count), self.msb_fns, self.lsb_fns):
+            output_msb_s = self.forward_stage(msb, self.scale, self._extract_pattern_S, msb_fn)
+            output_lsb_s = self.forward_stage(lsb, self.scale, self._extract_pattern_S, lsb_fn)
+            output_msb += torch.nn.functional.pad(output_msb_s, [i, 0, i, 0], mode='replicate')[:,:,:h*self.scale,:w*self.scale]
+            output_lsb += torch.nn.functional.pad(output_lsb_s, [i, 0, i, 0], mode='replicate')[:,:,:h*self.scale,:w*self.scale]
+        output_msb /= self.count
+        output_lsb /= self.count
+
+        if not config is None and config.current_iter % config.display_step == 0:
+            config.writer.add_histogram('output_lsb', output_lsb.detach().cpu().numpy(), config.current_iter)
+            config.writer.add_histogram('output_msb', output_msb.detach().cpu().numpy(), config.current_iter)
+
+        x = output_msb + output_lsb
+        x = x.reshape(b, c, h*self.scale, w*self.scale)
         return x
+
+    def get_lut_model(self, quantization_interval=16, batch_size=2**10):
+        raise NotImplementedError
+
+
+class SRMsbLsbR90Net(SRNetBase):
+    def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
+        super(SRMsbLsbR90Net, self).__init__()
+        self.scale = scale 
+        self.hidden_dim = hidden_dim
+        self.layers_count = layers_count
+
+        self.msb_fn = layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale,
+            input_max_value=255,
+            output_max_value=255
+        )
+        self.lsb_fn = layers.UpscaleBlock(
+            in_features=4,
+            hidden_dim=hidden_dim,
+            layers_count=layers_count,
+            upscale_factor=self.scale,
+            input_max_value=15,
+            output_max_value=255
+        )
+        self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
             
     def forward(self, x, config=None):
         b,c,h,w = x.shape
@@ -236,10 +292,8 @@ class SRMsbLsbR90Net(nn.Module):
         for rotations_count in range(4):
            rotated_msb = torch.rot90(msb, k=rotations_count, dims=[2, 3])
            rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
-           output_msb_r = self.forward_stage(rotated_msb, self.scale, self._extract_pattern_S, msb_fn)           
-           output_lsb_r = self.forward_stage(rotated_lsb, self.scale, self._extract_pattern_S, lsb_fn)
-           output_msb_r = round_func(output_msb_r) * 15 
-           output_lsb_r = round_func(output_lsb_r)
+           output_msb_r = self.forward_stage(rotated_msb, self.scale, self._extract_pattern_S, self.msb_fn)           
+           output_lsb_r = self.forward_stage(rotated_lsb, self.scale, self._extract_pattern_S, self.lsb_fn)
            output_msb += torch.rot90(output_msb_r, k=-rotations_count, dims=[2, 3])
            output_lsb += torch.rot90(output_lsb_r, k=-rotations_count, dims=[2, 3])
         output_msb /= 4
@@ -253,15 +307,9 @@ class SRMsbLsbR90Net(nn.Module):
 
     def get_lut_model(self, quantization_interval=16, batch_size=2**10):
         raise NotImplementedError
-    
-    def get_loss_fn(self):
-        fourier_loss_fn = losses.FocalFrequencyLoss()
-        def loss_fn(pred, target):
-            return fourier_loss_fn(pred, target) 
-        return loss_fn
 
 
-class SRMsbLsb4R90Net(nn.Module):
+class SRMsbLsb4R90Net(SRNetBase):
     def __init__(self, hidden_dim = 64, layers_count = 4, scale = 4):
         super(SRMsbLsb4R90Net, self).__init__()
         self.scale = scale 
@@ -274,7 +322,7 @@ class SRMsbLsb4R90Net(nn.Module):
             layers_count=layers_count,
             upscale_factor=self.scale,
             input_max_value=255,
-            output_max_value=15
+            output_max_value=255
         ) for x in range(4)])
         self.lsb_fns = nn.ModuleList([layers.UpscaleBlock(
             in_features=4,
@@ -282,19 +330,9 @@ class SRMsbLsb4R90Net(nn.Module):
             layers_count=layers_count,
             upscale_factor=self.scale,
             input_max_value=15,
-            output_max_value=15
+            output_max_value=255
         ) for x in range(4)])
         self._extract_pattern_S = layers.PercievePattern(receptive_field_idxes=[[0,0],[0,1],[1,0],[1,1]], center=[0,0], window_size=2)
-
-    def forward_stage(self, x, scale, percieve_pattern, stage):
-        b,c,h,w = x.shape
-        x = percieve_pattern(x)   
-        x = stage(x)
-        x = round_func(x)
-        x = x.reshape(b, c, h, w, scale, scale)
-        x = x.permute(0,1,2,4,3,5)
-        x = x.reshape(b, c, h*scale, w*scale)
-        return x
             
     def forward(self, x, config=None):
         b,c,h,w = x.shape
@@ -310,8 +348,6 @@ class SRMsbLsb4R90Net(nn.Module):
            rotated_lsb = torch.rot90(lsb, k=rotations_count, dims=[2, 3])
            output_msb_r = self.forward_stage(rotated_msb, self.scale, self._extract_pattern_S, msb_fn)           
            output_lsb_r = self.forward_stage(rotated_lsb, self.scale, self._extract_pattern_S, lsb_fn)
-           output_msb_r = round_func(output_msb_r) * 15 
-           output_lsb_r = round_func(output_lsb_r)
            output_msb += torch.rot90(output_msb_r, k=-rotations_count, dims=[2, 3])
            output_lsb += torch.rot90(output_lsb_r, k=-rotations_count, dims=[2, 3])
         output_msb /= 4
@@ -320,14 +356,9 @@ class SRMsbLsb4R90Net(nn.Module):
             config.writer.add_histogram('output_lsb', output_lsb.detach().cpu().numpy(), config.current_iter)
             config.writer.add_histogram('output_msb', output_msb.detach().cpu().numpy(), config.current_iter)
         x = output_msb + output_lsb
+        x = x.clamp(0, 255)
         x = x.reshape(b, c, h*self.scale, w*self.scale)
         return x
 
     def get_lut_model(self, quantization_interval=16, batch_size=2**10):
-        raise NotImplementedError
-    
-    def get_loss_fn(self):
-        fourier_loss_fn = losses.FocalFrequencyLoss()
-        def loss_fn(pred, target):
-            return fourier_loss_fn(pred, target) 
-        return loss_fn
\ No newline at end of file
+        raise NotImplementedError
\ No newline at end of file
diff --git a/src/train.py b/src/train.py
index 2eb6fee..fd8461a 100644
--- a/src/train.py
+++ b/src/train.py
@@ -98,6 +98,7 @@ def prepare_experiment_folder(config):
         config.logs_dir.mkdir()
 
 if __name__ == "__main__":
+    # torch.set_float32_matmul_precision('high')
     script_start_time = datetime.now()
 
     config_inst = TrainOptions()
@@ -112,7 +113,8 @@ if __name__ == "__main__":
         if 'lut' in config.model.lower():
             model = AVAILABLE_MODELS[config.model]( quantization_interval = 2**(8-config.quantization_bits), scale = config.scale)
     model = model.to(torch.device(config.device))
-    optimizer = AdamWScheduleFree(model.parameters(), lr=1e-2, betas=(0.9, 0.95))
+    # model = torch.compile(model)
+    optimizer = AdamWScheduleFree(model.parameters(), betas=(0.9, 0.95))
     print(optimizer)
 
     prepare_experiment_folder(config)