diff --git a/.DS_Store b/.DS_Store
index fa8005c2b30ab41ea97aadce479a53c5ccd08a7d..eb933bd1cee7ec0edb0283b1ebf9beb2da7c01a4 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/exercise3_material/.DS_Store b/exercise3_material/.DS_Store
index d3df6722b35ef6e33cce6e83464c26a32b62a855..fecaaf70e24e232810e41e67d501e67ce1c9db53 100644
Binary files a/exercise3_material/.DS_Store and b/exercise3_material/.DS_Store differ
diff --git a/exercise3_material/log.txt b/exercise3_material/log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..776ace85a7394e910793eca717d872292ab6978b
--- /dev/null
+++ b/exercise3_material/log.txt
@@ -0,0 +1,24 @@
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 64.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 62.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 60.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 66.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 62.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 64.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
diff --git a/exercise3_material/src_to_implement/.DS_Store b/exercise3_material/src_to_implement/.DS_Store
index b768b11c944364b0e7124b9168311c0faddd8700..8df3b1a748f61f823bd55186f9eaf2a352391ce3 100644
Binary files a/exercise3_material/src_to_implement/.DS_Store and b/exercise3_material/src_to_implement/.DS_Store differ
diff --git a/exercise3_material/src_to_implement/Layers/BatchNormalization.py b/exercise3_material/src_to_implement/Layers/BatchNormalization.py
index 430f4134c089cb51efeefe2050922307b6bb9457..de0823b498c9b046f3ed3afa9ed542fbc7e09625 100644
--- a/exercise3_material/src_to_implement/Layers/BatchNormalization.py
+++ b/exercise3_material/src_to_implement/Layers/BatchNormalization.py
@@ -1,155 +1,163 @@
-from . import Base
-import numpy as np
-from . import Helpers
-
-class BatchNormalization(Base.BaseLayer):
-    def __init__(self, channels, alpha=0.8, epsilon=5e-11) -> None:
-        super().__init__()
-        self.trainable = True
-        self.input_tensor = None
-        self.output_tensor = None
-        self.channel_size = channels
-        self.check_conv = 0
-        self.alpha = alpha
-        self.epsilon = epsilon
-        self.weights = None
-        self._optimizer = None
-        self._gradient_weights = None
-        self._bias_optimizer = None
-        self._gradient_bias = None
-        self.weights = None
-        self.bias = None
-        self.mean = None
-        self.variance = None
-        self.batch_size = None
-        self.channels = None
-        self.input_tensor = None
-
-        self.initialize()
-        pass
-
-    def initialize(self):
-        self.weights = np.ones(self.channel_size)
-        self.bias = np.zeros(self.channel_size)
-   
-    def forward(self, input_tensor):
-        self.input_tensor = input_tensor
-        self.check_conv = len(input_tensor.shape) == 4
-           
-
-        # Make sure to use the test mean. The test mean is computed during training time as a moving average. It is then kept fixed during test time.
-
-        if self.check_conv:
-            self.mean = np.mean(self.input_tensor, axis=(0,2,3))
-            self.variance = np.var(self.input_tensor, axis=(0,2,3))
-
-            self.batch_size = input_tensor.shape[0]
-            self.channels = input_tensor.shape[1]
-            if self.testing_phase:
-                x_tilda = (self.input_tensor - self.test_mean.reshape((1, self.channels, 1, 1)))/np.sqrt(self.test_variance.reshape((1, self.channels, 1, 1)) + self.epsilon)
-                
-                
-            else:
-                mean_k = np.mean(self.input_tensor, axis=(0,2,3))
-                variance_k = np.var(self.input_tensor, axis=(0,2,3))
-
-                self.test_mean = self.alpha*self.mean.reshape((1, self.channels, 1, 1)) + (1-self.alpha)*mean_k.reshape(1, self.channels, 1, 1)
-                self.test_variance = self.alpha*self.variance.reshape(1, self.channels, 1, 1) + (1-self.alpha)*variance_k.reshape(1, self.channels, 1, 1)
-
-                # store mean and variance for the next iteration
-
-                self.mean = mean_k
-                self.variance = variance_k
-                x_tilda = (self.input_tensor - self.mean.reshape(1, self.channels, 1, 1)/np.sqrt(self.variance.reshape(1, self.channels, 1, 1)) + self.epsilon)
-            self.output_tensor = self.weights.reshape(1, self.channels, 1, 1) * x_tilda + self.bias.reshape(1, self.channels, 1, 1)
-            
-        else:
-            self.mean = np.mean(self.input_tensor, axis=0)
-            self.variance = np.var(self.input_tensor, axis=0)
-
-            if self.testing_phase:
-                x_tilda = (self.input_tensor - self.test_mean)/np.sqrt(self.test_variance + self.epsilon)
-                
-            else:
-                mean_k = np.mean(input_tensor, axis=0)
-                variance_k = np.var(input_tensor, axis=0)
-
-                self.test_mean = self.alpha*self.mean + (1-self.alpha)*mean_k
-                self.test_variance = self.alpha*self.variance + (1-self.alpha)*variance_k
-
-                # store mean and variance for the next iteration
-
-                self.mean = mean_k
-                self.variance = variance_k
-                x_tilda = (self.input_tensor - self.mean) / np.sqrt(self.variance + self.epsilon)
-            self.output_tensor = self.weights * x_tilda + self.bias
-                
-               
-        return self.output_tensor
-    
-    def backward(self, error_tensor):
-
-        if self.check_conv:
-            self.error_tensor = Helpers.compute_bn_gradients(self.reformat(error_tensor), self.reformat(self.input_tensor), self.weights, self.mean, self.variance, self.epsilon)
-            self.error_tensor = self.reformat(self.error_tensor)
-        else:
-            self.error_tensor = Helpers.compute_bn_gradients(error_tensor, self.input_tensor, self.weights, self.mean, self.variance, self.epsilon)
-            self._gradient_weights = np.sum(self.input_tensor.T@error_tensor, axis=0) #verify this 
-            self.gradient_bias = np.sum(error_tensor, axis=0)
-
-
-        if self._optimizer is not None:
-            self.weights = self.optimizer.calculate_update(self.weights, self._gradient_weights)
-        if self.bias_optimizer:
-            self.bias = self.bias_optimizer.calculate_update(self.bais, self._gradient_bias)
-
-        return error_tensor
-    
-    def reformat(self, tensor):
-        is4D = len(tensor.shape) == 4
-
-        if is4D:
-            b, h, m, n = tensor.shape
-            output_tensor = tensor.reshape((b, h, m*n))
-            output_tensor = np.transpose(output_tensor, (0, 2, 1))
-            b, mn, h = output_tensor.shape
-            output_tensor = output_tensor.reshape((b*mn, h))
-
-        else:
-            b, h, m, n = self.input_tensor.shape
-            output_tensor = tensor.reshape((b, m*n, h))
-            output_tensor = np.transpose(output_tensor, (0, 2, 1))
-            output_tensor = output_tensor.reshape((b, h, m, n))
-        return output_tensor
-    
-    @property
-    def bias_optimizer(self):
-        return self._bias_optimizer
-
-    @bias_optimizer.setter
-    def bias_optimizer(self, value):
-        self._bias_optimizer = value
-
-    @property
-    def optimizer(self):
-        return self._optimizer
-    
-    @optimizer.setter
-    def optimizer(self, value):
-        self._optimizer = value
-
-    @property
-    def gradient_weights(self):
-        return self._gradient_weights
-
-    @gradient_weights.setter
-    def gradient_weights(self, value):
-        self._gradient_weights = value
-
-    @property
-    def gradient_bias(self):
-        return self._gradient_bias
-
-    @gradient_bias.setter
-    def gradient_bias(self, value):
+from . import Base
+import numpy as np
+from . import Helpers
+
+class BatchNormalization(Base.BaseLayer):
+    def __init__(self, channels, alpha=0.8, epsilon=5e-11) -> None:
+        super().__init__()
+        self.trainable = True
+        self.input_tensor = None
+        self.output_tensor = None
+        self.channel_size = channels
+        self.check_conv = 0
+        self.alpha = alpha
+        self.epsilon = epsilon
+        self.weights = None
+        self._optimizer = None
+        self._gradient_weights = None
+        self._bias_optimizer = None
+        self._gradient_bias = None
+        self.weights = None
+        self.bias = None
+        self.mean = 0
+        self.variance = 0
+        self.batch_size = None
+        self.test_mean = 0
+        self.test_variance = 1
+        self.x_tilda = 0
+        
+        self.input_tensor = None
+
+        self.initialize(None, None)
+        pass
+
+    def initialize(self, weights_initializer, bias_initializer):
+        self.weights = np.ones(self.channel_size)
+        self.bias = np.zeros(self.channel_size)
+
+    
+    def forward(self, input_tensor):
+        self.input_tensor = input_tensor
+        
+        if len (input_tensor.shape) == 4:
+            self.check_conv = True
+        else:
+            self.check_conv = False
+        
+
+        if self.check_conv:
+            self.mean = np.mean(input_tensor, axis=(0, 2, 3))
+            self.variance = np.var(input_tensor, axis=(0, 2, 3))
+            self.channel_size = input_tensor.shape[1]
+            if self.testing_phase:
+                self.x_tilda =  (self.input_tensor-self.test_mean.reshape((1, self.channel_size, 1, 1)))/(self.test_variance.reshape((1, self.channel_size, 1, 1))+self.epsilon)**0.5
+                return self.weights.reshape((1, self.channel_size, 1, 1)) * self.x_tilda + self.bias.reshape((1, self.channel_size, 1, 1)) 
+                
+            new_mean = np.mean(self.input_tensor, axis=(0, 2, 3))
+            new_var = np.var(self.input_tensor, axis=(0, 2, 3))
+
+            # Make sure to use the test mean. The test mean is computed during training time as a moving average. It is then kept fixed during test time.
+
+            self.test_mean = self.alpha*self.mean.reshape((1, self.channel_size, 1, 1)) + (1 - self.alpha) * new_mean.reshape(
+                (1, self.channel_size, 1, 1))
+            self.test_variance = self.alpha* self.variance.reshape((1, self.channel_size, 1, 1)) + (
+                    1 - self.alpha) * new_var.reshape((1, self.channel_size, 1, 1))
+
+            self.mean = new_mean
+            self.variance = new_var
+            
+            self.x_tilda= (self.input_tensor - self.mean.reshape((1, self.channel_size, 1, 1))) / np.sqrt(
+            self.variance.reshape((1, self.channel_size, 1, 1)) + self.epsilon)
+           
+            return self.weights.reshape((1, self.channel_size, 1, 1)) * self.x_tilda + self.bias.reshape((1, self.channel_size, 1, 1))    
+        
+        else: #not convolutional
+            self.mean = np.mean(input_tensor, axis=0)
+            self.variance = np.var(input_tensor, axis=0)
+            if self.testing_phase:
+                self.x_tilda = (input_tensor - self.test_mean) / np.sqrt(self.test_variance + self.epsilon)
+            else:
+                self.test_mean = self.alpha * self.mean + (1-self.alpha)*self.mean
+                self.test_variance = self.alpha * self.variance + (1-self.alpha)*self.variance
+
+                self.x_tilda = (self.input_tensor-self.mean)/np.sqrt(self.variance+self.epsilon)
+            return self.weights*self.x_tilda + self.bias
+
+        
+
+    def backward(self, error_tensor):
+
+        if self.check_conv:
+            self.error_tensor = Helpers.compute_bn_gradients(self.reformat(error_tensor), self.reformat(self.input_tensor), self.weights, self.mean, self.variance, self.epsilon)
+            self.error_tensor = self.reformat(self.error_tensor)
+            self._gradient_weights = np.sum(error_tensor*self.x_tilda, axis=(0, 2, 3)) #verify this 
+            self.gradient_bias = np.sum(error_tensor, axis=(0, 2 , 3))
+        else:
+            self.error_tensor = Helpers.compute_bn_gradients(error_tensor, self.input_tensor, self.weights, self.mean, self.variance, self.epsilon)
+            self._gradient_weights = np.sum(error_tensor*self.x_tilda, axis=0) #verify this 
+            self.gradient_bias = np.sum(error_tensor, axis=0)
+
+
+        if self._optimizer is not None:
+            self.weights = self.optimizer.calculate_update(self.weights, self._gradient_weights)
+        if self.bias_optimizer:
+            self.bias = self.bias_optimizer.calculate_update(self.bais, self._gradient_bias)
+
+        return self.error_tensor
+
+    def reformat(self, tensor):
+
+        if len(tensor.shape) == 4:
+            b = tensor.shape[0]
+            h = tensor.shape[1]
+            m = tensor.shape[2]
+            n = tensor.shape[3]
+            output_tensor = tensor.reshape((b, h, m*n))
+            output_tensor = np.transpose(output_tensor, (0, 2, 1))
+            b, mn, h = output_tensor.shape
+            output_tensor = output_tensor.reshape((b*mn, h))
+
+        else:
+            b = self.input_tensor.shape[0]
+            h = self.input_tensor.shape[1]
+            m = self.input_tensor.shape[2]
+            n = self.input_tensor.shape[3]
+            output_tensor = tensor.reshape((b, m*n, h))
+            output_tensor = np.transpose(output_tensor, (0, 2, 1))
+            output_tensor = output_tensor.reshape((b, h, m, n))
+        return output_tensor
+    
+
+ #all the required setters and getters   
+    
+    @property
+    def bias_optimizer(self):
+        return self._bias_optimizer
+
+    @bias_optimizer.setter
+    def bias_optimizer(self, value):
+        self._bias_optimizer = value
+
+    @property
+    def optimizer(self):
+        return self._optimizer
+    
+    @optimizer.setter
+    def optimizer(self, value):
+        self._optimizer = value
+
+    @property
+    def gradient_weights(self):
+        return self._gradient_weights
+
+    @gradient_weights.setter
+    def gradient_weights(self, value):
+        self._gradient_weights = value
+
+    @property
+    def gradient_bias(self):
+        return self._gradient_bias
+
+    @gradient_bias.setter
+    def gradient_bias(self, value):
         self._gradient_bias = value
\ No newline at end of file
diff --git a/exercise3_material/src_to_implement/Layers/Conv.py b/exercise3_material/src_to_implement/Layers/Conv.py
index f91d7d4247a4df2109cb6b37a32b06945b72af1e..54df6debd6ba311a256c79d34ea8c7fe2549fb0d 100644
--- a/exercise3_material/src_to_implement/Layers/Conv.py
+++ b/exercise3_material/src_to_implement/Layers/Conv.py
@@ -1,182 +1,194 @@
-from . import Base
-from scipy import ndimage
-from scipy import signal
-import numpy as np
-
-#stride_shape - single value or tuple
-#convolution_shape - 1D or 2D conv layer [c, m, n]
-#num_kernels - integer value
-class Conv(Base.BaseLayer):
-    
-    def __init__(self, stride_shape, convolution_shape, num_kernels) -> None:
-        super().__init__()
-        self.trainable = True
-        self._optimizer = None
-        self.weights = None
-        self.bias = None
-        self.gradient_weights = None
-        self.gradient_bias = None
-        self.stride_shape = stride_shape #single value or tuple
-        self.convolution_shape = convolution_shape #filter shape (c,m,n)
-        if len(self.convolution_shape) == 3:
-            self.c = self.convolution_shape[0]
-            self.m = self.convolution_shape[1]
-            self.n = self.convolution_shape[2]
-        else:
-            self.c = self.convolution_shape[0]
-            self.m = self.convolution_shape[1]
-        self.num_kernels = num_kernels
-        self.weights = np.random.uniform(0,1, (self.num_kernels, *convolution_shape))
-        self.bias = np.random.uniform(0,1, (self.num_kernels,))
-        pass
-
-#input shape - [batch, channels, y, x]
-#output shape - [batch, num_kernels, y_o, x_o]
-#y_o = (y + 2p - f)/s + 1
-    def forward(self, input_tensor):
-        self.input_tensor = input_tensor
-        if len(self.stride_shape) == 2:
-            sy = self.stride_shape[0]
-            sx = self.stride_shape[1]
-        else:
-            sy = self.stride_shape[0]
-            sx = self.stride_shape[0]
-
-        batch = input_tensor.shape[0]
-        
-        if len(self.convolution_shape) == 3:
-            y = input_tensor.shape[2]
-            x = input_tensor.shape[3]
-            padding_y = (self.m-1)/2
-            padding_x = (self.n-1)/2
-            self.padding = [padding_y, padding_x] 
-            y_o =  int((y + 2*padding_y - self.m)//sy + 1)
-            x_o =  int((x + 2*padding_x - self.n)//sx + 1)
-            output_shape = (batch, self.num_kernels, y_o, x_o)
-        else:
-            y = input_tensor.shape[2]
-            padding_y = (self.m-1)/2
-            self.padding = [padding_y] 
-            y_o =  int((y + 2*padding_y - self.m)//sy + 1)
-            output_shape = (batch, self.num_kernels, y_o)
-
-        output_tensor = np.zeros(output_shape)
-         
-
-        for ib in range(batch):
-            for ik in range(self.num_kernels):
-                if len(self.convolution_shape) == 3:
-                    output_per_filter = np.zeros((y,x))
-                else:
-                    output_per_filter = np.zeros((y))
-                for ic in range(self.c):
-
-                    output_per_filter += ndimage.convolve(self.input_tensor[ib, ic], self.weights[ik, ic], mode='constant', cval=0)
-                    # output_per_filter += signal.correlate(input_tensor[ib, ic], self.weights[ik, ic], mode='same', method='direct')
-                
-                output_per_filter = output_per_filter[::sy,::sx] if len(self.convolution_shape) == 3 else output_per_filter[::sy] #striding
-                output_tensor[ib, ik] = output_per_filter + self.bias[ik]
-               
-        return output_tensor
-    
-    @property
-    def optimizer(self):
-        return self._optimizer
-    
-    @optimizer.setter
-    def optimizer(self, value):
-        self._optimizer = value
-    
-    @property
-    def gradient_weights(self):
-        return self._gradient_weights
-    
-    @gradient_weights.setter
-    def gradient_weights(self, value):
-        self._gradient_weights = value
-    
-    @property
-    def gradient_bias(self):
-        return self._gradient_bias
-    
-    @gradient_bias.setter
-    def gradient_bias(self, value):
-        self._gradient_bias = value
-
-    def backward(self, error_tensor):
-        error_output = np.zeros_like(self.input_tensor)
-        if len(self.stride_shape) == 2:
-                sy = self.stride_shape[0]
-                sx = self.stride_shape[1]
-        else:
-            sy = self.stride_shape[0]
-            sx = self.stride_shape[0]
-
-        T_weights = self.weights.copy()
-        T_weights = np.transpose(T_weights, axes=(1,0,2,3)) if len(self.convolution_shape) == 3 else np.transpose(T_weights, axes=(1,0,2))
-        batch = self.input_tensor.shape[0]
-        nk, nc = T_weights.shape[:2]
-
-        if len(self.convolution_shape) == 3:
-            y = self.input_tensor.shape[2]
-            x = self.input_tensor.shape[3]
-        else:
-            y = self.input_tensor.shape[2]
-
-        for ib in range(batch):
-            for ik in range(nk):
-                error_per_channel = 0
-                for ic in range(nc):
-                    if len(self.convolution_shape) == 3:
-                        err = np.zeros((y,x))
-                        err[::sy, ::sx] = error_tensor[ib, ic]
-                    else:
-                        err = np.zeros(y)
-                        err[::sy] = error_tensor[ib, ic]
-                    
-                    error_per_channel += ndimage.correlate(err, T_weights[ik, ic], mode='constant', cval=0)
-                    
-                error_output[ib, ik] = error_per_channel
-
-        berror = error_tensor.sum(axis=0)
-        yerror = berror.sum(axis=1)
-        self.gradient_bias = yerror.sum(axis=1) if len(self.convolution_shape)==3 else yerror
-
-        self.gradient_weights=np.zeros_like(self.weights)
-        for ib in range(batch):
-            for ic in range(self.input_tensor.shape[1]):
-                for ik in range(self.num_kernels):
-                    if len(self.convolution_shape)==3:
-                        error = np.zeros((y, x))
-                        error[::sy, ::sx] = error_tensor[ib, ik]
-                        input = np.pad(self.input_tensor[ib, ic],
-                                                    [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0]))), 
-                                                    (int(np.ceil(self.padding[1])), int(np.floor(self.padding[1])))])                   
-                    else:
-                        error = np.zeros(y)
-                        error[::sy] = error_tensor[ib, ik]
-                        input = np.pad(self.input_tensor[ib, ic], [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0])))])
-                    buffer = ndimage.correlate(input, error, mode='constant')
-                    expected_output_size = np.array(input.shape) - np.array(error.shape) + 1
-                    buffer = buffer[:expected_output_size[0], :expected_output_size[1]] if len(expected_output_size)==2 else buffer[:expected_output_size[0]]
-
-                    self.gradient_weights[ik, ic] += buffer
-
-        
-        if self._optimizer is not None:
-            self.weights = self.optimizer.calculate_update(self.weights,self.gradient_weights)
-            self.bias = self.optimizer.calculate_update(self.bias,self.gradient_bias)
-        return error_output
-    
-    def initialize(self, weights_initializer, bias_initializer):
-        if len(self.convolution_shape) == 3:
-            fan_in = self.c * self.m * self.n
-            fan_out = self.num_kernels * self.m * self.n
-            self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m, self.n),fan_in, fan_out)
-            self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels)
-        else:
-            fan_in = self.c * self.m
-            fan_out = self.num_kernels * self.m
-            self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m),fan_in, fan_out)
-            self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels)
-        pass
\ No newline at end of file
+from Layers.Base import BaseLayer
+
+import numpy as np
+from scipy import signal
+from functools import reduce
+import operator
+from copy import deepcopy as copy
+
+
+class Conv(BaseLayer):
+    def __init__(self, stride_shape, convolution_shape, num_kernels):
+        super().__init__()
+        self.trainable = True
+        self.stride_shape = (stride_shape[0], stride_shape[0]) if len(stride_shape) == 1 else stride_shape
+        # 1d as [channel,m], 2d as [channel,m,n]
+        self.convolution_shape = convolution_shape
+        self.num_kernels = num_kernels
+        # init weights as uniform random (will be initialized again with initialize method)
+        # shape for 2d conv: (num_kernels, channel, m, n) 
+        self.weights = np.random.uniform(0, 1, (num_kernels, *convolution_shape))
+        # bias shape: number of kernels
+        self.bias = np.random.rand(num_kernels) 
+        
+        # grad parameters
+        self._gradient_weights = None
+        self._gradient_bias = None
+
+        self._optimizer = None
+        self._bias_optimizer = None
+
+        # conv_dim if it is 2d or 1d
+        self.conv_dim = 2 if len(convolution_shape) == 3 else 1
+
+
+    def initialize(self, weights_initializer, bias_initializer):
+        self.weights = weights_initializer.initialize(self.weights.shape,
+                    reduce(operator.mul, self.convolution_shape),
+                    reduce(operator.mul, [self.num_kernels, *self.convolution_shape[1:]]))
+
+        self.bias = bias_initializer.initialize(self.bias.shape, 1,self.num_kernels)
+
+        self._optimizer=copy(self.optimizer)
+        self._bias_optimizer=copy(self.optimizer)
+
+    def forward(self, input_tensor):
+        # if correlation is used in forward, we can use convole in backward
+        # or vice versa
+        # input_tensor shape (b,c,x,y) or (b,c,x)
+        self.input_tensor = input_tensor
+        ishape = input_tensor.shape
+        self.ishape = ishape
+        bsize, c, y, x = ishape if self.conv_dim==2 else (*ishape, None)
+        cx,cy = self.convolution_shape[-2:]
+
+        sh, sw = self.stride_shape
+
+        # new shape of y = (y-ky + 2*p)/sh + 1; y input size, ky kernel size, p padding size, sh stride size
+        #  but we need o/p size same as i/p so p=(ky-1)/2 if sh==1
+        # else we need to derive
+        pad=[(cx-1)/2]
+        out_shape = [int((y-cx+2*pad[0])/sh)+1]
+        if self.conv_dim==2:
+            pad.append((cy-1)/2)
+            out_shape.append(int((x-cy+2*pad[1])/sw)+1)
+        self.pad=pad
+        result = np.zeros((bsize, self.num_kernels, *out_shape))
+
+        # if used correlation in forward, should use convolve in backward 
+        for cb in range(bsize):
+            for ck in range(self.num_kernels):
+                # sum outputs of correlation of this kernel with individual input channel of input
+                kout = np.zeros((y,x)) if x else np.zeros((y))
+                for ch in range(c):
+                    # correlate with this batch's this channel and this kernel's this channel
+                    kout += signal.correlate(input_tensor[cb, ch], self.weights[ck, ch], mode='same', method='direct')
+                  
+                kout = kout[::sh, ::sw] if self.conv_dim==2 else kout[::sh]
+                result[cb, ck] = kout + self.bias[ck]
+
+        return result
+
+
+    def update_parameters(self, error_tensor):
+        # what is the grad of bias in this layer for this batch?
+        # we sum error tensor along axis of B,W,H (if 2d)
+        # B
+        berror = error_tensor.sum(axis=0)
+        # W
+        yerror = berror.sum(axis=1)
+        # H?
+        self._gradient_bias = yerror.sum(axis=1) if self.conv_dim==2 else yerror
+
+        # what is the grad of weights in this layer for this batch?
+        batch_size, channels, y, x = self.ishape if self.conv_dim==2 else (*self.ishape, None)
+        sh, sw = self.stride_shape
+        cx, cy = self.convolution_shape[-2:]
+
+        self.gradient_weights=np.zeros_like(self.weights)
+        for cb in range(batch_size):
+            for ch in range(channels):
+                for ck in range(self.num_kernels):
+                    if self.conv_dim==2:
+                        error = np.zeros((y, x))
+                        error[::sh, ::sw] = error_tensor[cb, ck]
+                        inp = np.pad(self.input_tensor[cb, ch],
+                                                    [(int(np.ceil(self.pad[0])), int(np.floor(self.pad[0]))), 
+                                                    (int(np.ceil(self.pad[1])), int(np.floor(self.pad[1])))]
+                                                    #  [int(np.ceil(self.pad[0])), int(np.floor(self.pad[1]))]
+                                                     )
+                    else:
+                        error = np.zeros(y)
+                        error[::sh] = error_tensor[cb, ck]
+                        inp = np.pad(self.input_tensor[cb, ch], [(int(np.ceil(self.pad[0])), int(np.floor(self.pad[0])))])
+
+                    self.gradient_weights[ck, ch] += signal.correlate(
+                        inp, error, mode='valid')
+
+        if self.optimizer:
+            self.weights = self._optimizer.calculate_update(self.weights, self._gradient_weights)
+            self.bias = self._bias_optimizer.calculate_update(self.bias, self._gradient_bias)
+
+    def error_this_layer(self, error_tensor):
+        # compute error in this layer
+        gradient=np.zeros_like(self.input_tensor)
+        sh,sw = self.stride_shape
+
+        # input Conv2d weight shape: (num_kernel, channel, w, h), channel is channel of input data
+        # inner Conv2d weight shape: (num_kernel, input_channel, w, h)
+        # input channel is channel from previous layer
+        # while passing error backward, we calculate error cased by this layer's weights 
+        # so transpose weight as : (input_channel, num_kernel, w, h)
+        nweight = self.weights.copy()
+        nweight = np.transpose(nweight, axes=(1,0,2,3)) if self.conv_dim==2 else np.transpose(nweight, axes=(1,0,2))
+        ishape = self.input_tensor.shape
+        y,x = ishape[-2:] if self.conv_dim==2 else (ishape[-1],None)
+
+        bsize = self.input_tensor.shape[0]
+        wk, wc = nweight.shape[:2]
+
+        for cb in range(bsize):
+            for ck in range(wk):
+                grad = 0
+                for c in range(wc):
+                    if self.conv_dim==2:
+                        err = np.zeros((y,x))
+                        err[::sh, ::sw] = error_tensor[cb, c]
+                    else:
+                        err = np.zeros(y)
+                        err[::sh] = error_tensor[cb, ck]
+                    # we used correlate on forward, use convolve now
+                    grad += signal.convolve(err, nweight[ck, c], mode='same', method='direct')
+                    
+                gradient[cb, ck] = grad
+        return gradient
+
+    def backward(self, error_tensor):
+        self.update_parameters(error_tensor)
+        gradient = self.error_this_layer(error_tensor)
+        
+
+
+        return gradient
+
+    @property
+    def gradient_weights(self):
+        return self._gradient_weights
+
+    @gradient_weights.setter
+    def gradient_weights(self, value):
+        self._gradient_weights = value
+
+    @property
+    def gradient_bias(self):
+        return self._gradient_bias
+
+    @gradient_bias.setter
+    def gradient_bias(self, value):
+        self._gradient_bias = value
+
+    @property
+    def optimizer(self):
+        return self._optimizer
+
+    @optimizer.setter
+    def optimizer(self, value):
+        self._optimizer = value
+
+    @property
+    def bias_optimizer(self):
+        return self._bias_optimizer
+
+    @bias_optimizer.setter
+    def bias_optimizer(self, value):
+        self._bias_optimizer = value
diff --git a/exercise3_material/src_to_implement/Layers/Conv_o.py b/exercise3_material/src_to_implement/Layers/Conv_o.py
new file mode 100644
index 0000000000000000000000000000000000000000..f91d7d4247a4df2109cb6b37a32b06945b72af1e
--- /dev/null
+++ b/exercise3_material/src_to_implement/Layers/Conv_o.py
@@ -0,0 +1,182 @@
+from . import Base
+from scipy import ndimage
+from scipy import signal
+import numpy as np
+
+#stride_shape - single value or tuple
+#convolution_shape - 1D or 2D conv layer [c, m, n]
+#num_kernels - integer value
+class Conv(Base.BaseLayer):
+    
+    def __init__(self, stride_shape, convolution_shape, num_kernels) -> None:
+        super().__init__()
+        self.trainable = True
+        self._optimizer = None
+        self.weights = None
+        self.bias = None
+        self.gradient_weights = None
+        self.gradient_bias = None
+        self.stride_shape = stride_shape #single value or tuple
+        self.convolution_shape = convolution_shape #filter shape (c,m,n)
+        if len(self.convolution_shape) == 3:
+            self.c = self.convolution_shape[0]
+            self.m = self.convolution_shape[1]
+            self.n = self.convolution_shape[2]
+        else:
+            self.c = self.convolution_shape[0]
+            self.m = self.convolution_shape[1]
+        self.num_kernels = num_kernels
+        self.weights = np.random.uniform(0,1, (self.num_kernels, *convolution_shape))
+        self.bias = np.random.uniform(0,1, (self.num_kernels,))
+        pass
+
+#input shape - [batch, channels, y, x]
+#output shape - [batch, num_kernels, y_o, x_o]
+#y_o = (y + 2p - f)/s + 1
+    def forward(self, input_tensor):
+        self.input_tensor = input_tensor
+        if len(self.stride_shape) == 2:
+            sy = self.stride_shape[0]
+            sx = self.stride_shape[1]
+        else:
+            sy = self.stride_shape[0]
+            sx = self.stride_shape[0]
+
+        batch = input_tensor.shape[0]
+        
+        if len(self.convolution_shape) == 3:
+            y = input_tensor.shape[2]
+            x = input_tensor.shape[3]
+            padding_y = (self.m-1)/2
+            padding_x = (self.n-1)/2
+            self.padding = [padding_y, padding_x] 
+            y_o =  int((y + 2*padding_y - self.m)//sy + 1)
+            x_o =  int((x + 2*padding_x - self.n)//sx + 1)
+            output_shape = (batch, self.num_kernels, y_o, x_o)
+        else:
+            y = input_tensor.shape[2]
+            padding_y = (self.m-1)/2
+            self.padding = [padding_y] 
+            y_o =  int((y + 2*padding_y - self.m)//sy + 1)
+            output_shape = (batch, self.num_kernels, y_o)
+
+        output_tensor = np.zeros(output_shape)
+         
+
+        for ib in range(batch):
+            for ik in range(self.num_kernels):
+                if len(self.convolution_shape) == 3:
+                    output_per_filter = np.zeros((y,x))
+                else:
+                    output_per_filter = np.zeros((y))
+                for ic in range(self.c):
+
+                    output_per_filter += ndimage.convolve(self.input_tensor[ib, ic], self.weights[ik, ic], mode='constant', cval=0)
+                    # output_per_filter += signal.correlate(input_tensor[ib, ic], self.weights[ik, ic], mode='same', method='direct')
+                
+                output_per_filter = output_per_filter[::sy,::sx] if len(self.convolution_shape) == 3 else output_per_filter[::sy] #striding
+                output_tensor[ib, ik] = output_per_filter + self.bias[ik]
+               
+        return output_tensor
+    
+    @property
+    def optimizer(self):
+        return self._optimizer
+    
+    @optimizer.setter
+    def optimizer(self, value):
+        self._optimizer = value
+    
+    @property
+    def gradient_weights(self):
+        return self._gradient_weights
+    
+    @gradient_weights.setter
+    def gradient_weights(self, value):
+        self._gradient_weights = value
+    
+    @property
+    def gradient_bias(self):
+        return self._gradient_bias
+    
+    @gradient_bias.setter
+    def gradient_bias(self, value):
+        self._gradient_bias = value
+
+    def backward(self, error_tensor):
+        error_output = np.zeros_like(self.input_tensor)
+        if len(self.stride_shape) == 2:
+                sy = self.stride_shape[0]
+                sx = self.stride_shape[1]
+        else:
+            sy = self.stride_shape[0]
+            sx = self.stride_shape[0]
+
+        T_weights = self.weights.copy()
+        T_weights = np.transpose(T_weights, axes=(1,0,2,3)) if len(self.convolution_shape) == 3 else np.transpose(T_weights, axes=(1,0,2))
+        batch = self.input_tensor.shape[0]
+        nk, nc = T_weights.shape[:2]
+
+        if len(self.convolution_shape) == 3:
+            y = self.input_tensor.shape[2]
+            x = self.input_tensor.shape[3]
+        else:
+            y = self.input_tensor.shape[2]
+
+        for ib in range(batch):
+            for ik in range(nk):
+                error_per_channel = 0
+                for ic in range(nc):
+                    if len(self.convolution_shape) == 3:
+                        err = np.zeros((y,x))
+                        err[::sy, ::sx] = error_tensor[ib, ic]
+                    else:
+                        err = np.zeros(y)
+                        err[::sy] = error_tensor[ib, ic]
+                    
+                    error_per_channel += ndimage.correlate(err, T_weights[ik, ic], mode='constant', cval=0)
+                    
+                error_output[ib, ik] = error_per_channel
+
+        berror = error_tensor.sum(axis=0)
+        yerror = berror.sum(axis=1)
+        self.gradient_bias = yerror.sum(axis=1) if len(self.convolution_shape)==3 else yerror
+
+        self.gradient_weights=np.zeros_like(self.weights)
+        for ib in range(batch):
+            for ic in range(self.input_tensor.shape[1]):
+                for ik in range(self.num_kernels):
+                    if len(self.convolution_shape)==3:
+                        error = np.zeros((y, x))
+                        error[::sy, ::sx] = error_tensor[ib, ik]
+                        input = np.pad(self.input_tensor[ib, ic],
+                                                    [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0]))), 
+                                                    (int(np.ceil(self.padding[1])), int(np.floor(self.padding[1])))])                   
+                    else:
+                        error = np.zeros(y)
+                        error[::sy] = error_tensor[ib, ik]
+                        input = np.pad(self.input_tensor[ib, ic], [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0])))])
+                    buffer = ndimage.correlate(input, error, mode='constant')
+                    expected_output_size = np.array(input.shape) - np.array(error.shape) + 1
+                    buffer = buffer[:expected_output_size[0], :expected_output_size[1]] if len(expected_output_size)==2 else buffer[:expected_output_size[0]]
+
+                    self.gradient_weights[ik, ic] += buffer
+
+        
+        if self._optimizer is not None:
+            self.weights = self.optimizer.calculate_update(self.weights,self.gradient_weights)
+            self.bias = self.optimizer.calculate_update(self.bias,self.gradient_bias)
+        return error_output
+    
+    def initialize(self, weights_initializer, bias_initializer):
+        if len(self.convolution_shape) == 3:
+            fan_in = self.c * self.m * self.n
+            fan_out = self.num_kernels * self.m * self.n
+            self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m, self.n),fan_in, fan_out)
+            self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels)
+        else:
+            fan_in = self.c * self.m
+            fan_out = self.num_kernels * self.m
+            self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m),fan_in, fan_out)
+            self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels)
+        pass
\ No newline at end of file
diff --git a/exercise3_material/src_to_implement/Layers/RNN.py b/exercise3_material/src_to_implement/Layers/RNN.py
index bea912131f4f172a9f7b9942d64e3a63b9ece781..e658f667a3ffefb2b9d7a84f4503f9474900cfe6 100644
--- a/exercise3_material/src_to_implement/Layers/RNN.py
+++ b/exercise3_material/src_to_implement/Layers/RNN.py
@@ -115,9 +115,6 @@ class RNN(Base.BaseLayer):
             self.output_FCLayer.weights = self.optimizer.calculate_update(self.output_FCLayer.weights, self.output_FCLayer_gradient_weights)
             self.weights = self.optimizer.calculate_update(self.weights, self.gradient_weights)
             
-
-
-
         return gradient_inputs
     
 
diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc
index df046cec3a85fe0fa9ac9477cea153c3145e6d13..2d221ce5bd067573fc841d5538c7e8d1204c36a0 100644
Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc differ
diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc
index 0a697e15a8ca2081823f22581d6ef79816c4fd8e..a4bc5324642d817258d596a07bc35c3f93005e25 100644
Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc differ
diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc
index 55e7d8444d71d682326b5c82307a0416ab676b94..610ffb0a518f1921d2a3fb8c5dcce245df293fef 100644
Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc differ
diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc
index d53a7162a2e4421e4084c7102e4f7af747776eb7..72c1568e112f4d122c9fa09557b1cfef22ec0701 100644
Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc differ
diff --git a/exercise3_material/src_to_implement/NeuralNetwork.py b/exercise3_material/src_to_implement/NeuralNetwork.py
index 9f49bdfdc23bef823bab390ce90a55cbfc1f9a6a..2dcb23e6bc33028ec2432116418cce8cea4b7e9d 100644
--- a/exercise3_material/src_to_implement/NeuralNetwork.py
+++ b/exercise3_material/src_to_implement/NeuralNetwork.py
@@ -1,60 +1,94 @@
-import copy
-
-class NeuralNetwork:
-    def __init__(self, optimizer, weights_initializer, bias_initializer) -> None:
-        self.optimizer = optimizer
-        self.loss = []
-        self.layers = [] 
-        self.data_layer = None
-        self.loss_layer = None
-        self.weights_initializer = weights_initializer
-        self.bias_initializer = bias_initializer
-        pass
-        
-    def forward(self):
-        loss_regularizer = 0
-        self.input_tensor, self.label_tensor = self.data_layer.next()
-        for layer in self.layers:
-            self.input_tensor = layer.forward(self.input_tensor)
-            if self.optimizer.regularizer is not None:
-                loss_regularizer += self.optimizer.regularizer.norm(layer.weights)
-        loss = self.loss_layer.forward(self.input_tensor+loss_regularizer, self.label_tensor)
-        return loss
-    
-    def backward(self):
-        error = self.loss_layer.backward(self.label_tensor)
-        for layer in reversed(self.layers):
-            error = layer.backward(error)
-        pass
-
-    def append_layer(self, layer):
-        if layer.trainable == True:
-            opti = copy.deepcopy(self.optimizer)
-            layer.optimizer = opti
-            layer.initialize(self.weights_initializer, self.bias_initializer)
-        self.layers.append(layer)
-
-    def train(self, iterations):
-        self.testing_phase = False
-        for _ in range(iterations):
-            loss = self.forward()
-            self.loss.append(loss)
-            self.backward()
-    
-    def test(self, input_tensor):
-        self.data_layer = input_tensor
-        for layer in self.layers:
-            self.data_layer = layer.forward(self.data_layer)
-        return self.data_layer
-    
-    @property
-    def phase(self):
-        return self.phase
-    
-    @phase.setter
-    def phase(self, value):
-        self.phase = value
-        pass
-
-    def norm(self, weights):
-        return self.loss_layer.norm(weights)
\ No newline at end of file
+from copy import deepcopy
+
+def save(filename, net):
+    import pickle
+    nnet=net
+    dlayer = nnet.data_layer
+    nnet.__setstate__({'data_layer': None})
+
+    with open(filename, 'wb') as f:
+        pickle.dump(nnet, f)
+    nnet.__setstate__({'data_layer': dlayer})
+    
+
+def load(filename, data_layer):
+    import pickle
+    with open(filename, 'rb') as f:
+        net = pickle.load(f)
+        net.__setstate__({'data_layer': data_layer})
+        
+    return net
+
+class NeuralNetwork:
+    def __init__(self, optimizer, weights_initializer, bias_initializer) -> None:
+        self.optimizer = optimizer
+        self.loss = []
+        self.layers=[]
+        self.data_layer = None
+        self.loss_layer = None
+        self.weights_initializer = weights_initializer
+        self.bias_initializer = bias_initializer
+
+        self._phase = None
+
+    def __getstate__(self):
+        return self.__dict__.copy()
+    
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        return self.__dict__.copy()
+
+    @property
+    def phase(self):
+        return self._phase
+    
+    @phase.setter
+    def phase(self, value):
+        self._phase = value
+
+    def forward(self):
+        inp,op = self.data_layer.next()
+        self.label = op
+        regularization_loss = 0
+        # print(inp)
+        for layer in self.layers:
+            inp = layer.forward(inp)
+            try:
+                regularization_loss += self.optimizer.regularizer.norm(layer.weights)
+            except:
+                pass        
+            layer.testing_phase = True
+
+        # inp = self.loss_layer.forward(inp, self.label)
+        self.pred=self.loss_layer.forward(inp+regularization_loss, op)
+        return self.pred            
+    
+    def backward(self):
+        # loss = self.loss_layer.forward(self.pred, self.label)
+        loss = self.loss_layer.backward(self.label)
+        for layer in self.layers[::-1]:
+            loss = layer.backward(loss)
+
+    
+    def append_layer(self, layer):
+        if layer.trainable:
+            layer.optimizer = deepcopy(self.optimizer)
+            layer.initialize(self.weights_initializer, self.bias_initializer)
+
+        self.layers.append(layer)
+
+    def train(self, iterations):
+        for i in range(iterations):
+            loss = self.forward()
+            self.backward()
+            self.loss.append(loss)
+
+    def test(self, input_tensor):
+        inp = input_tensor #self.data_layer.next()
+        # print(inp.shape)
+        for layer in self.layers:
+            inp = layer.forward(inp)
+        # print(layer)
+        return inp
+
+
diff --git a/exercise3_material/src_to_implement/NeuralNetwork_o.py b/exercise3_material/src_to_implement/NeuralNetwork_o.py
new file mode 100644
index 0000000000000000000000000000000000000000..a38d72bcf725d7f5f7567fc7495778c8e804139b
--- /dev/null
+++ b/exercise3_material/src_to_implement/NeuralNetwork_o.py
@@ -0,0 +1,65 @@
+import copy
+
+class NeuralNetwork:
+    def __init__(self, optimizer, weights_initializer, bias_initializer) -> None:
+        self.optimizer = optimizer
+        self.loss = []
+        self.layers = [] 
+        self.data_layer = None
+        self.loss_layer = None
+        self.weights_initializer = weights_initializer
+        self.bias_initializer = bias_initializer
+        pass
+        
+    def forward(self):
+        loss_regularizer = 0
+        self.input_tensor, self.label_tensor = self.data_layer.next()
+        for layer in self.layers:
+            self.input_tensor = layer.forward(self.input_tensor)
+            #if self.optimizer.regularizer is not None:
+            try:
+                if layer.trainable:
+                    loss_regularizer += self.optimizer.regularizer.norm(layer.weights)
+            except:
+                pass
+            
+        loss = self.loss_layer.forward(self.input_tensor, self.label_tensor)
+        return loss + loss_regularizer
+    
+    def backward(self):
+        error = self.loss_layer.backward(self.label_tensor)
+        for layer in reversed(self.layers):
+            error = layer.backward(error)
+        pass
+
+    def append_layer(self, layer):
+        if layer.trainable == True:
+            opti = copy.deepcopy(self.optimizer)
+            layer.optimizer = opti
+            layer.initialize(self.weights_initializer, self.bias_initializer)
+        self.layers.append(layer)
+
+    def train(self, iterations):
+        self.testing_phase = False
+        for _ in range(iterations):
+            loss = self.forward()
+            self.loss.append(loss)
+            self.backward()
+    
+    def test(self, input_tensor):
+        self.data_layer = input_tensor
+        for layer in self.layers:
+            self.data_layer = layer.forward(self.data_layer)
+        return self.data_layer
+    
+    @property
+    def phase(self):
+        return self.phase
+    
+    @phase.setter
+    def phase(self, value):
+        self.phase = value
+        pass
+
+    def norm(self, weights):
+        return self.loss_layer.norm(weights)
\ No newline at end of file
diff --git a/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc b/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc
index 56626f6b19db93890ac32aee063ad04b3cbcbccc..8582be15085fbe6387611208a5ad7b01636b789c 100644
Binary files a/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc and b/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc differ
diff --git a/exercise3_material/src_to_implement/log.txt b/exercise3_material/src_to_implement/log.txt
index 178c80c75b56c3786c1607b31b9494e09e82f255..022ddba76e5edbca27547451f4b9543919f1f8bc 100644
--- a/exercise3_material/src_to_implement/log.txt
+++ b/exercise3_material/src_to_implement/log.txt
@@ -10,3 +10,124 @@ On the Iris dataset, we achieve an accuracy of: 94.0%
 On the Iris dataset, we achieve an accuracy of: 98.0%
 On the Iris dataset using Dropout, we achieve an accuracy of: 54.0%
 On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 66.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 66.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 64.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 62.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 97.32888146911519%
+On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.66277128547578%
+On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 97.32888146911519%
+On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 87.31218697829716%
+On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 65.4424040066778%
+On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 90.15025041736226%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 84.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 62.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 95.99332220367279%
+On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 98.49749582637729%
+On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 95.99332220367279%
+On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 84.97495826377296%
+On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 86.64440734557596%
+On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 86.47746243739566%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 86.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 60.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 97.16193656093489%
+On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.8297161936561%
+On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 96.16026711185309%
+On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 76.29382303839732%
+On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 85.30884808013356%
+On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 87.31218697829716%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 60.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 54.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 68.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 76.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 60.0%
+On the Iris dataset, we achieve an accuracy of: 94.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 60.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 98.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 96.661101836394%
+On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.8297161936561%
+On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 96.4941569282137%
+On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 86.81135225375625%
+On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 78.79799666110183%
+On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 94.49081803005008%
+On the Iris dataset, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 98.0%
+On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0%
+On the Iris dataset using Dropout, we achieve an accuracy of: 96.0%
+On the Iris dataset, we achieve an accuracy of: 96.0%