diff --git a/.DS_Store b/.DS_Store index fa8005c2b30ab41ea97aadce479a53c5ccd08a7d..eb933bd1cee7ec0edb0283b1ebf9beb2da7c01a4 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/exercise3_material/.DS_Store b/exercise3_material/.DS_Store index d3df6722b35ef6e33cce6e83464c26a32b62a855..fecaaf70e24e232810e41e67d501e67ce1c9db53 100644 Binary files a/exercise3_material/.DS_Store and b/exercise3_material/.DS_Store differ diff --git a/exercise3_material/log.txt b/exercise3_material/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..776ace85a7394e910793eca717d872292ab6978b --- /dev/null +++ b/exercise3_material/log.txt @@ -0,0 +1,24 @@ +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 64.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 62.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 60.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 66.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 62.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 64.0% +On the Iris dataset, we achieve an accuracy of: 96.0% diff --git a/exercise3_material/src_to_implement/.DS_Store b/exercise3_material/src_to_implement/.DS_Store index b768b11c944364b0e7124b9168311c0faddd8700..8df3b1a748f61f823bd55186f9eaf2a352391ce3 100644 Binary files a/exercise3_material/src_to_implement/.DS_Store and b/exercise3_material/src_to_implement/.DS_Store differ diff --git a/exercise3_material/src_to_implement/Layers/BatchNormalization.py b/exercise3_material/src_to_implement/Layers/BatchNormalization.py index 430f4134c089cb51efeefe2050922307b6bb9457..de0823b498c9b046f3ed3afa9ed542fbc7e09625 100644 --- a/exercise3_material/src_to_implement/Layers/BatchNormalization.py +++ b/exercise3_material/src_to_implement/Layers/BatchNormalization.py @@ -1,155 +1,163 @@ -from . import Base -import numpy as np -from . import Helpers - -class BatchNormalization(Base.BaseLayer): - def __init__(self, channels, alpha=0.8, epsilon=5e-11) -> None: - super().__init__() - self.trainable = True - self.input_tensor = None - self.output_tensor = None - self.channel_size = channels - self.check_conv = 0 - self.alpha = alpha - self.epsilon = epsilon - self.weights = None - self._optimizer = None - self._gradient_weights = None - self._bias_optimizer = None - self._gradient_bias = None - self.weights = None - self.bias = None - self.mean = None - self.variance = None - self.batch_size = None - self.channels = None - self.input_tensor = None - - self.initialize() - pass - - def initialize(self): - self.weights = np.ones(self.channel_size) - self.bias = np.zeros(self.channel_size) - - def forward(self, input_tensor): - self.input_tensor = input_tensor - self.check_conv = len(input_tensor.shape) == 4 - - - # Make sure to use the test mean. The test mean is computed during training time as a moving average. It is then kept fixed during test time. - - if self.check_conv: - self.mean = np.mean(self.input_tensor, axis=(0,2,3)) - self.variance = np.var(self.input_tensor, axis=(0,2,3)) - - self.batch_size = input_tensor.shape[0] - self.channels = input_tensor.shape[1] - if self.testing_phase: - x_tilda = (self.input_tensor - self.test_mean.reshape((1, self.channels, 1, 1)))/np.sqrt(self.test_variance.reshape((1, self.channels, 1, 1)) + self.epsilon) - - - else: - mean_k = np.mean(self.input_tensor, axis=(0,2,3)) - variance_k = np.var(self.input_tensor, axis=(0,2,3)) - - self.test_mean = self.alpha*self.mean.reshape((1, self.channels, 1, 1)) + (1-self.alpha)*mean_k.reshape(1, self.channels, 1, 1) - self.test_variance = self.alpha*self.variance.reshape(1, self.channels, 1, 1) + (1-self.alpha)*variance_k.reshape(1, self.channels, 1, 1) - - # store mean and variance for the next iteration - - self.mean = mean_k - self.variance = variance_k - x_tilda = (self.input_tensor - self.mean.reshape(1, self.channels, 1, 1)/np.sqrt(self.variance.reshape(1, self.channels, 1, 1)) + self.epsilon) - self.output_tensor = self.weights.reshape(1, self.channels, 1, 1) * x_tilda + self.bias.reshape(1, self.channels, 1, 1) - - else: - self.mean = np.mean(self.input_tensor, axis=0) - self.variance = np.var(self.input_tensor, axis=0) - - if self.testing_phase: - x_tilda = (self.input_tensor - self.test_mean)/np.sqrt(self.test_variance + self.epsilon) - - else: - mean_k = np.mean(input_tensor, axis=0) - variance_k = np.var(input_tensor, axis=0) - - self.test_mean = self.alpha*self.mean + (1-self.alpha)*mean_k - self.test_variance = self.alpha*self.variance + (1-self.alpha)*variance_k - - # store mean and variance for the next iteration - - self.mean = mean_k - self.variance = variance_k - x_tilda = (self.input_tensor - self.mean) / np.sqrt(self.variance + self.epsilon) - self.output_tensor = self.weights * x_tilda + self.bias - - - return self.output_tensor - - def backward(self, error_tensor): - - if self.check_conv: - self.error_tensor = Helpers.compute_bn_gradients(self.reformat(error_tensor), self.reformat(self.input_tensor), self.weights, self.mean, self.variance, self.epsilon) - self.error_tensor = self.reformat(self.error_tensor) - else: - self.error_tensor = Helpers.compute_bn_gradients(error_tensor, self.input_tensor, self.weights, self.mean, self.variance, self.epsilon) - self._gradient_weights = np.sum(self.input_tensor.T@error_tensor, axis=0) #verify this - self.gradient_bias = np.sum(error_tensor, axis=0) - - - if self._optimizer is not None: - self.weights = self.optimizer.calculate_update(self.weights, self._gradient_weights) - if self.bias_optimizer: - self.bias = self.bias_optimizer.calculate_update(self.bais, self._gradient_bias) - - return error_tensor - - def reformat(self, tensor): - is4D = len(tensor.shape) == 4 - - if is4D: - b, h, m, n = tensor.shape - output_tensor = tensor.reshape((b, h, m*n)) - output_tensor = np.transpose(output_tensor, (0, 2, 1)) - b, mn, h = output_tensor.shape - output_tensor = output_tensor.reshape((b*mn, h)) - - else: - b, h, m, n = self.input_tensor.shape - output_tensor = tensor.reshape((b, m*n, h)) - output_tensor = np.transpose(output_tensor, (0, 2, 1)) - output_tensor = output_tensor.reshape((b, h, m, n)) - return output_tensor - - @property - def bias_optimizer(self): - return self._bias_optimizer - - @bias_optimizer.setter - def bias_optimizer(self, value): - self._bias_optimizer = value - - @property - def optimizer(self): - return self._optimizer - - @optimizer.setter - def optimizer(self, value): - self._optimizer = value - - @property - def gradient_weights(self): - return self._gradient_weights - - @gradient_weights.setter - def gradient_weights(self, value): - self._gradient_weights = value - - @property - def gradient_bias(self): - return self._gradient_bias - - @gradient_bias.setter - def gradient_bias(self, value): +from . import Base +import numpy as np +from . import Helpers + +class BatchNormalization(Base.BaseLayer): + def __init__(self, channels, alpha=0.8, epsilon=5e-11) -> None: + super().__init__() + self.trainable = True + self.input_tensor = None + self.output_tensor = None + self.channel_size = channels + self.check_conv = 0 + self.alpha = alpha + self.epsilon = epsilon + self.weights = None + self._optimizer = None + self._gradient_weights = None + self._bias_optimizer = None + self._gradient_bias = None + self.weights = None + self.bias = None + self.mean = 0 + self.variance = 0 + self.batch_size = None + self.test_mean = 0 + self.test_variance = 1 + self.x_tilda = 0 + + self.input_tensor = None + + self.initialize(None, None) + pass + + def initialize(self, weights_initializer, bias_initializer): + self.weights = np.ones(self.channel_size) + self.bias = np.zeros(self.channel_size) + + + def forward(self, input_tensor): + self.input_tensor = input_tensor + + if len (input_tensor.shape) == 4: + self.check_conv = True + else: + self.check_conv = False + + + if self.check_conv: + self.mean = np.mean(input_tensor, axis=(0, 2, 3)) + self.variance = np.var(input_tensor, axis=(0, 2, 3)) + self.channel_size = input_tensor.shape[1] + if self.testing_phase: + self.x_tilda = (self.input_tensor-self.test_mean.reshape((1, self.channel_size, 1, 1)))/(self.test_variance.reshape((1, self.channel_size, 1, 1))+self.epsilon)**0.5 + return self.weights.reshape((1, self.channel_size, 1, 1)) * self.x_tilda + self.bias.reshape((1, self.channel_size, 1, 1)) + + new_mean = np.mean(self.input_tensor, axis=(0, 2, 3)) + new_var = np.var(self.input_tensor, axis=(0, 2, 3)) + + # Make sure to use the test mean. The test mean is computed during training time as a moving average. It is then kept fixed during test time. + + self.test_mean = self.alpha*self.mean.reshape((1, self.channel_size, 1, 1)) + (1 - self.alpha) * new_mean.reshape( + (1, self.channel_size, 1, 1)) + self.test_variance = self.alpha* self.variance.reshape((1, self.channel_size, 1, 1)) + ( + 1 - self.alpha) * new_var.reshape((1, self.channel_size, 1, 1)) + + self.mean = new_mean + self.variance = new_var + + self.x_tilda= (self.input_tensor - self.mean.reshape((1, self.channel_size, 1, 1))) / np.sqrt( + self.variance.reshape((1, self.channel_size, 1, 1)) + self.epsilon) + + return self.weights.reshape((1, self.channel_size, 1, 1)) * self.x_tilda + self.bias.reshape((1, self.channel_size, 1, 1)) + + else: #not convolutional + self.mean = np.mean(input_tensor, axis=0) + self.variance = np.var(input_tensor, axis=0) + if self.testing_phase: + self.x_tilda = (input_tensor - self.test_mean) / np.sqrt(self.test_variance + self.epsilon) + else: + self.test_mean = self.alpha * self.mean + (1-self.alpha)*self.mean + self.test_variance = self.alpha * self.variance + (1-self.alpha)*self.variance + + self.x_tilda = (self.input_tensor-self.mean)/np.sqrt(self.variance+self.epsilon) + return self.weights*self.x_tilda + self.bias + + + + def backward(self, error_tensor): + + if self.check_conv: + self.error_tensor = Helpers.compute_bn_gradients(self.reformat(error_tensor), self.reformat(self.input_tensor), self.weights, self.mean, self.variance, self.epsilon) + self.error_tensor = self.reformat(self.error_tensor) + self._gradient_weights = np.sum(error_tensor*self.x_tilda, axis=(0, 2, 3)) #verify this + self.gradient_bias = np.sum(error_tensor, axis=(0, 2 , 3)) + else: + self.error_tensor = Helpers.compute_bn_gradients(error_tensor, self.input_tensor, self.weights, self.mean, self.variance, self.epsilon) + self._gradient_weights = np.sum(error_tensor*self.x_tilda, axis=0) #verify this + self.gradient_bias = np.sum(error_tensor, axis=0) + + + if self._optimizer is not None: + self.weights = self.optimizer.calculate_update(self.weights, self._gradient_weights) + if self.bias_optimizer: + self.bias = self.bias_optimizer.calculate_update(self.bais, self._gradient_bias) + + return self.error_tensor + + def reformat(self, tensor): + + if len(tensor.shape) == 4: + b = tensor.shape[0] + h = tensor.shape[1] + m = tensor.shape[2] + n = tensor.shape[3] + output_tensor = tensor.reshape((b, h, m*n)) + output_tensor = np.transpose(output_tensor, (0, 2, 1)) + b, mn, h = output_tensor.shape + output_tensor = output_tensor.reshape((b*mn, h)) + + else: + b = self.input_tensor.shape[0] + h = self.input_tensor.shape[1] + m = self.input_tensor.shape[2] + n = self.input_tensor.shape[3] + output_tensor = tensor.reshape((b, m*n, h)) + output_tensor = np.transpose(output_tensor, (0, 2, 1)) + output_tensor = output_tensor.reshape((b, h, m, n)) + return output_tensor + + + #all the required setters and getters + + @property + def bias_optimizer(self): + return self._bias_optimizer + + @bias_optimizer.setter + def bias_optimizer(self, value): + self._bias_optimizer = value + + @property + def optimizer(self): + return self._optimizer + + @optimizer.setter + def optimizer(self, value): + self._optimizer = value + + @property + def gradient_weights(self): + return self._gradient_weights + + @gradient_weights.setter + def gradient_weights(self, value): + self._gradient_weights = value + + @property + def gradient_bias(self): + return self._gradient_bias + + @gradient_bias.setter + def gradient_bias(self, value): self._gradient_bias = value \ No newline at end of file diff --git a/exercise3_material/src_to_implement/Layers/Conv.py b/exercise3_material/src_to_implement/Layers/Conv.py index f91d7d4247a4df2109cb6b37a32b06945b72af1e..54df6debd6ba311a256c79d34ea8c7fe2549fb0d 100644 --- a/exercise3_material/src_to_implement/Layers/Conv.py +++ b/exercise3_material/src_to_implement/Layers/Conv.py @@ -1,182 +1,194 @@ -from . import Base -from scipy import ndimage -from scipy import signal -import numpy as np - -#stride_shape - single value or tuple -#convolution_shape - 1D or 2D conv layer [c, m, n] -#num_kernels - integer value -class Conv(Base.BaseLayer): - - def __init__(self, stride_shape, convolution_shape, num_kernels) -> None: - super().__init__() - self.trainable = True - self._optimizer = None - self.weights = None - self.bias = None - self.gradient_weights = None - self.gradient_bias = None - self.stride_shape = stride_shape #single value or tuple - self.convolution_shape = convolution_shape #filter shape (c,m,n) - if len(self.convolution_shape) == 3: - self.c = self.convolution_shape[0] - self.m = self.convolution_shape[1] - self.n = self.convolution_shape[2] - else: - self.c = self.convolution_shape[0] - self.m = self.convolution_shape[1] - self.num_kernels = num_kernels - self.weights = np.random.uniform(0,1, (self.num_kernels, *convolution_shape)) - self.bias = np.random.uniform(0,1, (self.num_kernels,)) - pass - -#input shape - [batch, channels, y, x] -#output shape - [batch, num_kernels, y_o, x_o] -#y_o = (y + 2p - f)/s + 1 - def forward(self, input_tensor): - self.input_tensor = input_tensor - if len(self.stride_shape) == 2: - sy = self.stride_shape[0] - sx = self.stride_shape[1] - else: - sy = self.stride_shape[0] - sx = self.stride_shape[0] - - batch = input_tensor.shape[0] - - if len(self.convolution_shape) == 3: - y = input_tensor.shape[2] - x = input_tensor.shape[3] - padding_y = (self.m-1)/2 - padding_x = (self.n-1)/2 - self.padding = [padding_y, padding_x] - y_o = int((y + 2*padding_y - self.m)//sy + 1) - x_o = int((x + 2*padding_x - self.n)//sx + 1) - output_shape = (batch, self.num_kernels, y_o, x_o) - else: - y = input_tensor.shape[2] - padding_y = (self.m-1)/2 - self.padding = [padding_y] - y_o = int((y + 2*padding_y - self.m)//sy + 1) - output_shape = (batch, self.num_kernels, y_o) - - output_tensor = np.zeros(output_shape) - - - for ib in range(batch): - for ik in range(self.num_kernels): - if len(self.convolution_shape) == 3: - output_per_filter = np.zeros((y,x)) - else: - output_per_filter = np.zeros((y)) - for ic in range(self.c): - - output_per_filter += ndimage.convolve(self.input_tensor[ib, ic], self.weights[ik, ic], mode='constant', cval=0) - # output_per_filter += signal.correlate(input_tensor[ib, ic], self.weights[ik, ic], mode='same', method='direct') - - output_per_filter = output_per_filter[::sy,::sx] if len(self.convolution_shape) == 3 else output_per_filter[::sy] #striding - output_tensor[ib, ik] = output_per_filter + self.bias[ik] - - return output_tensor - - @property - def optimizer(self): - return self._optimizer - - @optimizer.setter - def optimizer(self, value): - self._optimizer = value - - @property - def gradient_weights(self): - return self._gradient_weights - - @gradient_weights.setter - def gradient_weights(self, value): - self._gradient_weights = value - - @property - def gradient_bias(self): - return self._gradient_bias - - @gradient_bias.setter - def gradient_bias(self, value): - self._gradient_bias = value - - def backward(self, error_tensor): - error_output = np.zeros_like(self.input_tensor) - if len(self.stride_shape) == 2: - sy = self.stride_shape[0] - sx = self.stride_shape[1] - else: - sy = self.stride_shape[0] - sx = self.stride_shape[0] - - T_weights = self.weights.copy() - T_weights = np.transpose(T_weights, axes=(1,0,2,3)) if len(self.convolution_shape) == 3 else np.transpose(T_weights, axes=(1,0,2)) - batch = self.input_tensor.shape[0] - nk, nc = T_weights.shape[:2] - - if len(self.convolution_shape) == 3: - y = self.input_tensor.shape[2] - x = self.input_tensor.shape[3] - else: - y = self.input_tensor.shape[2] - - for ib in range(batch): - for ik in range(nk): - error_per_channel = 0 - for ic in range(nc): - if len(self.convolution_shape) == 3: - err = np.zeros((y,x)) - err[::sy, ::sx] = error_tensor[ib, ic] - else: - err = np.zeros(y) - err[::sy] = error_tensor[ib, ic] - - error_per_channel += ndimage.correlate(err, T_weights[ik, ic], mode='constant', cval=0) - - error_output[ib, ik] = error_per_channel - - berror = error_tensor.sum(axis=0) - yerror = berror.sum(axis=1) - self.gradient_bias = yerror.sum(axis=1) if len(self.convolution_shape)==3 else yerror - - self.gradient_weights=np.zeros_like(self.weights) - for ib in range(batch): - for ic in range(self.input_tensor.shape[1]): - for ik in range(self.num_kernels): - if len(self.convolution_shape)==3: - error = np.zeros((y, x)) - error[::sy, ::sx] = error_tensor[ib, ik] - input = np.pad(self.input_tensor[ib, ic], - [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0]))), - (int(np.ceil(self.padding[1])), int(np.floor(self.padding[1])))]) - else: - error = np.zeros(y) - error[::sy] = error_tensor[ib, ik] - input = np.pad(self.input_tensor[ib, ic], [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0])))]) - buffer = ndimage.correlate(input, error, mode='constant') - expected_output_size = np.array(input.shape) - np.array(error.shape) + 1 - buffer = buffer[:expected_output_size[0], :expected_output_size[1]] if len(expected_output_size)==2 else buffer[:expected_output_size[0]] - - self.gradient_weights[ik, ic] += buffer - - - if self._optimizer is not None: - self.weights = self.optimizer.calculate_update(self.weights,self.gradient_weights) - self.bias = self.optimizer.calculate_update(self.bias,self.gradient_bias) - return error_output - - def initialize(self, weights_initializer, bias_initializer): - if len(self.convolution_shape) == 3: - fan_in = self.c * self.m * self.n - fan_out = self.num_kernels * self.m * self.n - self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m, self.n),fan_in, fan_out) - self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels) - else: - fan_in = self.c * self.m - fan_out = self.num_kernels * self.m - self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m),fan_in, fan_out) - self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels) - pass \ No newline at end of file +from Layers.Base import BaseLayer + +import numpy as np +from scipy import signal +from functools import reduce +import operator +from copy import deepcopy as copy + + +class Conv(BaseLayer): + def __init__(self, stride_shape, convolution_shape, num_kernels): + super().__init__() + self.trainable = True + self.stride_shape = (stride_shape[0], stride_shape[0]) if len(stride_shape) == 1 else stride_shape + # 1d as [channel,m], 2d as [channel,m,n] + self.convolution_shape = convolution_shape + self.num_kernels = num_kernels + # init weights as uniform random (will be initialized again with initialize method) + # shape for 2d conv: (num_kernels, channel, m, n) + self.weights = np.random.uniform(0, 1, (num_kernels, *convolution_shape)) + # bias shape: number of kernels + self.bias = np.random.rand(num_kernels) + + # grad parameters + self._gradient_weights = None + self._gradient_bias = None + + self._optimizer = None + self._bias_optimizer = None + + # conv_dim if it is 2d or 1d + self.conv_dim = 2 if len(convolution_shape) == 3 else 1 + + + def initialize(self, weights_initializer, bias_initializer): + self.weights = weights_initializer.initialize(self.weights.shape, + reduce(operator.mul, self.convolution_shape), + reduce(operator.mul, [self.num_kernels, *self.convolution_shape[1:]])) + + self.bias = bias_initializer.initialize(self.bias.shape, 1,self.num_kernels) + + self._optimizer=copy(self.optimizer) + self._bias_optimizer=copy(self.optimizer) + + def forward(self, input_tensor): + # if correlation is used in forward, we can use convole in backward + # or vice versa + # input_tensor shape (b,c,x,y) or (b,c,x) + self.input_tensor = input_tensor + ishape = input_tensor.shape + self.ishape = ishape + bsize, c, y, x = ishape if self.conv_dim==2 else (*ishape, None) + cx,cy = self.convolution_shape[-2:] + + sh, sw = self.stride_shape + + # new shape of y = (y-ky + 2*p)/sh + 1; y input size, ky kernel size, p padding size, sh stride size + # but we need o/p size same as i/p so p=(ky-1)/2 if sh==1 + # else we need to derive + pad=[(cx-1)/2] + out_shape = [int((y-cx+2*pad[0])/sh)+1] + if self.conv_dim==2: + pad.append((cy-1)/2) + out_shape.append(int((x-cy+2*pad[1])/sw)+1) + self.pad=pad + result = np.zeros((bsize, self.num_kernels, *out_shape)) + + # if used correlation in forward, should use convolve in backward + for cb in range(bsize): + for ck in range(self.num_kernels): + # sum outputs of correlation of this kernel with individual input channel of input + kout = np.zeros((y,x)) if x else np.zeros((y)) + for ch in range(c): + # correlate with this batch's this channel and this kernel's this channel + kout += signal.correlate(input_tensor[cb, ch], self.weights[ck, ch], mode='same', method='direct') + + kout = kout[::sh, ::sw] if self.conv_dim==2 else kout[::sh] + result[cb, ck] = kout + self.bias[ck] + + return result + + + def update_parameters(self, error_tensor): + # what is the grad of bias in this layer for this batch? + # we sum error tensor along axis of B,W,H (if 2d) + # B + berror = error_tensor.sum(axis=0) + # W + yerror = berror.sum(axis=1) + # H? + self._gradient_bias = yerror.sum(axis=1) if self.conv_dim==2 else yerror + + # what is the grad of weights in this layer for this batch? + batch_size, channels, y, x = self.ishape if self.conv_dim==2 else (*self.ishape, None) + sh, sw = self.stride_shape + cx, cy = self.convolution_shape[-2:] + + self.gradient_weights=np.zeros_like(self.weights) + for cb in range(batch_size): + for ch in range(channels): + for ck in range(self.num_kernels): + if self.conv_dim==2: + error = np.zeros((y, x)) + error[::sh, ::sw] = error_tensor[cb, ck] + inp = np.pad(self.input_tensor[cb, ch], + [(int(np.ceil(self.pad[0])), int(np.floor(self.pad[0]))), + (int(np.ceil(self.pad[1])), int(np.floor(self.pad[1])))] + # [int(np.ceil(self.pad[0])), int(np.floor(self.pad[1]))] + ) + else: + error = np.zeros(y) + error[::sh] = error_tensor[cb, ck] + inp = np.pad(self.input_tensor[cb, ch], [(int(np.ceil(self.pad[0])), int(np.floor(self.pad[0])))]) + + self.gradient_weights[ck, ch] += signal.correlate( + inp, error, mode='valid') + + if self.optimizer: + self.weights = self._optimizer.calculate_update(self.weights, self._gradient_weights) + self.bias = self._bias_optimizer.calculate_update(self.bias, self._gradient_bias) + + def error_this_layer(self, error_tensor): + # compute error in this layer + gradient=np.zeros_like(self.input_tensor) + sh,sw = self.stride_shape + + # input Conv2d weight shape: (num_kernel, channel, w, h), channel is channel of input data + # inner Conv2d weight shape: (num_kernel, input_channel, w, h) + # input channel is channel from previous layer + # while passing error backward, we calculate error cased by this layer's weights + # so transpose weight as : (input_channel, num_kernel, w, h) + nweight = self.weights.copy() + nweight = np.transpose(nweight, axes=(1,0,2,3)) if self.conv_dim==2 else np.transpose(nweight, axes=(1,0,2)) + ishape = self.input_tensor.shape + y,x = ishape[-2:] if self.conv_dim==2 else (ishape[-1],None) + + bsize = self.input_tensor.shape[0] + wk, wc = nweight.shape[:2] + + for cb in range(bsize): + for ck in range(wk): + grad = 0 + for c in range(wc): + if self.conv_dim==2: + err = np.zeros((y,x)) + err[::sh, ::sw] = error_tensor[cb, c] + else: + err = np.zeros(y) + err[::sh] = error_tensor[cb, ck] + # we used correlate on forward, use convolve now + grad += signal.convolve(err, nweight[ck, c], mode='same', method='direct') + + gradient[cb, ck] = grad + return gradient + + def backward(self, error_tensor): + self.update_parameters(error_tensor) + gradient = self.error_this_layer(error_tensor) + + + + return gradient + + @property + def gradient_weights(self): + return self._gradient_weights + + @gradient_weights.setter + def gradient_weights(self, value): + self._gradient_weights = value + + @property + def gradient_bias(self): + return self._gradient_bias + + @gradient_bias.setter + def gradient_bias(self, value): + self._gradient_bias = value + + @property + def optimizer(self): + return self._optimizer + + @optimizer.setter + def optimizer(self, value): + self._optimizer = value + + @property + def bias_optimizer(self): + return self._bias_optimizer + + @bias_optimizer.setter + def bias_optimizer(self, value): + self._bias_optimizer = value diff --git a/exercise3_material/src_to_implement/Layers/Conv_o.py b/exercise3_material/src_to_implement/Layers/Conv_o.py new file mode 100644 index 0000000000000000000000000000000000000000..f91d7d4247a4df2109cb6b37a32b06945b72af1e --- /dev/null +++ b/exercise3_material/src_to_implement/Layers/Conv_o.py @@ -0,0 +1,182 @@ +from . import Base +from scipy import ndimage +from scipy import signal +import numpy as np + +#stride_shape - single value or tuple +#convolution_shape - 1D or 2D conv layer [c, m, n] +#num_kernels - integer value +class Conv(Base.BaseLayer): + + def __init__(self, stride_shape, convolution_shape, num_kernels) -> None: + super().__init__() + self.trainable = True + self._optimizer = None + self.weights = None + self.bias = None + self.gradient_weights = None + self.gradient_bias = None + self.stride_shape = stride_shape #single value or tuple + self.convolution_shape = convolution_shape #filter shape (c,m,n) + if len(self.convolution_shape) == 3: + self.c = self.convolution_shape[0] + self.m = self.convolution_shape[1] + self.n = self.convolution_shape[2] + else: + self.c = self.convolution_shape[0] + self.m = self.convolution_shape[1] + self.num_kernels = num_kernels + self.weights = np.random.uniform(0,1, (self.num_kernels, *convolution_shape)) + self.bias = np.random.uniform(0,1, (self.num_kernels,)) + pass + +#input shape - [batch, channels, y, x] +#output shape - [batch, num_kernels, y_o, x_o] +#y_o = (y + 2p - f)/s + 1 + def forward(self, input_tensor): + self.input_tensor = input_tensor + if len(self.stride_shape) == 2: + sy = self.stride_shape[0] + sx = self.stride_shape[1] + else: + sy = self.stride_shape[0] + sx = self.stride_shape[0] + + batch = input_tensor.shape[0] + + if len(self.convolution_shape) == 3: + y = input_tensor.shape[2] + x = input_tensor.shape[3] + padding_y = (self.m-1)/2 + padding_x = (self.n-1)/2 + self.padding = [padding_y, padding_x] + y_o = int((y + 2*padding_y - self.m)//sy + 1) + x_o = int((x + 2*padding_x - self.n)//sx + 1) + output_shape = (batch, self.num_kernels, y_o, x_o) + else: + y = input_tensor.shape[2] + padding_y = (self.m-1)/2 + self.padding = [padding_y] + y_o = int((y + 2*padding_y - self.m)//sy + 1) + output_shape = (batch, self.num_kernels, y_o) + + output_tensor = np.zeros(output_shape) + + + for ib in range(batch): + for ik in range(self.num_kernels): + if len(self.convolution_shape) == 3: + output_per_filter = np.zeros((y,x)) + else: + output_per_filter = np.zeros((y)) + for ic in range(self.c): + + output_per_filter += ndimage.convolve(self.input_tensor[ib, ic], self.weights[ik, ic], mode='constant', cval=0) + # output_per_filter += signal.correlate(input_tensor[ib, ic], self.weights[ik, ic], mode='same', method='direct') + + output_per_filter = output_per_filter[::sy,::sx] if len(self.convolution_shape) == 3 else output_per_filter[::sy] #striding + output_tensor[ib, ik] = output_per_filter + self.bias[ik] + + return output_tensor + + @property + def optimizer(self): + return self._optimizer + + @optimizer.setter + def optimizer(self, value): + self._optimizer = value + + @property + def gradient_weights(self): + return self._gradient_weights + + @gradient_weights.setter + def gradient_weights(self, value): + self._gradient_weights = value + + @property + def gradient_bias(self): + return self._gradient_bias + + @gradient_bias.setter + def gradient_bias(self, value): + self._gradient_bias = value + + def backward(self, error_tensor): + error_output = np.zeros_like(self.input_tensor) + if len(self.stride_shape) == 2: + sy = self.stride_shape[0] + sx = self.stride_shape[1] + else: + sy = self.stride_shape[0] + sx = self.stride_shape[0] + + T_weights = self.weights.copy() + T_weights = np.transpose(T_weights, axes=(1,0,2,3)) if len(self.convolution_shape) == 3 else np.transpose(T_weights, axes=(1,0,2)) + batch = self.input_tensor.shape[0] + nk, nc = T_weights.shape[:2] + + if len(self.convolution_shape) == 3: + y = self.input_tensor.shape[2] + x = self.input_tensor.shape[3] + else: + y = self.input_tensor.shape[2] + + for ib in range(batch): + for ik in range(nk): + error_per_channel = 0 + for ic in range(nc): + if len(self.convolution_shape) == 3: + err = np.zeros((y,x)) + err[::sy, ::sx] = error_tensor[ib, ic] + else: + err = np.zeros(y) + err[::sy] = error_tensor[ib, ic] + + error_per_channel += ndimage.correlate(err, T_weights[ik, ic], mode='constant', cval=0) + + error_output[ib, ik] = error_per_channel + + berror = error_tensor.sum(axis=0) + yerror = berror.sum(axis=1) + self.gradient_bias = yerror.sum(axis=1) if len(self.convolution_shape)==3 else yerror + + self.gradient_weights=np.zeros_like(self.weights) + for ib in range(batch): + for ic in range(self.input_tensor.shape[1]): + for ik in range(self.num_kernels): + if len(self.convolution_shape)==3: + error = np.zeros((y, x)) + error[::sy, ::sx] = error_tensor[ib, ik] + input = np.pad(self.input_tensor[ib, ic], + [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0]))), + (int(np.ceil(self.padding[1])), int(np.floor(self.padding[1])))]) + else: + error = np.zeros(y) + error[::sy] = error_tensor[ib, ik] + input = np.pad(self.input_tensor[ib, ic], [(int(np.ceil(self.padding[0])), int(np.floor(self.padding[0])))]) + buffer = ndimage.correlate(input, error, mode='constant') + expected_output_size = np.array(input.shape) - np.array(error.shape) + 1 + buffer = buffer[:expected_output_size[0], :expected_output_size[1]] if len(expected_output_size)==2 else buffer[:expected_output_size[0]] + + self.gradient_weights[ik, ic] += buffer + + + if self._optimizer is not None: + self.weights = self.optimizer.calculate_update(self.weights,self.gradient_weights) + self.bias = self.optimizer.calculate_update(self.bias,self.gradient_bias) + return error_output + + def initialize(self, weights_initializer, bias_initializer): + if len(self.convolution_shape) == 3: + fan_in = self.c * self.m * self.n + fan_out = self.num_kernels * self.m * self.n + self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m, self.n),fan_in, fan_out) + self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels) + else: + fan_in = self.c * self.m + fan_out = self.num_kernels * self.m + self.weights = weights_initializer.initialize((self.num_kernels, self.c, self.m),fan_in, fan_out) + self.bias = bias_initializer.initialize((self.num_kernels,), 1, self.num_kernels) + pass \ No newline at end of file diff --git a/exercise3_material/src_to_implement/Layers/RNN.py b/exercise3_material/src_to_implement/Layers/RNN.py index bea912131f4f172a9f7b9942d64e3a63b9ece781..e658f667a3ffefb2b9d7a84f4503f9474900cfe6 100644 --- a/exercise3_material/src_to_implement/Layers/RNN.py +++ b/exercise3_material/src_to_implement/Layers/RNN.py @@ -115,9 +115,6 @@ class RNN(Base.BaseLayer): self.output_FCLayer.weights = self.optimizer.calculate_update(self.output_FCLayer.weights, self.output_FCLayer_gradient_weights) self.weights = self.optimizer.calculate_update(self.weights, self.gradient_weights) - - - return gradient_inputs diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc index df046cec3a85fe0fa9ac9477cea153c3145e6d13..2d221ce5bd067573fc841d5538c7e8d1204c36a0 100644 Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/BatchNormalization.cpython-310.pyc differ diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc index 0a697e15a8ca2081823f22581d6ef79816c4fd8e..a4bc5324642d817258d596a07bc35c3f93005e25 100644 Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/Conv.cpython-310.pyc differ diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc index 55e7d8444d71d682326b5c82307a0416ab676b94..610ffb0a518f1921d2a3fb8c5dcce245df293fef 100644 Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/Dropout.cpython-310.pyc differ diff --git a/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc b/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc index d53a7162a2e4421e4084c7102e4f7af747776eb7..72c1568e112f4d122c9fa09557b1cfef22ec0701 100644 Binary files a/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc and b/exercise3_material/src_to_implement/Layers/__pycache__/RNN.cpython-310.pyc differ diff --git a/exercise3_material/src_to_implement/NeuralNetwork.py b/exercise3_material/src_to_implement/NeuralNetwork.py index 9f49bdfdc23bef823bab390ce90a55cbfc1f9a6a..2dcb23e6bc33028ec2432116418cce8cea4b7e9d 100644 --- a/exercise3_material/src_to_implement/NeuralNetwork.py +++ b/exercise3_material/src_to_implement/NeuralNetwork.py @@ -1,60 +1,94 @@ -import copy - -class NeuralNetwork: - def __init__(self, optimizer, weights_initializer, bias_initializer) -> None: - self.optimizer = optimizer - self.loss = [] - self.layers = [] - self.data_layer = None - self.loss_layer = None - self.weights_initializer = weights_initializer - self.bias_initializer = bias_initializer - pass - - def forward(self): - loss_regularizer = 0 - self.input_tensor, self.label_tensor = self.data_layer.next() - for layer in self.layers: - self.input_tensor = layer.forward(self.input_tensor) - if self.optimizer.regularizer is not None: - loss_regularizer += self.optimizer.regularizer.norm(layer.weights) - loss = self.loss_layer.forward(self.input_tensor+loss_regularizer, self.label_tensor) - return loss - - def backward(self): - error = self.loss_layer.backward(self.label_tensor) - for layer in reversed(self.layers): - error = layer.backward(error) - pass - - def append_layer(self, layer): - if layer.trainable == True: - opti = copy.deepcopy(self.optimizer) - layer.optimizer = opti - layer.initialize(self.weights_initializer, self.bias_initializer) - self.layers.append(layer) - - def train(self, iterations): - self.testing_phase = False - for _ in range(iterations): - loss = self.forward() - self.loss.append(loss) - self.backward() - - def test(self, input_tensor): - self.data_layer = input_tensor - for layer in self.layers: - self.data_layer = layer.forward(self.data_layer) - return self.data_layer - - @property - def phase(self): - return self.phase - - @phase.setter - def phase(self, value): - self.phase = value - pass - - def norm(self, weights): - return self.loss_layer.norm(weights) \ No newline at end of file +from copy import deepcopy + +def save(filename, net): + import pickle + nnet=net + dlayer = nnet.data_layer + nnet.__setstate__({'data_layer': None}) + + with open(filename, 'wb') as f: + pickle.dump(nnet, f) + nnet.__setstate__({'data_layer': dlayer}) + + +def load(filename, data_layer): + import pickle + with open(filename, 'rb') as f: + net = pickle.load(f) + net.__setstate__({'data_layer': data_layer}) + + return net + +class NeuralNetwork: + def __init__(self, optimizer, weights_initializer, bias_initializer) -> None: + self.optimizer = optimizer + self.loss = [] + self.layers=[] + self.data_layer = None + self.loss_layer = None + self.weights_initializer = weights_initializer + self.bias_initializer = bias_initializer + + self._phase = None + + def __getstate__(self): + return self.__dict__.copy() + + def __setstate__(self, state): + self.__dict__.update(state) + return self.__dict__.copy() + + @property + def phase(self): + return self._phase + + @phase.setter + def phase(self, value): + self._phase = value + + def forward(self): + inp,op = self.data_layer.next() + self.label = op + regularization_loss = 0 + # print(inp) + for layer in self.layers: + inp = layer.forward(inp) + try: + regularization_loss += self.optimizer.regularizer.norm(layer.weights) + except: + pass + layer.testing_phase = True + + # inp = self.loss_layer.forward(inp, self.label) + self.pred=self.loss_layer.forward(inp+regularization_loss, op) + return self.pred + + def backward(self): + # loss = self.loss_layer.forward(self.pred, self.label) + loss = self.loss_layer.backward(self.label) + for layer in self.layers[::-1]: + loss = layer.backward(loss) + + + def append_layer(self, layer): + if layer.trainable: + layer.optimizer = deepcopy(self.optimizer) + layer.initialize(self.weights_initializer, self.bias_initializer) + + self.layers.append(layer) + + def train(self, iterations): + for i in range(iterations): + loss = self.forward() + self.backward() + self.loss.append(loss) + + def test(self, input_tensor): + inp = input_tensor #self.data_layer.next() + # print(inp.shape) + for layer in self.layers: + inp = layer.forward(inp) + # print(layer) + return inp + + diff --git a/exercise3_material/src_to_implement/NeuralNetwork_o.py b/exercise3_material/src_to_implement/NeuralNetwork_o.py new file mode 100644 index 0000000000000000000000000000000000000000..a38d72bcf725d7f5f7567fc7495778c8e804139b --- /dev/null +++ b/exercise3_material/src_to_implement/NeuralNetwork_o.py @@ -0,0 +1,65 @@ +import copy + +class NeuralNetwork: + def __init__(self, optimizer, weights_initializer, bias_initializer) -> None: + self.optimizer = optimizer + self.loss = [] + self.layers = [] + self.data_layer = None + self.loss_layer = None + self.weights_initializer = weights_initializer + self.bias_initializer = bias_initializer + pass + + def forward(self): + loss_regularizer = 0 + self.input_tensor, self.label_tensor = self.data_layer.next() + for layer in self.layers: + self.input_tensor = layer.forward(self.input_tensor) + #if self.optimizer.regularizer is not None: + try: + if layer.trainable: + loss_regularizer += self.optimizer.regularizer.norm(layer.weights) + except: + pass + + loss = self.loss_layer.forward(self.input_tensor, self.label_tensor) + return loss + loss_regularizer + + def backward(self): + error = self.loss_layer.backward(self.label_tensor) + for layer in reversed(self.layers): + error = layer.backward(error) + pass + + def append_layer(self, layer): + if layer.trainable == True: + opti = copy.deepcopy(self.optimizer) + layer.optimizer = opti + layer.initialize(self.weights_initializer, self.bias_initializer) + self.layers.append(layer) + + def train(self, iterations): + self.testing_phase = False + for _ in range(iterations): + loss = self.forward() + self.loss.append(loss) + self.backward() + + def test(self, input_tensor): + self.data_layer = input_tensor + for layer in self.layers: + self.data_layer = layer.forward(self.data_layer) + return self.data_layer + + @property + def phase(self): + return self.phase + + @phase.setter + def phase(self, value): + self.phase = value + pass + + def norm(self, weights): + return self.loss_layer.norm(weights) \ No newline at end of file diff --git a/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc b/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc index 56626f6b19db93890ac32aee063ad04b3cbcbccc..8582be15085fbe6387611208a5ad7b01636b789c 100644 Binary files a/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc and b/exercise3_material/src_to_implement/__pycache__/NeuralNetwork.cpython-310.pyc differ diff --git a/exercise3_material/src_to_implement/log.txt b/exercise3_material/src_to_implement/log.txt index 178c80c75b56c3786c1607b31b9494e09e82f255..022ddba76e5edbca27547451f4b9543919f1f8bc 100644 --- a/exercise3_material/src_to_implement/log.txt +++ b/exercise3_material/src_to_implement/log.txt @@ -10,3 +10,124 @@ On the Iris dataset, we achieve an accuracy of: 94.0% On the Iris dataset, we achieve an accuracy of: 98.0% On the Iris dataset using Dropout, we achieve an accuracy of: 54.0% On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 66.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 66.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 64.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 62.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 97.32888146911519% +On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.66277128547578% +On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 97.32888146911519% +On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 87.31218697829716% +On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 65.4424040066778% +On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 90.15025041736226% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 84.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 62.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 95.99332220367279% +On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 98.49749582637729% +On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 95.99332220367279% +On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 84.97495826377296% +On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 86.64440734557596% +On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 86.47746243739566% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 86.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 60.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 97.16193656093489% +On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.8297161936561% +On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 96.16026711185309% +On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 76.29382303839732% +On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 85.30884808013356% +On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 87.31218697829716% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 60.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 88.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 54.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 68.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 76.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 57.99999999999999% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 60.0% +On the Iris dataset, we achieve an accuracy of: 94.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 60.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 98.0% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the UCI ML hand-written digits dataset using Batch_norm and L2 we achieve an accuracy of: 96.661101836394% +On the UCI ML hand-written digits dataset using Batch_norm we achieve an accuracy of: 97.8297161936561% +On the UCI ML hand-written digits dataset using ADAM we achieve an accuracy of: 96.4941569282137% +On the UCI ML hand-written digits dataset using L1_regularizer we achieve an accuracy of: 86.81135225375625% +On the UCI ML hand-written digits dataset using L2_regularizer we achieve an accuracy of: 78.79799666110183% +On the UCI ML hand-written digits dataset using Dropout we achieve an accuracy of: 94.49081803005008% +On the Iris dataset, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 98.0% +On the Iris dataset using Batchnorm, we achieve an accuracy of: 98.0% +On the Iris dataset using Dropout, we achieve an accuracy of: 96.0% +On the Iris dataset, we achieve an accuracy of: 96.0%