Skip to content
Snippets Groups Projects
Commit 028af350 authored by Gabriel Falk's avatar Gabriel Falk
Browse files

Upload New File

parent 1e8dbc37
No related branches found
No related tags found
No related merge requests found
#include <chrono>
#include <iostream>
#include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include <device_launch_parameters.h>
#include <assert.h>
#include <stdio.h>
#include "tensor.h"
/*__global__ void conv1Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
*/
// Input 28x28x6xN -> Output 14x14x6xN;
__global__ void pool1Kernel(float* data, float* res){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
/*
__global__ void conv2Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
__global__ void pool2Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
__global__ void conv3Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
__global__ void fully1Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
__global__ void fully2Kernel(){
int i = threadIdx.x;
printf("Thread i: %d\n", i);
__syncthreads();
}
*/
//Tensor Bilder 4D (32x32xM), ...
cudaError_t lenet5Cuda(){
// Setup (vorerst als float pointer)
//int N = 128;
cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "lenet5Cuda: cudaSetDevice failed! Do you have a CUDA-capable GPU installed?\n");
return cudaStatus;
}
/*
// Memory for processed Data after each step
float *dev_images, *dev_conv1, *dev_pool1, *dev_conv2, *dev_pool2, *dev_conv3, *dev_fully1, *dev_fully2;
// Memory for wheigts which were used to process
// Anzahl: 156, 2416, 48120, 10164, 850;
/*float *dev_w_conv1, *dev_w_conv2, *dev_w_conv3, *dev_w_fully1, *dev_w_fully2;
cudaStatus = cudaMalloc((void**)&dev_images, 32*32*M*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_conv1, 28*28*6*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_pool1, 14*14*6*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_conv2, 10*10*16*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_pool2, 5*5*16*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_conv3, 120*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_fully1, 84*N*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_fully2, 10*M*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_w_conv1, (5*5*6+6)*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_w_conv2, (5*5*6*16+16)*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_w_conv3, (5*5*16*120+120)*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_w_fully1, (120*84+84)*sizeof(float));
cudaStatus = cudaMalloc((void**)&dev_w_fully2, (84*10+10)sizeof(float));
// Input 32x32xN; N = #Bilder
// ->Conv2D(tanh)->
// Output 28x28x6xN; N = #Bilder
// Input 28x28x6xN; N = #Bilder
// ->Avg-Pooling(2x2)->
pool1Kernel<<<1, 10>>>(dev_conv1, dev_pool1);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "pool1Cuda: pool1 launch failed: %s\n", cudaGetErrorString(cudaStatus));
return cudaStatus;
}
// Output 14x14x6xN; N = #Bilder
// Input 14x14x6xN; N = #Bilder
// ->Conv2D(tanh)->
// Output 10x10x16xN; N = #Bilder
// Input 10x10x16xN; N = #Bilder
// ->Avg-Pooling(2x2)->
// Output 5x5x16xN; N = #Bilder
// Input 5x5x16xN; N = #Bilder
// ->Conv2D(tanh)->
// Output 120xN; N = #Bilder
// Input 120xN; N = #Bilder
// ->FullyConnected->
// Output 84xN; N = #Bilder
// Input 84xN; N = #Bilder
// ->FullyConnected->
// Output 10xN; N = #Bilder
*/
return cudaStatus;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment