Skip to content
Snippets Groups Projects
Commit d95166ab authored by Gabriel Falk's avatar Gabriel Falk
Browse files

Upload New File

parent 7a9bfbd3
Branches
No related tags found
No related merge requests found
main.cpp 0 → 100644
#include <chrono>
#include <iostream>
#include <math.h>
#include <assert.h>
#include <cuda_runtime.h>
#include "cudaKernels.h"
#include "matrix.h"
#include "test.h"
//#include "cudaLenet5.h"
//#include <onnxruntime_cxx_api.h>
//#include "tensor.h"
matrix cudaInterface(matrix src1, matrix src2, int mode){
//Mode 0: AvgPooling; Mode 1: Mx x Vec -> Vec; Mode 2: A x B -> C; Mode 3: 2DConv; Mode 4: 2DConv FFT
double* time = (double*) malloc(sizeof(double));
if(mode == 0){
matrix dst = createMx(src1.sizeX/2, src1.sizeY/2);
avgPoolCuda(dst.head, src1.head, src1.sizeX, src1.sizeY, time);
return dst;
}else if(mode == 1){
matrix dst = createMx(src2.sizeX, src1.sizeY);
vecMulCuda(src1.head, src2.head, dst.head, src1.sizeX, src1.sizeY);
return dst;
}else if(mode == 2){
matrix dst = createMx(src2.sizeX, src1.sizeY);
matMulTileCuda(src1.head, src2.head, dst.head, src1.sizeX, src1.sizeY, src2.sizeX, src2.sizeY);
return dst;
}else if(mode == 3){
matrix dst = createMx(src1.sizeX - src2.sizeX + 1, src1.sizeY - src2.sizeY + 1);
conv2dCuda(src1.head, src2.head, dst.head, src1.sizeX, src1.sizeY, src2.sizeX, time);
return dst;
}else if(mode == 4){
matrix dst = createMx(src1.sizeX-src2.sizeX+1, src1.sizeY-src2.sizeY+1);
conv2dFFTCuda(src1.head, src2.head, dst.head, src1.sizeX, src1.sizeY, src2.sizeX, time);
return dst;
}
return createMx(0, 0);
}
matrix winogradCPU(matrix input, matrix filter, matrix A, matrix B, matrix G){
matrix U = mulMx(mulMx(transpose(B), input), B);
matrix V = mulMx(mulMx(G, filter), transpose(G));
matrix F = pointMulMx(V, U);
matrix Z = mulMx(mulMx(transpose(A), F), A);
return Z;
}
int main() {
// Test time Measures
test();
/*
//TEST for cuda Mx x Vec
matrix w = getWmat();
matrix i = getImat();
//Originals
printf("W:\n");
printMx(w);
printf("I:\n");
printMx(i);
//Cpu
matrix c = mulMx(w, i);
printf("CPU:\n");
printMx(c);
//Gpu
matrix d = cudaInterface(w, i, 1);
printf("GPU:\n");
printMx(d);
*/
/*
//Test 16 x 16
matrix t = getTmat();
//Originals
printf("T:\n");
printMx(t);
//Cpu
auto start = std::chrono::high_resolution_clock::now();
matrix c = mulMx(t, t);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
printf("CPU:\n");
std::cout << "Elapsed time: " << elapsed.count() << " s\n";
printMx(c);
//Gpu
printf("GPU:\n");
matrix d = cudaInterface(t, t, 2);
printMx(d);
*/
/*
//TEST Conv2d
matrix x = getHmat();
matrix f = getF2mat();
printf("X:\n");
printMx(x);
printf("F:\n");
printMx(f);
printf("CPU:\n");
auto start = std::chrono::high_resolution_clock::now();
matrix c = conv2d(x, f);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n";
printMx(c);
printf("GPU:\n");
matrix d = cudaInterface(x, f, 3);
printMx(d);
//TEST ENDE
*/
//TEST Conv2d
/*
matrix ff = get5x5fracmat();
matrix x = get7x7onesmat();
printf("FF:\n");
printMx(ff);
printf("X:\n");
printMx(x);
printf("CPU:\n");
//auto start = std::chrono::high_resolution_clock::now();
matrix c = conv2d(x, ff);
//auto finish = std::chrono::high_resolution_clock::now();
//std::chrono::duration<double> elapsed = finish - start;
//std::cout << "Elapsed time: " << elapsed.count() << " s\n";
printMx(c);
printf("GPU:\n");
matrix d = cudaInterface(x, ff, 4);
printMx(d);
*/
/*
//Winograd
matrix ff = get5x5fracmat();
matrix x = get6x6stdmat();
matrix B = get6x6WiB();
matrix G = get5x6WiG();
matrix A = get2x6WiA();
matrix ff = get3x3mat();
matrix x = get6x6stdmat();
matrix B = get4x4WiB();
matrix G = get3x4WiG();
matrix A = get2x4WiA();
printf("FF:\n");
printMx(ff);
printf("X:\n");
printMx(x);
printf("B:\n");
printMx(B);
printf("G:\n");
printMx(G);
printf("A:\n");
printMx(A);
printf("Wino:\n");
//matrix Z = winogradCPU(x, ff, A, B, G);
//printMx(Z);
printf("STD:\n");
matrix c = conv2d(x, ff);
printMx(c);
double* time = (double*) malloc(sizeof(double));
matrix dst = createMx(x.sizeX-ff.sizeX+1, x.sizeY-ff.sizeY+1);
conv2dWinoCuda(x.head, ff.head, dst.head, x.sizeX, x.sizeY, ff.sizeX, time);
printf("WinoGPU:\n");
printMx(dst);
*/
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment