Skip to content
Snippets Groups Projects
Select Git revision
  • cactus_stack_devel
  • master default protected
  • io_uring_set_taskrun_flags
  • steal-all-io-on-hint
  • ripripgrepv4
  • io-sleep-strategies-msg-ring
  • improve-echoclient
  • improve-even-pulse
  • waitfd-sleep-strategy2
  • ripripgrepv2.5-io-sleep
  • ripripgrepv3-io-sleep
  • ripripgrepv3
  • ripripgrepv2
  • du
  • select2
  • linked-echoserver
  • cancel-callbacks-simple
  • cancel-callbacks
  • callback-fsearch-alternative
  • callback-fsearch
  • fsearch-track-outfile-offset
21 results

matmul.cpp

Blame
  • Forked from Lehrstuhl für Informatik 4 (Systemsoftware) / manycore / emper
    Source project has a limited visibility.
    matmul.cpp 3.14 KiB
    #include <stdio.h>
    #include <stdlib.h>
    #include "test.h"
    
    int n = 2048;
    
    static float *  a;
    static float *  b;
    static float ** c;
    
    fibril static void compute(float *, int, int, float *, int, int,
        float **, int, int, int);
    
    static void compute00(float * a, int ai, int aj, float * b, int bi, int bj,
        float ** c, int ci, int cj, int n)
    {
      compute(a, ai, aj,     b, bi,     bj, c, ci, cj, n);
      compute(a, ai, aj + n, b, bi + n, bj, c, ci, cj, n);
    }
    
    static void compute01(float * a, int ai, int aj, float * b, int bi, int bj,
        float ** c, int ci, int cj, int n)
    {
      compute(a, ai, aj,     b, bi,     bj + n, c, ci, cj + n, n);
      compute(a, ai, aj + n, b, bi + n, bj + n, c, ci, cj + n, n);
    }
    
    static void compute10(float * a, int ai, int aj, float * b, int bi, int bj,
        float ** c, int ci, int cj, int n)
    {
      compute(a, ai + n, aj,     b, bi,     bj, c, ci + n, cj, n);
      compute(a, ai + n, aj + n, b, bi + n, bj, c, ci + n, cj, n);
    }
    
    static void compute11(float * a, int ai, int aj, float * b, int bi, int bj,
        float ** c, int ci, int cj, int n)
    {
      compute(a, ai + n, aj,     b, bi,     bj + n, c, ci + n, cj + n, n);
      compute(a, ai + n, aj + n, b, bi + n, bj + n, c, ci + n, cj + n, n);
    }
    
    static void multiply(float * a, int ai, int aj, float * b, int bi, int bj,
        float ** c, int ci, int cj)
    {
      int a0 = ai;
      int a1 = ai + 1;
    
      float s00 = 0.0F;
      float s01 = 0.0F;
      float s10 = 0.0F;
      float s11 = 0.0F;
    
      int b0 = bi;
      int b1 = bi + 1;
    
      s00 += a[a0 + aj] * b[b0 + bj];
      s10 += a[a1 + aj] * b[b0 + bj];
      s01 += a[a0 + aj] * b[b0 + bj + 1];
      s11 += a[a1 + aj] * b[b0 + bj + 1];
    
      s00 += a[a0 + aj + 1] * b[b1 + bj];
      s10 += a[a1 + aj + 1] * b[b1 + bj];
      s01 += a[a0 + aj + 1] * b[b1 + bj + 1];
      s11 += a[a1 + aj + 1] * b[b1 + bj + 1];
    
      c[ci]    [cj]     += s00;
      c[ci]    [cj + 1] += s01;
      c[ci + 1][cj]     += s10;
      c[ci + 1][cj + 1] += s11;
    }