#include <cmath>
#include <maca_cooperative_groups.h>
#include <maca_reduce.h>
#include <maca_fp16.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//#include <__clang_maca_mma_functions.h>
#include "mctlass/arch/wmma.h"
using namespace mxmaca;
// WMMA kernel: 16x16x16 matrix multiplication
//__device__ void wmma_ker(half *a, half *b, float *c) {
__host__ __device__ void wmma_ker(half *a, half *b, float *c) {
wmma::fragment<wmma::matrix_a, 16, 16, 16, half, wmma::col_major> a_frag;
wmma::fragment<wmma::matrix_b, 16, 16, 16, half, wmma::row_major> b_frag;
wmma::fragment<wmma::accumulator, 16, 16, 16, float> c_frag;
wmma::fill_fragment(c_frag, 0.0f);
wmma::load_matrix_sync(a_frag, a, 16);
wmma::load_matrix_sync(b_frag, b, 16);
wmma::mma_sync(c_frag, a_frag, b_frag, c_frag);
wmma::store_matrix_sync(c, c_frag, 16, wmma::mem_row_major);
}
int main() {
}
root@master2:/opt/caosiyuan/share/gitee.com/maca-samples/0_Introduction/asyncExec# make
/opt/maca/mxgpu_llvm/bin/mxcc -x maca -offload-arch native asyncExec.cpp -o asyncExec --maca-path=/opt/maca
asyncExec.cpp:39:17: error: expected namespace name
39 | using namespace mxmaca;
| ^
asyncExec.cpp:44:5: error: use of undeclared identifier 'wmma'
44 | wmma::fragment<wmma::matrix_a, 16, 16, 16, half, wmma::col_major> a_frag;
| ^
asyncExec.cpp:44:20: error: use of undeclared identifier 'wmma'
44 | wmma::fragment<wmma::matrix_a, 16, 16, 16, half, wmma::col_major> a_frag;
| ^
asyncExec.cpp:45:5: error: use of undeclared identifier 'wmma'
45 | wmma::fragment<wmma::matrix_b, 16, 16, 16, half, wmma::row_major> b_frag;
| ^
asyncExec.cpp:45:20: error: use of undeclared identifier 'wmma'
45 | wmma::fragment<wmma::matrix_b, 16, 16, 16, half, wmma::row_major> b_frag;
| ^
asyncExec.cpp:46:5: error: use of undeclared identifier 'wmma'
46 | wmma::fragment<wmma::accumulator, 16, 16, 16, float> c_frag;
| ^
asyncExec.cpp:46:20: error: use of undeclared identifier 'wmma'
46 | wmma::fragment<wmma::accumulator, 16, 16, 16, float> c_frag;
| ^
asyncExec.cpp:48:5: error: use of undeclared identifier 'wmma'
48 | wmma::fill_fragment(c_frag, 0.0f);
| ^
asyncExec.cpp:50:5: error: use of undeclared identifier 'wmma'
50 | wmma::load_matrix_sync(a_frag, a, 16);
| ^
asyncExec.cpp:51:5: error: use of undeclared identifier 'wmma'
51 | wmma::load_matrix_sync(b_frag, b, 16);
| ^
asyncExec.cpp:52:5: error: use of undeclared identifier 'wmma'
52 | wmma::mma_sync(c_frag, a_frag, b_frag, c_frag);
| ^
asyncExec.cpp:54:5: error: use of undeclared identifier 'wmma'
54 | wmma::store_matrix_sync(c, c_frag, 16, wmma::mem_row_major);
| ^
asyncExec.cpp:54:44: error: use of undeclared identifier 'wmma'
54 | wmma::store_matrix_sync(c, c_frag, 16, wmma::mem_row_major);
| ^
13 errors generated when compiling for host.
make: *** [Makefile:44: asyncExec] Error 1
# cat Makefile
# Location of the MACA Toolkit
MACA_PATH ?= /opt/maca
# Compiler
MXCC = $(MACA_PATH)/mxgpu_llvm/bin/mxcc
# Internel flags
MXCCFLAGS := -x maca
#ARCHFLAGS := -offload-arch native
ARCHFLAGS := -offload-arch native
#ARCHFLAGS := -offload-arch mx1
################################################################################
# Target rules
all: asyncExec
asyncExec: asyncExec.cpp
$(MXCC) $(MXCCFLAGS) $(ARCHFLAGS) $< -o $@ --maca-path=$(MACA_PATH)
run: asyncExec
./asyncExec
clean:
rm -rf asyncExec asyncExec.o tdump.mem tdump.txt