6. Fortran
6.1. 调用MXMACA
Fortran是科学计算中常用的编程语言。自Fortran 2003语法标准,iso_c_binding模块支持与C语言的互相调用。用户可以通过该模块调用C的接口来实现Fortran调用沐曦通用GPU。
假设要调用函数名为A的核函数,先实现一层C语言的wrapper函数,再通过iso_c_binding实现Fortran调用C wrapper的接口(形参为指针类型),最终达到Fortran调用沐曦通用GPU进行加速计算的目的。
对于类似 mcMalloc、mcMemcpy 这样的接口,也可以实现一套Fortran的API接口,如此在Fortran程序中也可以实现数据分配、传输、释放等操作。
而在Fortran中可以使用 integer(c_intptr_t) 类型来表示device上的数据指针。调用大致流程如下图所示:
图 6.1 Fortran调用C语言中MXMACA核函数示意图
6.2. 代码示例
Fortran调用MXMACA C实现向量加法,代码示例如下所示。示例中,将首先创建3个数组a、b、c,在GPU中计算c=a+b后,将c的结果传递到host上。
C代码example.maca:
#include <mc_runtime.h> __global__ void VectorAdd(float *A, float *B, float *C, int N) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < N) C[i] = A[i] + B[i]; } extern "C" void VectorAdd_C(float **A, float **B, float **C, int *N) { VectorAdd<<<1, *N>>>(*A, *B, *C, *N); } extern "C" void maca_malloc_C(int *n, size_t *elemSize, void **devPtr) { mcMalloc(devPtr,(*n)*(*elemSize)); } extern "C" void maca_free_C(void **devPtr) { mcFree(*devPtr); } extern "C" void maca_memcpyhtod_C(void **dst, void *src, int *n, size_t *size) { mcMemcpy(*dst, src, (*n) * (*size), mcMemcpyHostToDevice); } extern "C" void maca_memcpydtoh_C(void *dst, void **src, int *n, size_t *size) { mcMemcpy(dst, *src, (*n) * (*size), mcMemcpyDeviceToHost); }
Fortran interface module main.f90:
module maca_interface use iso_c_binding implicit none interface subroutine VectorAdd(A, B, C, N) & bind(c,name="VectorAdd_C") import integer(c_intptr_t) :: A integer(c_intptr_t) :: B integer(c_intptr_t) :: C integer(c_int) :: N end subroutine VectorAdd subroutine maca_malloc(n, elemSize, devPtr) & bind(c,name="maca_malloc_C") import integer(c_int) :: n integer(c_size_t) :: elemSize integer(c_intptr_t) :: devPtr end subroutine maca_malloc subroutine maca_free(devPtr) & bind(c,name="maca_free_C") import integer(c_intptr_t) :: devPtr end subroutine maca_free subroutine maca_memcpyhtod(dst, src, n, thesize) & bind(c,name="maca_memcpyhtod_C") import integer(c_intptr_t) :: dst type(c_ptr), value :: src integer(c_int) :: n integer(c_size_t) :: thesize end subroutine maca_memcpyhtod subroutine maca_memcpydtoh( dst, src, n, thesize) & bind(c,name="maca_memcpydtoh_C") import type(c_ptr), value :: dst integer(c_intptr_t) :: src integer(c_int) :: n integer(c_size_t) :: thesize end subroutine maca_memcpydtoh end interface end module
Fortran主程序main.f90:
program main use maca_interface use iso_c_binding implicit none integer, parameter :: N=1024 real(kind=4), target :: a(N), b(N), c(N) integer(c_intptr_t) :: a_d, b_d, c_d a = 1 b = 2 call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), a_d) call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), b_d) call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), c_d) call maca_memcpyhtod(a_d, c_loc(a), N, int(c_sizeof(a(1)), c_size_t)) call maca_memcpyhtod(b_d, c_loc(b), N, int(c_sizeof(a(1)), c_size_t)) call VectorAdd(a_d, b_d, c_d, N) call maca_memcpydtoh(c_loc(c), c_d, N, int(c_sizeof(a(1)), c_size_t)) write(*,*) c(1:3) call maca_free(a_d) call maca_free(b_d) call maca_free(c_d) end program
编译:
$ mxcc -fPIC -c example.maca $ gfortran -c main.f90 $ gfortran -o example example.o main.o -L${MACA_ROOT}/lib -lmcruntime -lmccompiler执行:
$ ./example 3 3 3
可以得到数组c的值都为3。