6. Fortran

6.1. 调用MXMACA

Fortran是科学计算中常用的编程语言。自Fortran 2003语法标准,iso_c_binding模块支持与C语言的互相调用。用户可以通过该模块调用C的接口来实现Fortran调用沐曦通用GPU。

假设要调用函数名为A的核函数,先实现一层C语言的wrapper函数,再通过iso_c_binding实现Fortran调用C wrapper的接口(形参为指针类型),最终达到Fortran调用沐曦通用GPU进行加速计算的目的。

对于类似 mcMallocmcMemcpy 这样的接口,也可以实现一套Fortran的API接口,如此在Fortran程序中也可以实现数据分配、传输、释放等操作。 而在Fortran中可以使用 integer(c_intptr_t) 类型来表示device上的数据指针。调用大致流程如下图所示:

../_images/image6.png

图 6.1 Fortran调用C语言中MXMACA核函数示意图

6.2. 代码示例

Fortran调用MXMACA C实现向量加法,代码示例如下所示。示例中,将首先创建3个数组abc,在GPU中计算c=a+b后,将c的结果传递到host上。

  1. C代码example.maca

    #include <mc_runtime.h>
    __global__ void VectorAdd(float *A, float *B, float *C, int N)
    {
        int i = blockDim.x * blockIdx.x + threadIdx.x;
        if (i < N) C[i] = A[i] + B[i];
    }
    
    extern "C" void VectorAdd_C(float **A, float **B, float **C, int *N)
    {
        VectorAdd<<<1, *N>>>(*A, *B, *C, *N);
    }
    
    extern "C" void maca_malloc_C(int *n, size_t *elemSize, void **devPtr)
    {
        mcMalloc(devPtr,(*n)*(*elemSize));
    }
    
    extern "C" void maca_free_C(void **devPtr)
    {
        mcFree(*devPtr);
    }
    
    extern "C" void maca_memcpyhtod_C(void **dst, void *src, int *n, size_t *size)
    {
        mcMemcpy(*dst, src, (*n) * (*size), mcMemcpyHostToDevice);
    }
    
    extern "C" void maca_memcpydtoh_C(void *dst, void **src, int *n, size_t *size)
    {
        mcMemcpy(dst, *src, (*n) * (*size), mcMemcpyDeviceToHost);
    }
    
  2. Fortran interface module main.f90

    module maca_interface
        use iso_c_binding
        implicit none
    
    interface
        subroutine VectorAdd(A, B, C, N) &
            bind(c,name="VectorAdd_C")
            import
            integer(c_intptr_t)   :: A
            integer(c_intptr_t)   :: B
            integer(c_intptr_t)   :: C
            integer(c_int)        :: N
        end subroutine VectorAdd
    
        subroutine maca_malloc(n, elemSize, devPtr) &
            bind(c,name="maca_malloc_C")
            import
            integer(c_int)      :: n
            integer(c_size_t)   :: elemSize
            integer(c_intptr_t)   :: devPtr
        end subroutine maca_malloc
    
        subroutine maca_free(devPtr) &
            bind(c,name="maca_free_C")
            import
            integer(c_intptr_t) :: devPtr
        end subroutine maca_free
    
        subroutine maca_memcpyhtod(dst, src, n, thesize) &
            bind(c,name="maca_memcpyhtod_C")
            import
            integer(c_intptr_t) :: dst
            type(c_ptr), value  :: src
            integer(c_int)      :: n
            integer(c_size_t)   :: thesize
        end subroutine maca_memcpyhtod
    
        subroutine maca_memcpydtoh( dst, src, n, thesize) &
            bind(c,name="maca_memcpydtoh_C")
            import
            type(c_ptr), value  :: dst
            integer(c_intptr_t) :: src
            integer(c_int)      :: n
            integer(c_size_t)   :: thesize
        end subroutine maca_memcpydtoh
    
    end interface
    end module
    
  3. Fortran主程序main.f90

    program main
        use maca_interface
        use iso_c_binding
        implicit none
        integer, parameter    :: N=1024
        real(kind=4), target  :: a(N), b(N), c(N)
        integer(c_intptr_t)   :: a_d, b_d, c_d
    
        a = 1
        b = 2
        call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), a_d)
        call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), b_d)
        call maca_malloc(N, int(c_sizeof(a(1)), c_size_t), c_d)
    
        call maca_memcpyhtod(a_d, c_loc(a), N, int(c_sizeof(a(1)), c_size_t))
        call maca_memcpyhtod(b_d, c_loc(b), N, int(c_sizeof(a(1)), c_size_t))
    
        call VectorAdd(a_d, b_d, c_d, N)
    
        call maca_memcpydtoh(c_loc(c), c_d, N, int(c_sizeof(a(1)), c_size_t))
    
        write(*,*) c(1:3)
        call maca_free(a_d)
        call maca_free(b_d)
        call maca_free(c_d)
    
    end program
    
  4. 编译:

    $ mxcc -fPIC -c example.maca
    $ gfortran -c main.f90
    $ gfortran -o example example.o main.o -L${MACA_ROOT}/lib -lmcruntime -lmccompiler
    
  5. 执行:

    $ ./example
    3 3 3
    

可以得到数组c的值都为3。