// modified saxpy example from https://devblogs.nvidia.com/parallelforall/easy-introduction-cuda-c-and-c/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void saxpy(int n, float a, float *x, float *y)
{
#pragma acc kernels
for (int i = 0; i < n; ++i)
y[i] = a*x[i] + y[i];
}
int main(void)
{
int N = 1<<20;
float *x, *y;
x = (float*) malloc(N * sizeof(float));
y = (float*) malloc(N * sizeof(float));
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
saxpy(N, 2.0f, x, y);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = fmax(maxError, abs(y[i] - 4.0f));
printf("Max error: %f\n", maxError);
}
To compile an OpenACC program, please load the following modules:
$ module load pgi/19 cuda/9.2
$ module list
Currently Loaded Modules:
1) slurm/20.02 2) binutils/2.32 3) libstdcxx/4 4) pgi/19 5) cuda-supplement/9.2 6) cuda/9.2
You just need to load
pgi/19
,cuda/9.2
, other modules will be loaded automatically.
You can still compile it manually:
$ pgcc -acc simple_acc.c -o simple_acc
The flag -acc
instructs the compiler to compile code into a OpenACC program. Please do not forget it since the compilation may still succeed but the compiler will ignore all OpenACC directives.
For a larger project, we still recommend using a Makefile to manage your build process
EXT = c
SRCS = $(shell find src -name '*.$(EXT)')
OBJS = $(SRCS:src/%.$(EXT)=build/%.o)
BIN = simple_acc
CC = pgcc
LD = pgcc
CFLAGS = -acc -O2
LDFLAGS = -acc
all: $(BIN)
$(BIN): $(OBJS)
$(LD) $(LDFLAGS) $(OBJS) -o $(BIN)
build/%.o: src/%.$(EXT)
$(CC) $(CFLAGS) -c $< -o $@
clean:
rm build/*.o
rm $(BIN)
Since OpenACC requires GPU resources, the submission procedure is identical to a CUDA program. Please read the CUDA Programming section for details.
#!/bin/bash
#SBATCH --job-name=simple_acc
#SBATCH --output=output
#SBATCH --partition=gpu
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --gres=gpu:1
module load gcc/4
module load cuda/9.2
./simple_acc
Submitting and reviewing your job is the same procedure as the simple MPI program section. Please read MPI Programming