diff --git a/Makefile.am b/Makefile.am index ba3157fe..a60d0411 100644 --- a/Makefile.am +++ b/Makefile.am @@ -56,8 +56,8 @@ if WITH_CHECK_PROGS SUBDIRS += t endif -if ENABLE_ACC -SUBDIRS += tools/libfrencutils_acc -SUBDIRS += tools/fregrid_acc -SUBDIRS += t_acc +if ENABLE_GPU +SUBDIRS += tools/libfrencutils_gpu +SUBDIRS += tools/fregrid_gpu +SUBDIRS += t_gpu endif diff --git a/configure.ac b/configure.ac index 64d8088b..2119668d 100644 --- a/configure.ac +++ b/configure.ac @@ -89,13 +89,13 @@ AM_CONDITIONAL([WITH_MPI_TESTS], [test x"$build_mpi" = x"yes" -a -z "$SKIP_MPI_T AC_ARG_ENABLE([acc], [AS_HELP_STRING([--enable-acc], [Builds with OpenACC. This will result in a second executable for fregrid, fregrid_gpu.(default no)])]) -AS_IF([test ${enable_acc:-no} = yes], - [enable_acc=yes], - [enable_acc=no]) +AS_IF([test ${enable_gpu:-no} = yes], + [enable_gpu=yes], + [enable_gpu=no]) # check compile flags -AS_IF([test ${enable_acc} = yes], +AS_IF([test ${enable_gpu} = yes], [GX_OPENACC_FLAGS()]) -AM_CONDITIONAL([ENABLE_ACC], [test "$enable_acc" = "yes"]) +AM_CONDITIONAL([ENABLE_GPU], [test "$enable_gpu" = "yes"]) AC_CHECK_FUNCS(gettid, [], []) @@ -135,7 +135,7 @@ else fi # Check if openacc.h exists -if test "$enable_acc" = yes ; then +if test "$enable_gpu" = yes ; then AC_CHECK_HEADERS([openacc.h], [], [AC_MSG_ERROR(["Cannot find OpenACC header file"])] ) AC_MSG_WARN(Currently only NVIDIA compilers are supported to compile with OpenACC in FRE-NCTOOLS) fi @@ -206,11 +206,11 @@ AC_CONFIG_FILES([Makefile postprocessing/split_ncvars/Makefile postprocessing/timavg/Makefile tools/libfrencutils/Makefile - tools/libfrencutils_acc/Makefile + tools/libfrencutils_gpu/Makefile tools/check_mask/Makefile tools/cubic_utils/Makefile tools/fregrid/Makefile - tools/fregrid_acc/Makefile + tools/fregrid_gpu/Makefile tools/make_coupler_mosaic/Makefile tools/make_hgrid/Makefile tools/make_land_domain/Makefile @@ -232,11 +232,11 @@ AC_CONFIG_FILES([Makefile tools/simple_hydrog/rmvpr/Makefile tools/simple_hydrog/libfmslite/Makefile t/Makefile - t_acc/Makefile - t_acc/test_read_remap_file/Makefile - t_acc/test_get_grid_cell_struct/Makefile - t_acc/test_get_upbound_nxcells_2dx2d/Makefile - t_acc/test_get_interp_order1/Makefile + t_gpu/Makefile + t_gpu/test_read_remap_file/Makefile + t_gpu/test_get_grid_cell_struct/Makefile + t_gpu/test_get_upbound_nxcells_2dx2d/Makefile + t_gpu/test_get_interp_order1/Makefile ]) diff --git a/t_acc/Makefile.am b/t_gpu/Makefile.am similarity index 100% rename from t_acc/Makefile.am rename to t_gpu/Makefile.am diff --git a/t_acc/test_get_grid_cell_struct/Makefile.am b/t_gpu/test_get_grid_cell_struct/Makefile.am similarity index 84% rename from t_acc/test_get_grid_cell_struct/Makefile.am rename to t_gpu/test_get_grid_cell_struct/Makefile.am index 0c368d47..5b85b7c7 100644 --- a/t_acc/test_get_grid_cell_struct/Makefile.am +++ b/t_gpu/test_get_grid_cell_struct/Makefile.am @@ -20,14 +20,14 @@ check_PROGRAMS = test_get_grid_cell_struct AM_CFLAGS = $(NETCDF_CFLAGS) \ - -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/fregrid_gpu \ -I$(top_srcdir)/tools/libfrencutils \ - -I$(top_srcdir)/tools/libfrencutils_acc -acc + -I$(top_srcdir)/tools/libfrencutils_gpu -acc LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ - $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/fregrid_gpu/interp_utils_gpu.o \ $(top_builddir)/tools/libfrencutils/libfrencutils.a \ - $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + $(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a test_get_grid_cell_struct_SOURCES = test_get_grid_cell_struct.c diff --git a/t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c b/t_gpu/test_get_grid_cell_struct/test_get_grid_cell_struct.c similarity index 97% rename from t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c rename to t_gpu/test_get_grid_cell_struct/test_get_grid_cell_struct.c index 927bf13e..ec609517 100644 --- a/t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c +++ b/t_gpu/test_get_grid_cell_struct/test_get_grid_cell_struct.c @@ -18,7 +18,7 @@ * . **********************************************************************/ -// This test tests the function test_get_grid_cell_struct used in fregrid_acc. +// This test tests the function test_get_grid_cell_struct used in fregrid_gpu. // Properties of each grid cell in a smple made-up grid with no poles are computed // on the device. This test ensures that the data transfer between the host and device // and computations have executed on the device as expected. @@ -26,10 +26,10 @@ #include #include #include -#include "create_xgrid_utils_acc.h" -#include "interp_utils_acc.h" +#include "create_xgrid_utils_gpu.h" +#include "interp_utils_gpu.h" #include "parameters.h" -#include "globals_acc.h" +#include "globals_gpu.h" #define NLON 36 // 36 cells in lon direction (36+1 grid points in the lon direction for each lat point) #define NLAT 4 // 4 cells in lat direction ( 4+1 grid points in the lat direction for each lon point) @@ -79,10 +79,10 @@ int main(){ } // copy grid to device - copy_grid_to_device_acc((NLON+1)*(NLAT+1), grid.latc, grid.lonc); + copy_grid_to_device_gpu((NLON+1)*(NLAT+1), grid.latc, grid.lonc); // get grid_cells - get_grid_cell_struct_acc( NLON, NLAT, &grid, &grid_cells); + get_grid_cell_struct_gpu( NLON, NLAT, &grid, &grid_cells); // get answers get_answers(grid.lonc, grid.latc, &answers); diff --git a/t_acc/test_get_interp_order1/Makefile.am b/t_gpu/test_get_interp_order1/Makefile.am similarity index 80% rename from t_acc/test_get_interp_order1/Makefile.am rename to t_gpu/test_get_interp_order1/Makefile.am index 88b0f3e5..ae074681 100644 --- a/t_acc/test_get_interp_order1/Makefile.am +++ b/t_gpu/test_get_interp_order1/Makefile.am @@ -20,15 +20,15 @@ check_PROGRAMS = test_get_interp_order1 AM_CFLAGS = $(NETCDF_CFLAGS) \ - -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/fregrid_gpu \ -I$(top_srcdir)/tools/libfrencutils \ - -I$(top_srcdir)/tools/libfrencutils_acc -acc + -I$(top_srcdir)/tools/libfrencutils_gpu -acc LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ - $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ - $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ + $(top_builddir)/tools/fregrid_gpu/interp_utils_gpu.o \ + $(top_builddir)/tools/fregrid_gpu/conserve_interp_gpu.o \ $(top_builddir)/tools/libfrencutils/libfrencutils.a \ - $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + $(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a test_get_interp_order1_SOURCES = test_get_interp_order1.c diff --git a/t_acc/test_get_interp_order1/test_get_interp_order1.c b/t_gpu/test_get_interp_order1/test_get_interp_order1.c similarity index 88% rename from t_acc/test_get_interp_order1/test_get_interp_order1.c rename to t_gpu/test_get_interp_order1/test_get_interp_order1.c index 05278205..23726eff 100644 --- a/t_acc/test_get_interp_order1/test_get_interp_order1.c +++ b/t_gpu/test_get_interp_order1/test_get_interp_order1.c @@ -18,7 +18,7 @@ * . **********************************************************************/ -// This test tests the function create_xgrid_2dx2d_order2_acc for a simple +// This test tests the function create_xgrid_2dx2d_order2_gpu for a simple // case where the input grid is identical to the output grid. The exchange // grid is identical to the input/output grid. Only the parent indices corresponding // to each exchange cell are checked. @@ -26,11 +26,11 @@ #include #include #include -#include "globals_acc.h" -#include "interp_utils_acc.h" -#include "conserve_interp_acc.h" -#include "create_xgrid_utils_acc.h" -#include "create_xgrid_acc.h" +#include "globals_gpu.h" +#include "interp_utils_gpu.h" +#include "conserve_interp_gpu.h" +#include "create_xgrid_utils_gpu.h" +#include "create_xgrid_gpu.h" void generate_input_is_same_as_output_grids(Grid_config *input_grid, Grid_config *output_grid, Interp_per_input_tile *interp_answers); @@ -152,30 +152,30 @@ int run_tests(Grid_config *input_grid, Grid_config *output_grid, Interp_per_inpu Grid_cells_struct_config output_grid_cells; //copy grid to device - copy_grid_to_device_acc(ngridpts_input, input_grid->latc, input_grid->lonc); - copy_grid_to_device_acc(ngridpts_output, output_grid->latc, output_grid->lonc); + copy_grid_to_device_gpu(ngridpts_input, input_grid->latc, input_grid->lonc); + copy_grid_to_device_gpu(ngridpts_output, output_grid->latc, output_grid->lonc); //get mask to skip input cells in creating interp - get_input_grid_mask_acc(ncells_input, &input_grid_mask); + get_input_grid_mask_gpu(ncells_input, &input_grid_mask); //get output grid cell info - get_grid_cell_struct_acc(nlon_output_cells, nlat_output_cells, output_grid, &output_grid_cells); + get_grid_cell_struct_gpu(nlon_output_cells, nlat_output_cells, output_grid, &output_grid_cells); //get bounding index - get_bounding_indices_acc(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, + get_bounding_indices_gpu(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, output_grid->latc, input_grid->latc, &jlat_overlap_starts, &jlat_overlap_ends); //malloc and create arrays - create_upbound_nxcells_arrays_on_device_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + create_upbound_nxcells_arrays_on_device_gpu(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); - upbound_nxcells = get_upbound_nxcells_2dx2d_acc(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, + upbound_nxcells = get_upbound_nxcells_2dx2d_gpu(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, jlat_overlap_starts, jlat_overlap_ends, input_grid->lonc, input_grid->latc, output_grid->lonc, output_grid->latc, input_grid_mask, &output_grid_cells, approx_nxcells_per_ij1, ij2_start, ij2_end); - nxcells = create_xgrid_2dx2d_order1_acc( nlon_input_cells, nlat_input_cells, + nxcells = create_xgrid_2dx2d_order1_gpu( nlon_input_cells, nlat_input_cells, nlon_output_cells, nlat_output_cells, jlat_overlap_starts, jlat_overlap_ends, input_grid->lonc, input_grid->latc, @@ -184,9 +184,9 @@ int run_tests(Grid_config *input_grid, Grid_config *output_grid, Interp_per_inpu approx_nxcells_per_ij1, ij2_start, ij2_end, interp); - free_grid_cell_struct_acc(ncells_output, &output_grid_cells); - free_upbound_nxcells_arrays_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); - free_input_grid_mask_acc(ncells_input, &input_grid_mask); + free_grid_cell_struct_gpu(ncells_output, &output_grid_cells); + free_upbound_nxcells_arrays_gpu(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + free_input_grid_mask_gpu(ncells_input, &input_grid_mask); return 0; diff --git a/t_acc/test_get_upbound_nxcells_2dx2d/Makefile.am b/t_gpu/test_get_upbound_nxcells_2dx2d/Makefile.am similarity index 80% rename from t_acc/test_get_upbound_nxcells_2dx2d/Makefile.am rename to t_gpu/test_get_upbound_nxcells_2dx2d/Makefile.am index f361dffb..7c5cc8af 100644 --- a/t_acc/test_get_upbound_nxcells_2dx2d/Makefile.am +++ b/t_gpu/test_get_upbound_nxcells_2dx2d/Makefile.am @@ -20,15 +20,15 @@ check_PROGRAMS = test_get_upbound_nxcells_2dx2d AM_CFLAGS = $(NETCDF_CFLAGS) \ - -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/fregrid_gpu \ -I$(top_srcdir)/tools/libfrencutils \ - -I$(top_srcdir)/tools/libfrencutils_acc -acc + -I$(top_srcdir)/tools/libfrencutils_gpu -acc LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ - $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ - $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/fregrid_gpu/conserve_interp_gpu.o \ + $(top_builddir)/tools/fregrid_gpu/interp_utils_gpu.o \ $(top_builddir)/tools/libfrencutils/libfrencutils.a \ - $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + $(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a test_get_upbound_nxcells_2dx2d_SOURCES = test_get_upbound_nxcells_2dx2d.c diff --git a/t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c b/t_gpu/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c similarity index 94% rename from t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c rename to t_gpu/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c index 972e1921..3c12a84e 100644 --- a/t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c +++ b/t_gpu/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c @@ -28,11 +28,11 @@ #include #include #include -#include "globals_acc.h" -#include "conserve_interp_acc.h" -#include "interp_utils_acc.h" -#include "create_xgrid_utils_acc.h" -#include "create_xgrid_acc.h" +#include "globals_gpu.h" +#include "conserve_interp_gpu.h" +#include "interp_utils_gpu.h" +#include "create_xgrid_utils_gpu.h" +#include "create_xgrid_gpu.h" typedef struct { int ncells_input; @@ -322,33 +322,33 @@ int run_tests(Grid_config *input_grid, Grid_config *output_grid, Grid_cells_stru double *input_grid_mask; //copy grid to device - copy_grid_to_device_acc(ngridpts_input, input_grid->latc, input_grid->lonc); - copy_grid_to_device_acc(ngridpts_output, output_grid->latc, output_grid->lonc); + copy_grid_to_device_gpu(ngridpts_input, input_grid->latc, input_grid->lonc); + copy_grid_to_device_gpu(ngridpts_output, output_grid->latc, output_grid->lonc); //get mask to skip input cells in creating xgrid - get_input_grid_mask_acc(ncells_input, &input_grid_mask); + get_input_grid_mask_gpu(ncells_input, &input_grid_mask); if( ! acc_is_present(input_grid_mask, ncells_input*sizeof(double)) ) { printf("INPUT_GRID_MASK IS NOT ON DEVICE!"); exit(1); } //get output grid cell info - get_grid_cell_struct_acc(nlon_output_cells, nlat_output_cells, output_grid, output_grid_cells); + get_grid_cell_struct_gpu(nlon_output_cells, nlat_output_cells, output_grid, output_grid_cells); //get bounding indices - get_bounding_indices_acc(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, + get_bounding_indices_gpu(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, output_grid->latc, input_grid->latc, &jlat_overlap_starts, &jlat_overlap_ends); if(jlat_overlap_starts != 0) printf("SMETHING IS WRONG WITH JLAT_OVERLAP_STARTS %d\n", jlat_overlap_starts); if(jlat_overlap_ends != nlat_input_cells-1) // this is what was in the original. printf("SOMETHING IS WRONG WITH JLAT_OVERLAP_ENDS %d %d\n", jlat_overlap_ends, nlat_input_cells-1); //malloc and create arrays - create_upbound_nxcells_arrays_on_device_acc(ncells_input, &p_approx_nxcells_per_ij1, &p_ij2_start, &p_ij2_end); + create_upbound_nxcells_arrays_on_device_gpu(ncells_input, &p_approx_nxcells_per_ij1, &p_ij2_start, &p_ij2_end); // check to ensure all data have been transferred/created to device check_data_on_device(input_grid, output_grid, p_approx_nxcells_per_ij1, p_ij2_start, p_ij2_end, output_grid_cells); - *upbound_nxcells = get_upbound_nxcells_2dx2d_acc(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, + *upbound_nxcells = get_upbound_nxcells_2dx2d_gpu(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, jlat_overlap_starts, jlat_overlap_ends, input_grid->lonc, input_grid->latc, output_grid->lonc, output_grid->latc, input_grid_mask, output_grid_cells, @@ -358,7 +358,7 @@ int run_tests(Grid_config *input_grid, Grid_config *output_grid, Grid_cells_stru *ij2_start = p_ij2_start; *ij2_end = p_ij2_end; - free_input_grid_mask_acc(ncells_input, &input_grid_mask); + free_input_grid_mask_gpu(ncells_input, &input_grid_mask); return 0; @@ -372,8 +372,8 @@ void cleanup_test(Answers *answers, Grid_config *input_grid, Grid_config *output int ncells_output = output_grid->nxc * output_grid->nyc; int ncells_input = input_grid->nxc * input_grid->nyc; - free_grid_cell_struct_acc(ncells_output, output_grid_cells); - free_upbound_nxcells_arrays_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + free_grid_cell_struct_gpu(ncells_output, output_grid_cells); + free_upbound_nxcells_arrays_gpu(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); } diff --git a/t_acc/test_read_remap_file/Makefile.am b/t_gpu/test_read_remap_file/Makefile.am similarity index 83% rename from t_acc/test_read_remap_file/Makefile.am rename to t_gpu/test_read_remap_file/Makefile.am index d0646388..c58875c1 100644 --- a/t_acc/test_read_remap_file/Makefile.am +++ b/t_gpu/test_read_remap_file/Makefile.am @@ -21,14 +21,14 @@ check_PROGRAMS = test_read_remap_file AM_CFLAGS = $(NETCDF_CFLAGS) \ -I$(top_srcdir)/tools/libfrencutils \ - -I$(top_srcdir)/tools/libfrencutils_acc \ - -I$(top_srcdir)/tools/fregrid_acc -acc + -I$(top_srcdir)/tools/libfrencutils_gpu \ + -I$(top_srcdir)/tools/fregrid_gpu -acc LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ - $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ - $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/fregrid_gpu/conserve_interp_gpu.o \ + $(top_builddir)/tools/fregrid_gpu/interp_utils_gpu.o \ $(top_builddir)/tools/libfrencutils/libfrencutils.a \ - $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + $(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a test_read_remap_file_SOURCES = test_read_remap_file.c diff --git a/t_acc/test_read_remap_file/test_make_remap_file_conserve.py b/t_gpu/test_read_remap_file/test_make_remap_file_conserve.py similarity index 100% rename from t_acc/test_read_remap_file/test_make_remap_file_conserve.py rename to t_gpu/test_read_remap_file/test_make_remap_file_conserve.py diff --git a/t_acc/test_read_remap_file/test_read_remap_file.c b/t_gpu/test_read_remap_file/test_read_remap_file.c similarity index 79% rename from t_acc/test_read_remap_file/test_read_remap_file.c rename to t_gpu/test_read_remap_file/test_read_remap_file.c index fc8a5969..b15bc210 100644 --- a/t_acc/test_read_remap_file/test_read_remap_file.c +++ b/t_gpu/test_read_remap_file/test_read_remap_file.c @@ -18,11 +18,11 @@ * . **********************************************************************/ -// This test tests function read_remap_file_acc to read in a made-up -// remap file. It ensures the correct initialization of the interp_acc struct. +// This test tests function read_remap_file_gpu to read in a made-up +// remap file. It ensures the correct initialization of the interp_gpu struct. // The remap file is generated with the python script // test_make_remap_file_conserve.py that uses the xarray module. This -// test also tests the function copy_interp_to_device_acc which copies interp_acc +// test also tests the function copy_interp_to_device_gpu which copies interp_gpu // to device. #include @@ -32,9 +32,9 @@ #include #include #include -#include "conserve_interp_acc.h" -#include "interp_utils_acc.h" -#include "globals_acc.h" +#include "conserve_interp_gpu.h" +#include "interp_utils_gpu.h" +#include "globals_gpu.h" #define INPUT_GRID_NTILES 6 #define OUTPUT_GRID_NTILES 2 @@ -74,9 +74,9 @@ char remap_files2[OUTPUT_GRID_NTILES][30] = { "remap_conserve2.tile1.nc", "remap void read_all_answers(Answers *answers, int myinterp_method); void read_ianswers( FILE *myfile, int *nxcells, int **ianswer); void read_ranswers( FILE *myfile, int *nxcells, double **ianswer); -void check_answers_on_device(Answers *answers, Interp_config_acc *interp_acc, int myinterp_method); -void check_answers_on_host(Answers *answers, Interp_config_acc *interp_acc, int myinterp_method); -void reset_interp_acc_on_host( Interp_config_acc *interp_acc, Answers *answers, int myinterp_method ); +void check_answers_on_device(Answers *answers, Interp_config_gpu *interp_gpu, int myinterp_method); +void check_answers_on_host(Answers *answers, Interp_config_gpu *interp_gpu, int myinterp_method); +void reset_interp_gpu_on_host( Interp_config_gpu *interp_gpu, Answers *answers, int myinterp_method ); void check_ianswers(int n, int *answers, int *checkme, int host_or_device); void check_ranswers(int n, double *answers, double *checkme, int host_or_device); void error(char *error_message); @@ -84,7 +84,7 @@ void error(char *error_message); // start program int main(int argc, char *argv[]) { - Interp_config_acc interp_acc[OUTPUT_GRID_NTILES]; + Interp_config_gpu interp_gpu[OUTPUT_GRID_NTILES]; Grid_config input_grid[INPUT_GRID_NTILES], output_grid[OUTPUT_GRID_NTILES]; Answers answers[OUTPUT_GRID_NTILES]; @@ -107,10 +107,10 @@ int main(int argc, char *argv[]) { // assign remap files for(int n=0 ; n. #*********************************************************************** -bin_PROGRAMS = fregrid_acc +bin_PROGRAMS = fregrid_gpu AM_CFLAGS = -I$(top_srcdir)/tools/libfrencutils $(NETCDF_CFLAGS) \ - -I$(top_srcdir)/tools/libfrencutils_acc \ + -I$(top_srcdir)/tools/libfrencutils_gpu \ -I$(top_srcdir)/tools/fregrid \ $(OPENACC_CFLAGS) LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) -fregrid_acc_SOURCES = conserve_interp_acc.c \ - conserve_interp_acc.h \ - interp_utils_acc.c \ - interp_utils_acc.h \ - fregrid_utils_acc.c \ - fregrid_utils_acc.h \ - fregrid_acc.c +fregrid_gpu_SOURCES = conserve_interp_gpu.c \ + conserve_interp_gpu.h \ + interp_utils_gpu.c \ + interp_utils_gpu.h \ + fregrid_utils_gpu.c \ + fregrid_utils_gpu.h \ + fregrid_gpu.c -fregrid_acc_LDADD = $(top_builddir)/tools/fregrid/fregrid_util.o \ +fregrid_gpu_LDADD = $(top_builddir)/tools/fregrid/fregrid_util.o \ $(top_builddir)/tools/fregrid/conserve_interp.o \ $(top_builddir)/tools/fregrid/bilinear_interp.o \ $(top_builddir)/tools/libfrencutils/libfrencutils.a \ - $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a \ + $(top_builddir)/tools/libfrencutils_gpu/libfrencutils_gpu.a \ $(LDADD) diff --git a/tools/fregrid_acc/conserve_interp_acc.c b/tools/fregrid_gpu/conserve_interp_gpu.c similarity index 83% rename from tools/fregrid_acc/conserve_interp_acc.c rename to tools/fregrid_gpu/conserve_interp_gpu.c index 6e3d96aa..e8426067 100644 --- a/tools/fregrid_acc/conserve_interp_acc.c +++ b/tools/fregrid_gpu/conserve_interp_gpu.c @@ -23,12 +23,12 @@ #include #include #include -#include "globals_acc.h" -#include "conserve_interp_acc.h" -#include "interp_utils_acc.h" -#include "create_xgrid_acc.h" -#include "create_xgrid_utils_acc.h" -#include "general_utils_acc.h" +#include "globals_gpu.h" +#include "conserve_interp_gpu.h" +#include "interp_utils_gpu.h" +#include "create_xgrid_gpu.h" +#include "create_xgrid_utils_gpu.h" +#include "general_utils_gpu.h" #include "mpp.h" #include "mpp_io.h" #include "read_mosaic.h" @@ -37,13 +37,13 @@ void setup_conserve_interp Setup the interpolation weight for conservative interpolation *******************************************************************************/ -void setup_conserve_interp_acc(int ntiles_input_grid, Grid_config *input_grid, int ntiles_output_grid, - Grid_config *output_grid, Interp_config_acc *interp_acc, unsigned int opcode) +void setup_conserve_interp_gpu(int ntiles_input_grid, Grid_config *input_grid, int ntiles_output_grid, + Grid_config *output_grid, Interp_config_gpu *interp_gpu, unsigned int opcode) { if( opcode & READ) { - read_remap_file_acc(ntiles_input_grid, ntiles_output_grid, output_grid, input_grid, interp_acc, opcode); - copy_interp_to_device_acc(ntiles_input_grid, ntiles_output_grid, interp_acc, opcode); + read_remap_file_gpu(ntiles_input_grid, ntiles_output_grid, output_grid, input_grid, interp_gpu, opcode); + copy_interp_to_device_gpu(ntiles_input_grid, ntiles_output_grid, interp_gpu, opcode); return; } @@ -56,11 +56,11 @@ void setup_conserve_interp_acc(int ntiles_input_grid, Grid_config *input_grid, i Grid_cells_struct_config output_grid_cells; - interp_acc[otile].nxcells = 0; + interp_gpu[otile].nxcells = 0; - copy_grid_to_device_acc(ngridpts_output_grid, output_grid[otile].latc, output_grid[otile].lonc); + copy_grid_to_device_gpu(ngridpts_output_grid, output_grid[otile].latc, output_grid[otile].lonc); - get_grid_cell_struct_acc( nlon_output_cells, nlat_output_cells, output_grid+otile, &output_grid_cells ); + get_grid_cell_struct_gpu( nlon_output_cells, nlat_output_cells, output_grid+otile, &output_grid_cells ); for(int itile=0; itilenxcells; + Interp_config_gpu *p_interp_gpu = interp_gpu+otile; + int nxcells=p_interp_gpu->nxcells; int nlon_input_cells, ii; size_t start[4] = {0,0,0,0}, nwrite[4] = {1, 1, 1, 1}; int *data_int=NULL; double *data_double=NULL; - int fid = mpp_open( interp_acc[otile].remap_file, MPP_WRITE); + int fid = mpp_open( interp_gpu[otile].remap_file, MPP_WRITE); int dim_string = mpp_def_dim(fid, "string", STRING); int dim_ncells = mpp_def_dim(fid, "ncells", nxcells); int dim_two = mpp_def_dim(fid, "two", 2); @@ -292,7 +292,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, //update data on host for(int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; #pragma acc update host( p_interp_for_itile->input_parent_cell_index[:itile_nxcells], \ p_interp_for_itile->output_parent_cell_index[:itile_nxcells], \ @@ -304,7 +304,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, //input tile ii = 0; for( int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; iinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; nlon_input_cells = input_grid[itile].nxc; for( int i=0 ; iinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; ioutput_parent_cell_index[i]%nlon_input_cells+1; @@ -344,7 +344,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, // j (y, lat) indices of input parent ii=0; for( int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; nlon_input_cells = input_grid[itile].nxc; for( int i=0 ; iinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; ioutput_parent_cell_index[i]/nlon_input_cells+1; @@ -370,7 +370,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, // exchange cell area ii=0; for( int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; ixcell_area[i]; @@ -382,7 +382,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, if(opcode & CONSERVE_ORDER2) { ii=0; start[1] = 0 ; for( int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; idcentroid_lon[i]; @@ -393,7 +393,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, ii=0; start[1] = 1 ; for( int itile=0 ; itileinput_tile+itile; + Interp_per_input_tile *p_interp_for_itile = p_interp_gpu->input_tile+itile; int itile_nxcells = p_interp_for_itile->nxcells; for( int i=0 ; idcentroid_lat[i]; @@ -413,7 +413,7 @@ void write_remap_file(const int ntiles_output_grid, const int ntiles_input_grid, } void check_area_conservation(const int ntiles_output_grid, const int ntiles_input_grid, Grid_config *output_grid, - Interp_config_acc *interp_acc) + Interp_config_gpu *interp_gpu) { for(int otile=0; otilenxcells; for(int i=0; ioutput_parent_cell_index[i]; @@ -463,7 +463,7 @@ void check_area_conservation(const int ntiles_output_grid, const int ntiles_inpu void do_scalar_conserve_interp( ) doing conservative interpolation *******************************************************************************/ -void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int ntiles_input_grid, const Grid_config *input_grid, +void do_scalar_conserve_interp_gpu(Interp_config_gpu *interp_gpu, int varid, int ntiles_input_grid, const Grid_config *input_grid, int ntiles_output_grid, const Grid_config *output_grid, const Field_config *field_in, Field_config *field_out, unsigned int opcode) { @@ -505,10 +505,10 @@ void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int input_grid+itile, input_area_weight); if(opcode & CONSERVE_ORDER1) - interp_data_order1(output_grid+otile, input_grid+itile, interp_acc[otile].input_tile+itile, + interp_data_order1(output_grid+otile, input_grid+itile, interp_gpu[otile].input_tile+itile, input_area_weight, field_in[itile].data, p_fieldout_data, out_area, out_miss, missing); if(opcode & CONSERVE_ORDER2) - interp_data_order2(output_grid+otile, input_grid+itile, interp_acc[otile].input_tile+itile, + interp_data_order2(output_grid+otile, input_grid+itile, interp_gpu[otile].input_tile+itile, input_area_weight, field_in[itile].data, p_fieldout_data, out_area, out_miss, field_in[itile].grad_mask, field_in[itile].grad_y, field_in[itile].grad_x, missing); @@ -558,20 +558,20 @@ void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int for(int itile=0 ; itilenxcells; -#pragma acc parallel loop present(minterp_acc->output_parent_cell_index[:ixcells], \ - minterp_acc->input_parent_cell_index[:ixcells], \ - minterp_acc->xcell_area[:ixcells], \ + int ixcells = minterp_gpu->nxcells; +#pragma acc parallel loop present(minterp_gpu->output_parent_cell_index[:ixcells], \ + minterp_gpu->input_parent_cell_index[:ixcells], \ + minterp_gpu->xcell_area[:ixcells], \ out_area[:ncells_output_grid])\ copyin(p_fieldin_area[:ncells_input_grid],\ p_gridin_area[:ncells_input_grid]) for(int ix=0; ixoutput_parent_cell_index[ix]; - int ij1 = minterp_acc->input_parent_cell_index[ix]; - double area = minterp_acc->xcell_area[ix]; + int ij2 = minterp_gpu->output_parent_cell_index[ix]; + int ij1 = minterp_gpu->input_parent_cell_index[ix]; + double area = minterp_gpu->xcell_area[ix]; if(cell_measures ) out_area[ij2] += (area*p_fieldin_area[ij1]/p_gridin_area[ij1]); else out_area[ij2] += area; } @@ -670,18 +670,18 @@ void get_input_area_weight(const int weights_exist, const int cell_measures, con } void interp_data_order1( const Grid_config *output_grid, const Grid_config *input_grid, - Interp_per_input_tile *minterp_acc, double *input_area_weight, double *fieldin_data, + Interp_per_input_tile *minterp_gpu, double *input_area_weight, double *fieldin_data, double *fieldout_data, double *out_area, int *out_miss, double missing) { - int nxcells = minterp_acc->nxcells; + int nxcells = minterp_gpu->nxcells; int ncells_input_grid = input_grid->nxc * input_grid->nyc; int ncells_output_grid = output_grid->nxc * output_grid->nyc; -#pragma acc data present(minterp_acc[:1], \ - minterp_acc->input_parent_cell_index[:nxcells], \ - minterp_acc->output_parent_cell_index[:nxcells], \ - minterp_acc->xcell_area[:nxcells], \ +#pragma acc data present(minterp_gpu[:1], \ + minterp_gpu->input_parent_cell_index[:nxcells], \ + minterp_gpu->output_parent_cell_index[:nxcells], \ + minterp_gpu->xcell_area[:nxcells], \ input_area_weight[:ncells_input_grid], \ fieldout_data[:ncells_output_grid], \ out_area[:ncells_output_grid], \ @@ -689,9 +689,9 @@ void interp_data_order1( const Grid_config *output_grid, const Grid_config *inpu copyin(fieldin_data[:ncells_input_grid]) #pragma acc parallel loop for(int ix=0; ixinput_parent_cell_index[ix]; - int ij2 = minterp_acc->output_parent_cell_index[ix]; - double area = minterp_acc->xcell_area[ix]; + int ij1 = minterp_gpu->input_parent_cell_index[ix]; + int ij2 = minterp_gpu->output_parent_cell_index[ix]; + double area = minterp_gpu->xcell_area[ix]; if( fieldin_data[ij1] == missing ) continue; @@ -706,12 +706,12 @@ void interp_data_order1( const Grid_config *output_grid, const Grid_config *inpu } void interp_data_order2( const Grid_config *output_grid, const Grid_config *input_grid, - Interp_per_input_tile *minterp_acc, double *input_area_weight, double *fieldin_data, + Interp_per_input_tile *minterp_gpu, double *input_area_weight, double *fieldin_data, double *fieldout_data, double *out_area, int *out_miss, int *grad_mask, double *grad_y, double *grad_x, double missing) { - int nxcells = minterp_acc->nxcells; + int nxcells = minterp_gpu->nxcells; int n_halo_cells = 2; int input_nlon_cells = input_grid->nxc; int input_nlat_cells = input_grid->nyc; @@ -721,10 +721,10 @@ void interp_data_order2( const Grid_config *output_grid, const Grid_config *inpu int output_nlon_cells = output_grid->nxc; int ncells_output_grid = output_nlon_cells * (output_grid->nyc); -#pragma acc data present( minterp_acc[:1], \ - minterp_acc->input_parent_cell_index[:nxcells], \ - minterp_acc->output_parent_cell_index[:nxcells], \ - minterp_acc->xcell_area[:nxcells], \ +#pragma acc data present( minterp_gpu[:1], \ + minterp_gpu->input_parent_cell_index[:nxcells], \ + minterp_gpu->output_parent_cell_index[:nxcells], \ + minterp_gpu->xcell_area[:nxcells], \ input_area_weight[:ncells_input_grid], \ fieldout_data[:ncells_output_grid], \ out_area[:ncells_output_grid], \ @@ -734,11 +734,11 @@ void interp_data_order2( const Grid_config *output_grid, const Grid_config *inpu grad_x[:ncells_input_grid], grad_y[:ncells_input_grid]) #pragma acc parallel loop for(int ix=0; ixinput_parent_cell_index[ix]; - int ij2 = minterp_acc->output_parent_cell_index[ix]; - double area = minterp_acc->xcell_area[ix]; - double dx = minterp_acc->dcentroid_lon[ix]; - double dy = minterp_acc->dcentroid_lat[ix]; + int ij1 = minterp_gpu->input_parent_cell_index[ix]; + int ij2 = minterp_gpu->output_parent_cell_index[ix]; + double area = minterp_gpu->xcell_area[ix]; + double dx = minterp_gpu->dcentroid_lon[ix]; + double dy = minterp_gpu->dcentroid_lat[ix]; int i1 = ij1%input_nlon_cells; int j1=ij1/input_nlon_cells; @@ -762,7 +762,7 @@ void get_bounding_indices gets indices for kat that overlap with the ref_grid_lat TODO: THIS FUNCTION NEEDS A UNIT TEST *******************************************************************************/ -void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells, +void get_bounding_indices_gpu(const int ref_nlon_cells, const int ref_nlat_cells, const int nlon_cells, const int nlat_cells, const double *ref_grid_lat, const double *grid_lat, int *overlap_starts_here_index, int *overlap_ends_here_index) @@ -775,8 +775,8 @@ void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells int nlat_gridpts = nlat_cells+1; int nlon_gridpts = nlon_cells+1; - ref_min_lat = minval_double_acc((ref_nlat_cells+1)*(ref_nlon_cells+1), ref_grid_lat); - ref_max_lat = maxval_double_acc((ref_nlat_cells+1)*(ref_nlon_cells+1), ref_grid_lat); + ref_min_lat = minval_double_gpu((ref_nlat_cells+1)*(ref_nlon_cells+1), ref_grid_lat); + ref_max_lat = maxval_double_gpu((ref_nlat_cells+1)*(ref_nlon_cells+1), ref_grid_lat); #pragma acc parallel loop collapse(2) present(grid_lat[:nlat_gridpts]) \ copyin(ref_min_lat, ref_max_lat) \ @@ -797,7 +797,7 @@ void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells } -void create_interp_acc_itile_arrays_on_device_acc(const int nxcells, const unsigned int opcode, +void create_interp_gpu_itile_arrays_on_device_gpu(const int nxcells, const unsigned int opcode, Interp_per_input_tile *interp_per_itile) { diff --git a/tools/fregrid_acc/conserve_interp_acc.h b/tools/fregrid_gpu/conserve_interp_gpu.h similarity index 80% rename from tools/fregrid_acc/conserve_interp_acc.h rename to tools/fregrid_gpu/conserve_interp_gpu.h index a5020c6e..1f8ee4e6 100644 --- a/tools/fregrid_acc/conserve_interp_acc.h +++ b/tools/fregrid_gpu/conserve_interp_gpu.h @@ -17,27 +17,27 @@ * License along with FRE-NCTools. If not, see * . **********************************************************************/ -#ifndef CONSERVE_INTERP_ACC_H_ -#define CONSERVE_INTERP_ACC_H_ +#ifndef CONSERVE_INTERP_GPU_H_ +#define CONSERVE_INTERP_GPU_H_ -#include "globals_acc.h" +#include "globals_gpu.h" -void setup_conserve_interp_acc(int ntiles_in, Grid_config *grid_in, int ntiles_out, - Grid_config *grid_out, Interp_config_acc *interp_acc, unsigned int opcode); +void setup_conserve_interp_gpu(int ntiles_in, Grid_config *grid_in, int ntiles_out, + Grid_config *grid_out, Interp_config_gpu *interp_gpu, unsigned int opcode); -void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int ntiles_in, const Grid_config *grid_in, +void do_scalar_conserve_interp_gpu(Interp_config_gpu *interp_gpu, int varid, int ntiles_in, const Grid_config *grid_in, int ntiles_out, const Grid_config *grid_out, const Field_config *field_in, Field_config *field_out, unsigned int opcode); -void read_remap_file_acc(int ntiles_input_grid, int ntiles_output_grid, +void read_remap_file_gpu(int ntiles_input_grid, int ntiles_output_grid, Grid_config *output_grid, Grid_config *input_grid, - Interp_config_acc *interp_acc, unsigned int opcode); + Interp_config_gpu *interp_gpu, unsigned int opcode); void write_remap_file(const int ntiles_out, const int ntiles_in, Grid_config *output_grid, - Grid_config *input_grid, Interp_config_acc *interp_acc, unsigned int opcode); + Grid_config *input_grid, Interp_config_gpu *interp_gpu, unsigned int opcode); void check_area_conservation(const int ntiles_output_grid, const int ntiles_input_grid, Grid_config *output_grid, - Interp_config_acc *interp_acc); + Interp_config_gpu *interp_gpu); void get_input_area_weight(const int weights_exist, const int cell_measures, const int cell_methods, const Field_config *field_in, const Grid_config *input_grid, @@ -52,12 +52,12 @@ void interp_data_order2( const Grid_config *output_grid, const Grid_config *inpu double *fieldout_data, double *out_area, int *out_miss, int *grad_mask, double *grad_y, double *grad_x, double missing); -void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells, +void get_bounding_indices_gpu(const int ref_nlon_cells, const int ref_nlat_cells, const int nlon_cells, const int nlat_cells, const double *ref_grid_lat, const double *grid_lat, int *overlap_starts_here_index, int *overlap_ends_here_index); -void create_interp_acc_itile_arrays_on_device_acc(const int nxcells, const unsigned int opcode, +void create_interp_gpu_itile_arrays_on_device_gpu(const int nxcells, const unsigned int opcode, Interp_per_input_tile *interp_per_itile); #endif diff --git a/tools/fregrid_acc/fregrid_acc.c b/tools/fregrid_gpu/fregrid_gpu.c similarity index 98% rename from tools/fregrid_acc/fregrid_acc.c rename to tools/fregrid_gpu/fregrid_gpu.c index fe1bbf3e..f2e3aaf8 100644 --- a/tools/fregrid_acc/fregrid_acc.c +++ b/tools/fregrid_gpu/fregrid_gpu.c @@ -49,15 +49,15 @@ #include #include #include -#include "globals_acc.h" +#include "globals_gpu.h" #include "read_mosaic.h" #include "mpp_io.h" #include "mpp.h" #include "mosaic_util.h" -#include "conserve_interp_acc.h" +#include "conserve_interp_gpu.h" #include "bilinear_interp.h" #include "fregrid_util.h" -#include "fregrid_utils_acc.h" +#include "fregrid_utils_gpu.h" char *usage[] = { "", @@ -346,7 +346,7 @@ int main(int argc, char* argv[]) File_config *file2_in = NULL; /* store input file information */ File_config *file2_out = NULL; /* store output file information */ Bound_config *bound_T = NULL; /* store halo update information for T-cell*/ - Interp_config_acc *interp_acc = NULL; /* store remapping information */ + Interp_config_gpu *interp_gpu = NULL; /* store remapping information */ int save_weight_only = 0; double time_get_in_grid=0, time_get_out_grid=0, time_get_input=0; @@ -685,11 +685,11 @@ int main(int argc, char* argv[]) grid_out = (Grid_config *)malloc(ntiles_out*sizeof(Grid_config)); bound_T = (Bound_config *)malloc(ntiles_in *sizeof(Bound_config)); //If statement will be removed once bilinear is on the GPU - if( opcode & BILINEAR ) interp_acc = (Interp_config_acc *)malloc(ntiles_out*sizeof(Interp_config_acc)); + if( opcode & BILINEAR ) interp_gpu = (Interp_config_gpu *)malloc(ntiles_out*sizeof(Interp_config_gpu)); else { - interp_acc = (Interp_config_acc *)malloc(ntiles_out*sizeof(Interp_config_acc)); + interp_gpu = (Interp_config_gpu *)malloc(ntiles_out*sizeof(Interp_config_gpu)); for(int i=0 ; i 1) { start[0] = m; mpp_get_var_value_block(fid, vid, start, nread, tilename); if(strlen(str1) + strlen(tilename) > STRING -5) mpp_error("set_output_remap_file(fregrid_util): length of str1 + " "length of tilename should be no greater than STRING-5"); - sprintf(interp_acc[m].remap_file, "%s.%s.nc", str1, tilename); + sprintf(interp_gpu[m].remap_file, "%s.%s.nc", str1, tilename); } else - sprintf(interp_acc[m].remap_file, "%s.nc", str1); - /* check interp_acc file to be read (=1) or write ( = 2) */ - if(!save_weight_only && mpp_file_exist(interp_acc[m].remap_file)) { + sprintf(interp_gpu[m].remap_file, "%s.nc", str1); + /* check interp_gpu file to be read (=1) or write ( = 2) */ + if(!save_weight_only && mpp_file_exist(interp_gpu[m].remap_file)) { (*opcode) |= READ; - interp_acc[m].file_exist = 1; + interp_gpu[m].file_exist = 1; } } diff --git a/tools/fregrid_acc/fregrid_utils_acc.h b/tools/fregrid_gpu/fregrid_utils_gpu.h similarity index 85% rename from tools/fregrid_acc/fregrid_utils_acc.h rename to tools/fregrid_gpu/fregrid_utils_gpu.h index 5bc8fc5c..7814d795 100644 --- a/tools/fregrid_acc/fregrid_utils_acc.h +++ b/tools/fregrid_gpu/fregrid_utils_gpu.h @@ -17,10 +17,10 @@ * License along with FRE-NCTools. If not, see * . **********************************************************************/ -#ifndef FREGRID_UTILS_ACC_H -#define FREGRID_UTILS_ACC_H +#ifndef FREGRID_UTILS_GPU_H +#define FREGRID_UTILS_GPU_H -void set_remap_file_acc( int ntiles, const char *mosaic_file, const char *remap_file, Interp_config_acc *interp_acc, +void set_remap_file_gpu( int ntiles, const char *mosaic_file, const char *remap_file, Interp_config_gpu *interp_gpu, unsigned int *opcode, int save_weight_only); #endif diff --git a/tools/fregrid_acc/interp_utils_acc.c b/tools/fregrid_gpu/interp_utils_gpu.c similarity index 76% rename from tools/fregrid_acc/interp_utils_acc.c rename to tools/fregrid_gpu/interp_utils_gpu.c index bf4420d8..54015ece 100644 --- a/tools/fregrid_acc/interp_utils_acc.c +++ b/tools/fregrid_gpu/interp_utils_gpu.c @@ -20,21 +20,21 @@ #include #include #include -#include "globals_acc.h" -#include "general_utils_acc.h" +#include "globals_gpu.h" +#include "general_utils_gpu.h" /******************************************************************************* void copy_grid_to_device( const int itile, Grid_config *grid ) Copies lat lon coordinates to device *******************************************************************************/ -void copy_grid_to_device_acc( const int npoints, const double *lat, const double *lon ) +void copy_grid_to_device_gpu( const int npoints, const double *lat, const double *lon ) { #pragma acc enter data copyin(lon[:npoints], lat[:npoints]) } -void delete_grid_from_device_acc( const int npoints, const double *lat, const double *lon ) +void delete_grid_from_device_gpu( const int npoints, const double *lat, const double *lon ) { #pragma acc exit data delete(lat[:npoints], lon[:npoints]) @@ -45,24 +45,24 @@ void delete_grid_from_device_acc( const int npoints, const double *lat, const do void copy_interp_to_device( Interp_config *interp ) Copies the interp struct to device *******************************************************************************/ -void copy_interp_to_device_acc( const int ntiles_in, const int ntiles_out, const Interp_config_acc *interp_acc, +void copy_interp_to_device_gpu( const int ntiles_in, const int ntiles_out, const Interp_config_gpu *interp_gpu, const unsigned int opcode ) { -#pragma acc enter data copyin(interp_acc[:ntiles_out]) +#pragma acc enter data copyin(interp_gpu[:ntiles_out]) for(int otile=0 ; otile. **********************************************************************/ -#ifndef FREGRID_UTILS_ACC_H_ -#define FREGRID_UTILS_ACC_H_ +#ifndef FREGRID_UTILS_GPU_H_ +#define FREGRID_UTILS_GPU_H_ -#include "globals_acc.h" +#include "globals_gpu.h" -void copy_grid_to_device_acc( const int npoints, const double *lat, const double *lon ); +void copy_grid_to_device_gpu( const int npoints, const double *lat, const double *lon ); -void delete_grid_from_device_acc( const int npoints, const double *lat, const double *lon ); +void delete_grid_from_device_gpu( const int npoints, const double *lat, const double *lon ); -void copy_interp_to_device_acc( const int ntiles_in, const int ntiles_out, const Interp_config_acc *interp_acc, +void copy_interp_to_device_gpu( const int ntiles_in, const int ntiles_out, const Interp_config_gpu *interp_gpu, const unsigned int opcode ); -void get_input_grid_mask_acc(const int mask_size, double **input_grid_mask); +void get_input_grid_mask_gpu(const int mask_size, double **input_grid_mask); -void free_input_grid_mask_acc(const int mask_size, double **input_grid_mask); +void free_input_grid_mask_gpu(const int mask_size, double **input_grid_mask); -void create_interp_per_intile_arrays_on_device_acc(const int nxcells, const unsigned int opcode, +void create_interp_per_intile_arrays_on_device_gpu(const int nxcells, const unsigned int opcode, Interp_per_input_tile *interp_per_itile); #endif diff --git a/tools/libfrencutils_acc/Makefile.am b/tools/libfrencutils_gpu/Makefile.am similarity index 70% rename from tools/libfrencutils_acc/Makefile.am rename to tools/libfrencutils_gpu/Makefile.am index 835fd5ea..022900fe 100644 --- a/tools/libfrencutils_acc/Makefile.am +++ b/tools/libfrencutils_gpu/Makefile.am @@ -17,15 +17,15 @@ # License along with FRE-NCTools. If not, see # . #*********************************************************************** -noinst_LIBRARIES = libfrencutils_acc.a +noinst_LIBRARIES = libfrencutils_gpu.a AM_CFLAGS = $(NETCDF_CFLAGS) $(OPENACC_CFLAGS) -I$(top_srcdir)/tools/libfrencutils -libfrencutils_acc_a_SOURCES = create_xgrid_acc.c \ - create_xgrid_acc.h \ - create_xgrid_utils_acc.c \ - create_xgrid_utils_acc.h \ - general_utils_acc.c \ - general_utils_acc.h +libfrencutils_gpu_a_SOURCES = create_xgrid_gpu.c \ + create_xgrid_gpu.h \ + create_xgrid_utils_gpu.c \ + create_xgrid_utils_gpu.h \ + general_utils_gpu.c \ + general_utils_gpu.h -libfrencutils_acc_a_LIBADD = $(top_builddir)/tools/libfrencutils/libfrencutils.a +libfrencutils_gpu_a_LIBADD = $(top_builddir)/tools/libfrencutils/libfrencutils.a diff --git a/tools/libfrencutils_acc/create_xgrid_acc.c b/tools/libfrencutils_gpu/create_xgrid_gpu.c similarity index 89% rename from tools/libfrencutils_acc/create_xgrid_acc.c rename to tools/libfrencutils_gpu/create_xgrid_gpu.c index f3ddae83..c4faa352 100644 --- a/tools/libfrencutils_acc/create_xgrid_acc.c +++ b/tools/libfrencutils_gpu/create_xgrid_gpu.c @@ -21,17 +21,17 @@ #include #include #include -#include "general_utils_acc.h" -#include "create_xgrid_acc.h" -#include "create_xgrid_utils_acc.h" -#include "globals_acc.h" +#include "general_utils_gpu.h" +#include "create_xgrid_gpu.h" +#include "create_xgrid_utils_gpu.h" +#include "globals_gpu.h" /******************************************************************************* -void get_upbound_nxcells_2dx2d_acc +void get_upbound_nxcells_2dx2d_gpu This function computes the upperbound to nxgrid. This upper bound will be used to malloc arrays used in create_xgrid *******************************************************************************/ -int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_input_cells, +int get_upbound_nxcells_2dx2d_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -70,15 +70,15 @@ int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_in int ij2_min=output_grid_ncells, ij2_max=0; double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV]; - get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + get_cell_vertices_gpu(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, input_cell_lon_vertices, input_cell_lat_vertices); - double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); - double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); - int nvertices = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); - double input_cell_lon_min = minval_double_acc(nvertices, input_cell_lon_vertices); - double input_cell_lon_max = maxval_double_acc(nvertices, input_cell_lon_vertices); - double input_cell_lon_cent = avgval_double_acc(nvertices, input_cell_lon_vertices); + double input_cell_lat_min = minval_double_gpu(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_gpu(4, input_cell_lat_vertices); + int nvertices = fix_lon_gpu(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_gpu(nvertices, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_gpu(nvertices, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_gpu(nvertices, input_cell_lon_vertices); approx_xcells_per_ij1[ij1]=0; @@ -132,7 +132,7 @@ int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_in conservative interpolation. nlon_input_cells,ninput_grid_lat,nlon_output_cells,nlat_output_cells are the size of the grid cell and input_grid_lon,input_grid_lat, output_grid_lon,output_grid_lat are geographic grid location of grid cell bounds. *******************************************************************************/ -int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_input_cells, +int create_xgrid_2dx2d_order1_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -189,15 +189,15 @@ int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_in double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV]; int approx_nxcells_b4_ij1=0, ixcell=0; - get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + get_cell_vertices_gpu(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, input_cell_lon_vertices, input_cell_lat_vertices); - double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); - double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); - int nvertices1 = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); - double input_cell_lon_min = minval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_lon_max = maxval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_lon_cent = avgval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_area = poly_area_acc(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); + double input_cell_lat_min = minval_double_gpu(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_gpu(4, input_cell_lat_vertices); + int nvertices1 = fix_lon_gpu(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_area = poly_area_gpu(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); #pragma acc loop seq for(int i=1; i<=ij1 ; i++) approx_nxcells_b4_ij1 += approx_nxcells_per_ij1[i-1]; @@ -239,10 +239,10 @@ int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_in if(output_cell_lon_min >= input_cell_lon_max ) continue; if(output_cell_lon_max <= input_cell_lon_min ) continue; - if ( (xvertices = clip_2dx2d_acc( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, + if ( (xvertices = clip_2dx2d_gpu( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, output_cell_lon_vertices, output_cell_lat_vertices, nvertices2, xcell_lon_vertices, xcell_lat_vertices)) > 0 ){ - double xcell_area = poly_area_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices); + double xcell_area = poly_area_gpu(xcell_lon_vertices, xcell_lat_vertices, xvertices); if( xcell_area/min(input_cell_area, output_cell_area) > AREA_RATIO_THRESH ) { store_xcell_area[approx_nxcells_b4_ij1+ixcell] = xcell_area; parent_input_index[approx_nxcells_b4_ij1+ixcell] = ij1; @@ -256,7 +256,7 @@ int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_in } } - copy_data_to_interp_on_device_acc(nxcells, input_grid_ncells, upbound_nxcells, nxcells_per_ij1, + copy_data_to_interp_on_device_gpu(nxcells, input_grid_ncells, upbound_nxcells, nxcells_per_ij1, &xcell_dclon, &xcell_dclat, approx_nxcells_per_ij1, parent_input_index, parent_output_index, store_xcell_area, interp_for_itile); @@ -280,7 +280,7 @@ int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_in void create_xgrid_2DX2D_order2 This routine generate exchange grids between two grids for the second order *******************************************************************************/ -int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_input_cells, +int create_xgrid_2dx2d_order2_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -350,15 +350,15 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in double summed_input_clat_ij1=0.; int approx_nxcells_b4_ij1=0, ixcell=0; - get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + get_cell_vertices_gpu(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, input_cell_lon_vertices, input_cell_lat_vertices); - double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); - double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); - int nvertices1 = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); - double input_cell_lon_min = minval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_lon_max = maxval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_lon_cent = avgval_double_acc(nvertices1, input_cell_lon_vertices); - double input_cell_area = poly_area_acc(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); + double input_cell_lat_min = minval_double_gpu(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_gpu(4, input_cell_lat_vertices); + int nvertices1 = fix_lon_gpu(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_gpu(nvertices1, input_cell_lon_vertices); + double input_cell_area = poly_area_gpu(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); #pragma acc loop seq for(int i=1; i<=ij1 ; i++) approx_nxcells_b4_ij1 += approx_nxcells_per_ij1[i-1]; @@ -400,17 +400,17 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in if(output_cell_lon_min >= input_cell_lon_max ) continue; if(output_cell_lon_max <= input_cell_lon_min ) continue; - if ( (xvertices = clip_2dx2d_acc( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, + if ( (xvertices = clip_2dx2d_gpu( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, output_cell_lon_vertices, output_cell_lat_vertices, nvertices2, xcell_lon_vertices, xcell_lat_vertices)) > 0 ){ - double xcell_area = poly_area_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices); + double xcell_area = poly_area_gpu(xcell_lon_vertices, xcell_lat_vertices, xvertices); if( xcell_area/min(input_cell_area, output_cell_area) > AREA_RATIO_THRESH ) { double xcell_clon, xcell_clat; store_xcell_area[approx_nxcells_b4_ij1+ixcell] = xcell_area; parent_input_index[approx_nxcells_b4_ij1+ixcell] = ij1; parent_output_index[approx_nxcells_b4_ij1+ixcell] = ij2; - poly_ctrlon_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices, input_cell_lon_cent, &xcell_clon); - poly_ctrlat_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices, &xcell_clat); + poly_ctrlon_gpu(xcell_lon_vertices, xcell_lat_vertices, xvertices, input_cell_lon_cent, &xcell_clon); + poly_ctrlat_gpu(xcell_lon_vertices, xcell_lat_vertices, xvertices, &xcell_clat); store_xcell_dclon[approx_nxcells_b4_ij1+ixcell] = xcell_clon/xcell_area; store_xcell_dclat[approx_nxcells_b4_ij1+ixcell] = xcell_clat/xcell_area; summed_input_area_ij1 += xcell_area; @@ -431,7 +431,7 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in } } - copy_data_to_interp_on_device_acc(nxcells, input_grid_ncells, upbound_nxcells, nxcells_per_ij1, + copy_data_to_interp_on_device_gpu(nxcells, input_grid_ncells, upbound_nxcells, nxcells_per_ij1, store_xcell_dclon, store_xcell_dclat, approx_nxcells_per_ij1, parent_input_index, parent_output_index, store_xcell_area, interp_for_itile); @@ -452,11 +452,11 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in double readin_area = readin_input_area[ij1]; if(fabs(input_area - readin_area)/readin_area > AREA_RATIO) { double x[4], y[4], input_cell_lon_cent; - get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, x, y); - int n = fix_lon_acc(x, y, 4, M_PI); - input_cell_lon_cent = avgval_double_acc(n, x); - poly_ctrlon_acc(x, y, n, input_cell_lon_cent, &input_clon); - poly_ctrlat_acc(x, y, n, &input_clat); + get_cell_vertices_gpu(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, x, y); + int n = fix_lon_gpu(x, y, 4, M_PI); + input_cell_lon_cent = avgval_double_gpu(n, x); + poly_ctrlon_gpu(x, y, n, input_cell_lon_cent, &input_clon); + poly_ctrlat_gpu(x, y, n, &input_clat); input_area = readin_area; } interp_for_itile->dcentroid_lon[ix] -= input_clon/input_area; @@ -487,7 +487,7 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in };/* get_xgrid_2Dx2D_order2 */ -int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_input_cells, +int create_xgrid_great_circle_gpu(const int *nlon_input_cells, const int *nlat_input_cells, const int *nlon_output_cells, const int *nlat_output_cells, const double *input_grid_lon, const double *input_grid_lat, const double *output_grid_lon, const double *output_grid_lat, @@ -525,13 +525,13 @@ int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_i y2 = (double *)malloc(nx2p*ny2p*sizeof(double)); z2 = (double *)malloc(nx2p*ny2p*sizeof(double)); - latlon2xyz_acc(nx1p*ny1p, input_grid_lon, input_grid_lat, x1, y1, z1); - latlon2xyz_acc(nx2p*ny2p, output_grid_lon, output_grid_lat, x2, y2, z2); + latlon2xyz_gpu(nx1p*ny1p, input_grid_lon, input_grid_lat, x1, y1, z1); + latlon2xyz_gpu(nx2p*ny2p, output_grid_lon, output_grid_lat, x2, y2, z2); area1 = (double *)malloc(nx1*ny1*sizeof(double)); area2 = (double *)malloc(nx2*ny2*sizeof(double)); - get_grid_great_circle_area_acc(nlon_input_cells, nlat_input_cells, input_grid_lon, input_grid_lat, area1); - get_grid_great_circle_area_acc(nlon_output_cells, nlat_output_cells, output_grid_lon, output_grid_lat, area2); + get_grid_great_circle_area_gpu(nlon_input_cells, nlat_input_cells, input_grid_lon, input_grid_lat, area1); + get_grid_great_circle_area_gpu(nlon_output_cells, nlat_output_cells, output_grid_lon, output_grid_lat, area2); n1_in = 4; n2_in = 4; @@ -556,9 +556,9 @@ int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_i x2_in[2] = x2[n2]; y2_in[2] = y2[n2]; z2_in[2] = z2[n2]; x2_in[3] = x2[n3]; y2_in[3] = y2[n3]; z2_in[3] = z2[n3]; - if ( (n_out = clip_2dx2d_great_circle_acc( x1_in, y1_in, z1_in, n1_in, x2_in, y2_in, z2_in, n2_in, + if ( (n_out = clip_2dx2d_great_circle_gpu( x1_in, y1_in, z1_in, n1_in, x2_in, y2_in, z2_in, n2_in, x_out, y_out, z_out)) > 0) { - xarea = great_circle_area_acc( n_out, x_out, y_out, z_out ) ; + xarea = great_circle_area_gpu( n_out, x_out, y_out, z_out ) ; min_area = min(area1[j1*nx1+i1], area2[j2*nx2+i2]); if( xarea/min_area > AREA_RATIO_THRESH ) { #ifdef debug_test_create_xgrid diff --git a/tools/libfrencutils_acc/create_xgrid_acc.h b/tools/libfrencutils_gpu/create_xgrid_gpu.h similarity index 91% rename from tools/libfrencutils_acc/create_xgrid_acc.h rename to tools/libfrencutils_gpu/create_xgrid_gpu.h index 6047b1bc..f28a52de 100644 --- a/tools/libfrencutils_acc/create_xgrid_acc.h +++ b/tools/libfrencutils_gpu/create_xgrid_gpu.h @@ -17,12 +17,12 @@ * License along with FRE-NCTools. If not, see * . **********************************************************************/ -#ifndef CREATE_XGRID_ACC_H_ -#define CREATE_XGRID_ACC_H_ +#ifndef CREATE_XGRID_GPU_H_ +#define CREATE_XGRID_GPU_H_ -#include "globals_acc.h" +#include "globals_gpu.h" -int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_input_cells, +int get_upbound_nxcells_2dx2d_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -31,7 +31,7 @@ int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_in const Grid_cells_struct_config *output_grid_cells, int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end); -int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_input_cells, +int create_xgrid_2dx2d_order1_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -41,7 +41,7 @@ int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_in int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, Interp_per_input_tile *interp_for_input_tile); -int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_input_cells, +int create_xgrid_2dx2d_order2_gpu(const int nlon_input_cells, const int nlat_input_cells, const int nlon_output_cells, const int nlat_output_cells, const int jlat_overlap_starts, const int jlat_overlap_ends, const double *input_grid_lon, const double *input_grid_lat, @@ -51,7 +51,7 @@ int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_in int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, Interp_per_input_tile *interp_for_input_tile, double *readin_input_area); -int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_input_cells, +int create_xgrid_great_circle_gpu(const int *nlon_input_cells, const int *nlat_input_cells, const int *nlon_output_cells, const int *nlat_output_cells, const double *input_grid_lon, const double *input_grid_lat, const double *output_grid_lon, const double *output_grid_lat, diff --git a/tools/libfrencutils_acc/create_xgrid_utils_acc.c b/tools/libfrencutils_gpu/create_xgrid_utils_gpu.c similarity index 82% rename from tools/libfrencutils_acc/create_xgrid_utils_acc.c rename to tools/libfrencutils_gpu/create_xgrid_utils_gpu.c index ee262ec2..706c4cb9 100644 --- a/tools/libfrencutils_acc/create_xgrid_utils_acc.c +++ b/tools/libfrencutils_gpu/create_xgrid_utils_gpu.c @@ -21,15 +21,15 @@ #include #include #include -#include "general_utils_acc.h" -#include "create_xgrid_utils_acc.h" -#include "globals_acc.h" +#include "general_utils_gpu.h" +#include "create_xgrid_utils_gpu.h" +#include "globals_gpu.h" /******************************************************************************* void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, const double *area) return the grid area. *******************************************************************************/ -void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) +void get_grid_area_gpu(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) { int nx, ny, nxp, i, j, n_in; double x_in[20], y_in[20]; @@ -47,18 +47,18 @@ void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, cons y_in[1] = lat[j*nxp+i+1]; y_in[2] = lat[(j+1)*nxp+i+1]; y_in[3] = lat[(j+1)*nxp+i]; - n_in = fix_lon_acc(x_in, y_in, 4, M_PI); - area[j*nx+i] = poly_area_acc(x_in, y_in, n_in); + n_in = fix_lon_gpu(x_in, y_in, 4, M_PI); + area[j*nx+i] = poly_area_gpu(x_in, y_in, n_in); } }; /* get_grid_area */ -void get_grid_great_circle_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) +void get_grid_great_circle_area_gpu(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) { int nx, ny, nxp, nyp, i, j, n_in; int n0, n1, n2, n3; double x_in[20], y_in[20], z_in[20]; - struct Node_acc *grid=NULL; + struct Node_gpu *grid=NULL; double *x=NULL, *y=NULL, *z=NULL; @@ -71,7 +71,7 @@ void get_grid_great_circle_area_acc(const int *nlon, const int *nlat, const doub y = (double *)malloc(nxp*nyp*sizeof(double)); z = (double *)malloc(nxp*nyp*sizeof(double)); - latlon2xyz_acc(nxp*nyp, lon, lat, x, y, z); + latlon2xyz_gpu(nxp*nyp, lon, lat, x, y, z); for(j=0; j and should not parallel to the line between and may need to consider truncation error */ @@ -269,19 +269,19 @@ int clip_2dx2d_acc(const double lon1_in[], const double lat1_in[], int n1_in, the more expensive of the computatioin. When the value is close to 0, some small exchange grid might be lost. Suggest to use value 0.05 for C48. *******************************************************************************/ -int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, +int clip_2dx2d_great_circle_gpu(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, const double x2_in[], const double y2_in[], const double z2_in [], int n2_in, double x_out[], double y_out[], double z_out[]) { - struct Node_acc *subjList=NULL; - struct Node_acc *clipList=NULL; - struct Node_acc *grid1List=NULL; - struct Node_acc *grid2List=NULL; - struct Node_acc *intersectList=NULL; - struct Node_acc *polyList=NULL; - struct Node_acc *curList=NULL; - struct Node_acc *firstIntersect=NULL, *curIntersect=NULL; - struct Node_acc *temp1=NULL, *temp2=NULL, *temp=NULL; + struct Node_gpu *subjList=NULL; + struct Node_gpu *clipList=NULL; + struct Node_gpu *grid1List=NULL; + struct Node_gpu *grid2List=NULL; + struct Node_gpu *intersectList=NULL; + struct Node_gpu *polyList=NULL; + struct Node_gpu *curList=NULL; + struct Node_gpu *firstIntersect=NULL, *curIntersect=NULL; + struct Node_gpu *temp1=NULL, *temp2=NULL, *temp=NULL; int i1, i2, i1p, i2p, i2p2, npts1, npts2; int nintersect, n_out; @@ -299,39 +299,39 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons /* first check the min and max of (x1_in, y1_in, z1_in) with (x2_in, y2_in, z2_in) */ - min_x1 = minval_double_acc(n1_in, x1_in); - max_x2 = maxval_double_acc(n2_in, x2_in); + min_x1 = minval_double_gpu(n1_in, x1_in); + max_x2 = maxval_double_gpu(n2_in, x2_in); if(min_x1 >= max_x2+RANGE_CHECK_CRITERIA) return 0; - max_x1 = maxval_double_acc(n1_in, x1_in); - min_x2 = minval_double_acc(n2_in, x2_in); + max_x1 = maxval_double_gpu(n1_in, x1_in); + min_x2 = minval_double_gpu(n2_in, x2_in); if(min_x2 >= max_x1+RANGE_CHECK_CRITERIA) return 0; - min_y1 = minval_double_acc(n1_in, y1_in); - max_y2 = maxval_double_acc(n2_in, y2_in); + min_y1 = minval_double_gpu(n1_in, y1_in); + max_y2 = maxval_double_gpu(n2_in, y2_in); if(min_y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0; - max_y1 = maxval_double_acc(n1_in, y1_in); - min_y2 = minval_double_acc(n2_in, y2_in); + max_y1 = maxval_double_gpu(n1_in, y1_in); + min_y2 = minval_double_gpu(n2_in, y2_in); if(min_y2 >= max_y1+RANGE_CHECK_CRITERIA) return 0; - min_z1 = minval_double_acc(n1_in, z1_in); - max_z2 = maxval_double_acc(n2_in, z2_in); + min_z1 = minval_double_gpu(n1_in, z1_in); + max_z2 = maxval_double_gpu(n2_in, z2_in); if(min_z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0; - max_z1 = maxval_double_acc(n1_in, z1_in); - min_z2 = minval_double_acc(n2_in, z2_in); + max_z1 = maxval_double_gpu(n1_in, z1_in); + min_z2 = minval_double_gpu(n2_in, z2_in); if(min_z2 >= max_z1+RANGE_CHECK_CRITERIA) return 0; - rewindList_acc(); + rewindList_gpu(); - grid1List = getNext_acc(); - grid2List = getNext_acc(); - intersectList = getNext_acc(); - polyList = getNext_acc(); + grid1List = getNext_gpu(); + grid2List = getNext_gpu(); + intersectList = getNext_gpu(); + polyList = getNext_gpu(); /* insert points into SubjList and ClipList */ - for(i1=0; i1isInside = 1; else temp->isInside = 0; - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } /* check if grid2List is inside grid1List */ temp = grid2List; while(temp) { - if(insidePolygon_acc(temp, grid1List)) + if(insidePolygon_gpu(temp, grid1List)) temp->isInside = 1; else temp->isInside = 0; - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } /* make sure the grid box is clockwise */ /*make sure each polygon is convex, which is equivalent that the great_circle_area is positive */ - if( gridArea_acc(grid1List) <= 0 ) + if( gridArea_gpu(grid1List) <= 0 ) printf("ERROR create_xgrid.c(clip_2dx2d_great_circle): grid box 1 is not convex\n"); - if( gridArea_acc(grid2List) <= 0 ) + if( gridArea_gpu(grid2List) <= 0 ) printf("ERROR create_xgrid.c(clip_2dx2d_great_circle): grid box 2 is not convex\n"); /* get the coordinates from grid1List and grid2List. @@ -371,17 +371,17 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons temp = grid1List; for(i1=0; i1Next_acc; + getCoordinates_gpu(temp, pt1[i1]); + temp = temp->Next_gpu; } temp = grid2List; for(i2=0; i2Next_acc; + getCoordinates_gpu(temp, pt2[i2]); + temp = temp->Next_gpu; } - firstIntersect=getNext_acc(); - curIntersect = getNext_acc(); + firstIntersect=getNext_gpu(); + curIntersect = getNext_gpu(); /* first find all the intersection points */ nintersect = 0; @@ -395,7 +395,7 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons p2_0 = pt2[i2]; p2_1 = pt2[i2p]; p2_2 = pt2[i2p2]; - if( line_intersect_2D_3D_acc(p1_0, p1_1, p2_0, p2_1, p2_2, intersect, &u1, &u2, &inbound) ) { + if( line_intersect_2D_3D_gpu(p1_0, p1_1, p2_0, p2_1, p2_2, intersect, &u1, &u2, &inbound) ) { int n_prev, n_cur; int is_in_subj, is_in_clip; @@ -404,14 +404,14 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons /* add the intersection into intersetList, The intersection might already be in intersectList and will be taken care addIntersect */ - if(addIntersect_acc(intersectList, intersect[0], intersect[1], intersect[2], 1, u1, u2, inbound, i1, i1p, i2, i2p)) { + if(addIntersect_gpu(intersectList, intersect[0], intersect[1], intersect[2], 1, u1, u2, inbound, i1, i1p, i2, i2p)) { /* add the intersection into the grid1List */ if(u1 == 1) { - insertIntersect_acc(grid1List, intersect[0], intersect[1], intersect[2], 0.0, u2, inbound, p1_1[0], p1_1[1], p1_1[2]); + insertIntersect_gpu(grid1List, intersect[0], intersect[1], intersect[2], 0.0, u2, inbound, p1_1[0], p1_1[1], p1_1[2]); } else - insertIntersect_acc(grid1List, intersect[0], intersect[1], intersect[2], u1, u2, inbound, p1_0[0], p1_0[1], p1_0[2]); + insertIntersect_gpu(grid1List, intersect[0], intersect[1], intersect[2], u1, u2, inbound, p1_0[0], p1_0[1], p1_0[2]); /* when u1 == 0 or 1, need to adjust the vertice to intersect value for roundoff error */ if(u1==1) { p1_1[0] = intersect[0]; @@ -425,9 +425,9 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons } /* add the intersection into the grid2List */ if(u2==1) - insertIntersect_acc(grid2List, intersect[0], intersect[1], intersect[2], 0.0, u1, 0, p2_1[0], p2_1[1], p2_1[2]); + insertIntersect_gpu(grid2List, intersect[0], intersect[1], intersect[2], 0.0, u1, 0, p2_1[0], p2_1[1], p2_1[2]); else - insertIntersect_acc(grid2List, intersect[0], intersect[1], intersect[2], u2, u1, 0, p2_0[0], p2_0[1], p2_0[2]); + insertIntersect_gpu(grid2List, intersect[0], intersect[1], intersect[2], u2, u1, 0, p2_0[0], p2_0[1], p2_0[2]); /* when u2 == 0 or 1, need to adjust the vertice to intersect value for roundoff error */ if(u2==1) { p2_1[0] = intersect[0]; @@ -452,9 +452,9 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons has_inbound = 0; /* loop through intersectList to see if there is any has inbound=1 or 2 */ temp = intersectList; - nintersect = length_acc(intersectList); + nintersect = length_gpu(intersectList); if(nintersect > 1) { - getFirstInbound_acc(intersectList, firstIntersect); + getFirstInbound_gpu(intersectList, firstIntersect); if(firstIntersect->initialized) { has_inbound = 1; } @@ -462,8 +462,8 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons /* when has_inbound == 0, get the grid1List and grid2List */ if( !has_inbound && nintersect > 1) { - setInbound_acc(intersectList, grid1List); - getFirstInbound_acc(intersectList, firstIntersect); + setInbound_gpu(intersectList, grid1List); + getFirstInbound_gpu(intersectList, firstIntersect); if(firstIntersect->initialized) has_inbound = 1; } @@ -472,20 +472,20 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons if(has_inbound) { maxiter1 = nintersect; - temp1 = getNode_acc(grid1List, *firstIntersect); + temp1 = getNode_gpu(grid1List, *firstIntersect); if( temp1 == NULL) { double lon[10], lat[10]; int i; - xyz2latlon_acc(n1_in, x1_in, y1_in, z1_in, lon, lat); + xyz2latlon_gpu(n1_in, x1_in, y1_in, z1_in, lon, lat); for(i=0; i< n1_in; i++) printf("lon1 = %g, lat1 = %g\n", lon[i]*R2D, lat[i]*R2D); printf("\n"); - xyz2latlon_acc(n2_in, x2_in, y2_in, z2_in, lon, lat); + xyz2latlon_gpu(n2_in, x2_in, y2_in, z2_in, lon, lat); for(i=0; i< n2_in; i++) printf("lon2 = %g, lat2 = %g\n", lon[i]*R2D, lat[i]*R2D); printf("\n"); printf("ERROR firstIntersect is not in the grid1List\n"); } - addNode_acc(polyList, *firstIntersect); + addNode_gpu(polyList, *firstIntersect); nintersect--; /* Loop over the grid1List and grid2List to find again the firstIntersect */ @@ -495,17 +495,17 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons /* Loop through curList to find the next intersection, the loop will end when come back to firstIntersect */ - copyNode_acc(curIntersect, *firstIntersect); + copyNode_gpu(curIntersect, *firstIntersect); iter1 = 0; found1 = 0; while( iter1 < maxiter1 ) { /* find the curIntersect in curList and get the next intersection points */ - temp1 = getNode_acc(curList, *curIntersect); - temp2 = temp1->Next_acc; + temp1 = getNode_gpu(curList, *curIntersect); + temp2 = temp1->Next_gpu; if( temp2 == NULL ) temp2 = curList; - maxiter2 = length_acc(curList); + maxiter2 = length_gpu(curList); found2 = 0; iter2 = 0; /* Loop until find the next intersection */ @@ -513,16 +513,16 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons int temp2IsIntersect; temp2IsIntersect = 0; - if( isIntersect_acc( *temp2 ) ) { /* copy the point and switch to the grid2List */ - struct Node_acc *temp3; + if( isIntersect_gpu( *temp2 ) ) { /* copy the point and switch to the grid2List */ + struct Node_gpu *temp3; /* first check if temp2 is the firstIntersect */ - if( sameNode_acc( *temp2, *firstIntersect) ) { + if( sameNode_gpu( *temp2, *firstIntersect) ) { found1 = 1; break; } - temp3 = temp2->Next_acc; + temp3 = temp2->Next_gpu; if( temp3 == NULL) temp3 = curList; if( temp3 == NULL) printf("ERROR creat_xgrid.c: temp3 can not be NULL\n"); found2 = 1; @@ -530,19 +530,19 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons need to keep on curList */ temp2IsIntersect = 1; - if( isIntersect_acc(*temp3) || (temp3->isInside == 1) ) found2 = 0; + if( isIntersect_gpu(*temp3) || (temp3->isInside == 1) ) found2 = 0; } if(found2) { - copyNode_acc(curIntersect, *temp2); + copyNode_gpu(curIntersect, *temp2); break; } else { - addNode_acc(polyList, *temp2); + addNode_gpu(polyList, *temp2); if(temp2IsIntersect) { nintersect--; } } - temp2 = temp2->Next_acc; + temp2 = temp2->Next_gpu; if( temp2 == NULL ) temp2 = curList; iter2 ++; } // while( iter2Next_acc; + getCoordinate_gpu(*temp1, x_out+n_out, y_out+n_out, z_out+n_out); + temp1 = temp1->Next_gpu; n_out++; } @@ -599,16 +599,16 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons if(temp->intersect != 1) { if( temp->isInside == 1) n1in2++; } - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } if(npts1==n1in2) { /* grid1 is inside grid2 */ n_out = npts1; n = 0; temp = grid1List; while( temp ) { - getCoordinate_acc(*temp, &x_out[n], &y_out[n], &z_out[n]); + getCoordinate_gpu(*temp, &x_out[n], &y_out[n], &z_out[n]); n++; - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } } if(n_out>0) return n_out; @@ -624,7 +624,7 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons if(temp->intersect != 1) { if( temp->isInside == 1) n2in1++; } - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } if(npts2==n2in1) { /* grid2 is inside grid1 */ @@ -632,9 +632,9 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons n = 0; temp = grid2List; while( temp ) { - getCoordinate_acc(*temp, &x_out[n], &y_out[n], &z_out[n]); + getCoordinate_gpu(*temp, &x_out[n], &y_out[n], &z_out[n]); n++; - temp = getNextNode_acc(temp); + temp = getNextNode_gpu(temp); } } @@ -643,7 +643,7 @@ int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], cons return n_out; } -void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config *output_grid, +void get_grid_cell_struct_gpu( const int nlon, const int nlat, const Grid_config *output_grid, Grid_cells_struct_config *grid_cells ) { @@ -677,20 +677,20 @@ void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config int nvertices; double lon_vertices[MV], lat_vertices[MV]; - get_cell_vertices_acc( icell, nlon, lon, lat, lon_vertices, lat_vertices ); + get_cell_vertices_gpu( icell, nlon, lon, lat, lon_vertices, lat_vertices ); - grid_cells->lat_min[icell] = minval_double_acc(4, lat_vertices); - grid_cells->lat_max[icell] = maxval_double_acc(4, lat_vertices); + grid_cells->lat_min[icell] = minval_double_gpu(4, lat_vertices); + grid_cells->lat_max[icell] = maxval_double_gpu(4, lat_vertices); - nvertices = fix_lon_acc(lon_vertices, lat_vertices, 4, M_PI); + nvertices = fix_lon_gpu(lon_vertices, lat_vertices, 4, M_PI); grid_cells->nvertices[icell] = nvertices; if(nvertices>MAX_V) printf("ERROR get_cell_minmaxavg_latlons: number of cell vertices is greater than MAX_V\n"); - grid_cells->lon_min[icell] = minval_double_acc(nvertices, lon_vertices); - grid_cells->lon_max[icell] = maxval_double_acc(nvertices, lon_vertices); - grid_cells->lon_cent[icell] = avgval_double_acc(nvertices, lon_vertices); + grid_cells->lon_min[icell] = minval_double_gpu(nvertices, lon_vertices); + grid_cells->lon_max[icell] = maxval_double_gpu(nvertices, lon_vertices); + grid_cells->lon_cent[icell] = avgval_double_gpu(nvertices, lon_vertices); - grid_cells->area[icell] = poly_area_acc(lon_vertices, lat_vertices, nvertices); + grid_cells->area[icell] = poly_area_gpu(lon_vertices, lat_vertices, nvertices); for(int ivertex=0 ; ivertexlon_vertices[MAX_V*icell+ivertex] = lon_vertices[ivertex]; @@ -700,7 +700,7 @@ void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config } -void free_grid_cell_struct_acc( const int ncells, Grid_cells_struct_config *grid_cells) +void free_grid_cell_struct_gpu( const int ncells, Grid_cells_struct_config *grid_cells) { #pragma acc exit data delete( grid_cells->lon_vertices, \ @@ -726,7 +726,7 @@ void free_grid_cell_struct_acc( const int ncells, Grid_cells_struct_config *grid } -void get_cell_vertices_acc( const int icell, const int nlon, const double *lon, const double *lat, double *x, double *y ) +void get_cell_vertices_gpu( const int icell, const int nlon, const double *lon, const double *lat, double *x, double *y ) { int i, j; @@ -746,7 +746,7 @@ void get_cell_vertices_acc( const int icell, const int nlon, const double *lon, } -void create_upbound_nxcells_arrays_on_device_acc(const int n, int **approx_nxcells_per_ij1, +void create_upbound_nxcells_arrays_on_device_gpu(const int n, int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end) { @@ -768,7 +768,7 @@ void create_upbound_nxcells_arrays_on_device_acc(const int n, int **approx_nxcel } -void free_upbound_nxcells_arrays_acc( const int n, int **approx_nxcells_per_ij1, +void free_upbound_nxcells_arrays_gpu( const int n, int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end) { int *p_approx_nxcells_per_ij1; @@ -788,7 +788,7 @@ void free_upbound_nxcells_arrays_acc( const int n, int **approx_nxcells_per_ij1, free(*ij2_end) ; *ij2_end = NULL; } -void copy_data_to_interp_on_device_acc(const int nxcells, const int input_ncells, const int upbound_nxcells, +void copy_data_to_interp_on_device_gpu(const int nxcells, const int input_ncells, const int upbound_nxcells, int *xcells_per_ij1, double *xcell_dclon, double *xcell_dclat, int *approx_xcells_per_ij1, int *parent_input_index, int *parent_output_index, double *xcell_areas, Interp_per_input_tile *interp_for_input_tile) diff --git a/tools/libfrencutils_acc/create_xgrid_utils_acc.h b/tools/libfrencutils_gpu/create_xgrid_utils_gpu.h similarity index 73% rename from tools/libfrencutils_acc/create_xgrid_utils_acc.h rename to tools/libfrencutils_gpu/create_xgrid_utils_gpu.h index d5c310a5..09568130 100644 --- a/tools/libfrencutils_acc/create_xgrid_utils_acc.h +++ b/tools/libfrencutils_gpu/create_xgrid_utils_gpu.h @@ -17,48 +17,48 @@ * License along with FRE-NCTools. If not, see * . **********************************************************************/ -#ifndef CREATE_XGRID_UTILS_ACC_H_ -#define CREATE_XGRID_UTILS_ACC_H_ +#ifndef CREATE_XGRID_UTILS_GPU_H_ +#define CREATE_XGRID_UTILS_GPU_H_ -#include "globals_acc.h" +#include "globals_gpu.h" #define MV 50 /* this value is small compare to earth area */ -void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); +void get_grid_area_gpu(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); -void get_grid_great_circle_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); +void get_grid_great_circle_area_gpu(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); #pragma acc routine seq -void poly_ctrlon_acc(const double *x, const double *y, int n, double clon_in, double *crtlon); +void poly_ctrlon_gpu(const double *x, const double *y, int n, double clon_in, double *crtlon); #pragma acc routine seq -void poly_ctrlat_acc(const double *x, const double *y, int n, double *crtlat); +void poly_ctrlat_gpu(const double *x, const double *y, int n, double *crtlat); #pragma acc routine seq -int clip_2dx2d_acc(const double lon1_in[], const double lat1_in[], int n1_in, +int clip_2dx2d_gpu(const double lon1_in[], const double lat1_in[], int n1_in, const double lon2_in[], const double lat2_in[], int n2_in, double lon_out[], double lat_out[]); -int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, +int clip_2dx2d_great_circle_gpu(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, const double x2_in[], const double y2_in[], const double z2_in [], int n2_in, double x_out[], double y_out[], double z_out[]); -void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config *output_grid, +void get_grid_cell_struct_gpu( const int nlon, const int nlat, const Grid_config *output_grid, Grid_cells_struct_config *grid_cells); -void free_grid_cell_struct_acc( const int ncells, Grid_cells_struct_config *grid_cells); +void free_grid_cell_struct_gpu( const int ncells, Grid_cells_struct_config *grid_cells); #pragma acc routine seq -void get_cell_vertices_acc( const int ij, const int nlon, const double *lon, const double *lat, double *x, double *y ); +void get_cell_vertices_gpu( const int ij, const int nlon, const double *lon, const double *lat, double *x, double *y ); -void create_upbound_nxcells_arrays_on_device_acc(const int n, int **approx_nxcells_per_ij1, +void create_upbound_nxcells_arrays_on_device_gpu(const int n, int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end); -void free_upbound_nxcells_arrays_acc( const int n, int **approx_nxcells_per_ij1, +void free_upbound_nxcells_arrays_gpu( const int n, int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end); -void copy_data_to_xgrid_on_device_acc(const int nxcells, const int input_ncells, const int upbound_nxcells, +void copy_data_to_xgrid_on_device_gpu(const int nxcells, const int input_ncells, const int upbound_nxcells, int *xcells_per_ij1, double *xcell_clon, double *xcell_clat, int *approx_xcells_per_ij1, int *parent_input_indices, int *parent_output_indices, double *xcell_areas, Interp_per_input_tile *interp_for_input_tile); diff --git a/tools/libfrencutils_acc/general_utils_acc.c b/tools/libfrencutils_gpu/general_utils_gpu.c similarity index 79% rename from tools/libfrencutils_acc/general_utils_acc.c rename to tools/libfrencutils_gpu/general_utils_gpu.c index ffe8ac8e..eb23b67c 100644 --- a/tools/libfrencutils_acc/general_utils_acc.c +++ b/tools/libfrencutils_gpu/general_utils_gpu.c @@ -25,29 +25,29 @@ #include #include #include -#include "general_utils_acc.h" -#include "globals_acc.h" +#include "general_utils_gpu.h" +#include "globals_gpu.h" -const double from_pole_threshold_rad_acc = 0.0174533; // 1.0 deg +const double from_pole_threshold_rad_gpu = 0.0174533; // 1.0 deg //reproduce siena was removed -int rotate_poly_flag_acc = 0; -double the_rotation_matrix_acc[3][3] = { 0 }; +int rotate_poly_flag_gpu = 0; +double the_rotation_matrix_gpu[3][3] = { 0 }; #pragma acc routine seq -void set_rotate_poly_true_acc(void){ - rotate_poly_flag_acc = 1; - set_the_rotation_matrix_acc(); +void set_rotate_poly_true_gpu(void){ + rotate_poly_flag_gpu = 1; + set_the_rotation_matrix_gpu(); } -struct Node_acc *nodeList_acc=NULL; -int curListPos_acc=0; +struct Node_gpu *nodeList_gpu=NULL; +int curListPos_gpu=0; -#pragma acc declare copyin(from_pole_threshold_rad_acc) -#pragma acc declare copyin(rotate_poly_flag_acc) -#pragma acc declare copyin(nodeList_acc) -#pragma acc declare copyin(curListPos_acc) -#pragma acc declare copyin(the_rotation_matrix_acc) +#pragma acc declare copyin(from_pole_threshold_rad_gpu) +#pragma acc declare copyin(rotate_poly_flag_gpu) +#pragma acc declare copyin(nodeList_gpu) +#pragma acc declare copyin(curListPos_gpu) +#pragma acc declare copyin(the_rotation_matrix_gpu) /********************************************************************* @@ -63,7 +63,7 @@ int curListPos_acc=0; array: array of data points (must be monotonically increasing) ia : size of array. ********************************************************************/ -int nearest_index_acc(double value, const double *array, int ia) +int nearest_index_gpu(double value, const double *array, int ia) { int index, i; int keep_going; @@ -94,7 +94,7 @@ int nearest_index_acc(double value, const double *array, int ia) double maxval_double(int size, double *data) get the maximum value of double array *******************************************************************************/ -double maxval_double_acc(int size, const double *data) +double maxval_double_gpu(int size, const double *data) { int n; double maxval; @@ -113,7 +113,7 @@ double maxval_double_acc(int size, const double *data) double minval_double(int size, double *data) get the minimum value of double array *******************************************************************************/ -double minval_double_acc(int size, const double *data) +double minval_double_gpu(int size, const double *data) { int n; double minval; @@ -131,7 +131,7 @@ double minval_double_acc(int size, const double *data) double avgval_double(int size, double *data) get the average value of double array *******************************************************************************/ -double avgval_double_acc(int size, const double *data) +double avgval_double_gpu(int size, const double *data) { int n; double avgval; @@ -149,7 +149,7 @@ double avgval_double_acc(int size, const double *data) void latlon2xyz Routine to map (lon, lat) to (x,y,z) ******************************************************************************/ -void latlon2xyz_acc(int size, const double *lon, const double *lat, double *x, double *y, double *z) +void latlon2xyz_gpu(int size, const double *lon, const double *lat, double *x, double *y, double *z) { int n; @@ -165,7 +165,7 @@ void latlon2xyz_acc(int size, const double *lon, const double *lat, double *x, d void xyz2laton(np, p, xs, ys) Transfer cartesian coordinates to spherical coordinates ----------------------------------------------------------*/ -void xyz2latlon_acc( int np, const double *x, const double *y, const double *z, double *lon, double *lat) +void xyz2latlon_gpu( int np, const double *x, const double *y, const double *z, double *lon, double *lat) { double xx, yy, zz; @@ -292,7 +292,7 @@ void xyz2latlon_acc( int np, const double *x, const double *y, const double *z, So, I2=\integral dx sin(arctan(tan(lat0)*cos(x-lon0))) can be shown to give an accurate estimate of grid cell areas surrounding the pole without a bump. ----------------------------------------------------------------------------*/ -double poly_area_main_acc(const double x[], const double y[], int n) { +double poly_area_main_gpu(const double x[], const double y[], int n) { double area = 0.0; int i; @@ -346,7 +346,7 @@ double poly_area_main_acc(const double x[], const double y[], int n) { TODO: The tiling error reported by make_coupler mosaic may be non-zero when using this feature. This may be developped in the future. */ -double poly_area_acc(const double xo[], const double yo[], int n) { +double poly_area_gpu(const double xo[], const double yo[], int n) { double area_pa = 0.0; double area_par = 0.0; int pole = 0; @@ -355,14 +355,14 @@ double poly_area_acc(const double xo[], const double yo[], int n) { double xr[8]; // rotated lon double yr[8]; // rotated lat - if (rotate_poly_flag_acc == 0) { - area_pa = poly_area_main_acc(xo, yo, n); + if (rotate_poly_flag_gpu == 0) { + area_pa = poly_area_main_gpu(xo, yo, n); return area_pa; } else { // anything near enough to the pole gets rotated - pole = is_near_pole_acc(yo, n); - crosses = crosses_pole_acc(xo, n); + pole = is_near_pole_gpu(yo, n); + crosses = crosses_pole_gpu(xo, n); if (crosses == 1 && pole == 0) { printf("ERROR poly_area: crosses == 1 && pole == 0\n"); } @@ -371,15 +371,15 @@ double poly_area_acc(const double xo[], const double yo[], int n) { if (n > 8) { printf("ERROR poly_area: n > 8. n=%d,n\n"); } - rotate_poly_acc(xo, yo, n, xr, yr); + rotate_poly_gpu(xo, yo, n, xr, yr); - int pole2 = is_near_pole_acc(yr, n); + int pole2 = is_near_pole_gpu(yr, n); if (pole2 == 1) { printf("ERROR poly_area: pole2 == 1\n"); } - area_par = poly_area_main_acc(xr, yr, n); + area_par = poly_area_main_gpu(xr, yr, n); } else { - area_pa = poly_area_main_acc(xo, yo, n); + area_pa = poly_area_main_gpu(xo, yo, n); } if (pole == 1) { @@ -390,7 +390,7 @@ double poly_area_acc(const double xo[], const double yo[], int n) { } } -int delete_vtx_acc(double x[], double y[], int n, int n_del) +int delete_vtx_gpu(double x[], double y[], int n, int n_del) { for (;n_del=HPI-TOLERANCE) pole = 1; if (0&&pole) { printf("fixing pole cell\n"); - v_print_acc(x, y, nn); + v_print_gpu(x, y, nn); printf("---------"); } @@ -440,11 +440,11 @@ int fix_lon_acc(double x[], double y[], int n, double tlon) int im=(i+nn-1)%nn, ip=(i+1)%nn; if (y[im]==y[i] && y[ip]==y[i]) { - nn = delete_vtx_acc(x, y, nn, i); + nn = delete_vtx_gpu(x, y, nn, i); i--; } else if (y[im]!=y[i] && y[ip]!=y[i]) { - nn = insert_vtx_acc(x, y, nn, i, x[i], y[i]); + nn = insert_vtx_gpu(x, y, nn, i, x[i], y[i]); i++; } } @@ -468,8 +468,8 @@ int fix_lon_acc(double x[], double y[], int n, double tlon) double x2=x[i]; double ypole= HPI; if(y[i]<0.0) ypole = -HPI ; - nn = insert_vtx_acc(x, y, nn, i, x2, ypole); - nn = insert_vtx_acc(x, y, nn, i, x1, ypole); + nn = insert_vtx_gpu(x, y, nn, i, x2, ypole); + nn = insert_vtx_gpu(x, y, nn, i, x1, ypole); break; } } @@ -487,8 +487,8 @@ int fix_lon_acc(double x[], double y[], int n, double tlon) else if (dx > M_PI) for (i=0;i MAXNODELIST) printf("ERROR getNext_acc: curListPos_acc >= MAXNODELIST\n"); + temp = nodeList_gpu+curListPos_gpu; + curListPos_gpu++; + if(curListPos_gpu > MAXNODELIST) printf("ERROR getNext_gpu: curListPos_gpu >= MAXNODELIST\n"); return (temp); } -void initNode_acc(struct Node_acc *node) +void initNode_gpu(struct Node_gpu *node) { node->x = 0; node->y = 0; @@ -718,15 +718,15 @@ void initNode_acc(struct Node_acc *node) node->intersect = 0; node->inbound = 0; node->isInside = 0; - node->Next_acc = NULL; + node->Next_gpu = NULL; node->initialized=0; } -void addEnd_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, int inside) +void addEnd_gpu(struct Node_gpu *list, double x, double y, double z, int intersect, double u, int inbound, int inside) { - struct Node_acc *temp=NULL; + struct Node_gpu *temp=NULL; if(list == NULL) printf("ERROR addEnd: list is NULL\n"); @@ -735,16 +735,16 @@ void addEnd_acc(struct Node_acc *list, double x, double y, double z, int interse /* (x,y,z) might already in the list when intersect is true and u=0 or 1 */ temp = list; while (temp) { - if(samePoint_acc(temp->x, temp->y, temp->z, x, y, z)) return; - temp=temp->Next_acc; + if(samePoint_gpu(temp->x, temp->y, temp->z, x, y, z)) return; + temp=temp->Next_gpu; } temp = list; - while(temp->Next_acc) - temp=temp->Next_acc; + while(temp->Next_gpu) + temp=temp->Next_gpu; /* Append at the end of the list. */ - temp->Next_acc = getNext_acc(); - temp = temp->Next_acc; + temp->Next_gpu = getNext_gpu(); + temp = temp->Next_gpu; } else { temp = list; @@ -762,13 +762,13 @@ void addEnd_acc(struct Node_acc *list, double x, double y, double z, int interse /* return 1 if the point (x,y,z) is added in the list, return 0 if it is already in the list */ -int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u1, double u2, int inbound, +int addIntersect_gpu(struct Node_gpu *list, double x, double y, double z, int intersect, double u1, double u2, int inbound, int is1, int ie1, int is2, int ie2) { double u1_cur, u2_cur; int i1_cur, i2_cur; - struct Node_acc *temp=NULL; + struct Node_gpu *temp=NULL; if(list == NULL) printf("ERROR addEnd: list is NULL\n"); @@ -791,13 +791,13 @@ int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int in while(temp) { if( temp->u == u1_cur && temp->subj_index == i1_cur) return 0; if( temp->u_clip == u2_cur && temp->clip_index == i2_cur) return 0; - if( !temp->Next_acc ) break; - temp=temp->Next_acc; + if( !temp->Next_gpu ) break; + temp=temp->Next_gpu; } /* Append at the end of the list. */ - temp->Next_acc = getNext_acc(); - temp = temp->Next_acc; + temp->Next_gpu = getNext_gpu(); + temp = temp->Next_gpu; } else { temp = list; @@ -819,9 +819,9 @@ int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int in } -int length_acc(struct Node_acc *list) +int length_gpu(struct Node_gpu *list) { - struct Node_acc *cur_ptr=NULL; + struct Node_gpu *cur_ptr=NULL; int count=0; cur_ptr=list; @@ -829,14 +829,14 @@ int length_acc(struct Node_acc *list) while(cur_ptr) { if(cur_ptr->initialized ==0) break; - cur_ptr=cur_ptr->Next_acc; + cur_ptr=cur_ptr->Next_gpu; count++; } return(count); } /* two points are the same if there are close enough */ -int samePoint_acc(double x1, double y1, double z1, double x2, double y2, double z2) +int samePoint_gpu(double x1, double y1, double z1, double x2, double y2, double z2) { if( fabs(x1-x2) > EPSLN10 || fabs(y1-y2) > EPSLN10 || fabs(z1-z2) > EPSLN10 ) return 0; @@ -846,7 +846,7 @@ int samePoint_acc(double x1, double y1, double z1, double x2, double y2, double -int sameNode_acc(struct Node_acc node1, struct Node_acc node2) +int sameNode_gpu(struct Node_gpu node1, struct Node_gpu node2) { if( node1.x == node2.x && node1.y == node2.y && node1.z==node2.z ) return 1; @@ -855,37 +855,37 @@ int sameNode_acc(struct Node_acc node1, struct Node_acc node2) } -void addNode_acc(struct Node_acc *list, struct Node_acc inNode_acc) +void addNode_gpu(struct Node_gpu *list, struct Node_gpu inNode_gpu) { - addEnd_acc(list, inNode_acc.x, inNode_acc.y, inNode_acc.z, inNode_acc.intersect, inNode_acc.u, inNode_acc.inbound, inNode_acc.isInside); + addEnd_gpu(list, inNode_gpu.x, inNode_gpu.y, inNode_gpu.z, inNode_gpu.intersect, inNode_gpu.u, inNode_gpu.inbound, inNode_gpu.isInside); } -struct Node_acc *getNode_acc(struct Node_acc *list, struct Node_acc inNode_acc) +struct Node_gpu *getNode_gpu(struct Node_gpu *list, struct Node_gpu inNode_gpu) { - struct Node_acc *thisNode_acc=NULL; - struct Node_acc *temp=NULL; + struct Node_gpu *thisNode_gpu=NULL; + struct Node_gpu *temp=NULL; temp = list; while( temp ) { - if( sameNode_acc( *temp, inNode_acc ) ) { - thisNode_acc = temp; + if( sameNode_gpu( *temp, inNode_gpu ) ) { + thisNode_gpu = temp; temp = NULL; break; } - temp = temp->Next_acc; + temp = temp->Next_gpu; } - return thisNode_acc; + return thisNode_gpu; } -struct Node_acc *getNextNode_acc(struct Node_acc *list) +struct Node_gpu *getNextNode_gpu(struct Node_gpu *list) { - return list->Next_acc; + return list->Next_gpu; } -void copyNode_acc(struct Node_acc *node_out, struct Node_acc node_in) +void copyNode_gpu(struct Node_gpu *node_out, struct Node_gpu node_in) { node_out->x = node_in.x; @@ -894,30 +894,30 @@ void copyNode_acc(struct Node_acc *node_out, struct Node_acc node_in) node_out->u = node_in.u; node_out->intersect = node_in.intersect; node_out->inbound = node_in.inbound; - node_out->Next_acc = NULL; + node_out->Next_gpu = NULL; node_out->initialized = node_in.initialized; node_out->isInside = node_in.isInside; } -void printNode_acc(struct Node_acc *list, char *str) +void printNode_gpu(struct Node_gpu *list, char *str) { - struct Node_acc *temp; + struct Node_gpu *temp; - if(list == NULL) printf("ERROR printNode_acc: list is NULL\n"); + if(list == NULL) printf("ERROR printNode_gpu: list is NULL\n"); if(str) printf(" %s \n", str); temp = list; while(temp) { if(temp->initialized ==0) break; printf(" (x, y, z, interset, inbound, isInside) = (%19.15f,%19.15f,%19.15f,%d,%d,%d)\n", temp->x, temp->y, temp->z, temp->intersect, temp->inbound, temp->isInside); - temp = temp->Next_acc; + temp = temp->Next_gpu; } printf("\n"); } -int intersectInList_acc(struct Node_acc *list, double x, double y, double z) +int intersectInList_gpu(struct Node_gpu *list, double x, double y, double z) { - struct Node_acc *temp; + struct Node_gpu *temp; int found=0; temp = list; @@ -927,7 +927,7 @@ int intersectInList_acc(struct Node_acc *list, double x, double y, double z) found = 1; break; } - temp=temp->Next_acc; + temp=temp->Next_gpu; } if (!found) printf("ERROR intersectInList: point (x,y,z) is not found in the list\n"); if( temp->intersect == 2 ) @@ -942,11 +942,11 @@ int intersectInList_acc(struct Node_acc *list, double x, double y, double z) after (x2,y2,z2) is an intersection, if u is greater than the u value of the intersection, insert after, otherwise insert before */ -void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, double u1, double u2, int inbound, +void insertIntersect_gpu(struct Node_gpu *list, double x, double y, double z, double u1, double u2, int inbound, double x2, double y2, double z2) { - struct Node_acc *temp1=NULL, *temp2=NULL; - struct Node_acc *temp; + struct Node_gpu *temp1=NULL, *temp2=NULL; + struct Node_gpu *temp; double u_cur; int found=0; @@ -957,7 +957,7 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do found = 1; break; } - temp1=temp1->Next_acc; + temp1=temp1->Next_gpu; } if (!found) printf("ERROR inserAfter: point (x,y,z) is not found in the list\n"); @@ -965,7 +965,7 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do u_cur = u1; if(u1 == 1) { u_cur = 0; - temp1 = temp1->Next_acc; + temp1 = temp1->Next_gpu; if(!temp1) temp1 = list; } if(u_cur==0) { @@ -980,12 +980,12 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do /* when u2 != 0 and u2 !=1, can decide if one end of the point is outside depending on inbound value */ if(u2 != 0 && u2 != 1) { - if(inbound == 1) { /* goes outside, then temp1->Next_acc is an outside point */ + if(inbound == 1) { /* goes outside, then temp1->Next_gpu is an outside point */ /* find the next non-intersect point */ - temp2 = temp1->Next_acc; + temp2 = temp1->Next_gpu; if(!temp2) temp2 = list; while(temp2->intersect) { - temp2=temp2->Next_acc; + temp2=temp2->Next_gpu; if(!temp2) temp2 = list; } @@ -996,7 +996,7 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do } } - temp2 = temp1->Next_acc; + temp2 = temp1->Next_gpu; while ( temp2 ) { if( temp2->intersect == 1 ) { if( temp2->u > u_cur ) { @@ -1006,11 +1006,11 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do else break; temp1 = temp2; - temp2 = temp2->Next_acc; + temp2 = temp2->Next_gpu; } /* assign value */ - temp = getNext_acc(); + temp = getNext_gpu(); temp->x = x; temp->y = y; temp->z = z; @@ -1019,14 +1019,14 @@ void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, do temp->inbound = inbound; temp->isInside = 1; temp->initialized = 1; - temp1->Next_acc = temp; - temp->Next_acc = temp2; + temp1->Next_gpu = temp; + temp->Next_gpu = temp2; } -double gridArea_acc(struct Node_acc *grid) { +double gridArea_gpu(struct Node_gpu *grid) { double x[20], y[20], z[20]; - struct Node_acc *temp=NULL; + struct Node_gpu *temp=NULL; double area; int n; @@ -1037,35 +1037,35 @@ double gridArea_acc(struct Node_acc *grid) { y[n] = temp->y; z[n] = temp->z; n++; - temp = temp->Next_acc; + temp = temp->Next_gpu; } - area = great_circle_area_acc(n, x, y, z); + area = great_circle_area_gpu(n, x, y, z); return area; } -int isIntersect_acc(struct Node_acc node) { +int isIntersect_gpu(struct Node_gpu node) { return node.intersect; } -int getInbound_acc( struct Node_acc node ) +int getInbound_gpu( struct Node_gpu node ) { return node.inbound; } -struct Node_acc *getLast_acc(struct Node_acc *list) +struct Node_gpu *getLast_gpu(struct Node_gpu *list) { - struct Node_acc *temp1; + struct Node_gpu *temp1; temp1 = list; if( temp1 ) { - while( temp1->Next_acc ) { - temp1 = temp1->Next_acc; + while( temp1->Next_gpu ) { + temp1 = temp1->Next_gpu; } } @@ -1073,24 +1073,24 @@ struct Node_acc *getLast_acc(struct Node_acc *list) } -int getFirstInbound_acc( struct Node_acc *list, struct Node_acc *nodeOut) +int getFirstInbound_gpu( struct Node_gpu *list, struct Node_gpu *nodeOut) { - struct Node_acc *temp=NULL; + struct Node_gpu *temp=NULL; temp=list; while(temp) { if( temp->inbound == 2 ) { - copyNode_acc(nodeOut, *temp); + copyNode_gpu(nodeOut, *temp); return 1; } - temp=temp->Next_acc; + temp=temp->Next_gpu; } return 0; } -void getCoordinate_acc(struct Node_acc node, double *x, double *y, double *z) +void getCoordinate_gpu(struct Node_gpu node, double *x, double *y, double *z) { @@ -1100,7 +1100,7 @@ void getCoordinate_acc(struct Node_acc node, double *x, double *y, double *z) } -void getCoordinates_acc(struct Node_acc *node, double *p) +void getCoordinates_gpu(struct Node_gpu *node, double *p) { @@ -1110,7 +1110,7 @@ void getCoordinates_acc(struct Node_acc *node, double *p) } -void setCoordinate_acc(struct Node_acc *node, double x, double y, double z) +void setCoordinate_gpu(struct Node_gpu *node, double x, double y, double z) { @@ -1124,16 +1124,16 @@ void setCoordinate_acc(struct Node_acc *node, double x, double y, double z) this will also set some inbound value of the points in list1 */ -void setInbound_acc(struct Node_acc *interList, struct Node_acc *list) +void setInbound_gpu(struct Node_gpu *interList, struct Node_gpu *list) { - struct Node_acc *temp1=NULL, *temp=NULL; - struct Node_acc *temp1_prev=NULL, *temp1_next=NULL; + struct Node_gpu *temp1=NULL, *temp=NULL; + struct Node_gpu *temp1_prev=NULL, *temp1_next=NULL; int prev_is_inside, next_is_inside; /* for each point in interList, search through list to decide the inbound value the interList point */ /* For each inbound point, the prev node should be outside and the next is inside. */ - if(length_acc(interList) == 0) return; + if(length_gpu(interList) == 0) return; temp = interList; @@ -1145,14 +1145,14 @@ void setInbound_acc(struct Node_acc *interList, struct Node_acc *list) temp1_prev = NULL; temp1_next = NULL; while(temp1) { - if(sameNode_acc(*temp1, *temp)) { - if(!temp1_prev) temp1_prev = getLast_acc(list); - temp1_next = temp1->Next_acc; + if(sameNode_gpu(*temp1, *temp)) { + if(!temp1_prev) temp1_prev = getLast_gpu(list); + temp1_next = temp1->Next_gpu; if(!temp1_next) temp1_next = list; break; } temp1_prev = temp1; - temp1 = temp1->Next_acc; + temp1 = temp1->Next_gpu; } if(!temp1_next) printf("ERROR from create_xgrid.c: temp is not in list1\n"); if( temp1_prev->isInside == 0 && temp1_next->isInside == 1) @@ -1160,11 +1160,11 @@ void setInbound_acc(struct Node_acc *interList, struct Node_acc *list) else temp->inbound = 1; } - temp=temp->Next_acc; + temp=temp->Next_gpu; } } -int isInside_acc(struct Node_acc *node) { +int isInside_gpu(struct Node_gpu *node) { if(node->isInside == -1) printf("ERROR from mosaic_util.c: node->isInside is not set\n"); return(node->isInside); @@ -1174,12 +1174,12 @@ int isInside_acc(struct Node_acc *node) { /* #define debug_test_create_xgrid */ /* check if node is inside polygon list or not */ - int insidePolygon_acc( struct Node_acc *node, struct Node_acc *list) + int insidePolygon_gpu( struct Node_gpu *node, struct Node_gpu *list) { int i, ip, is_inside; double pnt0[3], pnt1[3], pnt2[3]; double anglesum; - struct Node_acc *p1=NULL, *p2=NULL; + struct Node_gpu *p1=NULL, *p2=NULL; anglesum = 0; @@ -1188,7 +1188,7 @@ int isInside_acc(struct Node_acc *node) { pnt0[2] = node->z; p1 = list; - p2 = list->Next_acc; + p2 = list->Next_gpu; is_inside = 0; @@ -1199,10 +1199,10 @@ int isInside_acc(struct Node_acc *node) { pnt2[0] = p2->x; pnt2[1] = p2->y; pnt2[2] = p2->z; - if(samePoint_acc(pnt0[0], pnt0[1], pnt0[2], pnt1[0], pnt1[1], pnt1[2])) return 1; - anglesum += spherical_angle_acc(pnt0, pnt2, pnt1); - p1 = p1->Next_acc; - p2 = p2->Next_acc; + if(samePoint_gpu(pnt0[0], pnt0[1], pnt0[2], pnt1[0], pnt1[1], pnt1[2])) return 1; + anglesum += spherical_angle_gpu(pnt0, pnt2, pnt1); + p1 = p1->Next_gpu; + p2 = p2->Next_gpu; if(p2==NULL)p2 = list; } @@ -1215,7 +1215,7 @@ int isInside_acc(struct Node_acc *node) { } -int inside_a_polygon_acc(double *lon1, double *lat1, int *npts, double *lon2, double *lat2) +int inside_a_polygon_gpu(double *lon1, double *lat1, int *npts, double *lon2, double *lat2) { double x2[20], y2[20], z2[20]; @@ -1223,37 +1223,37 @@ int inside_a_polygon_acc(double *lon1, double *lat1, int *npts, double *lon2, do double min_x2, max_x2, min_y2, max_y2, min_z2, max_z2; int isinside, i; - struct Node_acc *grid1=NULL, *grid2=NULL; + struct Node_gpu *grid1=NULL, *grid2=NULL; /* first convert to cartesian grid */ - latlon2xyz_acc(*npts, lon2, lat2, x2, y2, z2); - latlon2xyz_acc(1, lon1, lat1, &x1, &y1, &z1); + latlon2xyz_gpu(*npts, lon2, lat2, x2, y2, z2); + latlon2xyz_gpu(1, lon1, lat1, &x1, &y1, &z1); - max_x2 = maxval_double_acc(*npts, x2); + max_x2 = maxval_double_gpu(*npts, x2); if(x1 >= max_x2+RANGE_CHECK_CRITERIA) return 0; - min_x2 = minval_double_acc(*npts, x2); + min_x2 = minval_double_gpu(*npts, x2); if(min_x2 >= x1+RANGE_CHECK_CRITERIA) return 0; - max_y2 = maxval_double_acc(*npts, y2); + max_y2 = maxval_double_gpu(*npts, y2); if(y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0; - min_y2 = minval_double_acc(*npts, y2); + min_y2 = minval_double_gpu(*npts, y2); if(min_y2 >= y1+RANGE_CHECK_CRITERIA) return 0; - max_z2 = maxval_double_acc(*npts, z2); + max_z2 = maxval_double_gpu(*npts, z2); if(z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0; - min_z2 = minval_double_acc(*npts, z2); + min_z2 = minval_double_gpu(*npts, z2); if(min_z2 >= z1+RANGE_CHECK_CRITERIA) return 0; - /* add x2,y2,z2 to a Node_acc */ - rewindList_acc(); - grid1 = getNext_acc(); - grid2 = getNext_acc(); + /* add x2,y2,z2 to a Node_gpu */ + rewindList_gpu(); + grid1 = getNext_gpu(); + grid2 = getNext_gpu(); - addEnd_acc(grid1, x1, y1, z1, 0, 0, 0, -1); - for(i=0; i<*npts; i++) addEnd_acc(grid2, x2[i], y2[i], z2[i], 0, 0, 0, -1); + addEnd_gpu(grid1, x1, y1, z1, 0, 0, 0, -1); + for(i=0; i<*npts; i++) addEnd_gpu(grid2, x2[i], y2[i], z2[i], 0, 0, 0, -1); - isinside = insidePolygon_acc(grid1, grid2); + isinside = insidePolygon_gpu(grid1, grid2); return isinside; @@ -1264,10 +1264,10 @@ int inside_a_polygon_acc(double *lon1, double *lat1, int *npts, double *lon2, do within a threshold from the CGS poles (i.e. near +- Pi/2). Otherwise returns 0. */ -int is_near_pole_acc(const double y[], int n) { +int is_near_pole_gpu(const double y[], int n) { int pole = 0; for (int i = 0; i < n; i++) { - if ((fabs(y[i]) + from_pole_threshold_rad_acc) > M_PI_2 ) { + if ((fabs(y[i]) + from_pole_threshold_rad_gpu) > M_PI_2 ) { pole = 1; break; } @@ -1280,7 +1280,7 @@ int is_near_pole_acc(const double y[], int n) { sides of a pole. i.e. if the longitudes are seperated by about Pi. Note, for realistic data (not huge polygons), if crosses_pole() reutrns 1, so should is_near_pole(). */ -int crosses_pole_acc(const double x[] , int n) { +int crosses_pole_gpu(const double x[] , int n) { int has_cl = 0; for (int i = 0; i < n; i++) { int im = (i + n - 1) % n; @@ -1295,14 +1295,14 @@ int crosses_pole_acc(const double x[] , int n) { } /* - Set the_rotation_matrix_acc global variable. + Set the_rotation_matrix_gpu global variable. The rotation is 45 degrees and about the vector with orign at earths center and the direction <0,1,1>/SQRT(2). I.e. a big rotation away from the pole if what is being rotaed is near a pole. For rotation matricies formulas and examples, see F.S.Hill, Computer Graphics Using OpenGL, @nd ed., Chapter 5.3. */ -void set_the_rotation_matrix_acc() { +void set_the_rotation_matrix_gpu() { double is2 = 1.0 /M_SQRT2; double m00 = 0; @@ -1315,14 +1315,14 @@ void set_the_rotation_matrix_acc() { for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - the_rotation_matrix_acc[i][j] = m[i][j]; + the_rotation_matrix_gpu[i][j] = m[i][j]; } } -#pragma acc data update device(the_rotation_matrix_acc[:3][:3]) +#pragma acc data update device(the_rotation_matrix_gpu[:3][:3]) } /* Rotate point given the passed in rotation matrix */ -void rotate_point_acc(double rv[], double rmat [][3]) { +void rotate_point_gpu(double rv[], double rmat [][3]) { double v[3]; for (int i = 0; i < 3; i++) { @@ -1338,19 +1338,19 @@ void rotate_point_acc(double rv[], double rmat [][3]) { /* Rotate polygon defined by x[], y[] points and store in xr[], yr[]*/ -void rotate_poly_acc(const double x[], const double y[], const int n, double xr[], double yr[]) { +void rotate_poly_gpu(const double x[], const double y[], const int n, double xr[], double yr[]) { double sv[2]; //a rotated lat/lon double rv[3]; //rotated xyz point for (int i = 0; i < n; i++) { - latlon2xyz_acc(1, &x[i], &y[i], &rv[0], &rv[1], &rv[2]); - rotate_point_acc(rv, the_rotation_matrix_acc); - xyz2latlon_acc(1, &rv[0], &rv[1], &rv[2], &sv[0], &sv[1]); + latlon2xyz_gpu(1, &x[i], &y[i], &rv[0], &rv[1], &rv[2]); + rotate_point_gpu(rv, the_rotation_matrix_gpu); + xyz2latlon_gpu(1, &rv[0], &rv[1], &rv[2], &sv[0], &sv[1]); xr[i] = sv[0]; yr[i] = sv[1]; } } -void pimod_acc(double x[],int nn) +void pimod_gpu(double x[],int nn) { for (int i=0;i* > 0, outside, otherwise inside. inner product value = 0 also treate as inside. *******************************************************************************/ -int inside_edge_acc(double x0, double y0, double x1, double y1, double x, double y) +int inside_edge_gpu(double x0, double y0, double x1, double y1, double x, double y) { const double SMALL = 1.e-12; double product; @@ -1384,7 +1384,7 @@ int inside_edge_acc(double x0, double y0, double x1, double y1, double x, double returns true if the lines could be intersected, false otherwise. inbound means the direction of (a1,a2) go inside or outside of (q1,q2,q3) */ -int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, double *q3, +int line_intersect_2D_3D_gpu(double *a1, double *a2, double *q1, double *q2, double *q3, double *intersect, double *u_a, double *u_q, int *inbound){ /* Do this intersection by reprsenting the line a1 to a2 as a plane through the @@ -1403,7 +1403,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou *inbound = 0; /* first check if any vertices are the same */ - if(samePoint_acc(a1[0], a1[1], a1[2], q1[0], q1[1], q1[2])) { + if(samePoint_gpu(a1[0], a1[1], a1[2], q1[0], q1[1], q1[2])) { *u_a = 0; *u_q = 0; intersect[0] = a1[0]; @@ -1411,7 +1411,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou intersect[2] = a1[2]; return 1; } - else if (samePoint_acc(a1[0], a1[1], a1[2], q2[0], q2[1], q2[2])) { + else if (samePoint_gpu(a1[0], a1[1], a1[2], q2[0], q2[1], q2[2])) { *u_a = 0; *u_q = 1; intersect[0] = a1[0]; @@ -1419,7 +1419,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou intersect[2] = a1[2]; return 1; } - else if(samePoint_acc(a2[0], a2[1], a2[2], q1[0], q1[1], q1[2])) { + else if(samePoint_gpu(a2[0], a2[1], a2[2], q1[0], q1[1], q1[2])) { *u_a = 1; *u_q = 0; intersect[0] = a2[0]; @@ -1427,7 +1427,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou intersect[2] = a2[2]; return 1; } - else if (samePoint_acc(a2[0], a2[1], a2[2], q2[0], q2[1], q2[2])) { + else if (samePoint_gpu(a2[0], a2[1], a2[2], q2[0], q2[1], q2[2])) { *u_a = 1; *u_q = 1; intersect[0] = a2[0]; @@ -1448,7 +1448,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou plane[7]=0.0; plane[8]=0.0; /* Intersect the segment with the plane */ - is_inter1 = intersect_tri_with_line_acc(plane, a1, a2, plane_p, u_a); + is_inter1 = intersect_tri_with_line_gpu(plane, a1, a2, plane_p, u_a); if(!is_inter1) return 0; @@ -1470,7 +1470,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou plane[8]=0.0; /* Intersect the segment with the plane */ - is_inter2 = intersect_tri_with_line_acc(plane, q1, q2, plane_p, u_q); + is_inter2 = intersect_tri_with_line_gpu(plane, q1, q2, plane_p, u_q); if(!is_inter2) return 0; @@ -1483,10 +1483,10 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou u =*u_a; /* The two planes are coincidental */ - vect_cross_acc(a1, a2, c1); - vect_cross_acc(q1, q2, c2); - vect_cross_acc(c1, c2, c3); - coincident = metric_acc(c3); + vect_cross_gpu(a1, a2, c1); + vect_cross_gpu(q1, q2, c2); + vect_cross_gpu(c1, c2, c3); + coincident = metric_gpu(c3); if(fabs(coincident) < EPSLN30) return 0; @@ -1495,7 +1495,7 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou intersect[1]=a1[1] + u*(a2[1]-a1[1]); intersect[2]=a1[2] + u*(a2[2]-a1[2]); - norm = metric_acc( intersect ); + norm = metric_gpu( intersect ); for(i = 0; i < 3; i ++) intersect[i] /= norm; /* when u_q =0 or u_q =1, the following could not decide the inbound value */ @@ -1510,10 +1510,10 @@ int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, dou v2[1] = q3[1]-q2[1]; v2[2] = q3[2]-q2[2]; - vect_cross_acc(v1, v2, c1); - vect_cross_acc(v1, p1, c2); + vect_cross_gpu(v1, v2, c1); + vect_cross_gpu(v1, p1, c2); - sense = dot_acc(c1, c2); + sense = dot_gpu(c1, c2); *inbound = 1; if(sense > 0) *inbound = 2; /* v1 going into v2 in CCW sense */ } diff --git a/tools/libfrencutils_acc/general_utils_acc.h b/tools/libfrencutils_gpu/general_utils_gpu.h similarity index 55% rename from tools/libfrencutils_acc/general_utils_acc.h rename to tools/libfrencutils_gpu/general_utils_gpu.h index abb0e4ea..02a2341c 100644 --- a/tools/libfrencutils_acc/general_utils_acc.h +++ b/tools/libfrencutils_gpu/general_utils_gpu.h @@ -25,7 +25,7 @@ #define min(a,b) (ab ? a:b) -struct Node_acc{ +struct Node_gpu{ double x, y, z, u, u_clip; int intersect; /* indicate if this point is an intersection, 0 = no, 1= yes, 2=both intersect and vertices */ int inbound; /* -1 uninitialized, 0 coincident, 1 outbound, 2 inbound */ @@ -33,171 +33,171 @@ struct Node_acc{ int isInside; /* = 1 means one point is inside the other polygon, 0 is not, -1 undecided. */ int subj_index; /* the index of subject point that an intersection follow. */ int clip_index; /* the index of clip point that an intersection follow */ - struct Node_acc *Next_acc; + struct Node_gpu *Next_gpu; }; #pragma acc routine seq -int nearest_index_acc(double value, const double *array, int ia); +int nearest_index_gpu(double value, const double *array, int ia); #pragma acc routine seq -int lon_fix_acc(double *x, double *y, int n_in, double tlon); +int lon_fix_gpu(double *x, double *y, int n_in, double tlon); #pragma acc routine seq -double minval_double_acc(int size, const double *data); +double minval_double_gpu(int size, const double *data); #pragma acc routine seq -double maxval_double_acc(int size, const double *data); +double maxval_double_gpu(int size, const double *data); #pragma acc routine seq -double avgval_double_acc(int size, const double *data); +double avgval_double_gpu(int size, const double *data); #pragma acc routine seq -void latlon2xyz_acc(int size, const double *lon, const double *lat, double *x, double *y, double *z); +void latlon2xyz_gpu(int size, const double *lon, const double *lat, double *x, double *y, double *z); #pragma acc routine seq -void xyz2latlon_acc(int size, const double *x, const double *y, const double *z, double *lon, double *lat); +void xyz2latlon_gpu(int size, const double *x, const double *y, const double *z, double *lon, double *lat); #pragma acc routine seq -double poly_area_acc(const double lon[], const double lat[], int n); +double poly_area_gpu(const double lon[], const double lat[], int n); #pragma acc routine seq -int fix_lon_acc(double lon[], double lat[], int n, double tlon); +int fix_lon_gpu(double lon[], double lat[], int n, double tlon); #pragma acc routine seq -double spherical_angle_acc(const double *v1, const double *v2, const double *v3); +double spherical_angle_gpu(const double *v1, const double *v2, const double *v3); #pragma acc routine seq -void vect_cross_acc(const double *p1, const double *p2, double *e ); +void vect_cross_gpu(const double *p1, const double *p2, double *e ); #pragma acc routine seq -double dot_acc(const double *p1, const double *p2); +double dot_gpu(const double *p1, const double *p2); #pragma acc routine seq -double metric_acc(const double *p) ; +double metric_gpu(const double *p) ; #pragma acc routine seq -int intersect_tri_with_line_acc(const double *plane, const double *l1, const double *l2, double *p, double *t); +int intersect_tri_with_line_gpu(const double *plane, const double *l1, const double *l2, double *p, double *t); #pragma acc routine seq -void mult_acc(double m[], double v[], double out_v[]); +void mult_gpu(double m[], double v[], double out_v[]); #pragma acc routine seq -int invert_matrix_3x3_acc(double m[], double m_inv[]); +int invert_matrix_3x3_gpu(double m[], double m_inv[]); #pragma acc routine seq -double great_circle_area_acc(int n, const double *x, const double *y, const double *z); +double great_circle_area_gpu(int n, const double *x, const double *y, const double *z); #pragma acc routine seq -int insidePolygon_acc(struct Node_acc *node, struct Node_acc *list ); +int insidePolygon_gpu(struct Node_gpu *node, struct Node_gpu *list ); #pragma acc routine seq -int inside_a_polygon_acc( double *lon1, double *lat1, int *npts, double *lon2, double *lat2); +int inside_a_polygon_gpu( double *lon1, double *lat1, int *npts, double *lon2, double *lat2); #pragma acc routine seq -void rewindList_acc(void); +void rewindList_gpu(void); #pragma acc routine seq -struct Node_acc *getNext_acc(); +struct Node_gpu *getNext_gpu(); #pragma acc routine seq -void initNode_acc(struct Node_acc *node); +void initNode_gpu(struct Node_gpu *node); #pragma acc routine seq -void addEnd_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, int inside); +void addEnd_gpu(struct Node_gpu *list, double x, double y, double z, int intersect, double u, int inbound, int inside); #pragma acc routine seq -int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u1, double u2, +int addIntersect_gpu(struct Node_gpu *list, double x, double y, double z, int intersect, double u1, double u2, int inbound, int is1, int ie1, int is2, int ie2); #pragma acc routine seq -void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, double u1, double u2, int inbound, +void insertIntersect_gpu(struct Node_gpu *list, double x, double y, double z, double u1, double u2, int inbound, double x2, double y2, double z2); #pragma acc routine seq -int length_acc(struct Node_acc *list); +int length_gpu(struct Node_gpu *list); #pragma acc routine seq -int samePoint_acc(double x1, double y1, double z1, double x2, double y2, double z2); +int samePoint_gpu(double x1, double y1, double z1, double x2, double y2, double z2); #pragma acc routine seq -int sameNode_acc(struct Node_acc node1, struct Node_acc node2); +int sameNode_gpu(struct Node_gpu node1, struct Node_gpu node2); #pragma acc routine seq -void addNode_acc(struct Node_acc *list, struct Node_acc nodeIn); +void addNode_gpu(struct Node_gpu *list, struct Node_gpu nodeIn); #pragma acc routine seq -struct Node_acc *getNode_acc(struct Node_acc *list, struct Node_acc inNode_acc); +struct Node_gpu *getNode_gpu(struct Node_gpu *list, struct Node_gpu inNode_gpu); #pragma acc routine seq -struct Node_acc *getNextNode_acc(struct Node_acc *list); +struct Node_gpu *getNextNode_gpu(struct Node_gpu *list); #pragma acc routine seq -void copyNode_acc(struct Node_acc *node_out, struct Node_acc node_in); +void copyNode_gpu(struct Node_gpu *node_out, struct Node_gpu node_in); #pragma acc routine seq -void printNode_acc(struct Node_acc *list, char *str); +void printNode_gpu(struct Node_gpu *list, char *str); #pragma acc routine seq -int intersectInList_acc(struct Node_acc *list, double x, double y, double z); +int intersectInList_gpu(struct Node_gpu *list, double x, double y, double z); #pragma acc routine seq -void insertAfter_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, +void insertAfter_gpu(struct Node_gpu *list, double x, double y, double z, int intersect, double u, int inbound, double x2, double y2, double z2); #pragma acc routine seq -double gridArea_acc(struct Node_acc *grid); +double gridArea_gpu(struct Node_gpu *grid); #pragma acc routine seq -int isIntersect_acc(struct Node_acc node); +int isIntersect_gpu(struct Node_gpu node); #pragma acc routine seq -int getInbound_acc( struct Node_acc node ); +int getInbound_gpu( struct Node_gpu node ); #pragma acc routine seq -struct Node_acc *getLast_acc(struct Node_acc *list); +struct Node_gpu *getLast_gpu(struct Node_gpu *list); #pragma acc routine seq -int getFirstInbound_acc( struct Node_acc *list, struct Node_acc *nodeOut); +int getFirstInbound_gpu( struct Node_gpu *list, struct Node_gpu *nodeOut); #pragma acc routine seq -void getCoordinate_acc(struct Node_acc node, double *x, double *y, double *z); +void getCoordinate_gpu(struct Node_gpu node, double *x, double *y, double *z); #pragma acc routine seq -void getCoordinates_acc(struct Node_acc *node, double *p); +void getCoordinates_gpu(struct Node_gpu *node, double *p); #pragma acc routine seq -void setCoordinate_acc(struct Node_acc *node, double x, double y, double z); +void setCoordinate_gpu(struct Node_gpu *node, double x, double y, double z); #pragma acc routine seq -void setInbound_acc(struct Node_acc *interList, struct Node_acc *list); +void setInbound_gpu(struct Node_gpu *interList, struct Node_gpu *list); #pragma acc routine seq -int isInside_acc(struct Node_acc *node); +int isInside_gpu(struct Node_gpu *node); #pragma acc routine seq -void set_rotate_poly_true_acc(void); +void set_rotate_poly_true_gpu(void); #pragma acc routine seq -int is_near_pole_acc(const double y[], int n); +int is_near_pole_gpu(const double y[], int n); #pragma acc routine seq -int crosses_pole_acc(const double x[], int n); +int crosses_pole_gpu(const double x[], int n); #pragma acc routine seq -void rotate_point_acc( double rv[], double rmat [][3]); +void rotate_point_gpu( double rv[], double rmat [][3]); #pragma acc routine seq -void rotate_poly_acc(const double x[], const double y[], const int n, double xr[], double yr[]); +void rotate_poly_gpu(const double x[], const double y[], const int n, double xr[], double yr[]); #pragma acc routine seq -void set_the_rotation_matrix_acc(); +void set_the_rotation_matrix_gpu(); #pragma acc routine seq -void pimod_acc(double x[],int nn); +void pimod_gpu(double x[],int nn); #pragma acc routine seq -int inside_edge_acc(double x0, double y0, double x1, double y1, double x, double y); +int inside_edge_gpu(double x0, double y0, double x1, double y1, double x, double y); #pragma acc routine seq -int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, double *q3, +int line_intersect_2D_3D_gpu(double *a1, double *a2, double *q1, double *q2, double *q3, double *intersect, double *u_a, double *u_q, int *inbound); diff --git a/tools/libfrencutils_acc/globals_acc.h b/tools/libfrencutils_gpu/globals_gpu.h similarity index 96% rename from tools/libfrencutils_acc/globals_acc.h rename to tools/libfrencutils_gpu/globals_gpu.h index d0391eae..b65784b6 100644 --- a/tools/libfrencutils_acc/globals_acc.h +++ b/tools/libfrencutils_gpu/globals_gpu.h @@ -17,8 +17,8 @@ * License along with FRE-NCTools. If not, see * . **********************************************************************/ -#ifndef GLOBALS_ACC_H_ -#define GLOBALS_ACC_H_ +#ifndef GLOBALS_GPU_H_ +#define GLOBALS_GPU_H_ #include "globals.h" #include "parameters.h" @@ -47,7 +47,7 @@ typedef struct { int *index; char remap_file[STRING]; int file_exist; -} Interp_config_acc; +} Interp_config_gpu; typedef struct { double *lon_min; diff --git a/tools/libfrencutils_acc/parameters.h b/tools/libfrencutils_gpu/parameters.h similarity index 100% rename from tools/libfrencutils_acc/parameters.h rename to tools/libfrencutils_gpu/parameters.h