diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index affcd031..c9deb9ba 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -128,6 +128,10 @@ if (KOKKOS) add_executable(qr_test test_qr_solve.cpp) target_link_libraries(qr_test ${LINKING_LIBRARIES}) + include_directories(pointcloud) + add_subdirectory(pointcloud) + + if (Matar_ENABLE_TRILINOS) add_executable(anndistributed ann_distributed.cpp) target_link_libraries(anndistributed ${LINKING_LIBRARIES}) diff --git a/examples/laplaceMPI/CMakeLists.txt b/examples/laplaceMPI/CMakeLists.txt index d9d4ec6c..5b114927 100644 --- a/examples/laplaceMPI/CMakeLists.txt +++ b/examples/laplaceMPI/CMakeLists.txt @@ -4,10 +4,10 @@ if (KOKKOS) #find_package(Kokkos REQUIRED) #new find_package(MPI REQUIRED) - #add_executable(laplace_mpi laplace_mpi.cpp) + add_executable(laplace_mpi laplace_mpi.cpp) #add_executable(laplace_mpi simple_mpi.cpp) #add_executable(laplace_mpi mpi_mesh_test.cpp) - add_executable(laplace_mpi simple_halo.cpp) + #add_executable(laplace_mpi simple_halo.cpp) add_definitions(-DHAVE_MPI=1) add_definitions(-DHAVE_KOKKOS=1) diff --git a/examples/pointcloud/CMakeLists.txt b/examples/pointcloud/CMakeLists.txt new file mode 100644 index 00000000..aafb20e9 --- /dev/null +++ b/examples/pointcloud/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.18) + + +if (KOKKOS) + #find_package(Kokkos REQUIRED) #new + + add_definitions(-DHAVE_KOKKOS=1) + if (CUDA) + add_definitions(-DHAVE_CUDA=1) + elseif (HIP) + add_definitions(-DHAVE_HIP=1) + elseif (OPENMP) + add_definitions(-DHAVE_OPENMP=1) + elseif (THREADS) + add_definitions(-DHAVE_THREADS=1) + endif() + + + add_executable(pointcloud-gbl pointcloud-gbl.cpp) + add_executable(pointcloud-rk pointcloud-rk.cpp) + + target_link_libraries(pointcloud-gbl ${LINKING_LIBRARIES}) + target_link_libraries(pointcloud-rk ${LINKING_LIBRARIES}) +endif(KOKKOS) diff --git a/examples/pointcloud/graphics-reader.h b/examples/pointcloud/graphics-reader.h new file mode 100755 index 00000000..0b1eb3cb --- /dev/null +++ b/examples/pointcloud/graphics-reader.h @@ -0,0 +1,796 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ + +// ----------------------------------------------- +// routines to read a graphics file +// +// ----------------------------------------------- + +#include +#include +#include +#include +#include +#include + +#include "matar.h" + +using namespace mtr; + + + +// checks to see if a path exists +bool DoesPathExist(const std::string &s) +{ + struct stat buffer; + return (stat (s.c_str(), &buffer) == 0); +} + +// Code from stackover flow for string delimiter parsing +std::vector split (std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find (delimiter, pos_start)) != std::string::npos) { + token = s.substr (pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back (token); + } + + res.push_back (s.substr (pos_start)); + return res; + +} // end of split + + +// retrieves multiple values between [ ] +std::vector extract_list(std::string str) { + + // replace '[' with a space and ']' with a space + std::replace(str.begin(), str.end(), '[', ' '); + std::replace(str.begin(), str.end(), ']', ' '); + + std::vector str_values; + std::vector values; + + // exact the str values into a vector + str_values = split(str, ","); + + // convert the text values into double values + for (auto &word : str_values) { + values.push_back( atof(word.c_str()) ); + } // end for + + return values; + +} // end of extract_list + + + +// from stack overflow on removing blanks in string +template +T remove_if(T beg, T end, P pred) +{ + T dest = beg; + for (T itr = beg;itr != end; ++itr) + if (!pred(*itr)) + *(dest++) = *itr; + return dest; +} + + + + + + + +// This function reads a VTK file +//-------------------------------------------------------- +// +/* + vtk mesh node ordering + 7--------6 + /| /| + / | / | + 4--------5 | + | | | | + | | | | + | 3-----|--2 + | / | / + |/ |/ + 0--------1 + + in the i,j,k format node order is + 0 = (i , j , k ) + 1 = (i+1, j , k ) + 2 = (i+1, j+1, k ) + 3 = (i , j+1, k ) + 4 = (i , j , k+1) + 5 = (i+1, j , k+1) + 6 = (i+1, j+1, k+1) + 7 = (i , j+1, k+1) + + The marching cubes ordering is + (i , j , k ) = 0 + (i+1, j , k ) = 1 + (i+1, j , k+1) = 5 + (i , j , k+1) = 4 + (i , j+1, k ) = 3 + (i+1, j+1, k ) = 2 + (i+1, j+1, k+1) = 6 + (i , j+1, k+1) = 7 + + */ +void readVTK(char * filename, + CArray &pt_coords, + CArray &elem_point_list, + CArray &pt_values, + int &num_points, + int &num_elems, + int &num_points_in_elem, + bool test_vtk_read, + bool verbose_vtk) +{ + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + int this_point; // a local id for a point in a elem (0:7 for a Hexahedral elem) + + int num_dims = 3; + + + std::string token; + + bool found = false; + + std::ifstream in; // FILE *in; + in.open(filename); + + + // look for POINTS + i = 0; + while (found==false) { + std::string str; + std::string delimiter = " "; + std::getline(in, str); + std::vector v = split (str, delimiter); + + // looking for the following text: + // POINTS %d float + if(v[0] == "POINTS"){ + num_points = std::stoi(v[1]); + printf("Num nodes read in %d\n", num_points); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find POINTS \n"); + break; + } // end if + + i++; + } // end while + + + // allocate memory for point coords and values + pt_coords = CArray (num_points,3); + pt_values = CArray (num_points); + + + // read the point coordinates + for (point_id=0; point_id v = split (str, delimiter); + + for (int dim=0; dim<3; dim++){ + pt_coords(point_id,dim) = std::stod(v[dim]); // double + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + if (verbose_vtk) printf(" %f ", pt_coords(point_id,dim)); + } + if (verbose_vtk) printf("\n"); // printing a space for readability + + } // end for points + found=false; + + + if (verbose_vtk)printf("\n"); + if (verbose_vtk) printf("looking for CELLS \n"); + + // look for CELLS + i = 0; + while (found==false) { + std::string str; + std::getline(in, str); + + std::string delimiter = " "; + std::vector v = split (str, delimiter); + + // looking for the following text: + // CELLS num_elems size + if(v[0] == "CELLS"){ + num_elems = std::stoi(v[1]); + printf("Num elements read in %d\n", num_elems); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find CELLS \n"); + break; + } // end if + + i++; + } // end while + + + + // allocate memomry for points in each element + elem_point_list = CArray (num_elems,8); // 8 points in a hex + + + // read the point ids in the element + for (elem_id=0; elem_id v = split (str, delimiter); + num_points_in_elem = std::stoi(v[0]); + + for (this_point=0; this_point v = split (str, delimiter); + + // looking for the following text: + // CELLS num_elems size + if(v[0] == "CELL_TYPES"){ + + std::getline(in, str); + elem_type = std::stoi(str); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find elem_TYPE \n"); + break; + } // end if + + i++; + } // end while + + if (verbose_vtk) printf("elem type = %d \n", elem_type); + // elem types: + // linear hex = 12, linear quad = 9 + found=false; + + // verify mesh has hexahedral elements, which have 8 nodes + if(num_points_in_elem != 8) { + printf("wrong elem type of %d \n", elem_type); + } + + + + // look for the point_var in the POINT_DATA heading + i = 0; + while (found==false) { + std::string str; + std::string delimiter = " "; + std::getline(in, str); + std::vector v = split (str, delimiter); + + // looking for the following text: + // POINT_DATA num_points + if(v[1] == "point_var"){ + + std::getline(in, str); // read next line -- its LOOKUP_TABLE + + // + for(int point_id=0; point_id10000000){ + printf("ERROR: Failed to find point_var in POINT_DATA \n"); + break; + } // end if + + i++; + } // end while + + + found=false; + + + + // testing the file read by painting a part + if(test_vtk_read == true){ + for(int node_id = 0; node_id &pt_coords, + CArray &elem_point_list, + CArray &pt_values, + int &num_points, + int &num_elems, + int &num_points_in_elem, + bool test_tecplot_read, + bool verbose_vtk) +{ + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + + int num_dims = 3; + + + std::string token; + + bool found = false; + + std::ifstream in; // FILE *in; + in.open(filename); + + + // look for POINTS + i = 0; // lines in the file + while (found==false) { + std::string str; + std::string delimiter = ","; + std::getline(in, str); + std::vector v = split (str, delimiter); + + bool found_nodes = false; + bool found_elems = false; + + // loop over the parsed text stored in v and + // am now looking for the following text: + // NODES %d float + for (int text=0; text words = split (v[text], delimiter_words); + + + + for(int a_word=0; a_word=2) found=true; + + if (i>1000){ + printf("ERROR: Failed to find NODES and ELEMENTS \n"); + break; + } // end if + + i++; + } // end while + + + // allocate memory for point coords and values + pt_coords = CArray (num_points,3); + pt_values = CArray (num_points); + + + printf("starting the x,y,z point read and density value \n"); + + // read the point coordinates + for (point_id=0; point_id v = split (str, delimiter); + + int column = 0; + for (int text=0; text 0 && column==3){ + pt_values(point_id) = std::stod(v[text]); // double + + if (verbose_vtk) printf(" %f \n", pt_values(point_id)); + + break; // exit after reading the density on this line + } // end if column is density + + + // if there is text, then it is a number + if(v[text].size() > 0 && column<3){ + + + pt_coords(point_id,column) = std::stod(v[text]); // double + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + if (verbose_vtk) printf(" %f ", pt_coords(point_id,column)); + + column++; // this will make the column = 3 after all dims are saved + } // end if to save coordinates + + + } // end for over the partitioned test + + } // end for points + found=false; + + + + + // allocate memomry for points in each element + num_points_in_elem = 8; + elem_point_list = CArray (num_elems,8); // 8 points in a hex + + + // read the point ids in the element + if (verbose_vtk) printf("Reading the nodes in the element\n"); + for (elem_id=0; elem_id v = split (str, delimiter); + + + int column = 0; + for (int text=0; text 0 && column<8){ + + + elem_point_list(elem_id,column) = std::stod(v[text]) - 1; // Fortran convention + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + // printing details on nodes in the element + if (verbose_vtk) printf(" %d ", elem_point_list(elem_id,column) ); + + column++; // this will make the column = 8 after all node values are saved + } // end if to save points in this element + + + } // end loop over the text + + } // end for + found=false; + + if (verbose_vtk) printf("\n"); // printing a space for readability + + + + + // testing the file read by painting a part + if(test_tecplot_read == true){ + for(int node_id = 0; node_id &pt_coords, + CArray &elem_point_list, + int num_points, + int num_elems, + int num_points_in_elem, + CArray &pt_values) +{ + + int GraphicsNumber = 0; + double Time = 0.0; + + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + int this_point; // a local id for a point in a elem (0:7 for a Hexahedral elem) + + + FILE *out[20]; // the output files that are written to + char name[100]; // char string + + + + std::string directory = "vtk"; + bool path = DoesPathExist(directory); + + // Create the folders for the ensight files + if (path==false) { + i=system("mkdir vtk"); + } + + + + + /* + --------------------------------------------------------------------------- + Write the Geometry file + --------------------------------------------------------------------------- + */ + + + snprintf(name, sizeof(name), "vtk/mesh.vtk"); // mesh file + + + out[0]=fopen(name,"w"); + + + fprintf(out[0],"# vtk DataFile Version 2.0\n"); // part 2 + fprintf(out[0],"Mesh for Fierro\n"); // part 2 + fprintf(out[0],"ASCII \n"); // part 3 + fprintf(out[0],"DATASET UNSTRUCTURED_GRID\n\n"); // part 4 + + fprintf(out[0],"POINTS %d float\n", num_points); + + + // write all components of the point coordinates + for (point_id=0; point_id +#include +#include +#include +#include + + +#include "matar.h" + +#include "lu_solver.hpp" + + +using namespace mtr; + + +// ----------------------------------------------- +// inputs: + + +// the number of nodes in the mesh +const double dx = 0.01; // resolution +const double dy = 0.01; // resolution +const double dz = 0.01; // resolution + + +// the mesh dimensions +// length of the domain is 5 for crazy shape and 1 for sphere +const double XMax = 1.0; +const double YMax = 1.0; +const double ZMax = 1.0; + +const double X0 = 0.0; +const double Y0 = 0.0; +const double Z0 = 0.0; + + +const double isoLevel=0.0; // contour to extract + + +// +// ----------------------------------------------- + + +std::tuple< + CArray, // normal + CArray, CArray, CArray, // v1X, v1Y, v1Z + CArray, CArray, CArray, // v2X, v2Y, v2Z + CArray, CArray, CArray, // v3X, v3Y, v3Z + size_t // n_facets +> +binary_stl_reader(const std::string& path) +{ + std::ifstream in(path, std::ios::binary | std::ios::ate); + if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); } + + const std::streamoff filesize = in.tellg(); + if (filesize < 100) { + std::cerr << "ERROR: File too small to be a valid STL\n"; + std::exit(EXIT_FAILURE); + } + in.seekg(0); + + // ---- check if ASCII ------------------------------------------------- + char magic[6] = { 0 }; + in.read(magic, 5); // read first 5 chars + in.seekg(0); // rewind + if (std::strncmp(magic, "solid", 5) == 0) { + std::cerr + << "ERROR: \"" << path + << "\" looks like an **ASCII** STL (starts with \"solid\").\n" + << "Re‑export it as *binary* or implement an ASCII parser.\n"; + std::exit(EXIT_FAILURE); // or call ascii_stl_reader(); + } + + // ---- read 80‑byte header + nominal facet count ---------------------- + char header[80]; in.read(header, 80); + size_t n_facets_nominal; in.read(reinterpret_cast(&n_facets_nominal), 4); + + // ---- compute expected count from file size to sanity‑check ---------- + // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2) + const size_t n_facets_from_size = + static_cast((filesize - 84) / 50); + + size_t n_facets = n_facets_nominal; + if (n_facets_nominal != n_facets_from_size) { + std::cout << "WARNING: facet count in header (" << n_facets_nominal + << ") disagrees with file size (" << n_facets_from_size + << "). Using size‑derived value.\n"; + n_facets = n_facets_from_size; + } + std::cout << "STL facets: " << n_facets << '\n'; + + // ---- allocate MATAR arrays ----------------------------------------- + CArray normal(n_facets, 3); + CArray v1X(n_facets), v1Y(n_facets), v1Z(n_facets); + CArray v2X(n_facets), v2Y(n_facets), v2Z(n_facets); + CArray v3X(n_facets), v3Y(n_facets), v3Z(n_facets); + + // ---- read facet records -------------------------------------------- + float nrm[3], v1[3], v2[3], v3[3]; + for (unsigned int i = 0; i < n_facets; ++i) { + in.read(reinterpret_cast(nrm), 12); + in.read(reinterpret_cast(v1), 12); + in.read(reinterpret_cast(v2), 12); + in.read(reinterpret_cast(v3), 12); + in.ignore(2); // attribute byte count + + for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d]; + v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2]; + v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2]; + v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2]; + } + return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets }; +} + + + + + +// a vector type with 3 components +struct vec_t{ + double x; + double y; + double z; + + // default constructor + vec_t (){}; + + // overloaded constructor + vec_t(double x_in, double y_in, double z_in){ + x = x_in; + y = y_in; + z = z_in; + }; + +}; // end vec_t + + +// a triangle data type +struct triangle_t { + + vec_t normal; // surface normal + + vec_t p[3]; // three nodes with x,y,z coords + + // default constructor + triangle_t(){}; + + // overloaded constructor + triangle_t (vec_t p_in[3]) + { + p[0]=p_in[0]; + p[1]=p_in[1]; + p[2]=p_in[2]; + }; + +}; // end triangle_t + + +// calculate the surface normal of a triangle +KOKKOS_INLINE_FUNCTION +void calc_normal(triangle_t *triangle){ + + //A = p1 - p0; + //B = p2 - p0; + vec_t A; + A.x = triangle->p[1].x - triangle->p[0].x; + A.y = triangle->p[1].y - triangle->p[0].y; + A.z = triangle->p[1].z - triangle->p[0].z; + + vec_t B; + B.x = triangle->p[2].x - triangle->p[0].x; + B.y = triangle->p[2].y - triangle->p[0].y; + B.z = triangle->p[2].z - triangle->p[0].z; + + vec_t N; + N.x = A.y * B.z - A.z * B.y; + N.y = A.z * B.x - A.x * B.z; + N.z = A.x * B.y - A.y * B.x; + + double mag; + mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z); + + // save the unit normal + triangle->normal.x = N.x/mag; + triangle->normal.y = N.y/mag; + triangle->normal.z = N.z/mag; + +} // end normal + + +// cross prodcut +vec_t cross(const vec_t &a, const vec_t &b) { + return {a.y*b.z - a.z*b.y, + a.z*b.x - a.x*b.z, + a.x*b.y - a.y*b.x}; +} + +double dot(const vec_t &a, const vec_t &b) { + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +// calculate the volume of a tet with this triangular face +double compute_volume(const triangle_t &triangle) { + // triangle.p[0] is the first vec_t, being node 0 + // ... + // triangle.p[1] is the third vec_t, being node 2 + double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0; + + return volume; +} + +struct gridcell_t { + + vec_t* p; + double* val; + + // default constructor + gridcell_t(){}; + + // overloaded constructor + gridcell_t (vec_t p_in[8], double val_in[8]) + { + p=p_in; + val=val_in; + }; + +}; // end gridcell_t + + +/* + Linearly interpolate the position where an isosurface cuts + an edge between two vertices, each with their own scalar value +*/ +KOKKOS_INLINE_FUNCTION +vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2) +{ + double mu; + vec_t p; + + if (fabs(isolevel-valp1) < 0.00001) + return(p1); + if (fabs(isolevel-valp2) < 0.00001) + return(p2); + if (fabs(valp1-valp2) < 0.00001) + return(p1); + mu = (isolevel - valp1) / (valp2 - valp1); + p.x = p1.x + mu * (p2.x - p1.x); + p.y = p1.y + mu * (p2.y - p1.y); + p.z = p1.z + mu * (p2.z - p1.z); + + return(p); +} + +/* + Given a grid cell and an isolevel, calculate the triangular + facets required to represent the isosurface through the cell. + Return the number of triangular facets, the array "triangles" + will be loaded up with the vertices at most 5 triangular facets. + 0 will be returned if the grid cell is either totally above + of totally below the isolevel. +*/ +KOKKOS_INLINE_FUNCTION +int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles) +{ + + int i,ntriang; + int cubeindex; + vec_t vertlist[12]; + + int edgeTable[256]={ + 0x0 , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, + 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, + 0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, + 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, + 0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c, + 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, + 0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac, + 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, + 0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c, + 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, + 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc, + 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, + 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c, + 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, + 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc , + 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, + 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, + 0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, + 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, + 0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, + 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, + 0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, + 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, + 0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460, + 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, + 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0, + 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, + 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230, + 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, + 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190, + 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, + 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0 }; + + int triTable[256][16] = + {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, + {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, + {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, + {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, + {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, + {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, + {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, + {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, + {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, + {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, + {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, + {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, + {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, + {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, + {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, + {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, + {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, + {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, + {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, + {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, + {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, + {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, + {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, + {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, + {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, + {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, + {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, + {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, + {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, + {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, + {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, + {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, + {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, + {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, + {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, + {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, + {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, + {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, + {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, + {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, + {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, + {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, + {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, + {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, + {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, + {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, + {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, + {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, + {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, + {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, + {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, + {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, + {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, + {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, + {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, + {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, + {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, + {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, + {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, + {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, + {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, + {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, + {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, + {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, + {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, + {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, + {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, + {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, + {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, + {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, + {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, + {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, + {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, + {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, + {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, + {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, + {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, + {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, + {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, + {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, + {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, + {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, + {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, + {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, + {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, + {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, + {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, + {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, + {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, + {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, + {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, + {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, + {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, + {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, + {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, + {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, + {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; + + + /* + Determine the index into the edge table which + tells us which vertices are inside of the surface + */ + cubeindex = 0; + + if (grid.val[0] < isolevel) cubeindex |= 1; + if (grid.val[1] < isolevel) cubeindex |= 2; + if (grid.val[2] < isolevel) cubeindex |= 4; + if (grid.val[3] < isolevel) cubeindex |= 8; + if (grid.val[4] < isolevel) cubeindex |= 16; + if (grid.val[5] < isolevel) cubeindex |= 32; + if (grid.val[6] < isolevel) cubeindex |= 64; + if (grid.val[7] < isolevel) cubeindex |= 128; + + + + /* Cube is entirely in/out of the surface */ + if (edgeTable[cubeindex] == 0) + return(0); + + /* Find the vertices where the surface intersects the cube */ + if (edgeTable[cubeindex] & 1) + vertlist[0] = + VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]); + if (edgeTable[cubeindex] & 2) + vertlist[1] = + VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]); + if (edgeTable[cubeindex] & 4) + vertlist[2] = + VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]); + if (edgeTable[cubeindex] & 8) + vertlist[3] = + VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]); + if (edgeTable[cubeindex] & 16) + vertlist[4] = + VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]); + if (edgeTable[cubeindex] & 32) + vertlist[5] = + VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]); + if (edgeTable[cubeindex] & 64) + vertlist[6] = + VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]); + if (edgeTable[cubeindex] & 128) + vertlist[7] = + VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]); + if (edgeTable[cubeindex] & 256) + vertlist[8] = + VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]); + if (edgeTable[cubeindex] & 512) + vertlist[9] = + VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]); + if (edgeTable[cubeindex] & 1024) + vertlist[10] = + VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]); + if (edgeTable[cubeindex] & 2048) + vertlist[11] = + VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]); + + /* Create the triangle */ + ntriang = 0; + for (i=0; triTable[cubeindex][i]!=-1; i+=3) { + + triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i ]]; + triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]]; + triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]]; + + ntriang++; + } // end for i + + return(ntriang); +} + + +// Gaussian function part of the RBF +// rbf = p(x) + lambda_j*exp(-(x - xj)*(x - xj)) +KOKKOS_FUNCTION +double Gaussian_fcn(double xi_value[3], double xj_value[3]){ + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]); + } + return exp(-diff_sqrd); +} + +// biharmonic function part of the RBF +// rbf = p(x) + lambda_j*sqrt((x - xj)*(x - xj)) +KOKKOS_FUNCTION +double biharmonic_fcn(double xi_value[3], double xj_value[3]){ + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]); + } + return sqrt(diff_sqrd); +} + + + +int main(int argc, char *argv[]) +{ + Kokkos::initialize(argc, argv); + { + + printf("Pointcloud reconstruction \n\n"); + + if(argc==1){ + printf("Please supply an STL file for testing the point cloud surface reconstruction code \n"); + return 0; + } + + std::string filename = argv[1]; + + auto [normal_host, + v1X_host, v1Y_host, v1Z_host, + v2X_host, v2Y_host, v2Z_host, + v3X_host, v3Y_host, v3Z_host, + num_inp_triangles_host] = binary_stl_reader(filename); + + // Warning on C++ support: + // At this time with C++, the contents from a tuple cannot + // be used inside a lambda function. The parallel loops use + // lambda functions. To overcome this C++ limitation, all + // contents in the tuple will be copied or pointed to (Using + // a MATAR dual view) allowing the data to be used in parallel. + const size_t num_inp_triangles = num_inp_triangles_host; + DViewCArrayKokkos normal(&normal_host(0,0), num_inp_triangles, 3); + DViewCArrayKokkos v1X(&v1X_host(0),num_inp_triangles); + DViewCArrayKokkos v1Y(&v1Y_host(0),num_inp_triangles); + DViewCArrayKokkos v1Z(&v1Z_host(0),num_inp_triangles); + DViewCArrayKokkos v2X(&v2X_host(0),num_inp_triangles); + DViewCArrayKokkos v2Y(&v2Y_host(0),num_inp_triangles); + DViewCArrayKokkos v2Z(&v2Z_host(0),num_inp_triangles); + DViewCArrayKokkos v3X(&v3X_host(0),num_inp_triangles); + DViewCArrayKokkos v3Y(&v3Y_host(0),num_inp_triangles); + DViewCArrayKokkos v3Z(&v3Z_host(0),num_inp_triangles); + + normal.update_device(); + v1X.update_device(); + v1Y.update_device(); + v1Z.update_device(); + v2X.update_device(); + v2Y.update_device(); + v2Z.update_device(); + v3X.update_device(); + v3Y.update_device(); + v3Z.update_device(); + + + // define mesh spacing, it is used to create a mesh + + double LX = (XMax - X0); // length in x-dir + double LY = (YMax - Y0); + double LZ = (ZMax - Z0); + + // the number of nodes in the mesh + int num_pt_x = (int)( LX/dx ) + 1; // there must be at least 2 nodes + int num_pt_y = (int)( LY/dy ) + 1; // there must be at least 2 nodes + int num_pt_z = (int)( LZ/dz ) + 1; // there must be at least 2 nodes + + + // mesh coordinates + DCArrayKokkos x(num_pt_x, "pt_x"); + DCArrayKokkos y(num_pt_y, "pt_y"); + DCArrayKokkos z(num_pt_z, "pt_z"); + + // small distance for moving in the +/- normal directions + double epsilon = 0.1*fmin(fmin(dx, dy), dz); + + + // function with isosurface that we want extracted + DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); + + + // define the triangles of extracted surface + const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); + DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem + DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); + num_triangles_in_elem.set_values(0); + + + printf("Creating point cloud data from STL file \n\n"); + + // define a point cloud + size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions + DCArrayKokkos point_positions(num_points, 3, "point_positions"); + DCArrayKokkos point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper + + //for(size_t tri=0; tri M_matrix(N,N,"M_matrix"); + DCArrayKokkos b_vector(N, "b_vector"); + + // ----------------------- + // assemble system matrix + // Mx = B + // _ _ _ _ _ _ + // |A P| |lambda| |f| + // |P^T 0| *| c | = |0| + // - - - - - - + // + // ----------------------- + + // initializing to zero + M_matrix.set_values(0.0); + + // assemble M + FOR_ALL(i, 0, num_points, + j, 0, num_points, { + + // this is the A matrix part of M + M_matrix(i, j) = Gaussian_fcn(&point_positions(i,0), &point_positions(j,0)); + }); + + FOR_ALL(i, 0, num_points, { + // Polynomial basis: [1, x, y, z] + // this the P matrix part of M + M_matrix(i, num_points+0) = 1.0; + M_matrix(i, num_points+1) = point_positions(i,0); // x coord + M_matrix(i, num_points+2) = point_positions(i,1); // y coord + M_matrix(i, num_points+3) = point_positions(i,2); // z coord + }); // end for i + + FOR_ALL(j, 0, num_points, { + // this the P^T matrix part of M + M_matrix(num_points+0, j) = 1.0; + M_matrix(num_points+1, j) = point_positions(j,0); // x coord + M_matrix(num_points+2, j) = point_positions(j,1); // y coord + M_matrix(num_points+3, j) = point_positions(j,2); // z coord + }); // end for j + + // adding the zeros in the bottom right corner + // for (size_t i = num_points; i < num_points+Pn; ++i) { + // for (size_t j = num_points; j < num_points+Pn; ++j) { + // M_matrix(i, j) = 0.0; + // } // end for j + // } // end for i + + //for(size_t i = num_points; i < num_points+Pn; ++i){ + FOR_ALL(i, num_points, N, { + b_vector(i) = 0.0; + }); // end for + + // assemble RHS vector, b + //for (size_t i = 0; i < num_points; ++i) { + FOR_ALL(i, 0, num_points, { + b_vector(i) = point_signed_distance(i); + }); // end for + + + M_matrix.update_host(); + b_vector.update_host(); + + + + // ---------------------------------- + // Solve for x in Mx=b + // ---------------------------------- + + // checking by printing + printf("matrix = \n"); + RUN({ + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + printf("%f , ", M_matrix(i, j)); + } // end for j + printf("\n"); + } // end for i + + printf("\n"); + printf("b = \n"); + for (size_t i = 0; i < N; ++i) { + printf("%f \n", b_vector(i)); + } + }); + + DCArrayKokkos perm (N, "perm"); + perm.set_values(0); + CArrayKokkos vv(N, "vv"); + + // used for LU problem + + int singular = 0; + int parity = 0; + + // Get the LU decomposition of the Mass Matrix + singular = LU_decompose_host(M_matrix, perm, vv, parity); // matrix is returned as the LU matrix + if(singular==0){ + printf("ERROR: matrix to fit point cloud surface is singluar \n"); + return 0; + } + // BUG in LU decompose + + // RUN({ + // int singular_d = 0; + // int parity_d = 0; + // singular_d = LU_decompose(M_matrix, perm, vv, parity_d); // M is returned as the LU matrix + // if(singular_d==0){ + // printf("ERROR: matrix is singluar \n"); + // } + // }); + + LU_backsub_host(M_matrix, perm, b_vector); // note: answer is sent back in b_vector + + // RUN({ + // LU_backsub(M_matrix, perm, b_vector); // note: answer is sent back in b + // }); + + RUN({ + //lambda coefficients for radial basis function, slice out only lambda values and polynomial values + auto lambda = ViewCArrayKokkos (&b_vector(0), num_points); + auto coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + printf("coeffs = \n"); + for(size_t i=0; i (&b_vector(0), num_points); + // coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + + // evaluate the polynomial part + gridValues(i,j,k) = b_vector(0+num_points) + + b_vector(1+num_points)*x(i) + + b_vector(2+num_points)*y(j) + + b_vector(3+num_points)*z(k); + + + for (size_t point=0; point // for timing + +#include +#include +#include +#include +#include + +#include // For rand() and srand() + + +#include "matar.h" + +#include "lu_solver.hpp" + + +#include // for unorded map testing + + + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +using namespace mtr; + +const double PI = 3.14159265358979323846; + +// ----------------------------------------------- +// inputs: + + +#define P2 // P1 or P2 +#define CUBIC_SPLINE // CUBIC_SPLINE or GAUSS kernel + +bool RAND_CLOUD = true; // RAND_CLOUD or uniform + +const size_t num_1d_x = 5; +const size_t num_1d_y = 5; +const size_t num_1d_z = 5; + +const double h_kernel = 2.0/5.; // it is always 2/num_1d_x +const double num_points_fit = 27; // minimum to P2 fit on structured mesh is 3x3x3 + +const size_t num_points = num_1d_x*num_1d_y*num_1d_z; + +// the bin sizes for finding neighboring points +const double bin_dx = 0.05; // bins in x +const double bin_dy = 0.05; // bins in y +const double bin_dz = 0.05; // bins in z + +const double X0 = 0.0; // origin +const double Y0 = 0.0; +const double Z0 = 0.0; + +// length of the domain +const double LX = 1.0; // length in x-dir +const double LY = 1.0; +const double LZ = 1.0; + + +bool check_maps = false; // CPU only!!!! + +// +// ----------------------------------------------- + + + +struct bin_keys_t{ + size_t i,j,k; +}; + +KOKKOS_INLINE_FUNCTION +size_t get_gid(size_t i, size_t j, size_t k, size_t num_x, size_t num_y){ + return i + (j + k*num_y)*num_x; +} + +KOKKOS_INLINE_FUNCTION +bin_keys_t get_bin_keys(const double x_pt, + const double y_pt, + const double z_pt){ + + + double i_dbl = fmax(0, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 + double j_dbl = fmax(0, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); + double k_dbl = fmax(0, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); + + bin_keys_t bin_keys; // save i,j,k to the bin keys + + // get the integer for the bins + bin_keys.i = (size_t)i_dbl; + bin_keys.j = (size_t)j_dbl; + bin_keys.k = (size_t)k_dbl; + + return bin_keys; + +} // end function + +KOKKOS_INLINE_FUNCTION +size_t get_bin_gid(const double x_pt, + const double y_pt, + const double z_pt, + const size_t num_bins_x, + const size_t num_bins_y, + const size_t num_bins_z){ + + + double i_dbl = fmin(num_bins_x-1, fmax(0.0, round((x_pt - X0)/bin_dx - 1.0e-8))); // x = ih + X0 + double j_dbl = fmin(num_bins_y-1, fmax(0.0, round((y_pt - Y0)/bin_dy - 1.0e-8))); + double k_dbl = fmin(num_bins_z-1, fmax(0.0, round((z_pt - Z0)/bin_dz - 1.0e-8))); + + // get the integers for the bins + size_t i = (size_t)i_dbl; + size_t j = (size_t)j_dbl; + size_t k = (size_t)k_dbl; + + // get the 1D index for this bin + return get_gid(i, j, k, num_bins_x, num_bins_y); + +} // end function + + + + +#if defined(CUBIC_SPLINE) + +KOKKOS_FUNCTION +double kernel(const double r[3], double h) { + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + const double radius = sqrt(diff_sqrd); + const double q = radius/h; + const double alpha = 2.0/(3.0*h); + if (q < 0.0) return 0.0; // defensive + if (q < 1.0) return (alpha * (1.0 - 1.5*q*q + 0.75*q*q*q)); + if (q < 2.0) return (alpha * 0.25 * pow(2.0 - q, 3)); + + return 0.0; +} + + +KOKKOS_FUNCTION +// derivative dW/dx_i = - dW/dr where r = xj-xi +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i) { + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + const double radius = sqrt(diff_sqrd); + const double q = radius/h; + + double df_dq = 0.0; // derivative of the dimensionless kernel shape function f(q) + if (q < 1.0) { + // f(q) = 1 - 1.5 q^2 + 0.75 q^3 + // f'(q) = -3 q + 2.25 q^2 + df_dq = -3.0 * q + 2.25 * q * q; + } else if (q < 2.0) { + // f(q) = 0.25 (2 - q)^3 + // f'(q) = 0.25 * 3 (2 - q)^2 * (-1) = -0.75 (2 - q)^2 + const double two_minus_q = 2.0 - q; + df_dq = -0.75 * two_minus_q * two_minus_q; + } else { + df_dq = 0.0; + } + + const double alpha = 2.0/(3.0*h); + const double dW_dr = alpha*(df_dq / h); + // grad W = dW/dr * (rij / radius) + const double invr = 1.0 /(radius + 1e-16); + + double drdx = -1.0; // default is derivative with respect to i + if(derivative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + } // end if + + for (size_t dim=0; dim<3; ++dim) { + grad_W[dim] = dW_dr * r[dim] * drdx * invr; + } + + return; +} + +#else + +// Gaussian function part of the RBF +// rbf = exp(-(xj - x)*(xj - x)/h) +KOKKOS_FUNCTION +double kernel(const double r[3], const double h){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + double norm = 1.0; // / (h * h * h * pow(PI, 1.5)); + return norm * exp(-diff_sqrd / (h * h)); +} // end of function + + + +// Gradient Gaussian function +// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) +KOKKOS_FUNCTION +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + double drdx = -1; + if(derivative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + } // end if + + const double rbf = kernel(r, h); + + // gradient + for (size_t dim=0; dim<3; ++dim) { + grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdx; + } + + return; +} // end of function + +#endif + +// Gaussian function part of the RBF, symmeterized +// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) +KOKKOS_FUNCTION +double kernel_syn(const double r[3], const double hi, const double hj){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call + const double Wj = exp(-diff_sqrd/(hj*hj)); + + return 0.5*(Wi + Wj); +} // end of function + + +// d/dx rbf = d/dx ( 0.5(exp(-(xj - xi)*(xj - x)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) ) +KOKKOS_FUNCTION +void grad_kernel_sym(double *gradW, const double r[3], const double hi, const double hj) { + double diff_sqrd = 0.0; + for (size_t dim=0; dim<3; ++dim){ + diff_sqrd += r[dim]*r[dim]; + } + + const double drdxi = -1; + + double Wi = exp(-diff_sqrd/(hi*hi)); + double Wj = exp(-diff_sqrd/(hj*hj)); + + double dWi = -2.0/ (hi*hi) * Wi*drdxi; // it uses xj - xi so a minus one + double dWj = -2.0/ (hj*hj) * Wj; // it uses xi - xj so it has a +1 for drdxi + + for (size_t dim=0; dim<3; ++dim) { + gradW[dim] = 0.5 * (dWi * r[dim] - dWj * r[dim]); // second term using -r + } +} + +#if defined(P2) + // Polynomial basis up to quadratic in 3D (10 terms) + const size_t num_poly_basis = 10; + KOKKOS_INLINE_FUNCTION + void poly_basis(const double r[3], double *p) { + + p[0] = 1.0; + p[1] = r[0]; + p[2] = r[1]; + p[3] = r[2]; + p[4] = r[0] * r[0]; + p[5] = r[0] * r[1]; + p[6] = r[0] * r[2]; + p[7] = r[1] * r[1]; + p[8] = r[1] * r[2]; + p[9] = r[2] * r[2]; + + // for high-order will use (x^a y^b z^c) + + return; + } // end function + + + KOKKOS_INLINE_FUNCTION + void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) { + + // definition, r = r_j - r_i + + double drdx = -1.0; // default is derivative with respect to i + double drdy = -1.0; // default is derivative with respect to i + double drdz = -1.0; // default is derivative with respect to i + if(derivative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + drdy = 1.0; // derivative with respect to j + drdz = 1.0; // derivative with respect to j + } // end if + + grad_p[0][0] = 0.0; + grad_p[1][0] = drdx; + grad_p[2][0] = 0.0; + grad_p[3][0] = 0.0; + grad_p[4][0] = 2.0*r[0]*drdx; + grad_p[5][0] = r[1]*drdx; + grad_p[6][0] = r[2]*drdx; + grad_p[7][0] = 0.0; + grad_p[8][0] = 0.0; + grad_p[9][0] = 0.0; + + // for high-order will use (x^a y^b z^c) + + grad_p[0][1] = 0.0; + grad_p[1][1] = 0.0; + grad_p[2][1] = drdy; + grad_p[3][1] = 0.0; + grad_p[4][1] = 0.0; + grad_p[5][1] = r[0]*drdy; + grad_p[6][1] = 0.0; + grad_p[7][1] = 2.0*r[1]*drdy; + grad_p[8][1] = r[2]*drdy; + grad_p[9][1] = 0.0; + + // for high-order will use (x^a y^b z^c) + + grad_p[0][2] = 0.0; + grad_p[1][2] = 0.0; + grad_p[2][2] = 0.0; + grad_p[3][2] = drdz; + grad_p[4][2] = 0.0; + grad_p[5][2] = 0.0; + grad_p[6][2] = r[0]*drdz; + grad_p[7][2] = 0.0; + grad_p[8][2] = r[1]*drdz; + grad_p[9][2] = 2.0*r[2]*drdz; + + // for high-order will use (x^a y^b z^c) + + return; + } // end function +#else + // Polynomial basis up to quadratic in 3D (10 terms) + const size_t num_poly_basis = 4; + KOKKOS_INLINE_FUNCTION + void poly_basis(const double r[3], double *p) { + + p[0] = 1.0; + p[1] = r[0]; + p[2] = r[1]; + p[3] = r[2]; + + return; + } // end function + + + KOKKOS_INLINE_FUNCTION + void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) { + + double drdx = -1.0; // default is derivative with respect to i + double drdy = -1.0; // default is derivative with respect to i + double drdz = -1.0; // default is derivative with respect to i + if(derivative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + drdy = 1.0; // derivative with respect to j + drdz = 1.0; // derivative with respect to j + } // end if + + grad_p[0][0] = 0.0; + grad_p[1][0] = drdx; + grad_p[2][0] = 0.0; + grad_p[3][0] = 0.0; + + grad_p[0][1] = 0.0; + grad_p[1][1] = 0.0; + grad_p[2][1] = drdy; + grad_p[3][1] = 0.0; + + + grad_p[0][2] = 0.0; + grad_p[1][2] = 0.0; + grad_p[2][2] = 0.0; + grad_p[3][2] = drdz; + + return; + } // end function +#endif + + +void calc_basis_functions( + size_t point_gid, + const DCArrayKokkos & x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, + const DCArrayKokkos & vol, + const CArrayKokkos & p_coeffs, + const DRaggedRightArrayKokkos & basis, + const double h) +{ + + //--------------------------------------------- + // walk over the neighboring points + //--------------------------------------------- + + FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), { + + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + + double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] + double r[3]; // vecx_j - vecx_i + r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i + r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i + r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i + + double W = kernel(r, h); + poly_basis(r,p); + + double correction = 0.0; + for (size_t a = 0; a < num_poly_basis; ++a){ + correction += p_coeffs(point_gid,a) * p[a]; + } // end for a + + basis(point_gid, neighbor_point_lid) = W * correction; + + }); // neighbor_point_lid + + return; + +} // end function + + +// grad_C = -M^-1 * grad_M * C +// grad_M[a][b][i] = V (P[a] * grad_p[i][b] W + grad_p[i][a] P[b] W + P[a] P[b] grad_W[i]) +// -M^-1[d][k]*grad_M[k][b][i]*C[b] = [d][i] +// p[d] grad_C[d][i] +void calc_basis_and_grad_basis_functions( + const DCArrayKokkos & x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, + const DCArrayKokkos & vol, + const CArrayKokkos & p_coeffs, + const CArrayKokkos & M_inv, + const DRaggedRightArrayKokkos & basis, + const DRaggedRightArrayKokkos & grad_basis, + const double h, + const bool derivative_wrt_i) +{ + + // dir + + // actual number of points + size_t num_points = x.dims(0); + + // loop over all nodes in the problem + FOR_ALL(point_gid, 0, num_points, { + + // -------------- + // Step 1: assemble grad M, the gradient of the moment matrix + + double grad_m[num_poly_basis][num_poly_basis][3]; + + for(size_t a=0;a& x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, + const DCArrayKokkos & vol, + const CArrayKokkos & p_coeffs, + const CArrayKokkos & M_inv, + double h) +{ + + // actual number of points + size_t num_points = x.dims(0); + + + // loop over all nodes in the problem + FOR_ALL(point_gid, 0, num_points, { + + double M_1D[num_poly_basis*num_poly_basis]; + ViewCArrayKokkos M(&M_1D[0], num_poly_basis, num_poly_basis); + M.set_values(0.0); + + // values in rhs after this function will be accessed as p_coeffs(i,0:N) + ViewCArrayKokkos rhs (&p_coeffs(point_gid,0), num_poly_basis); + rhs.set_values(0.0); + rhs(0) = 1.0; // enforce reproduction of constant 1, everything else is = 0 + + double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] + double r[3]; // vecx_j - vecx_i + + //--------------------------------------------- + // walk over the neighboring points + //--------------------------------------------- + + for (size_t neighbor_point_lid=0; neighbor_point_lid perm (&perm_1D[0], num_poly_basis); + for (size_t a = 0; a < num_poly_basis; ++a) { + perm(a)= 0; + } // end a + + double vv_1D[num_poly_basis]; + ViewCArrayKokkos vv(&vv_1D[0], num_poly_basis); + + // used for LU problem + int singular = 0; + int parity = 0; + singular = LU_decompose(M, perm, vv, parity); // M is returned as the LU matrix + if(singular==0){ + printf("ERROR: matrix is singluar \n"); + } + + + // -------------------------------------------------- + // things needed for gradient of the basis function + double col_1D[num_poly_basis]; + ViewCArrayKokkos col(&col_1D[0], num_poly_basis); + + // making a view, inverting only the matrix at point i + ViewCArrayKokkos M_inv_point(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis); + + LU_invert(M, // input matrix + perm, // permutations + M_inv_point, // inverse matrix at point gid + col); // tmp array + // ------------------------------------------------- + + // solve for p_coefs + LU_backsub(M, perm, rhs); // note: answer is sent back in rhs + + }); // end parallel loop + + + return; +} // end function + + + + + +int main(int argc, char *argv[]) +{ + Kokkos::initialize(argc, argv); + { + + printf("Pointcloud Reproducing Kernels \n\n"); + + + // define a point cloud + DCArrayKokkos point_positions(num_points, 3, "point_positions"); + DCArrayKokkos point_values(num_points, "point_values"); + + DCArrayKokkos vol(num_points); + vol.set_values(0.0); + + // point locations + if(RAND_CLOUD){ + srand(static_cast(time(0))); // Seed the random number generator + for(size_t i=0; i(rand())/static_cast(RAND_MAX); + point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); + point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + } + vol.set_values(1.0); + } + else { + + double dx = (LX-X0)/((double)num_1d_x - 1); + double dy = (LY-Y0)/((double)num_1d_y - 1); + double dz = (LZ-Z0)/((double)num_1d_z - 1); + + size_t point_gid = 0; + for(size_t k=0; k(i)*dx; + point_positions.host(point_gid, 1) = Y0 + static_cast(j)*dy; + point_positions.host(point_gid, 2) = Z0 + static_cast(k)*dz; + point_gid++; + } // end i + } // end j + } // end k + + + + const double elem_dx = LX/((double)num_1d_x); + const double elem_dy = LY/((double)num_1d_y); + const double elem_dz = LZ/((double)num_1d_z); + const double elem_vol = elem_dx*elem_dy*elem_dz; + + const size_t num_cells_1d_x = num_1d_x-1; + const size_t num_cells_1d_y = num_1d_y-1; + const size_t num_cells_1d_z = num_1d_z-1; + + FOR_ALL(k,0,num_cells_1d_z, + j,0,num_cells_1d_y, + i,0,num_cells_1d_x,{ + + for (int kcount=k; kcount<=k+1; kcount++){ + for (int jcount=j; jcount<=j+1; jcount++){ + for (int icount=i; icount<=i+1; icount++){ + size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y); + Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25); + } // end i + } // end j + } // end k + + }); // end parallel over k,j,i + + + } // end if + vol.update_host(); + point_positions.update_device(); + Kokkos::fence(); + + // point values + FOR_ALL(i, 0, num_points, { + + //printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2)); + point_values(i) = sqrt(point_positions(i, 0)*point_positions(i, 0) + + point_positions(i, 1)*point_positions(i, 1) + + point_positions(i, 2)*point_positions(i, 2)); + + }); // end parallel for tri's in the file + point_values.update_host(); + Kokkos::fence(); + //printf("\n"); + + + // ---------------------------- + // Make bins here + // ---------------------------- + + printf("making bins \n"); + + // the number of nodes in the mesh + size_t num_bins_x = (size_t)( round( (LX - X0)/bin_dx) + 1 ); + size_t num_bins_y = (size_t)( round( (LY - Y0)/bin_dy) + 1 ); + size_t num_bins_z = (size_t)( round( (LZ - Z0)/bin_dz) + 1 ); + // bin_dx = (LX-X0)/(num_bins_x - 1); + // bin_dy = (LY-Y0)/(num_bins_y - 1); + // bin_dz = (LZ-Z0)/(num_bins_z - 1); + + + size_t num_bins = num_bins_x*num_bins_y*num_bins_z; + printf("num bins x=%zu, y=%zu, z=%zu \n", num_bins_x, num_bins_y, num_bins_z); + + // bins and their connectivity to each other and points + DCArrayKokkos keys_in_bin(num_bins, "keys_in_bin"); // mapping from gid to (i,j,k) + DCArrayKokkos num_points_in_bin(num_bins, "num_bins"); + num_points_in_bin.set_values(0); + DRaggedRightArrayKokkos points_in_bin; // allocated later + + + // connectivity from points to bins + DCArrayKokkos points_bin_gid(num_points, "points_in_gid"); + CArrayKokkos points_bin_lid_storage(num_points, "bin_lid_storage"); // only used to create storage + DCArrayKokkos points_bin_stencil(num_points, 6, "bin_stencil"); // how imin,imax,jmin,jmax,kmin,kmax range for bins in stencil + DCArrayKokkos points_num_neighbors(num_points, "num_neighbors"); + + printf("Starting timers \n\n"); + + + // start timer + auto time_1 = std::chrono::high_resolution_clock::now(); + + // build reverse mapping between gid and i,j,k + FOR_ALL(i, 0, num_bins_x, + j, 0, num_bins_y, + k, 0, num_bins_z, { + + + // get bin gid for this i,j,k + size_t bin_gid = get_gid(i, j, k, num_bins_x, num_bins_y); + + // the i,j,k for this bin + bin_keys_t bin_keys; + bin_keys.i = i; + bin_keys.j = j; + bin_keys.k = k; + + // save mapping from bin_gid to bin_keys i,j,k + keys_in_bin(bin_gid) = bin_keys; + + }); + Kokkos::fence(); + keys_in_bin.update_host(); + + // end timer + auto time_2 = std::chrono::high_resolution_clock::now(); + + + // ------------------------------------------------------------------- + // below here, these routine must be called every time particles move + // ------------------------------------------------------------------- + + // start timer + auto time_3 = std::chrono::high_resolution_clock::now(); + + printf("building neighbor point list \n"); + + // save bin id to points + FOR_ALL(point_gid, 0, num_points, { + + // get the 1D index for this bin + size_t bin_gid = get_bin_gid(point_positions(point_gid,0), + point_positions(point_gid,1), + point_positions(point_gid,2), + num_bins_x, + num_bins_y, + num_bins_z); + + size_t storage_lid = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_gid), 1); + points_bin_gid(point_gid) = bin_gid; // the id of the bin + points_bin_lid_storage(point_gid) = storage_lid; // the storage place in the bin + + }); // end for all + Kokkos::fence(); + points_bin_gid.update_host(); + num_points_in_bin.update_host(); + + + // allocate points in bin connectivity + points_in_bin = DRaggedRightArrayKokkos (num_points_in_bin, "num_points_in_bin"); + + // save points in bin + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid + size_t bin_gid = points_bin_gid(point_gid); + + // get it's storage location in the ragged right compressed storage + size_t storage_lid = points_bin_lid_storage(point_gid); + + // save the point to this bin + points_in_bin(bin_gid, storage_lid) = point_gid; + + }); // end for all + + + // for(size_t bin_gid=0; bin_gid 0){ + // size_t i = keys_in_bin(bin_gid).i; + // size_t j = keys_in_bin(bin_gid).j; + // size_t k = keys_in_bin(bin_gid).k; + + // double bin_x = ((double)i)*bin_dx; + // double bin_y = ((double)j)*bin_dy; + // double bin_z = ((double)k)*bin_dz; + // //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", + // // num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); + // } + // } // end for + + + + // ------------------------------------------------ + // Find the neighbors around each point using bins + // ------------------------------------------------ + + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); + // printf(" keys = %zu, %zu, %zu, bin size = %zu, %zu, %zu \n", + // bin_keys.i, bin_keys.j, bin_keys.k, + // num_bins_x, num_bins_y, num_bins_z); + + // loop over neighboring bins + size_t num_points_found; + + // establish the stencil size to get enough particles + for(int stencil=1; stencil<100000; stencil++){ + + num_points_found = 0; + + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + const int imin = MAX(0, i-stencil); + const int imax = MIN(num_bins_x-1, i+stencil); + + const int jmin = MAX(0, j-stencil); + const int jmax = MIN(num_bins_y-1, j+stencil); + + const int kmin = MAX(0, k-stencil); + const int kmax = MIN(num_bins_z-1, k+stencil); + + for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int icount=imin; icount<=imax; icount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + num_points_found += num_points_in_bin(neighbor_bin_gid); + + } // end for kcount + } // end for jcount + } // end for icount + + // the min number of points required to solve the system is num_poly_basis+1, was 2*num_poly_basis + if (num_points_found >= num_points_fit || num_points_found==num_points){ + + const double x_pt_middle = bin_dx*((double)i) + X0; + const double y_pt_middle = bin_dy*((double)j) + Y0; + const double z_pt_middle = bin_dz*((double)k) + Z0; + + const double x_pt_minus = bin_dx*((double)imin) + X0; + const double y_pt_minus = bin_dy*((double)jmin) + Y0; + const double z_pt_minus = bin_dz*((double)kmin) + Z0; + + const double x_pt_plus = bin_dx*((double)imax) + X0; + const double y_pt_plus = bin_dy*((double)jmax) + Y0; + const double z_pt_plus = bin_dz*((double)kmax) + Z0; + + const double dist_minus = sqrt( (x_pt_minus - x_pt_middle)*(x_pt_minus - x_pt_middle) + + (y_pt_minus - y_pt_middle)*(y_pt_minus - y_pt_middle) + + (z_pt_minus - z_pt_middle)*(z_pt_minus - z_pt_middle) ); + + const double dist_plus = sqrt( (x_pt_plus - x_pt_middle)*(x_pt_plus - x_pt_middle) + + (y_pt_plus - y_pt_middle)*(y_pt_plus - y_pt_middle) + + (z_pt_plus - z_pt_middle)*(z_pt_plus - z_pt_middle) ); + + //printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d, jmax = %d, kmin = %d, kmax = %d, \n", + // h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax); + + // only exit when we exceed kernel distance + if (dist_minus >= h_kernel || dist_plus >= h_kernel || num_points_found==num_points){ + + //printf("exiting \n\n"); + + points_bin_stencil(point_gid,0) = imin; + points_bin_stencil(point_gid,1) = imax; + points_bin_stencil(point_gid,2) = jmin; + points_bin_stencil(point_gid,3) = jmax; + points_bin_stencil(point_gid,4) = kmin; + points_bin_stencil(point_gid,5) = kmax; + + points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors + points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because we counted point i as a neighbor + + break; + } + // else increase stencil size + + + } // end of check + + } // end for stencil + + + }); // end for all + Kokkos::fence(); + points_bin_stencil.update_host(); + + + + // account for stencels not overlapping, fixing assymetry in points connectivity + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid for this point + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); + + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + // walk over the stencil to get neighbors of this bin + const int imin = points_bin_stencil(point_gid,0); + const int imax = points_bin_stencil(point_gid,1); + const int jmin = points_bin_stencil(point_gid,2); + const int jmax = points_bin_stencil(point_gid,3); + const int kmin = points_bin_stencil(point_gid,4); + const int kmax = points_bin_stencil(point_gid,5); + + // loop over my bin stencil + for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int icount=imin; icount<=imax; icount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + + // save the points in this bin + for(size_t neighbor_pt_lid=0; neighbor_pt_lid= neighbor_imin && i <= neighbor_imax) && + (j >= neighbor_jmin && j <= neighbor_jmax) && + (k >= neighbor_kmin && k <= neighbor_kmax); + + if(!inside){ + Kokkos::atomic_increment(&points_num_neighbors(neighbor_point_gid)); + // the other stencil didn't see my point because it was smaller, now it does see it + } + + } // neighbor_point_lid + + } // end for kcount + } // end for jcount + } // end for icount + + }); // end for all + Kokkos::fence(); + points_num_neighbors.update_host(); + + // allocate memory for points in point + DRaggedRightArrayKokkos points_in_point(points_num_neighbors, "points_in_point"); + points_num_neighbors.set_values(0); // this is a num saved counter now + + // --------------------- + // Save the neighbors + // --------------------- + + // find neighbors using bins + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid for this point + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); + + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + // walk over the stencil to get neighbors + int imin = points_bin_stencil(point_gid,0); + int imax = points_bin_stencil(point_gid,1); + int jmin = points_bin_stencil(point_gid,2); + int jmax = points_bin_stencil(point_gid,3); + int kmin = points_bin_stencil(point_gid,4); + int kmax = points_bin_stencil(point_gid,5); + + + for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int icount=imin; icount<=imax; icount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + + // save the points in this bin + for(size_t neighbor_pt_lid=0; neighbor_pt_lid= neighbor_imin && i <= neighbor_imax) && + (j >= neighbor_jmin && j <= neighbor_jmax) && + (k >= neighbor_kmin && k <= neighbor_kmax); + + if(!inside){ + + size_t num_saved_neighbor = Kokkos::atomic_fetch_add(&points_num_neighbors(neighbor_point_gid), 1); + points_in_point(neighbor_point_gid, num_saved_neighbor) = point_gid; + // the other stencil didn't see my point because it was smaller, now it does see it + + } // end if + + } // end if neighbor != point_gid + + } // neighbor_point_lid + + } // end for kcount + } // end for jcount + } // end for icount + + }); // end for all + Kokkos::fence(); + points_in_point.update_host(); + + + // build the reverse map + DRaggedRightArrayKokkos reverse_neighbor_lid(points_num_neighbors); + + FOR_ALL(point_gid, 0, num_points, { + + for(int neighbor_point_lid = 0; neighbor_point_lid p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point + + + + CArrayKokkos M_inv(num_points, num_poly_basis, num_poly_basis); + CArrayKokkos grad_M(num_points, num_poly_basis, num_poly_basis); + + DRaggedRightArrayKokkos basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) + DRaggedRightArrayKokkos grad_basis_i(points_num_neighbors,3); // grad kernel basis j with respect to i (num_points, num_neighbors) + DRaggedRightArrayKokkos grad_basis_j(points_num_neighbors,3); // grad kernel basis i with respect to j (num_points, num_neighbors) + + + + double h = h_kernel; // kernel width + + + printf("building reproducing kernel coefficients \n"); + + // build coefficients on basis functions + calc_p_coefficients(point_positions, + points_num_neighbors, + points_in_point, + vol, + p_coeffs, + M_inv, + h); + + + calc_basis_and_grad_basis_functions( + point_positions, + points_num_neighbors, + points_in_point, + vol, + p_coeffs, + M_inv, + basis, + grad_basis_i, + h, + true); + + calc_basis_and_grad_basis_functions( + point_positions, + points_num_neighbors, + points_in_point, + vol, + p_coeffs, + M_inv, + basis, + grad_basis_j, + h, + false); + + // end timer + auto time_6 = std::chrono::high_resolution_clock::now(); + + + // ----------------- + // Timers + // ----------------- + printf("\n"); + std::chrono::duration ms = time_2 - time_1; + std::cout << "runtime to create bins = " << ms.count() << "ms\n\n"; + + ms = time_4 - time_3; + std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n"; + + ms = time_6 - time_5; + std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n"; + + + + printf("Checking gradients at points \n\n"); + + // performing checks on p_coeffs, basis, and grad_basis + double partion_unity; + double partion_unity_lcl; + + double linear_preserving; + double linear_preserving_lcl; + + double quadratic_preserving; + double quadratic_preserving_lcl; + + double grad_x_p0; + double grad_x_p0_lcl; + double grad_y_p0; + double grad_y_p0_lcl; + double grad_z_p0; + double grad_z_p0_lcl; + + double grad_x_p1; + double grad_x_p1_lcl; + double grad_y_p1; + double grad_y_p1_lcl; + double grad_z_p1; + double grad_z_p1_lcl; + + double grad_x_p2; + double grad_x_p2_lcl; + double grad_y_p2; + double grad_y_p2_lcl; + double grad_z_p2; + double grad_z_p2_lcl; + + // loop over the particles in the domain + for(size_t point_gid=0; point_gid1e-13) + printf("partition unity = %f, ", partion_unity); + + if(fabs(linear_preserving-point_positions(point_gid,0))>1e-13) + printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0))); + + #if defined(P2) + if(fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0))>1e-13) + printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid); + #endif + + // ----------------- + // gradient checks + // ----------------- + + // Sum(grad) = [0]; + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p0_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_x_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid); + }, grad_x_p0); + + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_y_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid); + }, grad_y_p0); + + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_z_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid); + }, grad_z_p0); + + const double grad_check_P0 = fabs(grad_x_p0)+fabs(grad_y_p0)+fabs(grad_z_p0); + if(0.333*fabs(grad_check_P0)>1e-8) + printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0); + + + // Sum(grad(P1)) = [1]; + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_x_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0); + }, grad_x_p1); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_y_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1); + }, grad_y_p1); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_z_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2); + }, grad_z_p1); + + const double grad_check_P1 = fabs(grad_x_p1 - 1.0)+fabs(grad_y_p1 - 1.0)+fabs(grad_z_p1 - 1.0); + if(0.333*fabs(grad_check_P1)>1e-8){ + printf("error in grad(P1) = %f, %f, %f, \n", + fabs(grad_x_p1 - 1.0), + fabs(grad_y_p1 - 1.0), + fabs(grad_z_p1 - 1.0)); + } + + + #if defined(P2) + // Sum(grad(P2)) = [2]; + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_x_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0); + }, grad_x_p2); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_y_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1); + }, grad_y_p2); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_z_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2); + }, grad_z_p2); + + const double grad_check_P2 = fabs(grad_x_p2-2.0*point_positions(point_gid,0)) + fabs(grad_y_p2-2.0*point_positions(point_gid,1)) + fabs(grad_z_p2-2.0*point_positions(point_gid,2)); + if(0.333*fabs(grad_check_P2)>1e-8){ + printf("error in grad(P2) = %f, %f, %f, \n", + fabs(grad_x_p2-2.0*point_positions(point_gid,0)), + fabs(grad_y_p2-2.0*point_positions(point_gid,1)), + fabs(grad_z_p2-2.0*point_positions(point_gid,2))); + } + #endif + + } // end for point gid + + + if(check_maps){ + size_t bad = 0; + for (size_t i=0; i50) break; + } else { + size_t check = points_in_point(j, rev); + if (check != i) { + printf("WRONG reverse: i=%zu j=%zu rev=%zu check=%zu\n", i, j, rev, check); + ++bad; + if (bad>50) break; + } + } + } + if (bad>50) break; + } + if (bad==0) printf("reverse map OK\n"); + + + for (size_t i=0; i seen; + for (size_t lid=0; lid u(num_points, 3); + FOR_ALL(i, 0, num_points, { + u(i, 0) = point_positions(i, 0); + u(i, 1) = point_positions(i, 1); + u(i, 2) = point_positions(i, 2); + }); + u.update_device(); + + DCArrayKokkos div(num_points); + div.set_values(0.0); + + DCArrayKokkos div_fd(num_points); + div_fd.set_values(0.0); + + FOR_ALL(i_gid, 0, num_points, { + + for(size_t j_lid = 0; j_lid 1e-8){ + printf("div(u) = %f at point %zu, error = %g, vol = %f\n", div.host(point_gid), point_gid, error, vol.host(point_gid)); + } + } // end for point_gid + + for(size_t point_gid=0; point_gid 1e-8){ + printf("div_fd(u) = %f at point %zu, error = %g\n", div_fd.host(point_gid), point_gid, error); + } + } // end for point_gid + + + double conserve_check; + double conserve_check_lcl; + FOR_REDUCE_SUM(point_gid, 0, num_points, + conserve_check_lcl, { + + for(size_t neighbor_point_lid = 0; neighbor_point_lid div_sinx(num_points); + div_sinx.set_values(0.0); + + DCArrayKokkos div_fd_sinx(num_points); + div_fd_sinx.set_values(0.0); + + FOR_ALL(i_gid, 0, num_points, { + + for(size_t j_lid = 0; j_lid &A, // matrix A passed in and is sent out in LU decomp format + const ViewCArrayKokkos &perm, // permutations + const ViewCArrayKokkos &vv, + int &parity) { // parity (+1 or -1) + + const int n = A.dims(0); // size of matrix + + parity = 1; + + // helper variables + double temp; + + // search for the largest element in each row; save the scaling in the + // temporary array vv and return zero if the matrix is singular + for(size_t i = 0; i < n; i++) { + + double big = 0.; + for(size_t j = 0; j < n; j++){ + if((temp=fabs(A(i,j))) > big){ + big=temp; + } + } + + if(big == 0.0) return(0); + + vv(i) = big; + } + + // the main loop for the Crout's algorithm + for(size_t j = 0; j < n; j++) { + + // this is the part a) of the algorithm except for i==j + for(size_t i=0;ij + pivot search + for(size_t i = j; i < n; i++) { + + double sum = A(i,j); + + for(size_t k=0; k= big){ + big = temp; + imax = i; + } + } // end for i + + // interchange rows, if needed, change parity and the scale factor + if(imax != j) { + + for(size_t k = 0; k < n; k++){ + temp = A(imax,k); + A(imax,k) = A(j,k); + A(j,k) = temp; + } + + parity = -(parity); + vv(imax) = vv(j); + } + + // store the index + perm(j) = imax; + // if the pivot element is zero, the matrix is singular but for some + // applications a tiny number is desirable instead + + if(A(j,j) == 0.0){ + A(j,j) = TINY; + } + // finally, divide by the pivot element + + if(j &A, // input matrix A in LU decomp format + const ViewCArrayKokkos &perm, // permutations + const ViewCArrayKokkos &b){ // RHS and is answer x to Ax=B + + const int n = A.dims(0); // size of matrix + + int ii = -1; + + + // First step of backsubstitution; the only wrinkle is to unscramble + // the permutation order. Note: the algorithm is optimized for a + // possibility of large amount of zeroes in b + + for(size_t i = 0; i < n; i++) { + + size_t ip = perm(i); + + double sum = b(ip); + b(ip) = b(i); + + if(ii >= 0){ + for(size_t j = ii; j0){ + ii=i; // a nonzero element encounted + } + + b(i) = sum; + } // end loop i + + // the second step + for(int i=n-1; i>=0; i--) { + + double sum = b(i); + for(size_t j=i+1; j &A, // input matrix + ViewCArrayKokkos &perm, // permutations + ViewCArrayKokkos &inv_mat, // inverse matrix + ViewCArrayKokkos &col) { // tmp array + + const size_t n = A.dims(0); // size of matrix + + + for(size_t j = 0; j < n; j++){ + + for(size_t i = 0; i < n; i++){ + col(i) = 0.0; + } // end for i + + col(j) = 1.0; + LU_backsub(A, perm, col); + + for(size_t i = 0; i < n; i++){ + inv_mat(i,j) = col(i); + } // end for i + + } // end for j + + return; + +} // end function + +// ----------------------- +// LU determinant function +// Input: A filled in LUPDecompose; N - dimension. +// Output: determinate of original A matrix +// ----------------------- +KOKKOS_INLINE_FUNCTION +double LU_determinant( + ViewCArrayKokkos &A, // input matrix + const int parity){ // parity (+1 0r -1) + + const int n = A.dims(0); // size of matrix + + double res = (double)(parity); + + for(size_t j=0; j