From 4499a1bb28034cff9f49f9a7749e6c87d69d7253 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Thu, 14 Aug 2025 16:29:35 -0600 Subject: [PATCH 01/23] pointcloud example --- examples/CMakeLists.txt | 4 + examples/pointcloud/CMakeLists.txt | 21 + examples/pointcloud/graphics-reader.h | 759 ++++++++++++++++ examples/pointcloud/icosahedron_bin.stl | Bin 0 -> 1084 bytes examples/pointcloud/pointcloud.cpp | 1101 +++++++++++++++++++++++ 5 files changed, 1885 insertions(+) create mode 100644 examples/pointcloud/CMakeLists.txt create mode 100755 examples/pointcloud/graphics-reader.h create mode 100644 examples/pointcloud/icosahedron_bin.stl create mode 100755 examples/pointcloud/pointcloud.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index affcd031..c9deb9ba 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -128,6 +128,10 @@ if (KOKKOS) add_executable(qr_test test_qr_solve.cpp) target_link_libraries(qr_test ${LINKING_LIBRARIES}) + include_directories(pointcloud) + add_subdirectory(pointcloud) + + if (Matar_ENABLE_TRILINOS) add_executable(anndistributed ann_distributed.cpp) target_link_libraries(anndistributed ${LINKING_LIBRARIES}) diff --git a/examples/pointcloud/CMakeLists.txt b/examples/pointcloud/CMakeLists.txt new file mode 100644 index 00000000..bc6f7668 --- /dev/null +++ b/examples/pointcloud/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.18) + + +if (KOKKOS) + #find_package(Kokkos REQUIRED) #new + + add_definitions(-DHAVE_KOKKOS=1) + if (CUDA) + add_definitions(-DHAVE_CUDA=1) + elseif (HIP) + add_definitions(-DHAVE_HIP=1) + elseif (OPENMP) + add_definitions(-DHAVE_OPENMP=1) + elseif (THREADS) + add_definitions(-DHAVE_THREADS=1) + endif() + + + add_executable(pointcloud pointcloud.cpp) + target_link_libraries(pointcloud ${LINKING_LIBRARIES}) +endif(KOKKOS) diff --git a/examples/pointcloud/graphics-reader.h b/examples/pointcloud/graphics-reader.h new file mode 100755 index 00000000..7081de6b --- /dev/null +++ b/examples/pointcloud/graphics-reader.h @@ -0,0 +1,759 @@ +// ----------------------------------------------- +// routines to read a graphics file +// +// ----------------------------------------------- + +#include +#include +#include +#include +#include +#include + +#include "matar.h" + +using namespace mtr; + + + +// checks to see if a path exists +bool DoesPathExist(const std::string &s) +{ + struct stat buffer; + return (stat (s.c_str(), &buffer) == 0); +} + +// Code from stackover flow for string delimiter parsing +std::vector split (std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find (delimiter, pos_start)) != std::string::npos) { + token = s.substr (pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back (token); + } + + res.push_back (s.substr (pos_start)); + return res; + +} // end of split + + +// retrieves multiple values between [ ] +std::vector extract_list(std::string str) { + + // replace '[' with a space and ']' with a space + std::replace(str.begin(), str.end(), '[', ' '); + std::replace(str.begin(), str.end(), ']', ' '); + + std::vector str_values; + std::vector values; + + // exact the str values into a vector + str_values = split(str, ","); + + // convert the text values into double values + for (auto &word : str_values) { + values.push_back( atof(word.c_str()) ); + } // end for + + return values; + +} // end of extract_list + + + +// from stack overflow on removing blanks in string +template +T remove_if(T beg, T end, P pred) +{ + T dest = beg; + for (T itr = beg;itr != end; ++itr) + if (!pred(*itr)) + *(dest++) = *itr; + return dest; +} + + + + + + + +// This function reads a VTK file +//-------------------------------------------------------- +// +/* + vtk mesh node ordering + 7--------6 + /| /| + / | / | + 4--------5 | + | | | | + | | | | + | 3-----|--2 + | / | / + |/ |/ + 0--------1 + + in the i,j,k format node order is + 0 = (i , j , k ) + 1 = (i+1, j , k ) + 2 = (i+1, j+1, k ) + 3 = (i , j+1, k ) + 4 = (i , j , k+1) + 5 = (i+1, j , k+1) + 6 = (i+1, j+1, k+1) + 7 = (i , j+1, k+1) + + The marching cubes ordering is + (i , j , k ) = 0 + (i+1, j , k ) = 1 + (i+1, j , k+1) = 5 + (i , j , k+1) = 4 + (i , j+1, k ) = 3 + (i+1, j+1, k ) = 2 + (i+1, j+1, k+1) = 6 + (i , j+1, k+1) = 7 + + */ +void readVTK(char * filename, + CArray &pt_coords, + CArray &elem_point_list, + CArray &pt_values, + int &num_points, + int &num_elems, + int &num_points_in_elem, + bool test_vtk_read, + bool verbose_vtk) +{ + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + int this_point; // a local id for a point in a elem (0:7 for a Hexahedral elem) + + int num_dims = 3; + + + std::string token; + + bool found = false; + + std::ifstream in; // FILE *in; + in.open(filename); + + + // look for POINTS + i = 0; + while (found==false) { + std::string str; + std::string delimiter = " "; + std::getline(in, str); + std::vector v = split (str, delimiter); + + // looking for the following text: + // POINTS %d float + if(v[0] == "POINTS"){ + num_points = std::stoi(v[1]); + printf("Num nodes read in %d\n", num_points); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find POINTS \n"); + break; + } // end if + + i++; + } // end while + + + // allocate memory for point coords and values + pt_coords = CArray (num_points,3); + pt_values = CArray (num_points); + + + // read the point coordinates + for (point_id=0; point_id v = split (str, delimiter); + + for (int dim=0; dim<3; dim++){ + pt_coords(point_id,dim) = std::stod(v[dim]); // double + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + if (verbose_vtk) printf(" %f ", pt_coords(point_id,dim)); + } + if (verbose_vtk) printf("\n"); // printing a space for readability + + } // end for points + found=false; + + + if (verbose_vtk)printf("\n"); + if (verbose_vtk) printf("looking for CELLS \n"); + + // look for CELLS + i = 0; + while (found==false) { + std::string str; + std::getline(in, str); + + std::string delimiter = " "; + std::vector v = split (str, delimiter); + + // looking for the following text: + // CELLS num_elems size + if(v[0] == "CELLS"){ + num_elems = std::stoi(v[1]); + printf("Num elements read in %d\n", num_elems); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find CELLS \n"); + break; + } // end if + + i++; + } // end while + + + + // allocate memomry for points in each element + elem_point_list = CArray (num_elems,8); // 8 points in a hex + + + // read the point ids in the element + for (elem_id=0; elem_id v = split (str, delimiter); + num_points_in_elem = std::stoi(v[0]); + + for (this_point=0; this_point v = split (str, delimiter); + + // looking for the following text: + // CELLS num_elems size + if(v[0] == "CELL_TYPES"){ + + std::getline(in, str); + elem_type = std::stoi(str); + + found=true; + } // end if + + + if (i>1000){ + printf("ERROR: Failed to find elem_TYPE \n"); + break; + } // end if + + i++; + } // end while + + if (verbose_vtk) printf("elem type = %d \n", elem_type); + // elem types: + // linear hex = 12, linear quad = 9 + found=false; + + // verify mesh has hexahedral elements, which have 8 nodes + if(num_points_in_elem != 8) { + printf("wrong elem type of %d \n", elem_type); + } + + + + // look for the point_var in the POINT_DATA heading + i = 0; + while (found==false) { + std::string str; + std::string delimiter = " "; + std::getline(in, str); + std::vector v = split (str, delimiter); + + // looking for the following text: + // POINT_DATA num_points + if(v[1] == "point_var"){ + + std::getline(in, str); // read next line -- its LOOKUP_TABLE + + // + for(int point_id=0; point_id10000000){ + printf("ERROR: Failed to find point_var in POINT_DATA \n"); + break; + } // end if + + i++; + } // end while + + + found=false; + + + + // testing the file read by painting a part + if(test_vtk_read == true){ + for(int node_id = 0; node_id &pt_coords, + CArray &elem_point_list, + CArray &pt_values, + int &num_points, + int &num_elems, + int &num_points_in_elem, + bool test_tecplot_read, + bool verbose_vtk) +{ + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + + int num_dims = 3; + + + std::string token; + + bool found = false; + + std::ifstream in; // FILE *in; + in.open(filename); + + + // look for POINTS + i = 0; // lines in the file + while (found==false) { + std::string str; + std::string delimiter = ","; + std::getline(in, str); + std::vector v = split (str, delimiter); + + bool found_nodes = false; + bool found_elems = false; + + // loop over the parsed text stored in v and + // am now looking for the following text: + // NODES %d float + for (int text=0; text words = split (v[text], delimiter_words); + + + + for(int a_word=0; a_word=2) found=true; + + if (i>1000){ + printf("ERROR: Failed to find NODES and ELEMENTS \n"); + break; + } // end if + + i++; + } // end while + + + // allocate memory for point coords and values + pt_coords = CArray (num_points,3); + pt_values = CArray (num_points); + + + printf("starting the x,y,z point read and density value \n"); + + // read the point coordinates + for (point_id=0; point_id v = split (str, delimiter); + + int column = 0; + for (int text=0; text 0 && column==3){ + pt_values(point_id) = std::stod(v[text]); // double + + if (verbose_vtk) printf(" %f \n", pt_values(point_id)); + + break; // exit after reading the density on this line + } // end if column is density + + + // if there is text, then it is a number + if(v[text].size() > 0 && column<3){ + + + pt_coords(point_id,column) = std::stod(v[text]); // double + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + if (verbose_vtk) printf(" %f ", pt_coords(point_id,column)); + + column++; // this will make the column = 3 after all dims are saved + } // end if to save coordinates + + + } // end for over the partitioned test + + } // end for points + found=false; + + + + + // allocate memomry for points in each element + num_points_in_elem = 8; + elem_point_list = CArray (num_elems,8); // 8 points in a hex + + + // read the point ids in the element + if (verbose_vtk) printf("Reading the nodes in the element\n"); + for (elem_id=0; elem_id v = split (str, delimiter); + + + int column = 0; + for (int text=0; text 0 && column<8){ + + + elem_point_list(elem_id,column) = std::stod(v[text]) - 1; // Fortran convention + //if num_dims=2 skip the 3rd value + + // printing all the mesh coordinates + // printing details on nodes in the element + if (verbose_vtk) printf(" %d ", elem_point_list(elem_id,column) ); + + column++; // this will make the column = 8 after all node values are saved + } // end if to save points in this element + + + } // end loop over the text + + } // end for + found=false; + + if (verbose_vtk) printf("\n"); // printing a space for readability + + + + + // testing the file read by painting a part + if(test_tecplot_read == true){ + for(int node_id = 0; node_id &pt_coords, + CArray &elem_point_list, + int num_points, + int num_elems, + int num_points_in_elem, + CArray &pt_values) +{ + + int GraphicsNumber = 0; + double Time = 0.0; + + + int i; // used for writing information to file + int point_id; // the global id for the point + int elem_id; // the global id for the elem + int this_point; // a local id for a point in a elem (0:7 for a Hexahedral elem) + + + FILE *out[20]; // the output files that are written to + char name[100]; // char string + + + + std::string directory = "vtk"; + bool path = DoesPathExist(directory); + + // Create the folders for the ensight files + if (path==false) { + i=system("mkdir vtk"); + } + + + + + /* + --------------------------------------------------------------------------- + Write the Geometry file + --------------------------------------------------------------------------- + */ + + + snprintf(name, sizeof(name), "vtk/mesh.vtk"); // mesh file + + + out[0]=fopen(name,"w"); + + + fprintf(out[0],"# vtk DataFile Version 2.0\n"); // part 2 + fprintf(out[0],"Mesh for Fierro\n"); // part 2 + fprintf(out[0],"ASCII \n"); // part 3 + fprintf(out[0],"DATASET UNSTRUCTURED_GRID\n\n"); // part 4 + + fprintf(out[0],"POINTS %d float\n", num_points); + + + // write all components of the point coordinates + for (point_id=0; point_idrvnWfmQJjwBJdMutNq#lSM@e>ZIUWw9 z<^5)NyLgyyH`VNJStX$LzEkJi;`PMKC@^N9+amf}m2zDE{k%^x4{ok?kGYOHw~v44 z#oPDClo^HwiqK%oS2GUG=%JJq>XItY3`x|WOd;Z$-$M~Yf=r6tPH^XNJ{)N>dno;2x}B%wUwQ5!deGg+Q5h z)UX)?@8lS;j!r)?oAA FxqppTS~UOw literal 0 HcmV?d00001 diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud.cpp new file mode 100755 index 00000000..963bffc1 --- /dev/null +++ b/examples/pointcloud/pointcloud.cpp @@ -0,0 +1,1101 @@ +// ----------------------------------------------- +// pointcloud reconstrution in C++ +// credit to Andrew Morgan and Nathaniel Morgan +// +// To run the code with an external mesh file: +// ./a.out graphics-file +// +// Requires the matar.h and macros.h libraries from +// the github LANL/MATAR/src folder +// +// The following rountines in this code came from: +// https://paulbourke.net/geometry/polygonise/ +// - Polygonise +// - VertexInterp +// +// +// The surface reconstruction method is from +// Reconstruction and Representation of 3D Objects with Radial BasisFunctions +// J. Carr, R. Beatson, ..., T. Evans +// https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf +// ----------------------------------------------- +#include +#include +#include +#include +#include + + +#include "matar.h" + +#include "lu_solver.hpp" + + +using namespace mtr; + + +// ----------------------------------------------- +// inputs: + + +// the number of nodes in the mesh +const double dx = 0.015; // resolution +const double dy = 0.015; // resolution +const double dz = 0.015; // resolution + + +// the mesh dimensions +// length of the domain is 5 for crazy shape and 1 for sphere +const double XMax = 1.0; +const double YMax = 1.0; +const double ZMax = 1.0; + +const double X0 = 0.0; +const double Y0 = 0.0; +const double Z0 = 0.0; + + +const double isoLevel=0.0; // contour to extract + + +// +// ----------------------------------------------- + + +std::tuple< + CArray, // normal + CArray, CArray, CArray, // v1X, v1Y, v1Z + CArray, CArray, CArray, // v2X, v2Y, v2Z + CArray, CArray, CArray, // v3X, v3Y, v3Z + size_t // n_facets +> +binary_stl_reader(const std::string& path) +{ + std::ifstream in(path, std::ios::binary | std::ios::ate); + if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); } + + const std::streamoff filesize = in.tellg(); + if (filesize < 100) { + std::cerr << "ERROR: File too small to be a valid STL\n"; + std::exit(EXIT_FAILURE); + } + in.seekg(0); + + // ---- check if ASCII ------------------------------------------------- + char magic[6] = { 0 }; + in.read(magic, 5); // read first 5 chars + in.seekg(0); // rewind + if (std::strncmp(magic, "solid", 5) == 0) { + std::cerr + << "ERROR: \"" << path + << "\" looks like an **ASCII** STL (starts with \"solid\").\n" + << "Re‑export it as *binary* or implement an ASCII parser.\n"; + std::exit(EXIT_FAILURE); // or call ascii_stl_reader(); + } + + // ---- read 80‑byte header + nominal facet count ---------------------- + char header[80]; in.read(header, 80); + size_t n_facets_nominal; in.read(reinterpret_cast(&n_facets_nominal), 4); + + // ---- compute expected count from file size to sanity‑check ---------- + // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2) + const size_t n_facets_from_size = + static_cast((filesize - 84) / 50); + + size_t n_facets = n_facets_nominal; + if (n_facets_nominal != n_facets_from_size) { + std::cout << "WARNING: facet count in header (" << n_facets_nominal + << ") disagrees with file size (" << n_facets_from_size + << "). Using size‑derived value.\n"; + n_facets = n_facets_from_size; + } + std::cout << "STL facets: " << n_facets << '\n'; + + // ---- allocate MATAR arrays ----------------------------------------- + CArray normal(n_facets, 3); + CArray v1X(n_facets), v1Y(n_facets), v1Z(n_facets); + CArray v2X(n_facets), v2Y(n_facets), v2Z(n_facets); + CArray v3X(n_facets), v3Y(n_facets), v3Z(n_facets); + + // ---- read facet records -------------------------------------------- + float nrm[3], v1[3], v2[3], v3[3]; + for (unsigned int i = 0; i < n_facets; ++i) { + in.read(reinterpret_cast(nrm), 12); + in.read(reinterpret_cast(v1), 12); + in.read(reinterpret_cast(v2), 12); + in.read(reinterpret_cast(v3), 12); + in.ignore(2); // attribute byte count + + for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d]; + v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2]; + v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2]; + v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2]; + } + return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets }; +} + + + + + +// a vector type with 3 components +struct vec_t{ + double x; + double y; + double z; + + // default constructor + vec_t (){}; + + // overloaded constructor + vec_t(double x_in, double y_in, double z_in){ + x = x_in; + y = y_in; + z = z_in; + }; + +}; // end vec_t + + +// a triangle data type +struct triangle_t { + + vec_t normal; // surface normal + + vec_t p[3]; // three nodes with x,y,z coords + + // default constructor + triangle_t(){}; + + // overloaded constructor + triangle_t (vec_t p_in[3]) + { + p[0]=p_in[0]; + p[1]=p_in[1]; + p[2]=p_in[2]; + }; + +}; // end triangle_t + + +// calculate the surface normal of a triangle +KOKKOS_INLINE_FUNCTION +void calc_normal(triangle_t *triangle){ + + //A = p1 - p0; + //B = p2 - p0; + vec_t A; + A.x = triangle->p[1].x - triangle->p[0].x; + A.y = triangle->p[1].y - triangle->p[0].y; + A.z = triangle->p[1].z - triangle->p[0].z; + + vec_t B; + B.x = triangle->p[2].x - triangle->p[0].x; + B.y = triangle->p[2].y - triangle->p[0].y; + B.z = triangle->p[2].z - triangle->p[0].z; + + vec_t N; + N.x = A.y * B.z - A.z * B.y; + N.y = A.z * B.x - A.x * B.z; + N.z = A.x * B.y - A.y * B.x; + + double mag; + mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z); + + // save the unit normal + triangle->normal.x = N.x/mag; + triangle->normal.y = N.y/mag; + triangle->normal.z = N.z/mag; + +} // end normal + + +struct gridcell_t { + + vec_t* p; + double* val; + + // default constructor + gridcell_t(){}; + + // overloaded constructor + gridcell_t (vec_t p_in[8], double val_in[8]) + { + p=p_in; + val=val_in; + }; + +}; // end gridcell_t + + +/* + Linearly interpolate the position where an isosurface cuts + an edge between two vertices, each with their own scalar value +*/ +KOKKOS_INLINE_FUNCTION +vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2) +{ + double mu; + vec_t p; + + if (fabs(isolevel-valp1) < 0.00001) + return(p1); + if (fabs(isolevel-valp2) < 0.00001) + return(p2); + if (fabs(valp1-valp2) < 0.00001) + return(p1); + mu = (isolevel - valp1) / (valp2 - valp1); + p.x = p1.x + mu * (p2.x - p1.x); + p.y = p1.y + mu * (p2.y - p1.y); + p.z = p1.z + mu * (p2.z - p1.z); + + return(p); +} + +/* + Given a grid cell and an isolevel, calculate the triangular + facets required to represent the isosurface through the cell. + Return the number of triangular facets, the array "triangles" + will be loaded up with the vertices at most 5 triangular facets. + 0 will be returned if the grid cell is either totally above + of totally below the isolevel. +*/ +KOKKOS_INLINE_FUNCTION +int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles) +{ + + int i,ntriang; + int cubeindex; + vec_t vertlist[12]; + + int edgeTable[256]={ + 0x0 , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, + 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, + 0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, + 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, + 0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c, + 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, + 0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac, + 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, + 0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c, + 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, + 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc, + 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, + 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c, + 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, + 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc , + 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, + 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, + 0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, + 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, + 0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, + 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, + 0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, + 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, + 0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460, + 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, + 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0, + 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, + 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230, + 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, + 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190, + 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, + 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0 }; + + int triTable[256][16] = + {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, + {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, + {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, + {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, + {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, + {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, + {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, + {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, + {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, + {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, + {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, + {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, + {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, + {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, + {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, + {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, + {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, + {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, + {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, + {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, + {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, + {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, + {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, + {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, + {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, + {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, + {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, + {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, + {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, + {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, + {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, + {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, + {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, + {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, + {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, + {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, + {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, + {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, + {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, + {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, + {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, + {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, + {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, + {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, + {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, + {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, + {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, + {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, + {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, + {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, + {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, + {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, + {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, + {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, + {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, + {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, + {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, + {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, + {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, + {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, + {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, + {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, + {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, + {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, + {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, + {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, + {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, + {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, + {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, + {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, + {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, + {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, + {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, + {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, + {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, + {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, + {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, + {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, + {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, + {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, + {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, + {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, + {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, + {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, + {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, + {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, + {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, + {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, + {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, + {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, + {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, + {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, + {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, + {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, + {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, + {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, + {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; + + + /* + Determine the index into the edge table which + tells us which vertices are inside of the surface + */ + cubeindex = 0; + + if (grid.val[0] < isolevel) cubeindex |= 1; + if (grid.val[1] < isolevel) cubeindex |= 2; + if (grid.val[2] < isolevel) cubeindex |= 4; + if (grid.val[3] < isolevel) cubeindex |= 8; + if (grid.val[4] < isolevel) cubeindex |= 16; + if (grid.val[5] < isolevel) cubeindex |= 32; + if (grid.val[6] < isolevel) cubeindex |= 64; + if (grid.val[7] < isolevel) cubeindex |= 128; + + + + /* Cube is entirely in/out of the surface */ + if (edgeTable[cubeindex] == 0) + return(0); + + /* Find the vertices where the surface intersects the cube */ + if (edgeTable[cubeindex] & 1) + vertlist[0] = + VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]); + if (edgeTable[cubeindex] & 2) + vertlist[1] = + VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]); + if (edgeTable[cubeindex] & 4) + vertlist[2] = + VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]); + if (edgeTable[cubeindex] & 8) + vertlist[3] = + VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]); + if (edgeTable[cubeindex] & 16) + vertlist[4] = + VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]); + if (edgeTable[cubeindex] & 32) + vertlist[5] = + VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]); + if (edgeTable[cubeindex] & 64) + vertlist[6] = + VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]); + if (edgeTable[cubeindex] & 128) + vertlist[7] = + VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]); + if (edgeTable[cubeindex] & 256) + vertlist[8] = + VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]); + if (edgeTable[cubeindex] & 512) + vertlist[9] = + VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]); + if (edgeTable[cubeindex] & 1024) + vertlist[10] = + VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]); + if (edgeTable[cubeindex] & 2048) + vertlist[11] = + VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]); + + /* Create the triangle */ + ntriang = 0; + for (i=0; triTable[cubeindex][i]!=-1; i+=3) { + + triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i ]]; + triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]]; + triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]]; + + ntriang++; + } // end for i + + return(ntriang); +} + + +// Gaussian function part of the RBF +// rbf = p(x) + lambda_j*exp(-(x - xj)*(x - xj)) +KOKKOS_FUNCTION +double Gaussian_fcn(double xi_value[3], double xj_value[3]){ + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]); + } + return exp(-diff_sqrd); +} + +// biharmonic function part of the RBF +// rbf = p(x) + lambda_j*sqrt((x - xj)*(x - xj)) +KOKKOS_FUNCTION +double biharmonic_fcn(double xi_value[3], double xj_value[3]){ + + double diff_sqrd = 0.0; + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]); + } + return sqrt(diff_sqrd); +} + + + +int main(int argc, char *argv[]) +{ + Kokkos::initialize(argc, argv); + { + + printf("Pointcloud reconstruction \n\n"); + + if(argc==1){ + printf("Please supply an STL file for testing the point cloud surface reconstruction code \n"); + return 0; + } + + std::string filename = argv[1]; + + auto [normal_host, + v1X_host, v1Y_host, v1Z_host, + v2X_host, v2Y_host, v2Z_host, + v3X_host, v3Y_host, v3Z_host, + num_inp_triangles_host] = binary_stl_reader(filename); + + // Warning on C++ support: + // At this time with C++, the contents from a tuple cannot + // be used inside a lambda function. The parallel loops use + // lambda functions. To overcome this C++ limitation, all + // contents in the tuple will be copied or pointed to (Using + // a MATAR dual view) allowing the data to be used in parallel. + const size_t num_inp_triangles = num_inp_triangles_host; + DViewCArrayKokkos normal(&normal_host(0,0), num_inp_triangles, 3); + DViewCArrayKokkos v1X(&v1X_host(0),num_inp_triangles); + DViewCArrayKokkos v1Y(&v1Y_host(0),num_inp_triangles); + DViewCArrayKokkos v1Z(&v1Z_host(0),num_inp_triangles); + DViewCArrayKokkos v2X(&v2X_host(0),num_inp_triangles); + DViewCArrayKokkos v2Y(&v2Y_host(0),num_inp_triangles); + DViewCArrayKokkos v2Z(&v2Z_host(0),num_inp_triangles); + DViewCArrayKokkos v3X(&v3X_host(0),num_inp_triangles); + DViewCArrayKokkos v3Y(&v3Y_host(0),num_inp_triangles); + DViewCArrayKokkos v3Z(&v3Z_host(0),num_inp_triangles); + + normal.update_device(); + v1X.update_device(); + v1Y.update_device(); + v1Z.update_device(); + v2X.update_device(); + v2Y.update_device(); + v2Z.update_device(); + v3X.update_device(); + v3Y.update_device(); + v3Z.update_device(); + + + // define mesh spacing, it is used to create a mesh + + double LX = (XMax - X0); // length in x-dir + double LY = (YMax - Y0); + double LZ = (ZMax - Z0); + + // the number of nodes in the mesh + int num_pt_x = (int)( LX/dx ) + 1; // there must be at least 2 nodes + int num_pt_y = (int)( LY/dy ) + 1; // there must be at least 2 nodes + int num_pt_z = (int)( LZ/dz ) + 1; // there must be at least 2 nodes + + + // mesh coordinates + DCArrayKokkos x(num_pt_x, "pt_x"); + DCArrayKokkos y(num_pt_y, "pt_y"); + DCArrayKokkos z(num_pt_z, "pt_z"); + + // small distance for moving in the +/- normal directions + double epsilon = 0.1*fmin(fmin(dx, dy), dz); + + + // function with isosurface that we want extracted + DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); + + + // define the triangles of extracted surface + const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); + DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem + DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); + num_triangles_in_elem.set_values(0); + + + printf("Creating point cloud data from STL file \n\n"); + + // define a point cloud + size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions + DCArrayKokkos point_positions(num_points, 3, "point_positions"); + DCArrayKokkos point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper + + //for(size_t tri=0; tri M_matrix(N,N,"M_matrix"); + DCArrayKokkos b_vector(N, "b_vector"); + + // ----------------------- + // assemble system matrix + // Mx = B + // _ _ _ _ _ _ + // |A P| |lambda| |f| + // |P^T 0| *| c | = |0| + // - - - - - - + // + // ----------------------- + + // initializing to zero + M_matrix.set_values(0.0); + + // assemble M + FOR_ALL(i, 0, num_points, + j, 0, num_points, { + + // this is the A matrix part of M + M_matrix(i, j) = Gaussian_fcn(&point_positions(i,0), &point_positions(j,0)); + }); + + FOR_ALL(i, 0, num_points, { + // Polynomial basis: [1, x, y, z] + // this the P matrix part of M + M_matrix(i, num_points+0) = 1.0; + M_matrix(i, num_points+1) = point_positions(i,0); // x coord + M_matrix(i, num_points+2) = point_positions(i,1); // y coord + M_matrix(i, num_points+3) = point_positions(i,2); // z coord + }); // end for i + + FOR_ALL(j, 0, num_points, { + // this the P^T matrix part of M + M_matrix(num_points+0, j) = 1.0; + M_matrix(num_points+1, j) = point_positions(j,0); // x coord + M_matrix(num_points+2, j) = point_positions(j,1); // y coord + M_matrix(num_points+3, j) = point_positions(j,2); // z coord + }); // end for j + + // adding the zeros in the bottom right corner + // for (size_t i = num_points; i < num_points+Pn; ++i) { + // for (size_t j = num_points; j < num_points+Pn; ++j) { + // M_matrix(i, j) = 0.0; + // } // end for j + // } // end for i + + //for(size_t i = num_points; i < num_points+Pn; ++i){ + FOR_ALL(i, num_points, N, { + b_vector(i) = 0.0; + }); // end for + + // assemble RHS vector, b + //for (size_t i = 0; i < num_points; ++i) { + FOR_ALL(i, 0, num_points, { + b_vector(i) = point_signed_distance(i); + }); // end for + + + M_matrix.update_host(); + b_vector.update_host(); + + + + // ---------------------------------- + // Solve for x in Mx=b + // ---------------------------------- + + // checking by printing + printf("matrix = \n"); + RUN({ + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + printf("%f , ", M_matrix(i, j)); + } // end for j + printf("\n"); + } // end for i + + printf("\n"); + printf("b = \n"); + for (size_t i = 0; i < N; ++i) { + printf("%f \n", b_vector(i)); + } + }); + + DCArrayKokkos perm (N, "perm"); + perm.set_values(0); + CArrayKokkos vv(N, "vv"); + + // used for LU problem + + int singular = 0; + int parity = 0; + + // Get the LU decomposition of the Mass Matrix + singular = LU_decompose_host(M_matrix, perm, vv, parity); // matrix is returned as the LU matrix + if(singular==0){ + printf("ERROR: matrix to fit point cloud surface is singluar \n"); + return 0; + } + // BUG in LU decompose + + // RUN({ + // int singular_d = 0; + // int parity_d = 0; + // singular_d = LU_decompose(M_matrix, perm, vv, parity_d); // M is returned as the LU matrix + // if(singular_d==0){ + // printf("ERROR: matrix is singluar \n"); + // } + // }); + + LU_backsub_host(M_matrix, perm, b_vector); // note: answer is sent back in b_vector + + // RUN({ + // LU_backsub(M_matrix, perm, b_vector); // note: answer is sent back in b + // }); + + RUN({ + //lambda coefficients for radial basis function, slice out only lambda values and polynomial values + auto lambda = ViewCArrayKokkos (&b_vector(0), num_points); + auto coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + printf("coeffs = \n"); + for(size_t i=0; i (&b_vector(0), num_points); + // coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + + // evaluate the polynomial part + gridValues(i,j,k) = b_vector(0+num_points) + + b_vector(1+num_points)*x(i) + + b_vector(2+num_points)*y(j) + + b_vector(3+num_points)*z(k); + + + for (size_t point=0; point Date: Fri, 15 Aug 2025 11:56:20 -0600 Subject: [PATCH 02/23] added volume calculation --- examples/pointcloud/graphics-reader.h | 37 +++++++++++ examples/pointcloud/pointcloud.cpp | 90 +++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 4 deletions(-) diff --git a/examples/pointcloud/graphics-reader.h b/examples/pointcloud/graphics-reader.h index 7081de6b..0b1eb3cb 100755 --- a/examples/pointcloud/graphics-reader.h +++ b/examples/pointcloud/graphics-reader.h @@ -1,3 +1,40 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ + // ----------------------------------------------- // routines to read a graphics file // diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud.cpp index 963bffc1..442a8dc2 100755 --- a/examples/pointcloud/pointcloud.cpp +++ b/examples/pointcloud/pointcloud.cpp @@ -1,3 +1,40 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ + // ----------------------------------------------- // pointcloud reconstrution in C++ // credit to Andrew Morgan and Nathaniel Morgan @@ -39,9 +76,9 @@ using namespace mtr; // the number of nodes in the mesh -const double dx = 0.015; // resolution -const double dy = 0.015; // resolution -const double dz = 0.015; // resolution +const double dx = 0.01; // resolution +const double dy = 0.01; // resolution +const double dz = 0.01; // resolution // the mesh dimensions @@ -210,6 +247,28 @@ void calc_normal(triangle_t *triangle){ } // end normal +// cross prodcut +vec_t cross(const vec_t &a, const vec_t &b) { + return {a.y*b.z - a.z*b.y, + a.z*b.x - a.x*b.z, + a.x*b.y - a.y*b.x}; +} + +double dot(const vec_t &a, const vec_t &b) { + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +// calculate the volume of a tet with this triangular face +double compute_volume(const triangle_t &triangle) { + // triangle.p[0] is the first vec_t, being node 0 + // ... + // triangle.p[1] is the third vec_t, being node 2 + double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0; + + return volume; +} + struct gridcell_t { vec_t* p; @@ -1040,7 +1099,30 @@ int main(int argc, char *argv[]) printf("Marching cubes finished \n\n"); - + + + // -------------------------------------------------- + // volume calculation + // -------------------------------------------------- + double volume = 0.0; + double vol_lcl = 0.0; + FOR_REDUCE_SUM(elem_gid, 0, num_elems, + vol_lcl, { + + for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){ + vol_lcl += compute_volume(all_mesh_surf_triangles(elem_gid,tri)); + } + + }, volume); + volume = fabs(volume); + + double radius = 0.794651/2.0; // radius of constructured part, based on a small mesh size + double PI = 3.14159265358979323846264338327950288419716939937510; + double vol_exact = 4.0/3.0*PI*radius*radius*radius; + printf("volume = %f, and `exact' sphere volume = %f \n", volume, vol_exact); + + // 0.262744 at 0.001 mesh size + // -------------------------------------------------- // Export STL file using results from marching cubes From eda38f70fdae992bba358ad8e233ea95c5465b58 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Thu, 4 Sep 2025 19:36:14 -0600 Subject: [PATCH 03/23] WIP: rk --- examples/laplaceMPI/CMakeLists.txt | 4 +- examples/pointcloud/CMakeLists.txt | 7 +- .../{pointcloud.cpp => pointcloud-gbl.cpp} | 0 examples/pointcloud/pointcloud-rk.cpp | 1197 +++++++++++++++++ solvers/lu_solver.hpp | 220 +++ 5 files changed, 1424 insertions(+), 4 deletions(-) rename examples/pointcloud/{pointcloud.cpp => pointcloud-gbl.cpp} (100%) create mode 100755 examples/pointcloud/pointcloud-rk.cpp diff --git a/examples/laplaceMPI/CMakeLists.txt b/examples/laplaceMPI/CMakeLists.txt index d9d4ec6c..5b114927 100644 --- a/examples/laplaceMPI/CMakeLists.txt +++ b/examples/laplaceMPI/CMakeLists.txt @@ -4,10 +4,10 @@ if (KOKKOS) #find_package(Kokkos REQUIRED) #new find_package(MPI REQUIRED) - #add_executable(laplace_mpi laplace_mpi.cpp) + add_executable(laplace_mpi laplace_mpi.cpp) #add_executable(laplace_mpi simple_mpi.cpp) #add_executable(laplace_mpi mpi_mesh_test.cpp) - add_executable(laplace_mpi simple_halo.cpp) + #add_executable(laplace_mpi simple_halo.cpp) add_definitions(-DHAVE_MPI=1) add_definitions(-DHAVE_KOKKOS=1) diff --git a/examples/pointcloud/CMakeLists.txt b/examples/pointcloud/CMakeLists.txt index bc6f7668..aafb20e9 100644 --- a/examples/pointcloud/CMakeLists.txt +++ b/examples/pointcloud/CMakeLists.txt @@ -16,6 +16,9 @@ if (KOKKOS) endif() - add_executable(pointcloud pointcloud.cpp) - target_link_libraries(pointcloud ${LINKING_LIBRARIES}) + add_executable(pointcloud-gbl pointcloud-gbl.cpp) + add_executable(pointcloud-rk pointcloud-rk.cpp) + + target_link_libraries(pointcloud-gbl ${LINKING_LIBRARIES}) + target_link_libraries(pointcloud-rk ${LINKING_LIBRARIES}) endif(KOKKOS) diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud-gbl.cpp similarity index 100% rename from examples/pointcloud/pointcloud.cpp rename to examples/pointcloud/pointcloud-gbl.cpp diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp new file mode 100755 index 00000000..9aae3661 --- /dev/null +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -0,0 +1,1197 @@ +/********************************************************************************************** + © 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ + +// ----------------------------------------------- +// pointcloud reconstrution in C++ +// credit to Andrew Morgan and Nathaniel Morgan +// +// To run the code with an external mesh file: +// ./a.out graphics-file +// +// Requires the matar.h and macros.h libraries from +// the github LANL/MATAR/src folder +// +// The following rountines in this code came from: +// https://paulbourke.net/geometry/polygonise/ +// - Polygonise +// - VertexInterp +// +// +// The surface reconstruction method is from +// Reconstruction and Representation of 3D Objects with Radial BasisFunctions +// J. Carr, R. Beatson, ..., T. Evans +// https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf +// ----------------------------------------------- +#include +#include +#include +#include +#include + + +#include "matar.h" + +#include "lu_solver.hpp" + + +using namespace mtr; + +const double PI = 3.14159265358979323846; + +// ----------------------------------------------- +// inputs: + + +// the number of nodes in the mesh +const double dx = 0.1; // resolution +const double dy = 0.1; // resolution +const double dz = 0.1; // resolution + + +// the mesh dimensions +// length of the domain is 5 for crazy shape and 1 for sphere +const double XMax = 1.0; +const double YMax = 1.0; +const double ZMax = 1.0; + +const double X0 = 0.0; +const double Y0 = 0.0; +const double Z0 = 0.0; + + +const double isoLevel=0.0; // contour to extract + + +// +// ----------------------------------------------- + + +std::tuple< + CArray, // normal + CArray, CArray, CArray, // v1X, v1Y, v1Z + CArray, CArray, CArray, // v2X, v2Y, v2Z + CArray, CArray, CArray, // v3X, v3Y, v3Z + size_t // n_facets +> +binary_stl_reader(const std::string& path) +{ + std::ifstream in(path, std::ios::binary | std::ios::ate); + if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); } + + const std::streamoff filesize = in.tellg(); + if (filesize < 100) { + std::cerr << "ERROR: File too small to be a valid STL\n"; + std::exit(EXIT_FAILURE); + } + in.seekg(0); + + // ---- check if ASCII ------------------------------------------------- + char magic[6] = { 0 }; + in.read(magic, 5); // read first 5 chars + in.seekg(0); // rewind + if (std::strncmp(magic, "solid", 5) == 0) { + std::cerr + << "ERROR: \"" << path + << "\" looks like an **ASCII** STL (starts with \"solid\").\n" + << "Re‑export it as *binary* or implement an ASCII parser.\n"; + std::exit(EXIT_FAILURE); // or call ascii_stl_reader(); + } + + // ---- read 80‑byte header + nominal facet count ---------------------- + char header[80]; in.read(header, 80); + size_t n_facets_nominal; in.read(reinterpret_cast(&n_facets_nominal), 4); + + // ---- compute expected count from file size to sanity‑check ---------- + // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2) + const size_t n_facets_from_size = + static_cast((filesize - 84) / 50); + + size_t n_facets = n_facets_nominal; + if (n_facets_nominal != n_facets_from_size) { + std::cout << "WARNING: facet count in header (" << n_facets_nominal + << ") disagrees with file size (" << n_facets_from_size + << "). Using size‑derived value.\n"; + n_facets = n_facets_from_size; + } + std::cout << "STL facets: " << n_facets << '\n'; + + // ---- allocate MATAR arrays ----------------------------------------- + CArray normal(n_facets, 3); + CArray v1X(n_facets), v1Y(n_facets), v1Z(n_facets); + CArray v2X(n_facets), v2Y(n_facets), v2Z(n_facets); + CArray v3X(n_facets), v3Y(n_facets), v3Z(n_facets); + + // ---- read facet records -------------------------------------------- + float nrm[3], v1[3], v2[3], v3[3]; + for (unsigned int i = 0; i < n_facets; ++i) { + in.read(reinterpret_cast(nrm), 12); + in.read(reinterpret_cast(v1), 12); + in.read(reinterpret_cast(v2), 12); + in.read(reinterpret_cast(v3), 12); + in.ignore(2); // attribute byte count + + for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d]; + v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2]; + v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2]; + v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2]; + } + return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets }; +} + + + + + +// a vector type with 3 components +struct vec_t{ + double x; + double y; + double z; + + // default constructor + vec_t (){}; + + // overloaded constructor + vec_t(double x_in, double y_in, double z_in){ + x = x_in; + y = y_in; + z = z_in; + }; + +}; // end vec_t + + +// a triangle data type +struct triangle_t { + + vec_t normal; // surface normal + + vec_t p[3]; // three nodes with x,y,z coords + + // default constructor + triangle_t(){}; + + // overloaded constructor + triangle_t (vec_t p_in[3]) + { + p[0]=p_in[0]; + p[1]=p_in[1]; + p[2]=p_in[2]; + }; + +}; // end triangle_t + + +// calculate the surface normal of a triangle +KOKKOS_INLINE_FUNCTION +void calc_normal(triangle_t *triangle){ + + //A = p1 - p0; + //B = p2 - p0; + vec_t A; + A.x = triangle->p[1].x - triangle->p[0].x; + A.y = triangle->p[1].y - triangle->p[0].y; + A.z = triangle->p[1].z - triangle->p[0].z; + + vec_t B; + B.x = triangle->p[2].x - triangle->p[0].x; + B.y = triangle->p[2].y - triangle->p[0].y; + B.z = triangle->p[2].z - triangle->p[0].z; + + vec_t N; + N.x = A.y * B.z - A.z * B.y; + N.y = A.z * B.x - A.x * B.z; + N.z = A.x * B.y - A.y * B.x; + + double mag; + mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z); + + // save the unit normal + triangle->normal.x = N.x/mag; + triangle->normal.y = N.y/mag; + triangle->normal.z = N.z/mag; + +} // end normal + + +// cross prodcut +vec_t cross(const vec_t &a, const vec_t &b) { + return {a.y*b.z - a.z*b.y, + a.z*b.x - a.x*b.z, + a.x*b.y - a.y*b.x}; +} + +double dot(const vec_t &a, const vec_t &b) { + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +// calculate the volume of a tet with this triangular face +double compute_volume(const triangle_t &triangle) { + // triangle.p[0] is the first vec_t, being node 0 + // ... + // triangle.p[1] is the third vec_t, being node 2 + double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0; + + return volume; +} + +struct gridcell_t { + + vec_t* p; + double* val; + + // default constructor + gridcell_t(){}; + + // overloaded constructor + gridcell_t (vec_t p_in[8], double val_in[8]) + { + p=p_in; + val=val_in; + }; + +}; // end gridcell_t + + +/* + Linearly interpolate the position where an isosurface cuts + an edge between two vertices, each with their own scalar value +*/ +KOKKOS_INLINE_FUNCTION +vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2) +{ + double mu; + vec_t p; + + if (fabs(isolevel-valp1) < 0.00001) + return(p1); + if (fabs(isolevel-valp2) < 0.00001) + return(p2); + if (fabs(valp1-valp2) < 0.00001) + return(p1); + mu = (isolevel - valp1) / (valp2 - valp1); + p.x = p1.x + mu * (p2.x - p1.x); + p.y = p1.y + mu * (p2.y - p1.y); + p.z = p1.z + mu * (p2.z - p1.z); + + return(p); +} + +/* + Given a grid cell and an isolevel, calculate the triangular + facets required to represent the isosurface through the cell. + Return the number of triangular facets, the array "triangles" + will be loaded up with the vertices at most 5 triangular facets. + 0 will be returned if the grid cell is either totally above + of totally below the isolevel. +*/ +KOKKOS_INLINE_FUNCTION +int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles) +{ + + int i,ntriang; + int cubeindex; + vec_t vertlist[12]; + + int edgeTable[256]={ + 0x0 , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, + 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, + 0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, + 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, + 0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c, + 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, + 0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac, + 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, + 0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c, + 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, + 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc, + 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, + 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c, + 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, + 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc , + 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, + 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, + 0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, + 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, + 0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, + 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, + 0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, + 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, + 0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460, + 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, + 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0, + 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, + 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230, + 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, + 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190, + 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, + 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0 }; + + int triTable[256][16] = + {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, + {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, + {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, + {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, + {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, + {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, + {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, + {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, + {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, + {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, + {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, + {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, + {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, + {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, + {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, + {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, + {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, + {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, + {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, + {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, + {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, + {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, + {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, + {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, + {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, + {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, + {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, + {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, + {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, + {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, + {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, + {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, + {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, + {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, + {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, + {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, + {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, + {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, + {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, + {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, + {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, + {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, + {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, + {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, + {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, + {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, + {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, + {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, + {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, + {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, + {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, + {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, + {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, + {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, + {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, + {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, + {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, + {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, + {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, + {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, + {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, + {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, + {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, + {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, + {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, + {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, + {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, + {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, + {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, + {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, + {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, + {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, + {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, + {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, + {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, + {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, + {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, + {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, + {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, + {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, + {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, + {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, + {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, + {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, + {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, + {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, + {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, + {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, + {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, + {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, + {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, + {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, + {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, + {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, + {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, + {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, + {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; + + + /* + Determine the index into the edge table which + tells us which vertices are inside of the surface + */ + cubeindex = 0; + + if (grid.val[0] < isolevel) cubeindex |= 1; + if (grid.val[1] < isolevel) cubeindex |= 2; + if (grid.val[2] < isolevel) cubeindex |= 4; + if (grid.val[3] < isolevel) cubeindex |= 8; + if (grid.val[4] < isolevel) cubeindex |= 16; + if (grid.val[5] < isolevel) cubeindex |= 32; + if (grid.val[6] < isolevel) cubeindex |= 64; + if (grid.val[7] < isolevel) cubeindex |= 128; + + + + /* Cube is entirely in/out of the surface */ + if (edgeTable[cubeindex] == 0) + return(0); + + /* Find the vertices where the surface intersects the cube */ + if (edgeTable[cubeindex] & 1) + vertlist[0] = + VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]); + if (edgeTable[cubeindex] & 2) + vertlist[1] = + VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]); + if (edgeTable[cubeindex] & 4) + vertlist[2] = + VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]); + if (edgeTable[cubeindex] & 8) + vertlist[3] = + VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]); + if (edgeTable[cubeindex] & 16) + vertlist[4] = + VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]); + if (edgeTable[cubeindex] & 32) + vertlist[5] = + VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]); + if (edgeTable[cubeindex] & 64) + vertlist[6] = + VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]); + if (edgeTable[cubeindex] & 128) + vertlist[7] = + VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]); + if (edgeTable[cubeindex] & 256) + vertlist[8] = + VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]); + if (edgeTable[cubeindex] & 512) + vertlist[9] = + VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]); + if (edgeTable[cubeindex] & 1024) + vertlist[10] = + VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]); + if (edgeTable[cubeindex] & 2048) + vertlist[11] = + VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]); + + /* Create the triangle */ + ntriang = 0; + for (i=0; triTable[cubeindex][i]!=-1; i+=3) { + + triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i ]]; + triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]]; + triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]]; + + ntriang++; + } // end for i + + return(ntriang); +} + + +// Gaussian function part of the RBF +// rbf = exp(-(x - xj)*(x - xj)/h) +KOKKOS_FUNCTION +double kernel(const double r[3], const double h){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + return exp(-diff_sqrd/(h*h)); +} // end of function + + +// Polynomial basis up to quadratic in 3D (10 terms) +const size_t num_poly_basis = 10; +KOKKOS_INLINE_FUNCTION +void poly_basis(const double r[3], double *p) { + + p[0] = 1.0; + p[1] = r[0]; + p[2] = r[1]; + p[3] = r[2]; + p[4] = r[0] * r[0]; + p[5] = r[0] * r[1]; + p[6] = r[0] * r[2]; + p[7] = r[1] * r[1]; + p[8] = r[1] * r[2]; + p[9] = r[2] * r[2]; + + // for high-order will use (x^a y^b z^c) + + return; +} // end function + + +void compute_shape_functions( + size_t i, + const DCArrayKokkos & x, + const CArrayKokkos & vol, + const CArrayKokkos & rk_coeffs, + const CArrayKokkos & rk_basis, + const double h) +{ + + // global num_points at this time, make it num_points in neighborhood + size_t num_points_neighborhood = x.dims(0); // will come from hash bins + + // loop over all neighbors around point i + FOR_ALL(j, 0, num_points_neighborhood, { + + double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] + double r[3]; // vecx_j - vecx_i + r[0] = x(j,0) - x(i,0); // x_j-x_i + r[1] = x(j,1) - x(i,1); // y_j-y_i + r[2] = x(j,2) - x(i,2); // z_j-z_i + + double W = kernel(r, h); + poly_basis(r,p); + + double correction = 0.0; + for (size_t a = 0; a < num_poly_basis; ++a){ + correction += rk_coeffs(i,a) * p[a]; + } // end a + + rk_basis(i,j) = W * correction; + }); + + + return; +} // end function + + + +// Build reproducing kernel coefficients for one particle +void build_rk_coefficients( + const DCArrayKokkos & x, + const CArrayKokkos & vol, + const CArrayKokkos & rk_coeffs, + double h) +{ + + // global num_points at this time, make it num_points in neighborhood + size_t num_points_neighborhood = x.dims(0); // will come from hash bins + + // actual number of points + size_t num_points = x.dims(0); + + + // loop over all nodes in the problem + FOR_ALL(i, 0, num_points, { + + double M_1D[num_poly_basis*num_poly_basis]; + ViewCArrayKokkos M(&M_1D[0], num_poly_basis, num_poly_basis); + + // values in rhs after this function will be accessed as rk_coeffs(i,0:N) + ViewCArrayKokkos rhs (&rk_coeffs(i,0), num_poly_basis); + rhs(0) = 1.0; // enforce reproduction of constant 1, everything else is = 0 + + double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] + double r[3]; // vecx_j - vecx_i + + // loop over all nodes around point i + for (size_t j = 0; j < num_points_neighborhood; ++j) { + + r[0] = x(j,0) - x(i,0); // x_j-x_i + r[1] = x(j,1) - x(i,1); // y_j-y_i + r[2] = x(j,2) - x(i,2); // z_j-z_i + + double W = kernel(r, h); + poly_basis(r,p); + + // assemble matrix + for (size_t a = 0; a < num_poly_basis; ++a) { + for (size_t b = 0; b < num_poly_basis; ++b) { + M(a,b) += vol(j) * W * p[a] * p[b]; + printf("M(a,b) = %f \n", M(a,b)); + } // end for b + } // for a + + } // end for point neighbors j + + // ------------- + // solve Ax=B + // ------------- + + size_t perm_1D[num_poly_basis]; + ViewCArrayKokkos perm (&perm_1D[0], num_poly_basis); + for (size_t a = 0; a < num_poly_basis; ++a) { + perm(a)= 0; + } // end a + + double vv_1D[num_poly_basis]; + ViewCArrayKokkos vv(&vv_1D[0], num_poly_basis); + + // used for LU problem + int singular = 0; + int parity = 0; + singular = LU_decompose(M, perm, vv, parity); // M is returned as the LU matrix + if(singular==0){ + printf("ERROR: matrix is singluar \n"); + } + + LU_backsub(M, perm, rhs); // note: answer is sent back in rhs + + }); // end parallel loop + + + return; +} // end function + + + + +int main(int argc, char *argv[]) +{ + Kokkos::initialize(argc, argv); + { + + printf("Pointcloud reconstruction \n\n"); + + if(argc==1){ + printf("Please supply an STL file for testing the point cloud surface reconstruction code \n"); + return 0; + } + + std::string filename = argv[1]; + + auto [normal_host, + v1X_host, v1Y_host, v1Z_host, + v2X_host, v2Y_host, v2Z_host, + v3X_host, v3Y_host, v3Z_host, + num_inp_triangles_host] = binary_stl_reader(filename); + + // Warning on C++ support: + // At this time with C++, the contents from a tuple cannot + // be used inside a lambda function. The parallel loops use + // lambda functions. To overcome this C++ limitation, all + // contents in the tuple will be copied or pointed to (Using + // a MATAR dual view) allowing the data to be used in parallel. + const size_t num_inp_triangles = num_inp_triangles_host; + DViewCArrayKokkos normal(&normal_host(0,0), num_inp_triangles, 3); + DViewCArrayKokkos v1X(&v1X_host(0),num_inp_triangles); + DViewCArrayKokkos v1Y(&v1Y_host(0),num_inp_triangles); + DViewCArrayKokkos v1Z(&v1Z_host(0),num_inp_triangles); + DViewCArrayKokkos v2X(&v2X_host(0),num_inp_triangles); + DViewCArrayKokkos v2Y(&v2Y_host(0),num_inp_triangles); + DViewCArrayKokkos v2Z(&v2Z_host(0),num_inp_triangles); + DViewCArrayKokkos v3X(&v3X_host(0),num_inp_triangles); + DViewCArrayKokkos v3Y(&v3Y_host(0),num_inp_triangles); + DViewCArrayKokkos v3Z(&v3Z_host(0),num_inp_triangles); + + normal.update_device(); + v1X.update_device(); + v1Y.update_device(); + v1Z.update_device(); + v2X.update_device(); + v2Y.update_device(); + v2Z.update_device(); + v3X.update_device(); + v3Y.update_device(); + v3Z.update_device(); + + + // define mesh spacing, it is used to create a mesh + + double LX = (XMax - X0); // length in x-dir + double LY = (YMax - Y0); + double LZ = (ZMax - Z0); + + // the number of nodes in the mesh + int num_pt_x = (int)( LX/dx ) + 1; // there must be at least 2 nodes + int num_pt_y = (int)( LY/dy ) + 1; // there must be at least 2 nodes + int num_pt_z = (int)( LZ/dz ) + 1; // there must be at least 2 nodes + + + // mesh coordinates + DCArrayKokkos x(num_pt_x, "pt_x"); + DCArrayKokkos y(num_pt_y, "pt_y"); + DCArrayKokkos z(num_pt_z, "pt_z"); + + // small distance for moving in the +/- normal directions + double epsilon = 0.1*fmin(fmin(dx, dy), dz); + + + // function with isosurface that we want extracted + DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); + + + // define the triangles of extracted surface + const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); + DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem + DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); + num_triangles_in_elem.set_values(0); + + + printf("Creating point cloud data from STL file \n\n"); + + // define a point cloud + size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions + DCArrayKokkos point_positions(num_points, 3, "point_positions"); + DCArrayKokkos point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper + + // 1 point per triangle at this time, thus a loop over tris + FOR_ALL(tri, 0, num_inp_triangles, { + // point on surface + point_positions(tri, 0) = 1.0/3.0*((double)v1X(tri) + (double)v2X(tri) + (double)v3X(tri)); + point_positions(tri, 1) = 1.0/3.0*((double)v1Y(tri) + (double)v2Y(tri) + (double)v3Y(tri)); + point_positions(tri, 2) = 1.0/3.0*((double)v1Z(tri) + (double)v2Z(tri) + (double)v3Z(tri)); + + point_signed_distance(tri) = 0.0; + + // off surface +normal + point_positions(tri+num_inp_triangles, 0) = point_positions(tri, 0) + epsilon*(double)normal(tri, 0); + point_positions(tri+num_inp_triangles, 1) = point_positions(tri, 1) + epsilon*(double)normal(tri, 1); + point_positions(tri+num_inp_triangles, 2) = point_positions(tri, 2) + epsilon*(double)normal(tri, 2); + + point_signed_distance(tri+num_inp_triangles) = epsilon; + + // off surface -normal + point_positions(tri+2*num_inp_triangles, 0) = point_positions(tri, 0) - epsilon*(double)normal(tri, 0); + point_positions(tri+2*num_inp_triangles, 1) = point_positions(tri, 1) - epsilon*(double)normal(tri, 1); + point_positions(tri+2*num_inp_triangles, 2) = point_positions(tri, 2) - epsilon*(double)normal(tri, 2); + + point_signed_distance(tri+2*num_inp_triangles) = -epsilon; + + }); // end parallel for tri's in the file + + + // ---------------------------- + // Reconstruct surface here + // ---------------------------- + + printf("Reconstructing surface using point cloud data \n\n"); + + // assuming all point neighbors contribute, will change to a hash bins + const size_t num_points_neighborhood = num_points; + + CArrayKokkos rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point + CArrayKokkos rk_basis(num_points, num_points); // reproducing kernel basis + CArrayKokkos vol(num_points); + vol.set_values(1.0); + + double h = 1.0; + + + printf("building rk coefficients \n"); + + // build coefficients on basis functions + build_rk_coefficients(point_positions, vol, rk_coeffs, h); + + // build basis functions + for(size_t i=0; i (&b_vector(0), num_points); + // coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + + + for (size_t point=0; point &A, // matrix A passed in and is sent out in LU decomp format + const ViewCArrayKokkos &perm, // permutations + const ViewCArrayKokkos &vv, + int &parity) { // parity (+1 or -1) + + const int n = A.dims(0); // size of matrix + + parity = 1; + + // helper variables + double temp; + + // search for the largest element in each row; save the scaling in the + // temporary array vv and return zero if the matrix is singular + for(size_t i = 0; i < n; i++) { + + double big = 0.; + for(size_t j = 0; j < n; j++){ + if((temp=fabs(A(i,j))) > big){ + big=temp; + } + } + + if(big == 0.0) return(0); + + vv(i) = big; + } + + // the main loop for the Crout's algorithm + for(size_t j = 0; j < n; j++) { + + // this is the part a) of the algorithm except for i==j + for(size_t i=0;ij + pivot search + for(size_t i = j; i < n; i++) { + + double sum = A(i,j); + + for(size_t k=0; k= big){ + big = temp; + imax = i; + } + } // end for i + + // interchange rows, if needed, change parity and the scale factor + if(imax != j) { + + for(size_t k = 0; k < n; k++){ + temp = A(imax,k); + A(imax,k) = A(j,k); + A(j,k) = temp; + } + + parity = -(parity); + vv(imax) = vv(j); + } + + // store the index + perm(j) = imax; + // if the pivot element is zero, the matrix is singular but for some + // applications a tiny number is desirable instead + + if(A(j,j) == 0.0){ + A(j,j) = TINY; + } + // finally, divide by the pivot element + + if(j &A, // input matrix A in LU decomp format + const ViewCArrayKokkos &perm, // permutations + const ViewCArrayKokkos &b){ // RHS and is answer x to Ax=B + + const int n = A.dims(0); // size of matrix + + int ii = -1; + + + // First step of backsubstitution; the only wrinkle is to unscramble + // the permutation order. Note: the algorithm is optimized for a + // possibility of large amount of zeroes in b + + for(size_t i = 0; i < n; i++) { + + size_t ip = perm(i); + + double sum = b(ip); + b(ip) = b(i); + + if(ii >= 0){ + for(size_t j = ii; j0){ + ii=i; // a nonzero element encounted + } + + b(i) = sum; + } // end loop i + + // the second step + for(int i=n-1; i>=0; i--) { + + double sum = b(i); + for(size_t j=i+1; j &A, // input matrix + ViewCArrayKokkos &perm, // permutations + ViewCArrayKokkos &inv_mat, // inverse matrix + ViewCArrayKokkos &col) { // tmp array + + const size_t n = A.dims(0); // size of matrix + + + for(size_t j = 0; j < n; j++){ + + for(size_t i = 0; i < n; i++){ + col(i) = 0.0; + } // end for i + + col(j) = 1.0; + LU_backsub(A, perm, col); + + for(size_t i = 0; i < n; i++){ + inv_mat(i,j) = col(i); + } // end for i + + } // end for j + + return; + +} // end function + +// ----------------------- +// LU determinant function +// Input: A filled in LUPDecompose; N - dimension. +// Output: determinate of original A matrix +// ----------------------- +KOKKOS_INLINE_FUNCTION +double LU_determinant( + ViewCArrayKokkos &A, // input matrix + const int parity){ // parity (+1 0r -1) + + const int n = A.dims(0); // size of matrix + + double res = (double)(parity); + + for(size_t j=0; j Date: Tue, 9 Sep 2025 09:19:06 -0600 Subject: [PATCH 04/23] WIP: rk basis functions give partion of unity --- examples/pointcloud/pointcloud-rk.cpp | 194 ++++++++++++++++++++------ 1 file changed, 150 insertions(+), 44 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 9aae3661..ca950309 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -77,9 +77,15 @@ const double PI = 3.14159265358979323846; // the number of nodes in the mesh -const double dx = 0.1; // resolution -const double dy = 0.1; // resolution -const double dz = 0.1; // resolution +const double dx = 0.1; // resolution to build STL file +const double dy = 0.1; // resolution to build STL file +const double dz = 0.1; // resolution to build STL file + + +// the bin sizes for finding neighboring points +const double bin_dx = 0.5; // 2 bins in x +const double bin_dy = 0.5; // 2 bins in y +const double bin_dz = 0.5; // 2 bins in z // the mesh dimensions @@ -92,6 +98,10 @@ const double X0 = 0.0; const double Y0 = 0.0; const double Z0 = 0.0; +const double LX = (XMax - X0); // length in x-dir +const double LY = (YMax - Y0); +const double LZ = (ZMax - Z0); + const double isoLevel=0.0; // contour to extract @@ -695,6 +705,27 @@ int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles) } +struct bin_ijk_t{ + size_t i, j, k; +}; + + +bin_ijk_t get_bin_ijk(const double x_pt, const double y_pt, const double z_pt){ + + bin_ijk_t bin_ijk; + + double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 + double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); + double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); + + // get the integers for the bins + bin_ijk.i = (size_t)i_dbl; + bin_ijk.j = (size_t)j_dbl; + bin_ijk.k = (size_t)k_dbl; + + return bin_ijk; +} // end function + // Gaussian function part of the RBF // rbf = exp(-(x - xj)*(x - xj)/h) KOKKOS_FUNCTION @@ -734,6 +765,9 @@ void poly_basis(const double r[3], double *p) { void compute_shape_functions( size_t i, + const double xpt, + const double ypt, + const double zpt, const DCArrayKokkos & x, const CArrayKokkos & vol, const CArrayKokkos & rk_coeffs, @@ -749,9 +783,9 @@ void compute_shape_functions( double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] double r[3]; // vecx_j - vecx_i - r[0] = x(j,0) - x(i,0); // x_j-x_i - r[1] = x(j,1) - x(i,1); // y_j-y_i - r[2] = x(j,2) - x(i,2); // z_j-z_i + r[0] = x(j,0) - xpt; // x_j-x_i + r[1] = x(j,1) - ypt; // y_j-y_i + r[2] = x(j,2) - zpt; // z_j-z_i double W = kernel(r, h); poly_basis(r,p); @@ -790,9 +824,11 @@ void build_rk_coefficients( double M_1D[num_poly_basis*num_poly_basis]; ViewCArrayKokkos M(&M_1D[0], num_poly_basis, num_poly_basis); + M.set_values(0.0); // values in rhs after this function will be accessed as rk_coeffs(i,0:N) ViewCArrayKokkos rhs (&rk_coeffs(i,0), num_poly_basis); + rhs.set_values(0.0); rhs(0) = 1.0; // enforce reproduction of constant 1, everything else is = 0 double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] @@ -809,10 +845,10 @@ void build_rk_coefficients( poly_basis(r,p); // assemble matrix + for (size_t a = 0; a < num_poly_basis; ++a) { for (size_t b = 0; b < num_poly_basis; ++b) { M(a,b) += vol(j) * W * p[a] * p[b]; - printf("M(a,b) = %f \n", M(a,b)); } // end for b } // for a @@ -850,6 +886,7 @@ void build_rk_coefficients( + int main(int argc, char *argv[]) { Kokkos::initialize(argc, argv); @@ -900,37 +937,10 @@ int main(int argc, char *argv[]) v3Z.update_device(); - // define mesh spacing, it is used to create a mesh - - double LX = (XMax - X0); // length in x-dir - double LY = (YMax - Y0); - double LZ = (ZMax - Z0); - - // the number of nodes in the mesh - int num_pt_x = (int)( LX/dx ) + 1; // there must be at least 2 nodes - int num_pt_y = (int)( LY/dy ) + 1; // there must be at least 2 nodes - int num_pt_z = (int)( LZ/dz ) + 1; // there must be at least 2 nodes - - // mesh coordinates - DCArrayKokkos x(num_pt_x, "pt_x"); - DCArrayKokkos y(num_pt_y, "pt_y"); - DCArrayKokkos z(num_pt_z, "pt_z"); - // small distance for moving in the +/- normal directions double epsilon = 0.1*fmin(fmin(dx, dy), dz); - - // function with isosurface that we want extracted - DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); - - - // define the triangles of extracted surface - const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); - DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem - DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); - num_triangles_in_elem.set_values(0); - printf("Creating point cloud data from STL file \n\n"); @@ -965,6 +975,55 @@ int main(int argc, char *argv[]) }); // end parallel for tri's in the file + + // ---------------------------- + // Make bins here + // ---------------------------- + + // the number of nodes in the mesh + size_t num_bins_x = (size_t)( round(LX/bin_dx) ); + size_t num_bins_y = (size_t)( round(LY/bin_dy) ); + size_t num_bins_z = (size_t)( round(LZ/bin_dz) ); + + + + size_t num_bins = num_bins_x*num_bins_y*num_bins_z; + +printf("num bins = %zu \n", num_bins); + + + DCArrayKokkos num_points_in_bin(num_bins); + num_points_in_bin.set_values(0); + DCArrayKokkos points_bin_id(num_points); + DCArrayKokkos points_bin_id_storage(num_points); + + FOR_ALL(pt_id, 0, num_points, { + + // get i,j,k indices of the bins + bin_ijk_t bin_ijk = get_bin_ijk(point_positions(pt_id,0), + point_positions(pt_id,1), + point_positions(pt_id,2)); + + // get the 1D index + size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x; + + size_t storage_place = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_id), 1); + points_bin_id(pt_id) = bin_id; // the id of the bin + points_bin_id_storage(pt_id) = storage_place; // the storage place in the bin + + }); // end for all + + DRaggedRightArrayKokkos points_in_bin(num_points_in_bin); + + FOR_ALL(pt_id, 0, num_points, { + + size_t bin_id = points_bin_id(pt_id); + size_t storage_place = points_bin_id_storage(pt_id); + points_in_bin(bin_id, storage_place) = pt_id; + + }); // end for all + + // ---------------------------- // Reconstruct surface here // ---------------------------- @@ -989,15 +1048,53 @@ int main(int argc, char *argv[]) // build basis functions for(size_t i=0; i x(num_pt_x, "pt_x"); + DCArrayKokkos y(num_pt_y, "pt_y"); + DCArrayKokkos z(num_pt_z, "pt_z"); + + + // function with isosurface that we want extracted + DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); + + + // define the triangles of extracted surface + const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); + DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem + DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); + num_triangles_in_elem.set_values(0); + printf("Evaluating surf function on mesh \n"); FOR_ALL(i, 0, num_pt_x, { x(i) = dx*(double)i + X0;; @@ -1019,15 +1116,24 @@ int main(int argc, char *argv[]) x_point[0] = x(i); x_point[1] = y(j); x_point[2] = z(k); + + gridValues(i,j,k) = 0.0; - // lambda coefficients for radial basis function, slice out only lambda values and polynomial values - // lambda = ViewCArrayKokkos (&b_vector(0), num_points); - // coefs = ViewCArrayKokkos (&b_vector(num_points), Pn); + // get i,j,k indices of the bins + bin_ijk_t bin_ijk = get_bin_ijk(x_point[0], + x_point[1], + x_point[2]); + + // get the 1D index + size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x; + + size_t point_i = points_in_bin(bin_id, 0); // get the first point in this bin + for (size_t point_j=0; point_j Date: Tue, 9 Sep 2025 10:01:49 -0600 Subject: [PATCH 05/23] WIP: fixed bug in rk_basis evaluation --- examples/pointcloud/pointcloud-rk.cpp | 59 ++++++++++++++++++++------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index ca950309..cda69090 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -795,7 +795,7 @@ void compute_shape_functions( correction += rk_coeffs(i,a) * p[a]; } // end a - rk_basis(i,j) = W * correction; + rk_basis(j) = W * correction; }); @@ -1034,7 +1034,7 @@ printf("num bins = %zu \n", num_bins); const size_t num_points_neighborhood = num_points; CArrayKokkos rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point - CArrayKokkos rk_basis(num_points, num_points); // reproducing kernel basis + CArrayKokkos rk_basis(num_points); // reproducing kernel basis, should have size num_points_neighborhood CArrayKokkos vol(num_points); vol.set_values(1.0); @@ -1046,24 +1046,30 @@ printf("num bins = %zu \n", num_bins); // build coefficients on basis functions build_rk_coefficients(point_positions, vol, rk_coeffs, h); - // build basis functions - for(size_t i=0; i Date: Tue, 9 Sep 2025 16:49:58 -0600 Subject: [PATCH 06/23] working rkpm with random particles --- examples/pointcloud/pointcloud-rk.cpp | 1012 ++----------------------- 1 file changed, 46 insertions(+), 966 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index cda69090..6fd003cf 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -36,32 +36,18 @@ **********************************************************************************************/ // ----------------------------------------------- -// pointcloud reconstrution in C++ -// credit to Andrew Morgan and Nathaniel Morgan -// -// To run the code with an external mesh file: -// ./a.out graphics-file -// -// Requires the matar.h and macros.h libraries from -// the github LANL/MATAR/src folder -// -// The following rountines in this code came from: -// https://paulbourke.net/geometry/polygonise/ -// - Polygonise -// - VertexInterp -// -// -// The surface reconstruction method is from -// Reconstruction and Representation of 3D Objects with Radial BasisFunctions -// J. Carr, R. Beatson, ..., T. Evans -// https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf +// pointcloud reproducing kernels in C++ +// Nathaniel Morgan // ----------------------------------------------- + #include #include #include #include #include +#include // For rand() and srand() + #include "matar.h" @@ -75,634 +61,26 @@ const double PI = 3.14159265358979323846; // ----------------------------------------------- // inputs: - -// the number of nodes in the mesh -const double dx = 0.1; // resolution to build STL file -const double dy = 0.1; // resolution to build STL file -const double dz = 0.1; // resolution to build STL file - +const size_t num_points = 101; // the bin sizes for finding neighboring points const double bin_dx = 0.5; // 2 bins in x const double bin_dy = 0.5; // 2 bins in y const double bin_dz = 0.5; // 2 bins in z +const double X0 = 0.0; // origin +const double Y0 = 0.0; +const double Z0 = 0.0; -// the mesh dimensions -// length of the domain is 5 for crazy shape and 1 for sphere -const double XMax = 1.0; -const double YMax = 1.0; -const double ZMax = 1.0; - -const double X0 = 0.0; -const double Y0 = 0.0; -const double Z0 = 0.0; - -const double LX = (XMax - X0); // length in x-dir -const double LY = (YMax - Y0); -const double LZ = (ZMax - Z0); - - -const double isoLevel=0.0; // contour to extract - +// length of the domain +const double LX = 1.0; // length in x-dir +const double LY = 1.0; +const double LZ = 1.0; // // ----------------------------------------------- -std::tuple< - CArray, // normal - CArray, CArray, CArray, // v1X, v1Y, v1Z - CArray, CArray, CArray, // v2X, v2Y, v2Z - CArray, CArray, CArray, // v3X, v3Y, v3Z - size_t // n_facets -> -binary_stl_reader(const std::string& path) -{ - std::ifstream in(path, std::ios::binary | std::ios::ate); - if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); } - - const std::streamoff filesize = in.tellg(); - if (filesize < 100) { - std::cerr << "ERROR: File too small to be a valid STL\n"; - std::exit(EXIT_FAILURE); - } - in.seekg(0); - - // ---- check if ASCII ------------------------------------------------- - char magic[6] = { 0 }; - in.read(magic, 5); // read first 5 chars - in.seekg(0); // rewind - if (std::strncmp(magic, "solid", 5) == 0) { - std::cerr - << "ERROR: \"" << path - << "\" looks like an **ASCII** STL (starts with \"solid\").\n" - << "Re‑export it as *binary* or implement an ASCII parser.\n"; - std::exit(EXIT_FAILURE); // or call ascii_stl_reader(); - } - - // ---- read 80‑byte header + nominal facet count ---------------------- - char header[80]; in.read(header, 80); - size_t n_facets_nominal; in.read(reinterpret_cast(&n_facets_nominal), 4); - - // ---- compute expected count from file size to sanity‑check ---------- - // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2) - const size_t n_facets_from_size = - static_cast((filesize - 84) / 50); - - size_t n_facets = n_facets_nominal; - if (n_facets_nominal != n_facets_from_size) { - std::cout << "WARNING: facet count in header (" << n_facets_nominal - << ") disagrees with file size (" << n_facets_from_size - << "). Using size‑derived value.\n"; - n_facets = n_facets_from_size; - } - std::cout << "STL facets: " << n_facets << '\n'; - - // ---- allocate MATAR arrays ----------------------------------------- - CArray normal(n_facets, 3); - CArray v1X(n_facets), v1Y(n_facets), v1Z(n_facets); - CArray v2X(n_facets), v2Y(n_facets), v2Z(n_facets); - CArray v3X(n_facets), v3Y(n_facets), v3Z(n_facets); - - // ---- read facet records -------------------------------------------- - float nrm[3], v1[3], v2[3], v3[3]; - for (unsigned int i = 0; i < n_facets; ++i) { - in.read(reinterpret_cast(nrm), 12); - in.read(reinterpret_cast(v1), 12); - in.read(reinterpret_cast(v2), 12); - in.read(reinterpret_cast(v3), 12); - in.ignore(2); // attribute byte count - - for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d]; - v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2]; - v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2]; - v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2]; - } - return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets }; -} - - - - - -// a vector type with 3 components -struct vec_t{ - double x; - double y; - double z; - - // default constructor - vec_t (){}; - - // overloaded constructor - vec_t(double x_in, double y_in, double z_in){ - x = x_in; - y = y_in; - z = z_in; - }; - -}; // end vec_t - - -// a triangle data type -struct triangle_t { - - vec_t normal; // surface normal - - vec_t p[3]; // three nodes with x,y,z coords - - // default constructor - triangle_t(){}; - - // overloaded constructor - triangle_t (vec_t p_in[3]) - { - p[0]=p_in[0]; - p[1]=p_in[1]; - p[2]=p_in[2]; - }; - -}; // end triangle_t - - -// calculate the surface normal of a triangle -KOKKOS_INLINE_FUNCTION -void calc_normal(triangle_t *triangle){ - - //A = p1 - p0; - //B = p2 - p0; - vec_t A; - A.x = triangle->p[1].x - triangle->p[0].x; - A.y = triangle->p[1].y - triangle->p[0].y; - A.z = triangle->p[1].z - triangle->p[0].z; - - vec_t B; - B.x = triangle->p[2].x - triangle->p[0].x; - B.y = triangle->p[2].y - triangle->p[0].y; - B.z = triangle->p[2].z - triangle->p[0].z; - - vec_t N; - N.x = A.y * B.z - A.z * B.y; - N.y = A.z * B.x - A.x * B.z; - N.z = A.x * B.y - A.y * B.x; - - double mag; - mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z); - - // save the unit normal - triangle->normal.x = N.x/mag; - triangle->normal.y = N.y/mag; - triangle->normal.z = N.z/mag; - -} // end normal - - -// cross prodcut -vec_t cross(const vec_t &a, const vec_t &b) { - return {a.y*b.z - a.z*b.y, - a.z*b.x - a.x*b.z, - a.x*b.y - a.y*b.x}; -} - -double dot(const vec_t &a, const vec_t &b) { - return a.x*b.x + a.y*b.y + a.z*b.z; -} - - -// calculate the volume of a tet with this triangular face -double compute_volume(const triangle_t &triangle) { - // triangle.p[0] is the first vec_t, being node 0 - // ... - // triangle.p[1] is the third vec_t, being node 2 - double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0; - - return volume; -} - -struct gridcell_t { - - vec_t* p; - double* val; - - // default constructor - gridcell_t(){}; - - // overloaded constructor - gridcell_t (vec_t p_in[8], double val_in[8]) - { - p=p_in; - val=val_in; - }; - -}; // end gridcell_t - - -/* - Linearly interpolate the position where an isosurface cuts - an edge between two vertices, each with their own scalar value -*/ -KOKKOS_INLINE_FUNCTION -vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2) -{ - double mu; - vec_t p; - - if (fabs(isolevel-valp1) < 0.00001) - return(p1); - if (fabs(isolevel-valp2) < 0.00001) - return(p2); - if (fabs(valp1-valp2) < 0.00001) - return(p1); - mu = (isolevel - valp1) / (valp2 - valp1); - p.x = p1.x + mu * (p2.x - p1.x); - p.y = p1.y + mu * (p2.y - p1.y); - p.z = p1.z + mu * (p2.z - p1.z); - - return(p); -} - -/* - Given a grid cell and an isolevel, calculate the triangular - facets required to represent the isosurface through the cell. - Return the number of triangular facets, the array "triangles" - will be loaded up with the vertices at most 5 triangular facets. - 0 will be returned if the grid cell is either totally above - of totally below the isolevel. -*/ -KOKKOS_INLINE_FUNCTION -int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles) -{ - - int i,ntriang; - int cubeindex; - vec_t vertlist[12]; - - int edgeTable[256]={ - 0x0 , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, - 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, - 0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, - 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, - 0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c, - 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, - 0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac, - 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, - 0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c, - 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, - 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc, - 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, - 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c, - 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, - 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc , - 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, - 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, - 0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, - 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, - 0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, - 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, - 0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, - 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, - 0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460, - 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, - 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0, - 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, - 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230, - 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, - 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190, - 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, - 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0 }; - - int triTable[256][16] = - {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, - {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, - {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, - {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, - {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, - {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, - {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, - {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, - {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, - {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, - {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, - {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, - {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, - {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, - {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, - {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, - {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, - {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, - {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, - {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, - {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, - {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, - {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, - {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, - {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, - {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, - {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, - {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, - {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, - {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, - {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, - {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, - {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, - {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, - {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, - {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, - {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, - {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, - {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, - {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, - {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, - {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, - {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, - {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, - {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, - {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, - {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, - {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, - {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, - {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, - {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, - {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, - {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, - {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, - {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, - {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, - {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, - {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, - {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, - {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, - {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, - {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, - {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, - {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, - {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, - {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, - {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, - {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, - {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, - {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, - {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, - {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, - {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, - {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, - {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, - {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, - {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, - {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, - {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, - {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, - {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, - {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, - {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, - {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, - {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, - {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, - {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, - {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, - {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, - {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, - {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, - {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, - {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, - {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, - {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, - {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, - {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, - {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, - {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, - {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, - {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, - {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, - {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, - {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, - {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, - {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, - {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, - {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, - {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, - {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, - {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, - {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, - {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, - {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, - {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, - {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, - {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, - {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, - {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, - {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, - {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, - {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, - {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, - {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, - {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, - {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, - {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, - {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, - {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, - {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, - {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, - {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, - {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, - {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, - {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, - {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, - {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, - {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, - {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, - {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, - {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, - {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, - {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, - {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, - {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, - {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, - {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, - {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, - {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, - {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, - {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, - {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, - {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, - {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, - {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, - {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, - {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, - {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, - {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, - {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, - {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, - {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, - {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, - {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, - {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, - {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, - {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, - {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, - {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, - {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, - {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, - {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, - {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, - {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, - {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, - {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, - {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, - {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, - {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, - {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; - - - /* - Determine the index into the edge table which - tells us which vertices are inside of the surface - */ - cubeindex = 0; - - if (grid.val[0] < isolevel) cubeindex |= 1; - if (grid.val[1] < isolevel) cubeindex |= 2; - if (grid.val[2] < isolevel) cubeindex |= 4; - if (grid.val[3] < isolevel) cubeindex |= 8; - if (grid.val[4] < isolevel) cubeindex |= 16; - if (grid.val[5] < isolevel) cubeindex |= 32; - if (grid.val[6] < isolevel) cubeindex |= 64; - if (grid.val[7] < isolevel) cubeindex |= 128; - - - - /* Cube is entirely in/out of the surface */ - if (edgeTable[cubeindex] == 0) - return(0); - - /* Find the vertices where the surface intersects the cube */ - if (edgeTable[cubeindex] & 1) - vertlist[0] = - VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]); - if (edgeTable[cubeindex] & 2) - vertlist[1] = - VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]); - if (edgeTable[cubeindex] & 4) - vertlist[2] = - VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]); - if (edgeTable[cubeindex] & 8) - vertlist[3] = - VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]); - if (edgeTable[cubeindex] & 16) - vertlist[4] = - VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]); - if (edgeTable[cubeindex] & 32) - vertlist[5] = - VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]); - if (edgeTable[cubeindex] & 64) - vertlist[6] = - VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]); - if (edgeTable[cubeindex] & 128) - vertlist[7] = - VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]); - if (edgeTable[cubeindex] & 256) - vertlist[8] = - VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]); - if (edgeTable[cubeindex] & 512) - vertlist[9] = - VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]); - if (edgeTable[cubeindex] & 1024) - vertlist[10] = - VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]); - if (edgeTable[cubeindex] & 2048) - vertlist[11] = - VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]); - - /* Create the triangle */ - ntriang = 0; - for (i=0; triTable[cubeindex][i]!=-1; i+=3) { - - triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i ]]; - triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]]; - triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]]; - - ntriang++; - } // end for i - - return(ntriang); -} struct bin_ijk_t{ @@ -765,9 +143,6 @@ void poly_basis(const double r[3], double *p) { void compute_shape_functions( size_t i, - const double xpt, - const double ypt, - const double zpt, const DCArrayKokkos & x, const CArrayKokkos & vol, const CArrayKokkos & rk_coeffs, @@ -783,9 +158,9 @@ void compute_shape_functions( double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] double r[3]; // vecx_j - vecx_i - r[0] = x(j,0) - xpt; // x_j-x_i - r[1] = x(j,1) - ypt; // y_j-y_i - r[2] = x(j,2) - zpt; // z_j-z_i + r[0] = x(j,0) - x(i,0); // x_j-x_i + r[1] = x(j,1) - x(i,1); // y_j-y_i + r[2] = x(j,2) - x(i,2); // z_j-z_i double W = kernel(r, h); poly_basis(r,p); @@ -793,7 +168,7 @@ void compute_shape_functions( double correction = 0.0; for (size_t a = 0; a < num_poly_basis; ++a){ correction += rk_coeffs(i,a) * p[a]; - } // end a + } // end for a rk_basis(j) = W * correction; }); @@ -804,7 +179,7 @@ void compute_shape_functions( -// Build reproducing kernel coefficients for one particle +// Build reproducing kernel coefficients for all particles in the domain void build_rk_coefficients( const DCArrayKokkos & x, const CArrayKokkos & vol, @@ -892,88 +267,33 @@ int main(int argc, char *argv[]) Kokkos::initialize(argc, argv); { - printf("Pointcloud reconstruction \n\n"); + printf("Pointcloud Reproducing Kernels \n\n"); - if(argc==1){ - printf("Please supply an STL file for testing the point cloud surface reconstruction code \n"); - return 0; - } - - std::string filename = argv[1]; - - auto [normal_host, - v1X_host, v1Y_host, v1Z_host, - v2X_host, v2Y_host, v2Z_host, - v3X_host, v3Y_host, v3Z_host, - num_inp_triangles_host] = binary_stl_reader(filename); - - // Warning on C++ support: - // At this time with C++, the contents from a tuple cannot - // be used inside a lambda function. The parallel loops use - // lambda functions. To overcome this C++ limitation, all - // contents in the tuple will be copied or pointed to (Using - // a MATAR dual view) allowing the data to be used in parallel. - const size_t num_inp_triangles = num_inp_triangles_host; - DViewCArrayKokkos normal(&normal_host(0,0), num_inp_triangles, 3); - DViewCArrayKokkos v1X(&v1X_host(0),num_inp_triangles); - DViewCArrayKokkos v1Y(&v1Y_host(0),num_inp_triangles); - DViewCArrayKokkos v1Z(&v1Z_host(0),num_inp_triangles); - DViewCArrayKokkos v2X(&v2X_host(0),num_inp_triangles); - DViewCArrayKokkos v2Y(&v2Y_host(0),num_inp_triangles); - DViewCArrayKokkos v2Z(&v2Z_host(0),num_inp_triangles); - DViewCArrayKokkos v3X(&v3X_host(0),num_inp_triangles); - DViewCArrayKokkos v3Y(&v3Y_host(0),num_inp_triangles); - DViewCArrayKokkos v3Z(&v3Z_host(0),num_inp_triangles); - - normal.update_device(); - v1X.update_device(); - v1Y.update_device(); - v1Z.update_device(); - v2X.update_device(); - v2Y.update_device(); - v2Z.update_device(); - v3X.update_device(); - v3Y.update_device(); - v3Z.update_device(); - - - - // small distance for moving in the +/- normal directions - double epsilon = 0.1*fmin(fmin(dx, dy), dz); - - - printf("Creating point cloud data from STL file \n\n"); // define a point cloud - size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions DCArrayKokkos point_positions(num_points, 3, "point_positions"); - DCArrayKokkos point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper - - // 1 point per triangle at this time, thus a loop over tris - FOR_ALL(tri, 0, num_inp_triangles, { - // point on surface - point_positions(tri, 0) = 1.0/3.0*((double)v1X(tri) + (double)v2X(tri) + (double)v3X(tri)); - point_positions(tri, 1) = 1.0/3.0*((double)v1Y(tri) + (double)v2Y(tri) + (double)v3Y(tri)); - point_positions(tri, 2) = 1.0/3.0*((double)v1Z(tri) + (double)v2Z(tri) + (double)v3Z(tri)); + DCArrayKokkos point_values(num_points, "point_values"); - point_signed_distance(tri) = 0.0; - - // off surface +normal - point_positions(tri+num_inp_triangles, 0) = point_positions(tri, 0) + epsilon*(double)normal(tri, 0); - point_positions(tri+num_inp_triangles, 1) = point_positions(tri, 1) + epsilon*(double)normal(tri, 1); - point_positions(tri+num_inp_triangles, 2) = point_positions(tri, 2) + epsilon*(double)normal(tri, 2); - - point_signed_distance(tri+num_inp_triangles) = epsilon; + // point locations + srand(static_cast(time(0))); // Seed the random number generator + for(size_t i=0; i(rand())/static_cast(RAND_MAX); + point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); + point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + } + point_positions.update_device(); + Kokkos::fence(); - // off surface -normal - point_positions(tri+2*num_inp_triangles, 0) = point_positions(tri, 0) - epsilon*(double)normal(tri, 0); - point_positions(tri+2*num_inp_triangles, 1) = point_positions(tri, 1) - epsilon*(double)normal(tri, 1); - point_positions(tri+2*num_inp_triangles, 2) = point_positions(tri, 2) - epsilon*(double)normal(tri, 2); + // point values + FOR_ALL(i, 0, num_points, { - point_signed_distance(tri+2*num_inp_triangles) = -epsilon; + printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2)); + point_values(i) = sqrt(point_positions(i, 0)*point_positions(i, 0) + + point_positions(i, 1)*point_positions(i, 1) + + point_positions(i, 2)*point_positions(i, 2)); }); // end parallel for tri's in the file - + printf("\n"); // ---------------------------- @@ -989,8 +309,7 @@ int main(int argc, char *argv[]) size_t num_bins = num_bins_x*num_bins_y*num_bins_z; -printf("num bins = %zu \n", num_bins); - + //printf("num bins = %zu \n", num_bins); DCArrayKokkos num_points_in_bin(num_bins); num_points_in_bin.set_values(0); @@ -1052,11 +371,13 @@ printf("num bins = %zu \n", num_bins); double partion_unity; double partion_unity_lcl; + double linear_preserving; + double linear_preserving_lcl; + for(size_t i=0; i x(num_pt_x, "pt_x"); - DCArrayKokkos y(num_pt_y, "pt_y"); - DCArrayKokkos z(num_pt_z, "pt_z"); - - - // function with isosurface that we want extracted - DCArrayKokkos gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values"); - - - // define the triangles of extracted surface - const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1); - DCArrayKokkos all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem - DCArrayKokkos num_triangles_in_elem(num_elems, "num_tris_in_elem"); - num_triangles_in_elem.set_values(0); - - printf("Evaluating surf function on mesh \n"); - FOR_ALL(i, 0, num_pt_x, { - x(i) = dx*(double)i + X0;; - }); - FOR_ALL(j, 0, num_pt_y, { - y(j) = dy*(double)j + Y0; - }); - FOR_ALL(k, 0, num_pt_z, { - z(k) = dz*(double)k + Z0; - }); - Kokkos::fence(); - - // save mesh coordinates of the nodes - FOR_ALL(k, 0, num_pt_z, - j, 0, num_pt_y, - i, 0, num_pt_x, { - - double x_point[3]; - x_point[0] = x(i); - x_point[1] = y(j); - x_point[2] = z(k); - - gridValues(i,j,k) = 0.0; - - // get i,j,k indices of the bins - bin_ijk_t bin_ijk = get_bin_ijk(x_point[0], - x_point[1], - x_point[2]); - - // get the 1D index - size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x; - - size_t point_i; // the closest point - double dist_i = 1.e16; - - - // find the closest point to the evaluation location - for (size_t point_lid=0; point_lid Date: Tue, 9 Sep 2025 17:03:14 -0600 Subject: [PATCH 07/23] added vtk writer of particles --- examples/pointcloud/pointcloud-rk.cpp | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 6fd003cf..4c7faca7 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -293,6 +293,8 @@ int main(int argc, char *argv[]) point_positions(i, 2)*point_positions(i, 2)); }); // end parallel for tri's in the file + point_values.update_host(); + Kokkos::fence(); printf("\n"); @@ -393,22 +395,46 @@ int main(int argc, char *argv[]) linear_preserving_lcl += rk_basis(j)*vol(j)*point_positions(j,0); }, linear_preserving); - printf("partition unity = %f, ", partion_unity); + printf("partition unity = %f, ", partion_unity); printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(i,0)), i); } // end for i + printf("Writing VTK Graphics File \n\n"); + + std::ofstream out("cloud.vtk"); + + out << "# vtk DataFile Version 3.0\n"; + out << "3D point cloud\n"; + out << "ASCII\n"; + out << "DATASET POLYDATA\n"; + out << "POINTS " << num_points << " float\n"; + for (size_t pt_id = 0; pt_id < num_points; ++pt_id) { + out << point_positions.host(pt_id,0) << " " + << point_positions.host(pt_id,1) << " " + << point_positions.host(pt_id,2) << "\n"; + } + + out << "\nPOINT_DATA " << num_points << "\n"; + out << "SCALARS field float 1\n"; + out << "LOOKUP_TABLE default\n"; + for (size_t pt_id = 0; pt_id < num_points; ++pt_id) { + out << point_values.host(pt_id) << "\n"; + } + printf("Finished \n\n"); - } // end of kokkos scope + } // end of kokkos scope Kokkos::finalize(); + + return 0; } // end main From dbf96a2ba9b8b17954e3b4b55a959bd3775e0cdc Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Thu, 11 Sep 2025 11:01:09 -0600 Subject: [PATCH 08/23] added binning to make local neighbors --- examples/pointcloud/pointcloud-rk.cpp | 402 ++++++++++++++++++++------ 1 file changed, 317 insertions(+), 85 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 4c7faca7..3f86e704 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -53,6 +53,8 @@ #include "lu_solver.hpp" +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) using namespace mtr; @@ -64,9 +66,9 @@ const double PI = 3.14159265358979323846; const size_t num_points = 101; // the bin sizes for finding neighboring points -const double bin_dx = 0.5; // 2 bins in x -const double bin_dy = 0.5; // 2 bins in y -const double bin_dz = 0.5; // 2 bins in z +const double bin_dx = 0.05; // bins in x +const double bin_dy = 0.05; // bins in y +const double bin_dz = 0.05; // bins in z const double X0 = 0.0; // origin const double Y0 = 0.0; @@ -82,26 +84,56 @@ const double LZ = 1.0; - -struct bin_ijk_t{ - size_t i, j, k; +struct bin_keys_t{ + size_t i,j,k; }; +KOKKOS_INLINE_FUNCTION +size_t get_gid(size_t i, size_t j, size_t k, size_t num_x, size_t num_y){ + return i + (j + k*num_y)*num_x; +} -bin_ijk_t get_bin_ijk(const double x_pt, const double y_pt, const double z_pt){ +KOKKOS_INLINE_FUNCTION +bin_keys_t get_bin_keys(const double x_pt, + const double y_pt, + const double z_pt){ + + + double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 + double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); + double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); + + bin_keys_t bin_keys; // save i,j,k to the bin keys + + // get the integer for the bins + bin_keys.i = (size_t)i_dbl; + bin_keys.j = (size_t)j_dbl; + bin_keys.k = (size_t)k_dbl; + + return bin_keys; + +} // end function + +KOKKOS_INLINE_FUNCTION +size_t get_bin_gid(const double x_pt, + const double y_pt, + const double z_pt, + const size_t num_bins_x, + const size_t num_bins_y){ - bin_ijk_t bin_ijk; double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); // get the integers for the bins - bin_ijk.i = (size_t)i_dbl; - bin_ijk.j = (size_t)j_dbl; - bin_ijk.k = (size_t)k_dbl; + size_t i = (size_t)i_dbl; + size_t j = (size_t)j_dbl; + size_t k = (size_t)k_dbl; - return bin_ijk; + // get the 1D index for this bin + return get_gid(i, j, k, num_bins_x, num_bins_y); + } // end function // Gaussian function part of the RBF @@ -142,39 +174,46 @@ void poly_basis(const double r[3], double *p) { void compute_shape_functions( - size_t i, + size_t point_gid, const DCArrayKokkos & x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, const CArrayKokkos & vol, const CArrayKokkos & rk_coeffs, - const CArrayKokkos & rk_basis, + const DRaggedRightArrayKokkos & rk_basis, const double h) { - // global num_points at this time, make it num_points in neighborhood - size_t num_points_neighborhood = x.dims(0); // will come from hash bins + //--------------------------------------------- + // walk over the neighboring points + //--------------------------------------------- + + FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), { - // loop over all neighbors around point i - FOR_ALL(j, 0, num_points_neighborhood, { + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] double r[3]; // vecx_j - vecx_i - r[0] = x(j,0) - x(i,0); // x_j-x_i - r[1] = x(j,1) - x(i,1); // y_j-y_i - r[2] = x(j,2) - x(i,2); // z_j-z_i + r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i + r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i + r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i double W = kernel(r, h); poly_basis(r,p); double correction = 0.0; for (size_t a = 0; a < num_poly_basis; ++a){ - correction += rk_coeffs(i,a) * p[a]; + correction += rk_coeffs(point_gid,a) * p[a]; } // end for a - rk_basis(j) = W * correction; - }); + rk_basis(point_gid, neighbor_point_lid) = W * correction; + + }); // neighbor_point_lid + return; + } // end function @@ -182,39 +221,44 @@ void compute_shape_functions( // Build reproducing kernel coefficients for all particles in the domain void build_rk_coefficients( const DCArrayKokkos & x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, const CArrayKokkos & vol, const CArrayKokkos & rk_coeffs, double h) { - // global num_points at this time, make it num_points in neighborhood - size_t num_points_neighborhood = x.dims(0); // will come from hash bins - // actual number of points size_t num_points = x.dims(0); // loop over all nodes in the problem - FOR_ALL(i, 0, num_points, { + FOR_ALL(point_gid, 0, num_points, { double M_1D[num_poly_basis*num_poly_basis]; ViewCArrayKokkos M(&M_1D[0], num_poly_basis, num_poly_basis); M.set_values(0.0); // values in rhs after this function will be accessed as rk_coeffs(i,0:N) - ViewCArrayKokkos rhs (&rk_coeffs(i,0), num_poly_basis); + ViewCArrayKokkos rhs (&rk_coeffs(point_gid,0), num_poly_basis); rhs.set_values(0.0); rhs(0) = 1.0; // enforce reproduction of constant 1, everything else is = 0 double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] double r[3]; // vecx_j - vecx_i - // loop over all nodes around point i - for (size_t j = 0; j < num_points_neighborhood; ++j) { - - r[0] = x(j,0) - x(i,0); // x_j-x_i - r[1] = x(j,1) - x(i,1); // y_j-y_i - r[2] = x(j,2) - x(i,2); // z_j-z_i + + //--------------------------------------------- + // walk over the neighboring points + //--------------------------------------------- + + for (size_t neighbor_point_lid=0; neighbor_point_lid keys_in_bin(num_bins, "keys_in_bin"); // mapping from gid to (i,j,k) + DCArrayKokkos num_points_in_bin(num_bins, "num_bins"); + num_points_in_bin.set_values(0); + DRaggedRightArrayKokkos points_in_bin; // allocated later - size_t num_bins = num_bins_x*num_bins_y*num_bins_z; + // connectivity from points to bins + DCArrayKokkos points_bin_gid(num_points, "points_in_gid"); + CArrayKokkos points_bin_lid_storage(num_points, "bin_lid_storage"); // only used to create storage + DCArrayKokkos points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle + DCArrayKokkos points_num_neighbors(num_points, "num_neighbors"); + + // build reverse mapping between gid and i,j,k + FOR_ALL(i, 0, num_bins_x, + j, 0, num_bins_y, + k, 0, num_bins_z, { + - //printf("num bins = %zu \n", num_bins); + // get bin gid for this i,j,k + size_t bin_gid = get_gid(i, j, k, num_bins_x, num_bins_y); - DCArrayKokkos num_points_in_bin(num_bins); - num_points_in_bin.set_values(0); - DCArrayKokkos points_bin_id(num_points); - DCArrayKokkos points_bin_id_storage(num_points); - - FOR_ALL(pt_id, 0, num_points, { + // the i,j,k for this bin + bin_keys_t bin_keys; + bin_keys.i = i; + bin_keys.j = j; + bin_keys.k = k; + + // save mapping from bin_gid to bin_keys i,j,k + keys_in_bin(bin_gid) = bin_keys; - // get i,j,k indices of the bins - bin_ijk_t bin_ijk = get_bin_ijk(point_positions(pt_id,0), - point_positions(pt_id,1), - point_positions(pt_id,2)); + }); + Kokkos::fence(); + keys_in_bin.update_host(); + + + // ------------------------------------------------------------------- + // below here, these routine must be called every time particles move + // ------------------------------------------------------------------- - // get the 1D index - size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x; - - size_t storage_place = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_id), 1); - points_bin_id(pt_id) = bin_id; // the id of the bin - points_bin_id_storage(pt_id) = storage_place; // the storage place in the bin + // save bin id to points + FOR_ALL(point_gid, 0, num_points, { + + // get the 1D index for this bin + size_t bin_gid = get_bin_gid(point_positions(point_gid,0), + point_positions(point_gid,1), + point_positions(point_gid,2), + num_bins_x, + num_bins_y); + + size_t storage_lid = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_gid), 1); + points_bin_gid(point_gid) = bin_gid; // the id of the bin + points_bin_lid_storage(point_gid) = storage_lid; // the storage place in the bin }); // end for all + Kokkos::fence(); + points_bin_gid.update_host(); + num_points_in_bin.update_host(); - DRaggedRightArrayKokkos points_in_bin(num_points_in_bin); - FOR_ALL(pt_id, 0, num_points, { + // allocate points in bin connectivity + points_in_bin = DRaggedRightArrayKokkos (num_points_in_bin, "num_points_in_bin"); - size_t bin_id = points_bin_id(pt_id); - size_t storage_place = points_bin_id_storage(pt_id); - points_in_bin(bin_id, storage_place) = pt_id; + // save points in bin + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid + size_t bin_gid = points_bin_gid(point_gid); + + // get it's storage location in the ragged right compressed storage + size_t storage_lid = points_bin_lid_storage(point_gid); + + // save the point to this bin + points_in_bin(bin_gid, storage_lid) = point_gid; }); // end for all + + + + // ------------------------------------------------ + // Find the neighbors around each point using bins + // ------------------------------------------------ + FOR_ALL(point_gid, 0, num_points, { - // ---------------------------- - // Reconstruct surface here - // ---------------------------- + // get bin gid + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); + // printf(" keys = %zu, %zu, %zu, bin size = %zu, %zu, %zu \n", + // bin_keys.i, bin_keys.j, bin_keys.k, + // num_bins_x, num_bins_y, num_bins_z); + + // loop over neighboring bins + size_t num_points_found; + + // establish the stencil size to get enough particles + for(int stencil=1; stencil<1000; stencil++){ + + num_points_found = 0; + + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + const int imin = MAX(0, i-stencil); + const int imax = MIN(num_bins_x-1, i+stencil); + + const int jmin = MAX(0, j-stencil); + const int jmax = MIN(num_bins_y-1, j+stencil); + + const int kmin = MAX(0, k-stencil); + const int kmax = MIN(num_bins_z-1, k+stencil); + + for (int icount=imin; icount<=imax; icount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int kcount=kmin; kcount<=kmax; kcount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + num_points_found += num_points_in_bin(neighbor_bin_gid); + + } // end for kcount + } // end for jcount + } // end for icount + + // the min number of points required to solve the system is num_poly_basis+1 + if (num_points_found > num_poly_basis+5){ + + points_bin_stencil(point_gid) = stencil; + points_num_neighbors(point_gid) = num_points_found; // key for allocations + printf("neighbors found = %zu \n", num_points_found); + break; + } + + } // end for stencil + + printf("num_pts_in_bin = %zu, neighbors found = %zu, Stencil size = %zu, bin keys = %zu, %zu, %zu \n", + num_points_in_bin(bin_gid), + points_num_neighbors(point_gid), + points_bin_stencil(point_gid), + bin_keys.i, + bin_keys.j, + bin_keys.k); + + }); // end for all + Kokkos::fence(); + points_bin_stencil.update_host(); + points_num_neighbors.update_host(); + + // allocate memory for points in point + DRaggedRightArrayKokkos points_in_point(points_num_neighbors, "points_in_point"); + + // --------------------- + // Save the neighbors + // --------------------- - printf("Reconstructing surface using point cloud data \n\n"); + // find my neighbors using bins + FOR_ALL(point_gid, 0, num_points, { - // assuming all point neighbors contribute, will change to a hash bins - const size_t num_points_neighborhood = num_points; + // get bin gid for this point + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); - CArrayKokkos rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point - CArrayKokkos rk_basis(num_points); // reproducing kernel basis, should have size num_points_neighborhood + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + // walk over the stencil to get neighbors + const int stencil = points_bin_stencil(point_gid); + + const int imin = MAX(0, i-stencil); + const int imax = MIN(num_bins_x-1, i+stencil); + + const int jmin = MAX(0, j-stencil); + const int jmax = MIN(num_bins_y-1, j+stencil); + + const int kmin = MAX(0, k-stencil); + const int kmax = MIN(num_bins_z-1, k+stencil); + + size_t num_saved = 0; + size_t num_points_found = 0; + + for (int icount=imin; icount<=imax; icount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int kcount=kmin; kcount<=kmax; kcount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + num_points_found += num_points_in_bin(neighbor_bin_gid); + + // save the points in this bin + for(size_t neighbor_pt_lid=0; neighbor_pt_lid rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point + DRaggedRightArrayKokkos rk_basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) CArrayKokkos vol(num_points); vol.set_values(1.0); double h = 1.0; - printf("building rk coefficients \n"); + printf("building reproducing kernel coefficients \n"); // build coefficients on basis functions - build_rk_coefficients(point_positions, vol, rk_coeffs, h); + build_rk_coefficients(point_positions, + points_num_neighbors, + points_in_point, + vol, + rk_coeffs, + h); - // performing checks on rk_coeffs double partion_unity; double partion_unity_lcl; @@ -376,29 +603,34 @@ int main(int argc, char *argv[]) double linear_preserving; double linear_preserving_lcl; - for(size_t i=0; i Date: Thu, 11 Sep 2025 13:18:19 -0600 Subject: [PATCH 09/23] added Q2 test --- examples/pointcloud/pointcloud-rk.cpp | 28 +++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 3f86e704..eafcc38d 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -485,19 +485,11 @@ int main(int argc, char *argv[]) points_bin_stencil(point_gid) = stencil; points_num_neighbors(point_gid) = num_points_found; // key for allocations - printf("neighbors found = %zu \n", num_points_found); break; } } // end for stencil - printf("num_pts_in_bin = %zu, neighbors found = %zu, Stencil size = %zu, bin keys = %zu, %zu, %zu \n", - num_points_in_bin(bin_gid), - points_num_neighbors(point_gid), - points_bin_stencil(point_gid), - bin_keys.i, - bin_keys.j, - bin_keys.k); }); // end for all Kokkos::fence(); @@ -551,9 +543,7 @@ int main(int argc, char *argv[]) for(size_t neighbor_pt_lid=0; neighbor_pt_lid Date: Thu, 11 Sep 2025 16:27:48 -0600 Subject: [PATCH 10/23] WIP: grad of basis fcns --- examples/pointcloud/pointcloud-rk.cpp | 220 ++++++++++++++++++++++---- 1 file changed, 193 insertions(+), 27 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index eafcc38d..78f6f481 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -136,8 +136,9 @@ size_t get_bin_gid(const double x_pt, } // end function + // Gaussian function part of the RBF -// rbf = exp(-(x - xj)*(x - xj)/h) +// rbf = exp(-(xj - x)*(xj - x)/h) KOKKOS_FUNCTION double kernel(const double r[3], const double h){ @@ -151,6 +152,28 @@ double kernel(const double r[3], const double h){ } // end of function +// Gradient Gaussian function +// rbf = exp(-(xj - x)*(xj - x)/h) +KOKKOS_FUNCTION +void grad_kernel(const double r[3], const double h, double *grad_W){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + const double rbf = exp(-diff_sqrd/(h*h)); + + // gradient + grad_W[0] = 2.0/h*r[0]*rbf; + grad_W[1] = 2.0/h*r[1]*rbf; + grad_W[2] = 2.0/h*r[2]*rbf; + + return; +} // end of function + + // Polynomial basis up to quadratic in 3D (10 terms) const size_t num_poly_basis = 10; KOKKOS_INLINE_FUNCTION @@ -173,14 +196,66 @@ void poly_basis(const double r[3], double *p) { } // end function -void compute_shape_functions( +KOKKOS_INLINE_FUNCTION +void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) { + + const double drdx = -1.0; + + grad_p[0][0] = 0.0; + grad_p[0][1] = drdx; + grad_p[0][2] = 0.0; + grad_p[0][3] = 0.0; + grad_p[0][4] = 2.0*r[0]*drdx; + grad_p[0][5] = r[1]*drdx; + grad_p[0][6] = r[2]*drdx; + grad_p[0][7] = 0.0; + grad_p[0][8] = 0.0; + grad_p[0][9] = 0.0; + + // for high-order will use (x^a y^b z^c) + + const double drdy = -1.0; + + grad_p[1][0] = 0.0; + grad_p[1][1] = 0.0; + grad_p[1][2] = drdy; + grad_p[1][3] = 0.0; + grad_p[1][4] = 0.0; + grad_p[1][5] = r[0]*drdy; + grad_p[1][6] = 0.0; + grad_p[1][7] = 2.0*r[1]*drdy; + grad_p[1][8] = r[2]*drdy; + grad_p[1][9] = 0.0; + + // for high-order will use (x^a y^b z^c) + + const double drdz = -1.0; + + grad_p[2][0] = 0.0; + grad_p[2][1] = 0.0; + grad_p[2][2] = 0.0; + grad_p[2][3] = drdz; + grad_p[2][4] = 0.0; + grad_p[2][5] = 0.0; + grad_p[2][6] = r[0]*drdz; + grad_p[2][7] = 0.0; + grad_p[2][8] = r[1]*drdz; + grad_p[2][9] = 2.0*r[2]*drdz; + + // for high-order will use (x^a y^b z^c) + + return; +} // end function + + +void calc_shape_functions( size_t point_gid, const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, const CArrayKokkos & vol, - const CArrayKokkos & rk_coeffs, - const DRaggedRightArrayKokkos & rk_basis, + const CArrayKokkos & p_coeffs, + const DRaggedRightArrayKokkos & basis, const double h) { @@ -203,14 +278,84 @@ void compute_shape_functions( double correction = 0.0; for (size_t a = 0; a < num_poly_basis; ++a){ - correction += rk_coeffs(point_gid,a) * p[a]; + correction += p_coeffs(point_gid,a) * p[a]; } // end for a - rk_basis(point_gid, neighbor_point_lid) = W * correction; + basis(point_gid, neighbor_point_lid) = W * correction; }); // neighbor_point_lid + return; + +} // end function + + + +void calc_grad_shape_functions( + size_t point_gid, + const DCArrayKokkos & x, + const DCArrayKokkos points_num_neighbors, + const DRaggedRightArrayKokkos points_in_point, + const CArrayKokkos & vol, + const CArrayKokkos & p_coeffs, + const DRaggedRightArrayKokkos & basis, + const DRaggedRightArrayKokkos & grad_basis, + const double h) +{ + + //--------------------------------------------- + // walk over the neighboring points + //--------------------------------------------- + + FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), { + + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + + double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] + double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis + double r[3]; // vecx_j - vecx_i + r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i + r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i + r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i + + double W = kernel(r, h); + poly_basis(r,p); + + double correction = 0.0; + for (size_t a = 0; a < num_poly_basis; ++a){ + correction += p_coeffs(point_gid,a) * p[a]; + } // end for a + + basis(point_gid, neighbor_point_lid) = W * correction; + + // --- gradient --- + double grad_W[3]; + grad_kernel(r, h, grad_W); + grad_poly_basis(r, grad_p); + + double term1_x = 0.0; + double term1_y = 0.0; + double term1_z = 0.0; + + double term2_x = 0.0; + double term2_y = 0.0; + double term2_z = 0.0; + + for (size_t a = 0; a < num_poly_basis; ++a){ + term1_x += grad_p[0][a] * p_coeffs(point_gid,a); + term1_y += grad_p[1][a] * p_coeffs(point_gid,a); + term1_z += grad_p[2][a] * p_coeffs(point_gid,a); + } // end for a + term1_x *= W; + term1_y *= W; + term1_z *= W; + + term2_x = correction*grad_W[0]; + term2_y = correction*grad_W[1]; + term2_z = correction*grad_W[2]; + + }); // neighbor_point_lid return; @@ -218,13 +363,14 @@ void compute_shape_functions( -// Build reproducing kernel coefficients for all particles in the domain -void build_rk_coefficients( +// Build reproducing kernel poly coefficients for all particles in the domain +void calc_p_coefficients( const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, const CArrayKokkos & vol, - const CArrayKokkos & rk_coeffs, + const CArrayKokkos & p_coeffs, + const CArrayKokkos & M_inv, double h) { @@ -239,15 +385,14 @@ void build_rk_coefficients( ViewCArrayKokkos M(&M_1D[0], num_poly_basis, num_poly_basis); M.set_values(0.0); - // values in rhs after this function will be accessed as rk_coeffs(i,0:N) - ViewCArrayKokkos rhs (&rk_coeffs(point_gid,0), num_poly_basis); + // values in rhs after this function will be accessed as p_coeffs(i,0:N) + ViewCArrayKokkos rhs (&p_coeffs(point_gid,0), num_poly_basis); rhs.set_values(0.0); rhs(0) = 1.0; // enforce reproduction of constant 1, everything else is = 0 double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] double r[3]; // vecx_j - vecx_i - //--------------------------------------------- // walk over the neighboring points //--------------------------------------------- @@ -295,6 +440,22 @@ void build_rk_coefficients( printf("ERROR: matrix is singluar \n"); } + + // -------------------------------------------------- + // things needed for gradient of the basis function + double col_1D[num_poly_basis]; + ViewCArrayKokkos col(&col_1D[0], num_poly_basis); + + // making a view, inverting only the matrix at point i + ViewCArrayKokkos M_inv_pt(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis); + + LU_invert(M, // input matrix + perm, // permutations + M_inv_pt, // inverse matrix at point gid + col); // tmp array + // ------------------------------------------------- + + // solve for p_coefs LU_backsub(M, perm, rhs); // note: answer is sent back in rhs }); // end parallel loop @@ -567,26 +728,31 @@ int main(int argc, char *argv[]) printf("Reconstructing basis using point cloud data \n\n"); - CArrayKokkos rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point - DRaggedRightArrayKokkos rk_basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) + CArrayKokkos p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point CArrayKokkos vol(num_points); vol.set_values(1.0); - double h = 1.0; + CArrayKokkos M_inv(num_points, num_poly_basis, num_poly_basis); + CArrayKokkos grad_M(num_points, num_poly_basis, num_poly_basis); + + DRaggedRightArrayKokkos basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) + DRaggedRightArrayKokkos grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors) + double h = 1.0; + printf("building reproducing kernel coefficients \n"); // build coefficients on basis functions - build_rk_coefficients(point_positions, + calc_p_coefficients(point_positions, points_num_neighbors, points_in_point, vol, - rk_coeffs, + p_coeffs, + M_inv, h); - - // performing checks on rk_coeffs + // performing checks on p_coeffs double partion_unity; double partion_unity_lcl; @@ -600,18 +766,18 @@ int main(int argc, char *argv[]) for(size_t point_gid=0; point_gid Date: Sat, 13 Sep 2025 13:05:59 -0600 Subject: [PATCH 11/23] Added gradient of basis --- examples/pointcloud/pointcloud-rk.cpp | 221 +++++++++++++++++++++----- 1 file changed, 177 insertions(+), 44 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 78f6f481..45bcbb3e 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -166,9 +166,9 @@ void grad_kernel(const double r[3], const double h, double *grad_W){ const double rbf = exp(-diff_sqrd/(h*h)); // gradient - grad_W[0] = 2.0/h*r[0]*rbf; - grad_W[1] = 2.0/h*r[1]*rbf; - grad_W[2] = 2.0/h*r[2]*rbf; + grad_W[0] = 2.0/(h*h)*r[0]*rbf; + grad_W[1] = 2.0/(h*h)*r[1]*rbf; + grad_W[2] = 2.0/(h*h)*r[2]*rbf; return; } // end of function @@ -248,7 +248,7 @@ void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) { } // end function -void calc_shape_functions( +void calc_basis_functions( size_t point_gid, const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, @@ -290,29 +290,66 @@ void calc_shape_functions( } // end function - -void calc_grad_shape_functions( +// grad_C = -M^-1 * grad_M * C +// grad_M[a][b][i] = V (P[a] * grad_p[i][b] W + grad_p[i][a] P[b] W + P[a] P[b] grad_W[i]) +// -M^-1[d][k]*grad_M[k][b][i]*C[b] = [d][i] +// p[d] grad_C[d][i] +void calc_basis_and_grad_basis_functions( size_t point_gid, const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, const CArrayKokkos & vol, const CArrayKokkos & p_coeffs, + const CArrayKokkos & M_inv, const DRaggedRightArrayKokkos & basis, const DRaggedRightArrayKokkos & grad_basis, const double h) { - //--------------------------------------------- - // walk over the neighboring points - //--------------------------------------------- + // -------------- + // Step 1: assemble grad M, the gradient of the moment matrix + + FArrayKokkos grad_m(3,num_poly_basis,num_poly_basis); + grad_m.set_values(0.0); + // walk over the neighboring points FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), { size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - double p[num_poly_basis]; // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz] - double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis + double r[3]; // vecx_j - vecx_i + r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i + r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i + r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i + + double W = kernel(r,h); + double grad_W[3]; grad_kernel(r,h,grad_W); + + double p[num_poly_basis]; poly_basis(r,p); + double grad_p[3][num_poly_basis]; grad_poly_basis(r,grad_p); + + double Vj = vol(neighbor_point_gid); + + // grad_M = V (P grad_p W + grad_p P W + P P grad_W) + for(size_t a=0;a col(&col_1D[0], num_poly_basis); // making a view, inverting only the matrix at point i - ViewCArrayKokkos M_inv_pt(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis); + ViewCArrayKokkos M_inv_point(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis); LU_invert(M, // input matrix perm, // permutations - M_inv_pt, // inverse matrix at point gid + M_inv_point, // inverse matrix at point gid col); // tmp array // ------------------------------------------------- @@ -745,12 +821,12 @@ int main(int argc, char *argv[]) // build coefficients on basis functions calc_p_coefficients(point_positions, - points_num_neighbors, - points_in_point, - vol, - p_coeffs, - M_inv, - h); + points_num_neighbors, + points_in_point, + vol, + p_coeffs, + M_inv, + h); // performing checks on p_coeffs double partion_unity; @@ -762,17 +838,50 @@ int main(int argc, char *argv[]) double quadratic_preserving; double quadratic_preserving_lcl; + double grad_x_p0; + double grad_x_p0_lcl; + double grad_y_p0; + double grad_y_p0_lcl; + double grad_z_p0; + double grad_z_p0_lcl; + + double grad_x_p1; + double grad_x_p1_lcl; + double grad_y_p1; + double grad_y_p1_lcl; + double grad_z_p1; + double grad_z_p1_lcl; + + double grad_x_p2; + double grad_x_p2_lcl; + double grad_y_p2; + double grad_y_p2_lcl; + double grad_z_p2; + double grad_z_p2_lcl; + // loop over the particles in the domain for(size_t point_gid=0; point_gid Date: Sat, 13 Sep 2025 14:00:42 -0600 Subject: [PATCH 12/23] added P0, P1, and P2 gradient tests --- examples/pointcloud/pointcloud-rk.cpp | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 45bcbb3e..ec1d6ff4 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -933,6 +933,49 @@ int main(int argc, char *argv[]) printf("grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0); + // Sum(grad(P1)) = [1]; + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_x_p1_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0); + }, grad_x_p1); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_y_p1_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1); + }, grad_y_p1); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p1_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2); + }, grad_z_p1); + printf("error in grad(P1) = %f, %f, %f, \n", + fabs(grad_x_p1 - 1.0), + fabs(grad_y_p1 - 1.0), + fabs(grad_z_p1 - 1.0)); + + + // Sum(grad(P2)) = [2]; + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_x_p2_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0); + }, grad_x_p2); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_y_p2_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1); + }, grad_y_p2); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p2_lcl, { + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2); + }, grad_z_p2); + printf("error in grad(P2) = %f, %f, %f, \n", + fabs(grad_x_p2-2.0*point_positions(point_gid,0)), + fabs(grad_y_p2-2.0*point_positions(point_gid,1)), + fabs(grad_z_p2-2.0*point_positions(point_gid,2))); + } // end for point gid From 66c1b28cabfec8dd179d03d01e821eb4b648b4a1 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Sat, 13 Sep 2025 15:46:19 -0600 Subject: [PATCH 13/23] more parallism and DOP implementation --- examples/pointcloud/pointcloud-rk.cpp | 301 +++++++++++++------------- 1 file changed, 156 insertions(+), 145 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index ec1d6ff4..d42c2e92 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -197,50 +197,50 @@ void poly_basis(const double r[3], double *p) { KOKKOS_INLINE_FUNCTION -void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) { +void grad_poly_basis(const double r[3], double (*grad_p)[3]) { const double drdx = -1.0; grad_p[0][0] = 0.0; - grad_p[0][1] = drdx; - grad_p[0][2] = 0.0; - grad_p[0][3] = 0.0; - grad_p[0][4] = 2.0*r[0]*drdx; - grad_p[0][5] = r[1]*drdx; - grad_p[0][6] = r[2]*drdx; - grad_p[0][7] = 0.0; - grad_p[0][8] = 0.0; - grad_p[0][9] = 0.0; + grad_p[1][0] = drdx; + grad_p[2][0] = 0.0; + grad_p[3][0] = 0.0; + grad_p[4][0] = 2.0*r[0]*drdx; + grad_p[5][0] = r[1]*drdx; + grad_p[6][0] = r[2]*drdx; + grad_p[7][0] = 0.0; + grad_p[8][0] = 0.0; + grad_p[9][0] = 0.0; // for high-order will use (x^a y^b z^c) const double drdy = -1.0; - grad_p[1][0] = 0.0; + grad_p[0][1] = 0.0; grad_p[1][1] = 0.0; - grad_p[1][2] = drdy; - grad_p[1][3] = 0.0; - grad_p[1][4] = 0.0; - grad_p[1][5] = r[0]*drdy; - grad_p[1][6] = 0.0; - grad_p[1][7] = 2.0*r[1]*drdy; - grad_p[1][8] = r[2]*drdy; - grad_p[1][9] = 0.0; + grad_p[2][1] = drdy; + grad_p[3][1] = 0.0; + grad_p[4][1] = 0.0; + grad_p[5][1] = r[0]*drdy; + grad_p[6][1] = 0.0; + grad_p[7][1] = 2.0*r[1]*drdy; + grad_p[8][1] = r[2]*drdy; + grad_p[9][1] = 0.0; // for high-order will use (x^a y^b z^c) const double drdz = -1.0; - grad_p[2][0] = 0.0; - grad_p[2][1] = 0.0; + grad_p[0][2] = 0.0; + grad_p[1][2] = 0.0; grad_p[2][2] = 0.0; - grad_p[2][3] = drdz; - grad_p[2][4] = 0.0; - grad_p[2][5] = 0.0; - grad_p[2][6] = r[0]*drdz; - grad_p[2][7] = 0.0; - grad_p[2][8] = r[1]*drdz; - grad_p[2][9] = 2.0*r[2]*drdz; + grad_p[3][2] = drdz; + grad_p[4][2] = 0.0; + grad_p[5][2] = 0.0; + grad_p[6][2] = r[0]*drdz; + grad_p[7][2] = 0.0; + grad_p[8][2] = r[1]*drdz; + grad_p[9][2] = 2.0*r[2]*drdz; // for high-order will use (x^a y^b z^c) @@ -295,7 +295,6 @@ void calc_basis_functions( // -M^-1[d][k]*grad_M[k][b][i]*C[b] = [d][i] // p[d] grad_C[d][i] void calc_basis_and_grad_basis_functions( - size_t point_gid, const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, @@ -307,131 +306,150 @@ void calc_basis_and_grad_basis_functions( const double h) { - // -------------- - // Step 1: assemble grad M, the gradient of the moment matrix + // actual number of points + size_t num_points = x.dims(0); + + // loop over all nodes in the problem + FOR_ALL(point_gid, 0, num_points, { - FArrayKokkos grad_m(3,num_poly_basis,num_poly_basis); - grad_m.set_values(0.0); + // -------------- + // Step 1: assemble grad M, the gradient of the moment matrix - // walk over the neighboring points - FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), { + double grad_m[num_poly_basis][num_poly_basis][3]; - size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + for(size_t a=0;a Date: Sat, 13 Sep 2025 16:04:32 -0600 Subject: [PATCH 14/23] added timers and increased particle count --- examples/pointcloud/pointcloud-rk.cpp | 35 ++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index d42c2e92..9098d82c 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -40,6 +40,8 @@ // Nathaniel Morgan // ----------------------------------------------- +#include // for timing + #include #include #include @@ -63,7 +65,7 @@ const double PI = 3.14159265358979323846; // ----------------------------------------------- // inputs: -const size_t num_points = 101; +const size_t num_points = 10001; // the bin sizes for finding neighboring points const double bin_dx = 0.05; // bins in x @@ -621,6 +623,9 @@ int main(int argc, char *argv[]) DCArrayKokkos points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle DCArrayKokkos points_num_neighbors(num_points, "num_neighbors"); + // start timer + auto time_1 = std::chrono::high_resolution_clock::now(); + // build reverse mapping between gid and i,j,k FOR_ALL(i, 0, num_bins_x, j, 0, num_bins_y, @@ -643,11 +648,17 @@ int main(int argc, char *argv[]) Kokkos::fence(); keys_in_bin.update_host(); + // end timer + auto time_2 = std::chrono::high_resolution_clock::now(); + // ------------------------------------------------------------------- // below here, these routine must be called every time particles move // ------------------------------------------------------------------- + // start timer + auto time_3 = std::chrono::high_resolution_clock::now(); + // save bin id to points FOR_ALL(point_gid, 0, num_points, { @@ -814,6 +825,9 @@ int main(int argc, char *argv[]) points_in_point.update_host(); + // end timer + auto time_4 = std::chrono::high_resolution_clock::now(); + // ---------------------------------------- // Find basis that reconstructs polynomial @@ -821,6 +835,7 @@ int main(int argc, char *argv[]) printf("Reconstructing basis using point cloud data \n\n"); + auto time_5 = std::chrono::high_resolution_clock::now(); CArrayKokkos p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point CArrayKokkos vol(num_points); @@ -858,6 +873,9 @@ int main(int argc, char *argv[]) grad_basis, h); + // end timer + auto time_6 = std::chrono::high_resolution_clock::now(); + // performing checks on p_coeffs, basis, and grad_basis double partion_unity; @@ -988,7 +1006,18 @@ int main(int argc, char *argv[]) } // end for point gid - ////// + ////// timers /// + + std::chrono::duration ms = time_2 - time_1; + std::cout << "runtime to create bins = " << ms.count() << "ms\n\n"; + + ms = time_4 - time_3; + std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n"; + + ms = time_6 - time_5; + std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n"; + + printf("Writing VTK Graphics File \n\n"); @@ -1012,7 +1041,7 @@ int main(int argc, char *argv[]) out << point_values.host(point_gid) << "\n"; } - + printf("Finished \n\n"); From 7a9993154844374368159fee2508e5d5c20659e3 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Thu, 18 Sep 2025 16:14:30 -0600 Subject: [PATCH 15/23] built maps and reverse maps between points --- examples/pointcloud/pointcloud-rk.cpp | 370 ++++++++++++++++++++++---- 1 file changed, 325 insertions(+), 45 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 9098d82c..952d24a7 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -55,6 +55,11 @@ #include "lu_solver.hpp" + +#include // for unorded map testing + + + #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -65,7 +70,7 @@ const double PI = 3.14159265358979323846; // ----------------------------------------------- // inputs: -const size_t num_points = 10001; +const size_t num_points = 31; // the bin sizes for finding neighboring points const double bin_dx = 0.05; // bins in x @@ -81,6 +86,8 @@ const double LX = 1.0; // length in x-dir const double LY = 1.0; const double LZ = 1.0; +bool check_maps = false; // CPU only!!!! + // // ----------------------------------------------- @@ -589,7 +596,7 @@ int main(int argc, char *argv[]) // point values FOR_ALL(i, 0, num_points, { - printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2)); + //printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2)); point_values(i) = sqrt(point_positions(i, 0)*point_positions(i, 0) + point_positions(i, 1)*point_positions(i, 1) + point_positions(i, 2)*point_positions(i, 2)); @@ -597,7 +604,7 @@ int main(int argc, char *argv[]) }); // end parallel for tri's in the file point_values.update_host(); Kokkos::fence(); - printf("\n"); + //printf("\n"); // ---------------------------- @@ -620,9 +627,11 @@ int main(int argc, char *argv[]) // connectivity from points to bins DCArrayKokkos points_bin_gid(num_points, "points_in_gid"); CArrayKokkos points_bin_lid_storage(num_points, "bin_lid_storage"); // only used to create storage - DCArrayKokkos points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle + DCArrayKokkos points_bin_stencil(num_points, 6, "bin_stencil"); // how imin,imax,jmin,jmax,kmin,kmax range for bins in stencil DCArrayKokkos points_num_neighbors(num_points, "num_neighbors"); + printf("Starting timers \n\n"); + // start timer auto time_1 = std::chrono::high_resolution_clock::now(); @@ -717,7 +726,7 @@ int main(int argc, char *argv[]) size_t num_points_found; // establish the stencil size to get enough particles - for(int stencil=1; stencil<1000; stencil++){ + for(int stencil=1; stencil<100000; stencil++){ num_points_found = 0; @@ -747,10 +756,18 @@ int main(int argc, char *argv[]) } // end for icount // the min number of points required to solve the system is num_poly_basis+1 - if (num_points_found > num_poly_basis+5){ + if (num_points_found > num_poly_basis+5 || num_points_found==num_points){ + + points_bin_stencil(point_gid,0) = imin; + points_bin_stencil(point_gid,1) = imax; + points_bin_stencil(point_gid,2) = jmin; + points_bin_stencil(point_gid,3) = jmax; + points_bin_stencil(point_gid,4) = kmin; + points_bin_stencil(point_gid,5) = kmax; + + points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors + //points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because counted point i as a neighbor - points_bin_stencil(point_gid) = stencil; - points_num_neighbors(point_gid) = num_points_found; // key for allocations break; } @@ -760,16 +777,81 @@ int main(int argc, char *argv[]) }); // end for all Kokkos::fence(); points_bin_stencil.update_host(); + + + + // account for stencels not overlapping, fixing assymetry in points connectivity + FOR_ALL(point_gid, 0, num_points, { + + // get bin gid for this point + size_t bin_gid = points_bin_gid(point_gid); + + // get i,j,k for this bin + bin_keys_t bin_keys = keys_in_bin(bin_gid); + + const int i = bin_keys.i; + const int j = bin_keys.j; + const int k = bin_keys.k; + + // walk over the stencil to get neighbors of this bin + const int imin = points_bin_stencil(point_gid,0); + const int imax = points_bin_stencil(point_gid,1); + const int jmin = points_bin_stencil(point_gid,2); + const int jmax = points_bin_stencil(point_gid,3); + const int kmin = points_bin_stencil(point_gid,4); + const int kmax = points_bin_stencil(point_gid,5); + + // loop over my bin stencil + for (int icount=imin; icount<=imax; icount++){ + for (int jcount=jmin; jcount<=jmax; jcount++) { + for (int kcount=kmin; kcount<=kmax; kcount++){ + + // get bin neighbor gid + size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); + + // save the points in this bin + for(size_t neighbor_pt_lid=0; neighbor_pt_lid= neighbor_imin && i <= neighbor_imax) && + (j >= neighbor_jmin && j <= neighbor_jmax) && + (k >= neighbor_kmin && k <= neighbor_kmax); + + if(!inside){ + Kokkos::atomic_increment(&points_num_neighbors(neighbor_point_gid)); + // the other stencil didn't see my point because it was smaller, now it does see it + } + + } // neighbor_point_lid + + } // end for kcount + } // end for jcount + } // end for icount + + }); // end for all + Kokkos::fence(); points_num_neighbors.update_host(); // allocate memory for points in point DRaggedRightArrayKokkos points_in_point(points_num_neighbors, "points_in_point"); + points_num_neighbors.set_values(0); // this is a num saved counter now // --------------------- // Save the neighbors // --------------------- - // find my neighbors using bins + // find neighbors using bins FOR_ALL(point_gid, 0, num_points, { // get bin gid for this point @@ -783,19 +865,13 @@ int main(int argc, char *argv[]) const int k = bin_keys.k; // walk over the stencil to get neighbors - const int stencil = points_bin_stencil(point_gid); - - const int imin = MAX(0, i-stencil); - const int imax = MIN(num_bins_x-1, i+stencil); - - const int jmin = MAX(0, j-stencil); - const int jmax = MIN(num_bins_y-1, j+stencil); - - const int kmin = MAX(0, k-stencil); - const int kmax = MIN(num_bins_z-1, k+stencil); + int imin = points_bin_stencil(point_gid,0); + int imax = points_bin_stencil(point_gid,1); + int jmin = points_bin_stencil(point_gid,2); + int jmax = points_bin_stencil(point_gid,3); + int kmin = points_bin_stencil(point_gid,4); + int kmax = points_bin_stencil(point_gid,5); - size_t num_saved = 0; - size_t num_points_found = 0; for (int icount=imin; icount<=imax; icount++){ for (int jcount=jmin; jcount<=jmax; jcount++) { @@ -803,16 +879,45 @@ int main(int argc, char *argv[]) // get bin neighbor gid size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); - num_points_found += num_points_in_bin(neighbor_bin_gid); // save the points in this bin for(size_t neighbor_pt_lid=0; neighbor_pt_lid= neighbor_imin && i <= neighbor_imax) && + (j >= neighbor_jmin && j <= neighbor_jmax) && + (k >= neighbor_kmin && k <= neighbor_kmax); + + if(!inside){ + + size_t num_saved_neighbor = Kokkos::atomic_fetch_add(&points_num_neighbors(neighbor_point_gid), 1); + points_in_point(neighbor_point_gid, num_saved_neighbor) = point_gid; + // the other stencil didn't see my point because it was smaller, now it does see it + + } // end if + + //} // end if neighbor != point_gid } // neighbor_point_lid @@ -825,6 +930,36 @@ int main(int argc, char *argv[]) points_in_point.update_host(); + // build the reverse map + DRaggedRightArrayKokkos reverse_neighbor_lid(points_num_neighbors); + reverse_neighbor_lid.set_values(-1); + + FOR_ALL(point_gid, 0, num_points, { + + for(int neighbor_point_lid = 0; neighbor_point_lid basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) DRaggedRightArrayKokkos grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors) + double h = 1.0; @@ -877,6 +1013,23 @@ int main(int argc, char *argv[]) auto time_6 = std::chrono::high_resolution_clock::now(); + // ----------------- + // Timers + // ----------------- + printf("\n"); + std::chrono::duration ms = time_2 - time_1; + std::cout << "runtime to create bins = " << ms.count() << "ms\n\n"; + + ms = time_4 - time_3; + std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n"; + + ms = time_6 - time_5; + std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n"; + + + + printf("Checking gradients at points \n\n"); + // performing checks on p_coeffs, basis, and grad_basis double partion_unity; double partion_unity_lcl; @@ -913,7 +1066,9 @@ int main(int argc, char *argv[]) // partition of unity FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, { - partion_unity_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid); + // get the point gid for this neighboring + size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); + partion_unity_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid); }, partion_unity); @@ -932,9 +1087,14 @@ int main(int argc, char *argv[]) quadratic_preserving_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0); }, quadratic_preserving); - printf("partition unity = %f, ", partion_unity); - printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0))); - printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid); + if(fabs(partion_unity-1.0)>1e-13) + printf("partition unity = %f, ", partion_unity); + + if(fabs(linear_preserving-point_positions(point_gid,0))>1e-13) + printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0))); + + if(fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0))>1e-13) + printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid); // ----------------- @@ -957,7 +1117,10 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid); }, grad_z_p0); - printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0); + + const double grad_check_P0 = fabs(grad_x_p0)+fabs(grad_y_p0)+fabs(grad_z_p0); + if(0.333*fabs(grad_check_P0)>1e-8) + printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0); // Sum(grad(P1)) = [1]; @@ -976,10 +1139,14 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2); }, grad_z_p1); - printf("error in grad(P1) = %f, %f, %f, \n", - fabs(grad_x_p1 - 1.0), - fabs(grad_y_p1 - 1.0), - fabs(grad_z_p1 - 1.0)); + + const double grad_check_P1 = fabs(grad_x_p1 - 1.0)+fabs(grad_y_p1 - 1.0)+fabs(grad_z_p1 - 1.0); + if(0.333*fabs(grad_check_P1)>1e-8){ + printf("error in grad(P1) = %f, %f, %f, \n", + fabs(grad_x_p1 - 1.0), + fabs(grad_y_p1 - 1.0), + fabs(grad_z_p1 - 1.0)); + } // Sum(grad(P2)) = [2]; @@ -998,24 +1165,137 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2); }, grad_z_p2); - printf("error in grad(P2) = %f, %f, %f, \n", - fabs(grad_x_p2-2.0*point_positions(point_gid,0)), - fabs(grad_y_p2-2.0*point_positions(point_gid,1)), - fabs(grad_z_p2-2.0*point_positions(point_gid,2))); + + const double grad_check_P2 = fabs(grad_x_p2-2.0*point_positions(point_gid,0)) + fabs(grad_y_p2-2.0*point_positions(point_gid,1)) + fabs(grad_z_p2-2.0*point_positions(point_gid,2)); + if(0.333*fabs(grad_check_P2)>1e-8){ + printf("error in grad(P2) = %f, %f, %f, \n", + fabs(grad_x_p2-2.0*point_positions(point_gid,0)), + fabs(grad_y_p2-2.0*point_positions(point_gid,1)), + fabs(grad_z_p2-2.0*point_positions(point_gid,2))); + } } // end for point gid - ////// timers /// + if(check_maps){ + size_t bad = 0; + for (size_t i=0; i50) break; + } else { + size_t check = points_in_point(j, rev); + if (check != i) { + printf("WRONG reverse: i=%zu j=%zu rev=%zu check=%zu\n", i, j, rev, check); + ++bad; + if (bad>50) break; + } + } + } + if (bad>50) break; + } + if (bad==0) printf("reverse map OK\n"); + + + for (size_t i=0; i seen; + for (size_t lid=0; lid div(num_points); + // div.set_values(0.0); + + // FOR_ALL(point_gid, 0, num_points, { + + // for(size_t neighbor_point_lid = 0; neighbor_point_lid ms = time_2 - time_1; - std::cout << "runtime to create bins = " << ms.count() << "ms\n\n"; - ms = time_4 - time_3; - std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n"; - ms = time_6 - time_5; - std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n"; From c77e0d5f6eaefc99bbe520906ebc7df32dc21c12 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Tue, 23 Sep 2025 17:35:11 -0600 Subject: [PATCH 16/23] rk conservative gradient test --- examples/pointcloud/pointcloud-rk.cpp | 391 +++++++++++++++++++++----- 1 file changed, 328 insertions(+), 63 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 952d24a7..f0122013 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -70,7 +70,15 @@ const double PI = 3.14159265358979323846; // ----------------------------------------------- // inputs: -const size_t num_points = 31; + +const size_t num_1d_x = 4; +const size_t num_1d_y = 4; +const size_t num_1d_z = 4; + +const double h_kernel = 1.5/4.; +const double num_points_fit = 30; + +const size_t num_points = num_1d_x*num_1d_y*num_1d_z; // the bin sizes for finding neighboring points const double bin_dx = 0.05; // bins in x @@ -146,6 +154,64 @@ size_t get_bin_gid(const double x_pt, } // end function + +KOKKOS_FUNCTION +double kernel_bs(const double r[3], double h) { + + double xij = 0.0; + for(size_t dim=0; dim<3; dim++){ + xij += r[dim]*r[dim]; + } // dim + + double q = sqrt(xij)/h; + double alpha = 2.0/(3.0*h); + if (q < 0.0) return 0.0; // defensive + if (q < 1.0) return (alpha * (1.0 - 1.5*q*q + 0.75*q*q*q)); + if (q < 2.0) return (alpha * 0.25 * pow(2.0 - q, 3)); + + return 0.0; +} + + +KOKKOS_FUNCTION +// derivative dW/dx_i = - dW/dr where r = xj-xi +void grad_kernel_bs(double *grad_W, const double r[3], const double h) { + + double xij = 0.0; + for(size_t dim=0; dim<3; dim++){ + xij += r[dim]*r[dim]; + } // dim + // sqrt(xij) = radius + + const double radius = sqrt(xij); + const double q = radius/h; + + double df_dq = 0.0; // derivative of the dimensionless kernel shape function f(q) + if (q < 1.0) { + // f(q) = 1 - 1.5 q^2 + 0.75 q^3 + // f'(q) = -3 q + 2.25 q^2 + df_dq = -3.0 * q + 2.25 * q * q; + } else if (q < 2.0) { + // f(q) = 0.25 (2 - q)^3 + // f'(q) = 0.25 * 3 (2 - q)^2 * (-1) = -0.75 (2 - q)^2 + const double two_minus_q = 2.0 - q; + df_dq = -0.75 * two_minus_q * two_minus_q; + } else { + df_dq = 0.0; + } + + const double dW_dr = (df_dq / h); + // grad W = dW/dr * (rij / radius) + const double invr = 1.0 / radius; + + for (size_t dim=0; dim<3; ++dim) { + grad_W[dim] = dW_dr * r[dim] * invr; + } + + return; +} + + // Gaussian function part of the RBF // rbf = exp(-(xj - x)*(xj - x)/h) KOKKOS_FUNCTION @@ -157,14 +223,33 @@ double kernel(const double r[3], const double h){ diff_sqrd += r[dim]*r[dim]; } // dim - return exp(-diff_sqrd/(h*h)); + double norm = 1.0 / (h * h * h * pow(PI, 1.5)); + return norm * exp(-diff_sqrd / (h * h)); +} // end of function + + +// Gaussian function part of the RBF, symmeterized +// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) +KOKKOS_FUNCTION +double kernel_syn(const double r[3], const double hi, const double hj){ + + double diff_sqrd = 0.0; + + for(size_t dim=0; dim<3; dim++){ + diff_sqrd += r[dim]*r[dim]; + } // dim + + const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call + const double Wj = exp(-diff_sqrd/(hj*hj)); + + return 0.5*(Wi + Wj); } // end of function // Gradient Gaussian function -// rbf = exp(-(xj - x)*(xj - x)/h) +// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) KOKKOS_FUNCTION -void grad_kernel(const double r[3], const double h, double *grad_W){ +void grad_kernel(double *grad_W, const double r[3], const double h){ double diff_sqrd = 0.0; @@ -172,16 +257,38 @@ void grad_kernel(const double r[3], const double h, double *grad_W){ diff_sqrd += r[dim]*r[dim]; } // dim - const double rbf = exp(-diff_sqrd/(h*h)); + const double drdxi = -1; + const double rbf = kernel(r, h); // gradient - grad_W[0] = 2.0/(h*h)*r[0]*rbf; - grad_W[1] = 2.0/(h*h)*r[1]*rbf; - grad_W[2] = 2.0/(h*h)*r[2]*rbf; + for (size_t dim=0; dim<3; ++dim) { + grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdxi; + } return; } // end of function +// d/dx rbf = d/dx ( 0.5(exp(-(xj - xi)*(xj - x)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) ) +KOKKOS_FUNCTION +void grad_kernel_sym(double *gradW, const double r[3], const double hi, const double hj) { + double diff_sqrd = 0.0; + for (size_t dim=0; dim<3; ++dim){ + diff_sqrd += r[dim]*r[dim]; + } + + const double drdxi = -1; + + double Wi = exp(-diff_sqrd/(hi*hi)); + double Wj = exp(-diff_sqrd/(hj*hj)); + + double dWi = -2.0/ (hi*hi) * Wi*drdxi; // it uses xj - xi so a minus one + double dWj = -2.0/ (hj*hj) * Wj; // it uses xi - xj so it has a +1 for drdxi + + for (size_t dim=0; dim<3; ++dim) { + gradW[dim] = 0.5 * (dWi * r[dim] - dWj * r[dim]); // second term using -r + } +} + // Polynomial basis up to quadratic in 3D (10 terms) const size_t num_poly_basis = 10; @@ -262,7 +369,7 @@ void calc_basis_functions( const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, - const CArrayKokkos & vol, + const DCArrayKokkos & vol, const CArrayKokkos & p_coeffs, const DRaggedRightArrayKokkos & basis, const double h) @@ -307,7 +414,7 @@ void calc_basis_and_grad_basis_functions( const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, - const CArrayKokkos & vol, + const DCArrayKokkos & vol, const CArrayKokkos & p_coeffs, const CArrayKokkos & M_inv, const DRaggedRightArrayKokkos & basis, @@ -346,7 +453,7 @@ void calc_basis_and_grad_basis_functions( double W = kernel(r,h); double grad_W[3]; - grad_kernel(r,h,grad_W); + grad_kernel(grad_W,r,h); double p[num_poly_basis]; poly_basis(r,p); @@ -387,7 +494,7 @@ void calc_basis_and_grad_basis_functions( poly_basis(r,p); double grad_W[3]; - grad_kernel(r, h, grad_W); + grad_kernel(grad_W, r, h); double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis grad_poly_basis(r, grad_p); @@ -471,7 +578,7 @@ void calc_p_coefficients( const DCArrayKokkos & x, const DCArrayKokkos points_num_neighbors, const DRaggedRightArrayKokkos points_in_point, - const CArrayKokkos & vol, + const DCArrayKokkos & vol, const CArrayKokkos & p_coeffs, const CArrayKokkos & M_inv, double h) @@ -584,12 +691,34 @@ int main(int argc, char *argv[]) DCArrayKokkos point_values(num_points, "point_values"); // point locations + if(false){ srand(static_cast(time(0))); // Seed the random number generator - for(size_t i=0; i(rand())/static_cast(RAND_MAX); - point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); - point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + for(size_t i=0; i(rand())/static_cast(RAND_MAX); + point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); + point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + } } + else { + + double dx = LX/((double)num_1d_x); + double dy = LY/((double)num_1d_y); + double dz = LZ/((double)num_1d_z); + + size_t point_gid = 0; + for(size_t k=0; k(i)*dx; + point_positions.host(point_gid, 1) = Y0 + static_cast(j)*dy; + point_positions.host(point_gid, 2) = Z0 + static_cast(k)*dz; + point_gid++; + } // end i + } // end j + } // end k + + } // end if + point_positions.update_device(); Kokkos::fence(); @@ -742,10 +871,10 @@ int main(int argc, char *argv[]) const int kmin = MAX(0, k-stencil); const int kmax = MIN(num_bins_z-1, k+stencil); - - for (int icount=imin; icount<=imax; icount++){ + + for (int kcount=kmin; kcount<=kmax; kcount++){ for (int jcount=jmin; jcount<=jmax; jcount++) { - for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int icount=imin; icount<=imax; icount++){ // get bin neighbor gid size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); @@ -755,8 +884,8 @@ int main(int argc, char *argv[]) } // end for jcount } // end for icount - // the min number of points required to solve the system is num_poly_basis+1 - if (num_points_found > num_poly_basis+5 || num_points_found==num_points){ + // the min number of points required to solve the system is num_poly_basis+1, was 2*num_poly_basis + if (num_points_found > num_points_fit || num_points_found==num_points){ points_bin_stencil(point_gid,0) = imin; points_bin_stencil(point_gid,1) = imax; @@ -802,9 +931,9 @@ int main(int argc, char *argv[]) const int kmax = points_bin_stencil(point_gid,5); // loop over my bin stencil - for (int icount=imin; icount<=imax; icount++){ + for (int kcount=kmin; kcount<=kmax; kcount++){ for (int jcount=jmin; jcount<=jmax; jcount++) { - for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int icount=imin; icount<=imax; icount++){ // get bin neighbor gid size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); @@ -873,9 +1002,9 @@ int main(int argc, char *argv[]) int kmax = points_bin_stencil(point_gid,5); - for (int icount=imin; icount<=imax; icount++){ + for (int kcount=kmin; kcount<=kmax; kcount++){ for (int jcount=jmin; jcount<=jmax; jcount++) { - for (int kcount=kmin; kcount<=kmax; kcount++){ + for (int icount=imin; icount<=imax; icount++){ // get bin neighbor gid size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y); @@ -973,8 +1102,36 @@ int main(int argc, char *argv[]) auto time_5 = std::chrono::high_resolution_clock::now(); CArrayKokkos p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point - CArrayKokkos vol(num_points); - vol.set_values(1.0); + + + DCArrayKokkos vol(num_points); + vol.set_values(0.0); + + const double dx = LX/((double)num_1d_x); + const double dy = LY/((double)num_1d_y); + const double dz = LZ/((double)num_1d_z); + const double elem_vol = dx*dy*dz; + + const size_t num_cells_1d_x = num_1d_x-1; + const size_t num_cells_1d_y = num_1d_y-1; + const size_t num_cells_1d_z = num_1d_z-1; + + FOR_ALL(k,0,num_cells_1d_z, + j,0,num_cells_1d_y, + i,0,num_cells_1d_x,{ + + for (int kcount=k; kcount<=k+1; kcount++){ + for (int jcount=j; jcount<=j+1; jcount++){ + for (int icount=i; icount<=i+1; icount++){ + size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y); + Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25); + } // end i + } // end j + } // end k + + }); // end parallel over k,j,i + vol.update_host(); + CArrayKokkos M_inv(num_points, num_poly_basis, num_poly_basis); CArrayKokkos grad_M(num_points, num_poly_basis, num_poly_basis); @@ -984,7 +1141,8 @@ int main(int argc, char *argv[]) - double h = 1.0; + double h = h_kernel; // kernel width + printf("building reproducing kernel coefficients \n"); @@ -1107,11 +1265,13 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); grad_x_p0_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid); }, grad_x_p0); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); grad_y_p0_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid); }, grad_y_p0); + FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); @@ -1216,45 +1376,150 @@ int main(int argc, char *argv[]) // ------------------- // Anti-sym gradient // ------------------- - - // DCArrayKokkos div(num_points); - // div.set_values(0.0); + printf("Testing divergence of vector field u = (x, y, z) \n\n"); - // FOR_ALL(point_gid, 0, num_points, { - - // for(size_t neighbor_point_lid = 0; neighbor_point_lid u(num_points, 3); + FOR_ALL(i, 0, num_points, { + u(i, 0) = point_positions(i, 0); + u(i, 1) = point_positions(i, 1); + u(i, 2) = point_positions(i, 2); + }); + u.update_device(); + + DCArrayKokkos div(num_points); + div.set_values(0.0); + + DCArrayKokkos div_fd(num_points); + div_fd.set_values(0.0); + + FOR_ALL(i_gid, 0, num_points, { + for(size_t j_lid = 0; j_lid 1e-8){ + printf("div(u) = %f at point %zu, error = %g, vol = %f\n", div.host(point_gid), point_gid, error, vol.host(point_gid)); + //} + } // end for point_gid + + for(size_t point_gid=0; point_gid 1e-8){ + printf("div_fd(u) = %f at point %zu, error = %g\n", div_fd.host(point_gid), point_gid, error); + } + } // end for point_gid + + /* + DCArrayKokkos div(num_points); + div.set_values(0.0); + + FOR_ALL(i_gid, 0, num_points, { - // // get the point gid for this neighbor - // size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - - // // get the local id of my neighbor that matches my point_gid - // size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid); - - // if(point_gid != points_in_point(neighbor_point_gid, neighbor_lid)){ - // printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", point_gid, points_in_point(neighbor_point_gid, neighbor_lid), neighbor_point_gid); - // } - - // double dot_prod = 0.0; - // for (size_t dim=0; dim<3; dim++){ - // dot_prod += (grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim))* - // (point_positions(neighbor_point_gid,dim) - point_positions(point_gid,dim)); - // } - // div(point_gid) += vol(point_gid)*vol(neighbor_point_gid)*0.5*dot_prod; + for(size_t j_lid = 0; j_lid Date: Fri, 26 Sep 2025 12:47:54 -0600 Subject: [PATCH 17/23] WIP anti-symmetric gradient, added b-splid kernel and P1 P2 switch --- examples/pointcloud/pointcloud-rk.cpp | 423 +++++++++++++------------- 1 file changed, 216 insertions(+), 207 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index f0122013..07390ca8 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -71,12 +71,15 @@ const double PI = 3.14159265358979323846; // inputs: -const size_t num_1d_x = 4; -const size_t num_1d_y = 4; -const size_t num_1d_z = 4; +#define P1 // P1 or P2 -const double h_kernel = 1.5/4.; -const double num_points_fit = 30; + +const size_t num_1d_x = 3; +const size_t num_1d_y = 3; +const size_t num_1d_z = 3; + +const double h_kernel = 2/3.; +const double num_points_fit = 27; // minimum to fit is 3x3x3 const size_t num_points = num_1d_x*num_1d_y*num_1d_z; @@ -116,9 +119,9 @@ bin_keys_t get_bin_keys(const double x_pt, const double z_pt){ - double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 - double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); - double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); + double i_dbl = fmax(0, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 + double j_dbl = fmax(0, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); + double k_dbl = fmax(0, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); bin_keys_t bin_keys; // save i,j,k to the bin keys @@ -136,12 +139,13 @@ size_t get_bin_gid(const double x_pt, const double y_pt, const double z_pt, const size_t num_bins_x, - const size_t num_bins_y){ + const size_t num_bins_y, + const size_t num_bins_z){ - double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5 - double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10)); - double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10)); + double i_dbl = fmin(num_bins_x-1, fmax(0.0, round((x_pt - X0)/bin_dx - 1.0e-8))); // x = ih + X0 + double j_dbl = fmin(num_bins_y-1, fmax(0.0, round((y_pt - Y0)/bin_dy - 1.0e-8))); + double k_dbl = fmin(num_bins_z-1, fmax(0.0, round((z_pt - Z0)/bin_dz - 1.0e-8))); // get the integers for the bins size_t i = (size_t)i_dbl; @@ -156,15 +160,16 @@ size_t get_bin_gid(const double x_pt, KOKKOS_FUNCTION -double kernel_bs(const double r[3], double h) { +double kernel(const double r[3], double h) { - double xij = 0.0; + double diff_sqrd = 0.0; for(size_t dim=0; dim<3; dim++){ - xij += r[dim]*r[dim]; + diff_sqrd += r[dim]*r[dim]; } // dim - double q = sqrt(xij)/h; - double alpha = 2.0/(3.0*h); + const double radius = sqrt(diff_sqrd); + const double q = radius/h; + const double alpha = 2.0/(3.0*h); if (q < 0.0) return 0.0; // defensive if (q < 1.0) return (alpha * (1.0 - 1.5*q*q + 0.75*q*q*q)); if (q < 2.0) return (alpha * 0.25 * pow(2.0 - q, 3)); @@ -175,15 +180,14 @@ double kernel_bs(const double r[3], double h) { KOKKOS_FUNCTION // derivative dW/dx_i = - dW/dr where r = xj-xi -void grad_kernel_bs(double *grad_W, const double r[3], const double h) { +void grad_kernel(double *grad_W, const double r[3], const double h) { - double xij = 0.0; + double diff_sqrd = 0.0; for(size_t dim=0; dim<3; dim++){ - xij += r[dim]*r[dim]; + diff_sqrd += r[dim]*r[dim]; } // dim - // sqrt(xij) = radius - const double radius = sqrt(xij); + const double radius = sqrt(diff_sqrd); const double q = radius/h; double df_dq = 0.0; // derivative of the dimensionless kernel shape function f(q) @@ -202,7 +206,7 @@ void grad_kernel_bs(double *grad_W, const double r[3], const double h) { const double dW_dr = (df_dq / h); // grad W = dW/dr * (rij / radius) - const double invr = 1.0 / radius; + const double invr = 1.0 /(radius + 1e-16); for (size_t dim=0; dim<3; ++dim) { grad_W[dim] = dW_dr * r[dim] * invr; @@ -215,7 +219,7 @@ void grad_kernel_bs(double *grad_W, const double r[3], const double h) { // Gaussian function part of the RBF // rbf = exp(-(xj - x)*(xj - x)/h) KOKKOS_FUNCTION -double kernel(const double r[3], const double h){ +double kernel_g(const double r[3], const double h){ double diff_sqrd = 0.0; @@ -223,7 +227,7 @@ double kernel(const double r[3], const double h){ diff_sqrd += r[dim]*r[dim]; } // dim - double norm = 1.0 / (h * h * h * pow(PI, 1.5)); + double norm = 1.0; // / (h * h * h * pow(PI, 1.5)); return norm * exp(-diff_sqrd / (h * h)); } // end of function @@ -249,7 +253,7 @@ double kernel_syn(const double r[3], const double hi, const double hj){ // Gradient Gaussian function // d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) KOKKOS_FUNCTION -void grad_kernel(double *grad_W, const double r[3], const double h){ +void grad_kernel_g(double *grad_W, const double r[3], const double h){ double diff_sqrd = 0.0; @@ -289,79 +293,121 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do } } +#if defined(P2) + // Polynomial basis up to quadratic in 3D (10 terms) + const size_t num_poly_basis = 10; + KOKKOS_INLINE_FUNCTION + void poly_basis(const double r[3], double *p) { -// Polynomial basis up to quadratic in 3D (10 terms) -const size_t num_poly_basis = 10; -KOKKOS_INLINE_FUNCTION -void poly_basis(const double r[3], double *p) { + p[0] = 1.0; + p[1] = r[0]; + p[2] = r[1]; + p[3] = r[2]; + p[4] = r[0] * r[0]; + p[5] = r[0] * r[1]; + p[6] = r[0] * r[2]; + p[7] = r[1] * r[1]; + p[8] = r[1] * r[2]; + p[9] = r[2] * r[2]; - p[0] = 1.0; - p[1] = r[0]; - p[2] = r[1]; - p[3] = r[2]; - p[4] = r[0] * r[0]; - p[5] = r[0] * r[1]; - p[6] = r[0] * r[2]; - p[7] = r[1] * r[1]; - p[8] = r[1] * r[2]; - p[9] = r[2] * r[2]; + // for high-order will use (x^a y^b z^c) - // for high-order will use (x^a y^b z^c) + return; + } // end function - return; -} // end function + KOKKOS_INLINE_FUNCTION + void grad_poly_basis(const double r[3], double (*grad_p)[3]) { + + const double drdx = -1.0; + + grad_p[0][0] = 0.0; + grad_p[1][0] = drdx; + grad_p[2][0] = 0.0; + grad_p[3][0] = 0.0; + grad_p[4][0] = 2.0*r[0]*drdx; + grad_p[5][0] = r[1]*drdx; + grad_p[6][0] = r[2]*drdx; + grad_p[7][0] = 0.0; + grad_p[8][0] = 0.0; + grad_p[9][0] = 0.0; + + // for high-order will use (x^a y^b z^c) + + const double drdy = -1.0; + + grad_p[0][1] = 0.0; + grad_p[1][1] = 0.0; + grad_p[2][1] = drdy; + grad_p[3][1] = 0.0; + grad_p[4][1] = 0.0; + grad_p[5][1] = r[0]*drdy; + grad_p[6][1] = 0.0; + grad_p[7][1] = 2.0*r[1]*drdy; + grad_p[8][1] = r[2]*drdy; + grad_p[9][1] = 0.0; + + // for high-order will use (x^a y^b z^c) + + const double drdz = -1.0; + + grad_p[0][2] = 0.0; + grad_p[1][2] = 0.0; + grad_p[2][2] = 0.0; + grad_p[3][2] = drdz; + grad_p[4][2] = 0.0; + grad_p[5][2] = 0.0; + grad_p[6][2] = r[0]*drdz; + grad_p[7][2] = 0.0; + grad_p[8][2] = r[1]*drdz; + grad_p[9][2] = 2.0*r[2]*drdz; + + // for high-order will use (x^a y^b z^c) + + return; + } // end function +#else + // Polynomial basis up to quadratic in 3D (10 terms) + const size_t num_poly_basis = 4; + KOKKOS_INLINE_FUNCTION + void poly_basis(const double r[3], double *p) { + + p[0] = 1.0; + p[1] = r[0]; + p[2] = r[1]; + p[3] = r[2]; + + return; + } // end function + + + KOKKOS_INLINE_FUNCTION + void grad_poly_basis(const double r[3], double (*grad_p)[3]) { + + const double drdx = -1.0; + + grad_p[0][0] = 0.0; + grad_p[1][0] = drdx; + grad_p[2][0] = 0.0; + grad_p[3][0] = 0.0; -KOKKOS_INLINE_FUNCTION -void grad_poly_basis(const double r[3], double (*grad_p)[3]) { - - const double drdx = -1.0; - - grad_p[0][0] = 0.0; - grad_p[1][0] = drdx; - grad_p[2][0] = 0.0; - grad_p[3][0] = 0.0; - grad_p[4][0] = 2.0*r[0]*drdx; - grad_p[5][0] = r[1]*drdx; - grad_p[6][0] = r[2]*drdx; - grad_p[7][0] = 0.0; - grad_p[8][0] = 0.0; - grad_p[9][0] = 0.0; - - // for high-order will use (x^a y^b z^c) - - const double drdy = -1.0; - - grad_p[0][1] = 0.0; - grad_p[1][1] = 0.0; - grad_p[2][1] = drdy; - grad_p[3][1] = 0.0; - grad_p[4][1] = 0.0; - grad_p[5][1] = r[0]*drdy; - grad_p[6][1] = 0.0; - grad_p[7][1] = 2.0*r[1]*drdy; - grad_p[8][1] = r[2]*drdy; - grad_p[9][1] = 0.0; - - // for high-order will use (x^a y^b z^c) - - const double drdz = -1.0; - - grad_p[0][2] = 0.0; - grad_p[1][2] = 0.0; - grad_p[2][2] = 0.0; - grad_p[3][2] = drdz; - grad_p[4][2] = 0.0; - grad_p[5][2] = 0.0; - grad_p[6][2] = r[0]*drdz; - grad_p[7][2] = 0.0; - grad_p[8][2] = r[1]*drdz; - grad_p[9][2] = 2.0*r[2]*drdz; - - // for high-order will use (x^a y^b z^c) + const double drdy = -1.0; - return; -} // end function + grad_p[0][1] = 0.0; + grad_p[1][1] = 0.0; + grad_p[2][1] = drdy; + grad_p[3][1] = 0.0; + + const double drdz = -1.0; + + grad_p[0][2] = 0.0; + grad_p[1][2] = 0.0; + grad_p[2][2] = 0.0; + grad_p[3][2] = drdz; + + return; + } // end function +#endif void calc_basis_functions( @@ -452,8 +498,10 @@ void calc_basis_and_grad_basis_functions( r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i double W = kernel(r,h); + //printf("kernel = %f \n", W); double grad_W[3]; grad_kernel(grad_W,r,h); + //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]); double p[num_poly_basis]; poly_basis(r,p); @@ -701,9 +749,9 @@ int main(int argc, char *argv[]) } else { - double dx = LX/((double)num_1d_x); - double dy = LY/((double)num_1d_y); - double dz = LZ/((double)num_1d_z); + double dx = (LX-X0)/((double)num_1d_x - 1); + double dy = (LY-Y0)/((double)num_1d_y - 1); + double dz = (LZ-Z0)/((double)num_1d_z - 1); size_t point_gid = 0; for(size_t k=0; k keys_in_bin(num_bins, "keys_in_bin"); // mapping from gid to (i,j,k) @@ -805,7 +859,8 @@ int main(int argc, char *argv[]) point_positions(point_gid,1), point_positions(point_gid,2), num_bins_x, - num_bins_y); + num_bins_y, + num_bins_z); size_t storage_lid = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_gid), 1); points_bin_gid(point_gid) = bin_gid; // the id of the bin @@ -835,6 +890,21 @@ int main(int argc, char *argv[]) }); // end for all + for(size_t bin_gid=0; bin_gid 0){ + size_t i = keys_in_bin(bin_gid).i; + size_t j = keys_in_bin(bin_gid).j; + size_t k = keys_in_bin(bin_gid).k; + + double bin_x = ((double)i)*bin_dx; + double bin_y = ((double)j)*bin_dy; + double bin_z = ((double)k)*bin_dz; + printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", + num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); + } + } // end for + + // ------------------------------------------------ // Find the neighbors around each point using bins @@ -885,20 +955,52 @@ int main(int argc, char *argv[]) } // end for icount // the min number of points required to solve the system is num_poly_basis+1, was 2*num_poly_basis - if (num_points_found > num_points_fit || num_points_found==num_points){ + if (num_points_found >= num_points_fit || num_points_found==num_points){ - points_bin_stencil(point_gid,0) = imin; - points_bin_stencil(point_gid,1) = imax; - points_bin_stencil(point_gid,2) = jmin; - points_bin_stencil(point_gid,3) = jmax; - points_bin_stencil(point_gid,4) = kmin; - points_bin_stencil(point_gid,5) = kmax; + const double x_pt_middle = bin_dx*((double)i) + X0; + const double y_pt_middle = bin_dy*((double)j) + Y0; + const double z_pt_middle = bin_dz*((double)k) + Z0; - points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors - //points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because counted point i as a neighbor + const double x_pt_minus = bin_dx*((double)imin) + X0; + const double y_pt_minus = bin_dy*((double)jmin) + Y0; + const double z_pt_minus = bin_dz*((double)kmin) + Z0; + + const double x_pt_plus = bin_dx*((double)imax) + X0; + const double y_pt_plus = bin_dy*((double)jmax) + Y0; + const double z_pt_plus = bin_dz*((double)kmax) + Z0; - break; - } + const double dist_minus = sqrt( (x_pt_minus - x_pt_middle)*(x_pt_minus - x_pt_middle) + + (y_pt_minus - y_pt_middle)*(y_pt_minus - y_pt_middle) + + (z_pt_minus - z_pt_middle)*(z_pt_minus - z_pt_middle) ); + + const double dist_plus = sqrt( (x_pt_plus - x_pt_middle)*(x_pt_plus - x_pt_middle) + + (y_pt_plus - y_pt_middle)*(y_pt_plus - y_pt_middle) + + (z_pt_plus - z_pt_middle)*(z_pt_plus - z_pt_middle) ); + + printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d, jmax = %d, kmin = %d, kmax = %d, \n", + h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax); + + // only exit when we exceed kernel distance + if (dist_minus >= h_kernel || dist_plus >= h_kernel || num_points_found==num_points){ + + //printf("exiting \n\n"); + + points_bin_stencil(point_gid,0) = imin; + points_bin_stencil(point_gid,1) = imax; + points_bin_stencil(point_gid,2) = jmin; + points_bin_stencil(point_gid,3) = jmax; + points_bin_stencil(point_gid,4) = kmin; + points_bin_stencil(point_gid,5) = kmax; + + points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors + points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because we counted point i as a neighbor + + break; + } + // else increase stencil size + + + } // end of check } // end for stencil @@ -1015,9 +1117,7 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid); // make sure its a neighbor - //if(neighbor_point_gid != point_gid){ - - // I am including point_i in the neighbor list + if(neighbor_point_gid != point_gid){ // save the neighbor size_t num_saved = Kokkos::atomic_fetch_add(&points_num_neighbors(point_gid), 1); @@ -1046,7 +1146,7 @@ int main(int argc, char *argv[]) } // end if - //} // end if neighbor != point_gid + } // end if neighbor != point_gid } // neighbor_point_lid @@ -1411,7 +1511,7 @@ int main(int argc, char *argv[]) // conservative mesh-free FE double contrib = 0.0; for (int dim=0; dim<3; ++dim) { - contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) - u(i_gid, dim)); + contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) + u(i_gid, dim)); } div(i_gid) += vol(i_gid) * vol(j_gid) * contrib; @@ -1424,7 +1524,7 @@ int main(int argc, char *argv[]) } div(i_gid) /= vol(i_gid); - //div_fd(i_gid) /= vol(i_gid); // finite difference doesn't have the V_i on the right side, so no division + // remember: finite difference doesn't have the V_i on the right side, so no division }); div.update_host(); div_fd.update_host(); @@ -1444,97 +1544,6 @@ int main(int argc, char *argv[]) } } // end for point_gid - /* - DCArrayKokkos div(num_points); - div.set_values(0.0); - - FOR_ALL(i_gid, 0, num_points, { - - for(size_t j_lid = 0; j_lid Date: Fri, 26 Sep 2025 16:06:50 -0600 Subject: [PATCH 18/23] WIP: added grad_i and grad_j --- examples/pointcloud/pointcloud-rk.cpp | 156 +++++++++++++++++--------- 1 file changed, 102 insertions(+), 54 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 07390ca8..04fea617 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -71,7 +71,8 @@ const double PI = 3.14159265358979323846; // inputs: -#define P1 // P1 or P2 +#define P2 // P1 or P2 +#define CUBIC_SPLINE // CUBIC_SPLINE or GUASS const size_t num_1d_x = 3; @@ -159,6 +160,9 @@ size_t get_bin_gid(const double x_pt, + +#if defined(CUBIC_SPLINE) + KOKKOS_FUNCTION double kernel(const double r[3], double h) { @@ -180,7 +184,7 @@ double kernel(const double r[3], double h) { KOKKOS_FUNCTION // derivative dW/dx_i = - dW/dr where r = xj-xi -void grad_kernel(double *grad_W, const double r[3], const double h) { +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i) { double diff_sqrd = 0.0; for(size_t dim=0; dim<3; dim++){ @@ -204,22 +208,29 @@ void grad_kernel(double *grad_W, const double r[3], const double h) { df_dq = 0.0; } - const double dW_dr = (df_dq / h); + const double alpha = 2.0/(3.0*h); + const double dW_dr = alpha*(df_dq / h); // grad W = dW/dr * (rij / radius) const double invr = 1.0 /(radius + 1e-16); + double drdx = -1.0; // default is derivative with respect to i + if(derviative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + } // end if + for (size_t dim=0; dim<3; ++dim) { - grad_W[dim] = dW_dr * r[dim] * invr; + grad_W[dim] = dW_dr * r[dim] * drdx * invr; } return; } +#else // Gaussian function part of the RBF // rbf = exp(-(xj - x)*(xj - x)/h) KOKKOS_FUNCTION -double kernel_g(const double r[3], const double h){ +double kernel(const double r[3], const double h){ double diff_sqrd = 0.0; @@ -232,10 +243,11 @@ double kernel_g(const double r[3], const double h){ } // end of function -// Gaussian function part of the RBF, symmeterized -// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) + +// Gradient Gaussian function +// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) KOKKOS_FUNCTION -double kernel_syn(const double r[3], const double hi, const double hj){ +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i){ double diff_sqrd = 0.0; @@ -243,17 +255,27 @@ double kernel_syn(const double r[3], const double hi, const double hj){ diff_sqrd += r[dim]*r[dim]; } // dim - const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call - const double Wj = exp(-diff_sqrd/(hj*hj)); + double drdx = -1; + if(derviative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + } // end if - return 0.5*(Wi + Wj); + const double rbf = kernel(r, h); + + // gradient + for (size_t dim=0; dim<3; ++dim) { + grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdx; + } + + return; } // end of function +#endif -// Gradient Gaussian function -// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) +// Gaussian function part of the RBF, symmeterized +// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) KOKKOS_FUNCTION -void grad_kernel_g(double *grad_W, const double r[3], const double h){ +double kernel_syn(const double r[3], const double hi, const double hj){ double diff_sqrd = 0.0; @@ -261,17 +283,13 @@ void grad_kernel_g(double *grad_W, const double r[3], const double h){ diff_sqrd += r[dim]*r[dim]; } // dim - const double drdxi = -1; - const double rbf = kernel(r, h); - - // gradient - for (size_t dim=0; dim<3; ++dim) { - grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdxi; - } + const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call + const double Wj = exp(-diff_sqrd/(hj*hj)); - return; + return 0.5*(Wi + Wj); } // end of function + // d/dx rbf = d/dx ( 0.5(exp(-(xj - xi)*(xj - x)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) ) KOKKOS_FUNCTION void grad_kernel_sym(double *gradW, const double r[3], const double hi, const double hj) { @@ -317,9 +335,18 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do KOKKOS_INLINE_FUNCTION - void grad_poly_basis(const double r[3], double (*grad_p)[3]) { + void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derviative_wrt_i) { - const double drdx = -1.0; + // default r = r_j - r_i + + double drdx = -1.0; // default is derivative with respect to i + double drdy = -1.0; // default is derivative with respect to i + double drdz = -1.0; // default is derivative with respect to i + if(derviative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + drdy = 1.0; // derivative with respect to j + drdz = 1.0; // derivative with respect to j + } // end if grad_p[0][0] = 0.0; grad_p[1][0] = drdx; @@ -334,8 +361,6 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do // for high-order will use (x^a y^b z^c) - const double drdy = -1.0; - grad_p[0][1] = 0.0; grad_p[1][1] = 0.0; grad_p[2][1] = drdy; @@ -349,8 +374,6 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do // for high-order will use (x^a y^b z^c) - const double drdz = -1.0; - grad_p[0][2] = 0.0; grad_p[1][2] = 0.0; grad_p[2][2] = 0.0; @@ -382,23 +405,27 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do KOKKOS_INLINE_FUNCTION - void grad_poly_basis(const double r[3], double (*grad_p)[3]) { + void grad_poly_basis(const double r[3], double (*grad_p)[3], size_t eval_point) { - const double drdx = -1.0; + double drdx = -1.0; // default is derivative with respect to i + double drdy = -1.0; // default is derivative with respect to i + double drdz = -1.0; // default is derivative with respect to i + if(derviative_wrt_i == false){ + drdx = 1.0; // derivative with respect to j + drdy = 1.0; // derivative with respect to j + drdz = 1.0; // derivative with respect to j + } // end if grad_p[0][0] = 0.0; grad_p[1][0] = drdx; grad_p[2][0] = 0.0; grad_p[3][0] = 0.0; - const double drdy = -1.0; - grad_p[0][1] = 0.0; grad_p[1][1] = 0.0; grad_p[2][1] = drdy; grad_p[3][1] = 0.0; - const double drdz = -1.0; grad_p[0][2] = 0.0; grad_p[1][2] = 0.0; @@ -465,9 +492,12 @@ void calc_basis_and_grad_basis_functions( const CArrayKokkos & M_inv, const DRaggedRightArrayKokkos & basis, const DRaggedRightArrayKokkos & grad_basis, - const double h) + const double h, + const bool derviative_wrt_i) { + // dir + // actual number of points size_t num_points = x.dims(0); @@ -497,17 +527,19 @@ void calc_basis_and_grad_basis_functions( r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i + + double W = kernel(r,h); //printf("kernel = %f \n", W); double grad_W[3]; - grad_kernel(grad_W,r,h); + grad_kernel(grad_W,r,h,derviative_wrt_i); //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]); double p[num_poly_basis]; poly_basis(r,p); double grad_p[num_poly_basis][3]; - grad_poly_basis(r,grad_p); + grad_poly_basis(r, grad_p, derviative_wrt_i); double Vj = vol(neighbor_point_gid); @@ -542,10 +574,10 @@ void calc_basis_and_grad_basis_functions( poly_basis(r,p); double grad_W[3]; - grad_kernel(grad_W, r, h); + grad_kernel(grad_W, r, h, derviative_wrt_i); double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis - grad_poly_basis(r, grad_p); + grad_poly_basis(r, grad_p, derviative_wrt_i); // double correction = 0.0; @@ -1237,7 +1269,8 @@ int main(int argc, char *argv[]) CArrayKokkos grad_M(num_points, num_poly_basis, num_poly_basis); DRaggedRightArrayKokkos basis(points_num_neighbors); // reproducing kernel basis (num_points, num_neighbors) - DRaggedRightArrayKokkos grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors) + DRaggedRightArrayKokkos grad_basis_i(points_num_neighbors,3); // grad kernel basis j with respect to i (num_points, num_neighbors) + DRaggedRightArrayKokkos grad_basis_j(points_num_neighbors,3); // grad kernel basis i with respect to j (num_points, num_neighbors) @@ -1264,8 +1297,21 @@ int main(int argc, char *argv[]) p_coeffs, M_inv, basis, - grad_basis, - h); + grad_basis_i, + h, + true); + + calc_basis_and_grad_basis_functions( + point_positions, + points_num_neighbors, + points_in_point, + vol, + p_coeffs, + M_inv, + basis, + grad_basis_j, + h, + false); // end timer auto time_6 = std::chrono::high_resolution_clock::now(); @@ -1363,19 +1409,19 @@ int main(int argc, char *argv[]) FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p0_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_x_p0_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid); + grad_x_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid); }, grad_x_p0); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_y_p0_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid); + grad_y_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid); }, grad_y_p0); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid); + grad_z_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid); }, grad_z_p0); const double grad_check_P0 = fabs(grad_x_p0)+fabs(grad_y_p0)+fabs(grad_z_p0); @@ -1387,17 +1433,17 @@ int main(int argc, char *argv[]) FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p1_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_x_p1_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0); + grad_x_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0); }, grad_x_p1); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p1_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_y_p1_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1); + grad_y_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1); }, grad_y_p1); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p1_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2); + grad_z_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2); }, grad_z_p1); const double grad_check_P1 = fabs(grad_x_p1 - 1.0)+fabs(grad_y_p1 - 1.0)+fabs(grad_z_p1 - 1.0); @@ -1413,17 +1459,17 @@ int main(int argc, char *argv[]) FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_x_p2_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0); + grad_x_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0); }, grad_x_p2); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p2_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_y_p2_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1); + grad_y_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1); }, grad_y_p2); FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p2_lcl, { // get the point gid for this neighboring size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); - grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2); + grad_z_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2); }, grad_z_p2); const double grad_check_P2 = fabs(grad_x_p2-2.0*point_positions(point_gid,0)) + fabs(grad_y_p2-2.0*point_positions(point_gid,1)) + fabs(grad_z_p2-2.0*point_positions(point_gid,2)); @@ -1493,6 +1539,7 @@ int main(int argc, char *argv[]) div_fd.set_values(0.0); FOR_ALL(i_gid, 0, num_points, { + for(size_t j_lid = 0; j_lid Date: Sat, 27 Sep 2025 10:59:06 -0600 Subject: [PATCH 19/23] WIP: anti-symmetric gradient --- examples/pointcloud/pointcloud-rk.cpp | 121 ++++++++++++++------------ 1 file changed, 66 insertions(+), 55 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 04fea617..3958886d 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -73,7 +73,7 @@ const double PI = 3.14159265358979323846; #define P2 // P1 or P2 #define CUBIC_SPLINE // CUBIC_SPLINE or GUASS - +bool RAND_CLOUD = true; // RAND_CLOUD or uniform const size_t num_1d_x = 3; const size_t num_1d_y = 3; @@ -184,7 +184,7 @@ double kernel(const double r[3], double h) { KOKKOS_FUNCTION // derivative dW/dx_i = - dW/dr where r = xj-xi -void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i) { +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i) { double diff_sqrd = 0.0; for(size_t dim=0; dim<3; dim++){ @@ -214,7 +214,7 @@ void grad_kernel(double *grad_W, const double r[3], const double h, const bool d const double invr = 1.0 /(radius + 1e-16); double drdx = -1.0; // default is derivative with respect to i - if(derviative_wrt_i == false){ + if(derivative_wrt_i == false){ drdx = 1.0; // derivative with respect to j } // end if @@ -247,7 +247,7 @@ double kernel(const double r[3], const double h){ // Gradient Gaussian function // d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) KOKKOS_FUNCTION -void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i){ +void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i){ double diff_sqrd = 0.0; @@ -256,7 +256,7 @@ void grad_kernel(double *grad_W, const double r[3], const double h, const bool d } // dim double drdx = -1; - if(derviative_wrt_i == false){ + if(derivative_wrt_i == false){ drdx = 1.0; // derivative with respect to j } // end if @@ -335,14 +335,14 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do KOKKOS_INLINE_FUNCTION - void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derviative_wrt_i) { + void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) { - // default r = r_j - r_i + // definition, r = r_j - r_i double drdx = -1.0; // default is derivative with respect to i double drdy = -1.0; // default is derivative with respect to i double drdz = -1.0; // default is derivative with respect to i - if(derviative_wrt_i == false){ + if(derivative_wrt_i == false){ drdx = 1.0; // derivative with respect to j drdy = 1.0; // derivative with respect to j drdz = 1.0; // derivative with respect to j @@ -410,7 +410,7 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do double drdx = -1.0; // default is derivative with respect to i double drdy = -1.0; // default is derivative with respect to i double drdz = -1.0; // default is derivative with respect to i - if(derviative_wrt_i == false){ + if(derivative_wrt_i == false){ drdx = 1.0; // derivative with respect to j drdy = 1.0; // derivative with respect to j drdz = 1.0; // derivative with respect to j @@ -493,7 +493,7 @@ void calc_basis_and_grad_basis_functions( const DRaggedRightArrayKokkos & basis, const DRaggedRightArrayKokkos & grad_basis, const double h, - const bool derviative_wrt_i) + const bool derivative_wrt_i) { // dir @@ -532,14 +532,14 @@ void calc_basis_and_grad_basis_functions( double W = kernel(r,h); //printf("kernel = %f \n", W); double grad_W[3]; - grad_kernel(grad_W,r,h,derviative_wrt_i); + grad_kernel(grad_W,r,h,derivative_wrt_i); //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]); double p[num_poly_basis]; poly_basis(r,p); double grad_p[num_poly_basis][3]; - grad_poly_basis(r, grad_p, derviative_wrt_i); + grad_poly_basis(r, grad_p, derivative_wrt_i); double Vj = vol(neighbor_point_gid); @@ -574,10 +574,10 @@ void calc_basis_and_grad_basis_functions( poly_basis(r,p); double grad_W[3]; - grad_kernel(grad_W, r, h, derviative_wrt_i); + grad_kernel(grad_W, r, h, derivative_wrt_i); double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis - grad_poly_basis(r, grad_p, derviative_wrt_i); + grad_poly_basis(r, grad_p, derivative_wrt_i); // double correction = 0.0; @@ -770,14 +770,18 @@ int main(int argc, char *argv[]) DCArrayKokkos point_positions(num_points, 3, "point_positions"); DCArrayKokkos point_values(num_points, "point_values"); + DCArrayKokkos vol(num_points); + vol.set_values(0.0); + // point locations - if(false){ + if(RAND_CLOUD){ srand(static_cast(time(0))); // Seed the random number generator for(size_t i=0; i(rand())/static_cast(RAND_MAX); point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); } + vol.set_values(1.0); } else { @@ -797,8 +801,35 @@ int main(int argc, char *argv[]) } // end j } // end k - } // end if + + const double elem_dx = LX/((double)num_1d_x); + const double elem_dy = LY/((double)num_1d_y); + const double elem_dz = LZ/((double)num_1d_z); + const double elem_vol = elem_dx*elem_dy*elem_dz; + + const size_t num_cells_1d_x = num_1d_x-1; + const size_t num_cells_1d_y = num_1d_y-1; + const size_t num_cells_1d_z = num_1d_z-1; + + FOR_ALL(k,0,num_cells_1d_z, + j,0,num_cells_1d_y, + i,0,num_cells_1d_x,{ + + for (int kcount=k; kcount<=k+1; kcount++){ + for (int jcount=j; jcount<=j+1; jcount++){ + for (int icount=i; icount<=i+1; icount++){ + size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y); + Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25); + } // end i + } // end j + } // end k + + }); // end parallel over k,j,i + + + } // end if + vol.update_host(); point_positions.update_device(); Kokkos::fence(); @@ -931,8 +962,8 @@ int main(int argc, char *argv[]) double bin_x = ((double)i)*bin_dx; double bin_y = ((double)j)*bin_dy; double bin_z = ((double)k)*bin_dz; - printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", - num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); + //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", + // num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); } } // end for @@ -1009,8 +1040,8 @@ int main(int argc, char *argv[]) (y_pt_plus - y_pt_middle)*(y_pt_plus - y_pt_middle) + (z_pt_plus - z_pt_middle)*(z_pt_plus - z_pt_middle) ); - printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d, jmax = %d, kmin = %d, kmax = %d, \n", - h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax); + //printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d, jmax = %d, kmin = %d, kmax = %d, \n", + // h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax); // only exit when we exceed kernel distance if (dist_minus >= h_kernel || dist_plus >= h_kernel || num_points_found==num_points){ @@ -1235,34 +1266,6 @@ int main(int argc, char *argv[]) CArrayKokkos p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point - - DCArrayKokkos vol(num_points); - vol.set_values(0.0); - - const double dx = LX/((double)num_1d_x); - const double dy = LY/((double)num_1d_y); - const double dz = LZ/((double)num_1d_z); - const double elem_vol = dx*dy*dz; - - const size_t num_cells_1d_x = num_1d_x-1; - const size_t num_cells_1d_y = num_1d_y-1; - const size_t num_cells_1d_z = num_1d_z-1; - - FOR_ALL(k,0,num_cells_1d_z, - j,0,num_cells_1d_y, - i,0,num_cells_1d_x,{ - - for (int kcount=k; kcount<=k+1; kcount++){ - for (int jcount=j; jcount<=j+1; jcount++){ - for (int icount=i; icount<=i+1; icount++){ - size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y); - Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25); - } // end i - } // end j - } // end k - - }); // end parallel over k,j,i - vol.update_host(); CArrayKokkos M_inv(num_points, num_poly_basis, num_poly_basis); @@ -1540,20 +1543,26 @@ int main(int argc, char *argv[]) FOR_ALL(i_gid, 0, num_points, { - for(size_t j_lid = 0; j_lid 1e-8){ + if(error > 1e-8){ printf("div(u) = %f at point %zu, error = %g, vol = %f\n", div.host(point_gid), point_gid, error, vol.host(point_gid)); - //} + } } // end for point_gid for(size_t point_gid=0; point_gid Date: Sat, 27 Sep 2025 11:36:07 -0600 Subject: [PATCH 20/23] added divergence sin(x) test --- examples/pointcloud/pointcloud-rk.cpp | 101 +++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 11 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 3958886d..b482c3c6 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -72,14 +72,14 @@ const double PI = 3.14159265358979323846; #define P2 // P1 or P2 -#define CUBIC_SPLINE // CUBIC_SPLINE or GUASS +#define CUBIC_SPLINE // CUBIC_SPLINE or GAUSS kernel bool RAND_CLOUD = true; // RAND_CLOUD or uniform -const size_t num_1d_x = 3; -const size_t num_1d_y = 3; -const size_t num_1d_z = 3; +const size_t num_1d_x = 5; +const size_t num_1d_y = 5; +const size_t num_1d_z = 5; -const double h_kernel = 2/3.; +const double h_kernel = 2/5.; const double num_points_fit = 27; // minimum to fit is 3x3x3 const size_t num_points = num_1d_x*num_1d_y*num_1d_z; @@ -1602,8 +1602,6 @@ int main(int argc, char *argv[]) } // end for point_gid - - double conserve_check; double conserve_check_lcl; FOR_REDUCE_SUM(point_gid, 0, num_points, @@ -1615,11 +1613,14 @@ int main(int argc, char *argv[]) size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid); // get the local id of my neighbor that matches my point_gid - //size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid); + size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid); for (size_t dim=0; dim<3; dim++){ + // from point i conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(point_gid,neighbor_point_lid,dim)); - //conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim)); + + // from neighbor point + conserve_check_lcl += 0.5*(grad_basis_i(neighbor_point_gid,neighbor_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim)); } } @@ -1628,6 +1629,72 @@ int main(int argc, char *argv[]) + printf("Testing sin(u) of vector field u = (x, y, z) \n\n"); + + FOR_ALL(i, 0, num_points, { + u(i, 0) = sin(point_positions(i, 0)); + u(i, 1) = sin(point_positions(i, 1)); + u(i, 2) = sin(point_positions(i, 2)); + }); + u.update_device(); + + DCArrayKokkos div_sinx(num_points); + div_sinx.set_values(0.0); + + DCArrayKokkos div_fd_sinx(num_points); + div_fd_sinx.set_values(0.0); + + FOR_ALL(i_gid, 0, num_points, { + + for(size_t j_lid = 0; j_lid Date: Sat, 27 Sep 2025 12:46:26 -0600 Subject: [PATCH 21/23] cleaned up prints --- examples/pointcloud/pointcloud-rk.cpp | 48 +++++++++++++++------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index b482c3c6..0b7c15c3 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -79,8 +79,8 @@ const size_t num_1d_x = 5; const size_t num_1d_y = 5; const size_t num_1d_z = 5; -const double h_kernel = 2/5.; -const double num_points_fit = 27; // minimum to fit is 3x3x3 +const double h_kernel = 2.0/5.; +const double num_points_fit = 27; // minimum to fit on structured mesh is 3x3x3 const size_t num_points = num_1d_x*num_1d_y*num_1d_z; @@ -775,14 +775,14 @@ int main(int argc, char *argv[]) // point locations if(RAND_CLOUD){ - srand(static_cast(time(0))); // Seed the random number generator - for(size_t i=0; i(rand())/static_cast(RAND_MAX); - point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); - point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + srand(static_cast(time(0))); // Seed the random number generator + for(size_t i=0; i(rand())/static_cast(RAND_MAX); + point_positions.host(i, 1) = Y0 + LY*static_cast(rand())/static_cast(RAND_MAX); + point_positions.host(i, 2) = Z0 + LZ*static_cast(rand())/static_cast(RAND_MAX); + } + vol.set_values(1.0); } - vol.set_values(1.0); - } else { double dx = (LX-X0)/((double)num_1d_x - 1); @@ -850,6 +850,8 @@ int main(int argc, char *argv[]) // ---------------------------- // Make bins here // ---------------------------- + + printf("making bins \n"); // the number of nodes in the mesh size_t num_bins_x = (size_t)( round( (LX - X0)/bin_dx) + 1 ); @@ -878,6 +880,7 @@ int main(int argc, char *argv[]) printf("Starting timers \n\n"); + // start timer auto time_1 = std::chrono::high_resolution_clock::now(); @@ -914,6 +917,8 @@ int main(int argc, char *argv[]) // start timer auto time_3 = std::chrono::high_resolution_clock::now(); + printf("building neighbor point list \n"); + // save bin id to points FOR_ALL(point_gid, 0, num_points, { @@ -953,19 +958,19 @@ int main(int argc, char *argv[]) }); // end for all - for(size_t bin_gid=0; bin_gid 0){ - size_t i = keys_in_bin(bin_gid).i; - size_t j = keys_in_bin(bin_gid).j; - size_t k = keys_in_bin(bin_gid).k; + // for(size_t bin_gid=0; bin_gid 0){ + // size_t i = keys_in_bin(bin_gid).i; + // size_t j = keys_in_bin(bin_gid).j; + // size_t k = keys_in_bin(bin_gid).k; - double bin_x = ((double)i)*bin_dx; - double bin_y = ((double)j)*bin_dy; - double bin_z = ((double)k)*bin_dz; - //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", - // num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); - } - } // end for + // double bin_x = ((double)i)*bin_dx; + // double bin_y = ((double)j)*bin_dy; + // double bin_z = ((double)k)*bin_dz; + // //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", + // // num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z); + // } + // } // end for @@ -1255,6 +1260,7 @@ int main(int argc, char *argv[]) // end timer auto time_4 = std::chrono::high_resolution_clock::now(); + printf("done building neighbor point list \n"); // ---------------------------------------- // Find basis that reconstructs polynomial From 3bfb736817088e6fd6599140d5d7dbdb2f90f58b Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Sat, 27 Sep 2025 16:57:17 -0600 Subject: [PATCH 22/23] fixed P1 option --- examples/pointcloud/pointcloud-rk.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index 0b7c15c3..b03fae88 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -405,7 +405,7 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do KOKKOS_INLINE_FUNCTION - void grad_poly_basis(const double r[3], double (*grad_p)[3], size_t eval_point) { + void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) { double drdx = -1.0; // default is derivative with respect to i double drdy = -1.0; // default is derivative with respect to i @@ -1228,8 +1228,7 @@ int main(int argc, char *argv[]) // build the reverse map - DRaggedRightArrayKokkos reverse_neighbor_lid(points_num_neighbors); - reverse_neighbor_lid.set_values(-1); + DRaggedRightArrayKokkos reverse_neighbor_lid(points_num_neighbors); FOR_ALL(point_gid, 0, num_points, { From 3a89cb914d9ec6294f1b837f6f85e34b34143ff0 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Sat, 27 Sep 2025 17:20:42 -0600 Subject: [PATCH 23/23] only run P2 tests with P2 --- examples/pointcloud/pointcloud-rk.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp index b03fae88..54fd49c9 100755 --- a/examples/pointcloud/pointcloud-rk.cpp +++ b/examples/pointcloud/pointcloud-rk.cpp @@ -73,14 +73,15 @@ const double PI = 3.14159265358979323846; #define P2 // P1 or P2 #define CUBIC_SPLINE // CUBIC_SPLINE or GAUSS kernel + bool RAND_CLOUD = true; // RAND_CLOUD or uniform const size_t num_1d_x = 5; const size_t num_1d_y = 5; const size_t num_1d_z = 5; -const double h_kernel = 2.0/5.; -const double num_points_fit = 27; // minimum to fit on structured mesh is 3x3x3 +const double h_kernel = 2.0/5.; // it is always 2/num_1d_x +const double num_points_fit = 27; // minimum to P2 fit on structured mesh is 3x3x3 const size_t num_points = num_1d_x*num_1d_y*num_1d_z; @@ -98,6 +99,7 @@ const double LX = 1.0; // length in x-dir const double LY = 1.0; const double LZ = 1.0; + bool check_maps = false; // CPU only!!!! // @@ -1405,9 +1407,10 @@ int main(int argc, char *argv[]) if(fabs(linear_preserving-point_positions(point_gid,0))>1e-13) printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0))); + #if defined(P2) if(fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0))>1e-13) printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid); - + #endif // ----------------- // gradient checks @@ -1463,6 +1466,7 @@ int main(int argc, char *argv[]) } + #if defined(P2) // Sum(grad(P2)) = [2]; FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, { // get the point gid for this neighboring @@ -1487,6 +1491,7 @@ int main(int argc, char *argv[]) fabs(grad_y_p2-2.0*point_positions(point_gid,1)), fabs(grad_z_p2-2.0*point_positions(point_gid,2))); } + #endif } // end for point gid