From 4499a1bb28034cff9f49f9a7749e6c87d69d7253 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Thu, 14 Aug 2025 16:29:35 -0600
Subject: [PATCH 01/23] pointcloud example

---
 examples/CMakeLists.txt                 |    4 +
 examples/pointcloud/CMakeLists.txt      |   21 +
 examples/pointcloud/graphics-reader.h   |  759 ++++++++++++++++
 examples/pointcloud/icosahedron_bin.stl |  Bin 0 -> 1084 bytes
 examples/pointcloud/pointcloud.cpp      | 1101 +++++++++++++++++++++++
 5 files changed, 1885 insertions(+)
 create mode 100644 examples/pointcloud/CMakeLists.txt
 create mode 100755 examples/pointcloud/graphics-reader.h
 create mode 100644 examples/pointcloud/icosahedron_bin.stl
 create mode 100755 examples/pointcloud/pointcloud.cpp
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index affcd031..c9deb9ba 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -128,6 +128,10 @@ if (KOKKOS)
   add_executable(qr_test test_qr_solve.cpp)
   target_link_libraries(qr_test ${LINKING_LIBRARIES})
 
+  include_directories(pointcloud)
+  add_subdirectory(pointcloud)
+
+
   if (Matar_ENABLE_TRILINOS)
     add_executable(anndistributed ann_distributed.cpp)
     target_link_libraries(anndistributed ${LINKING_LIBRARIES})
diff --git a/examples/pointcloud/CMakeLists.txt b/examples/pointcloud/CMakeLists.txt
new file mode 100644
index 00000000..bc6f7668
--- /dev/null
+++ b/examples/pointcloud/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.18)
+
+
+if (KOKKOS)
+  #find_package(Kokkos REQUIRED) #new
+
+  add_definitions(-DHAVE_KOKKOS=1)
+  if (CUDA)
+    add_definitions(-DHAVE_CUDA=1)
+  elseif (HIP)
+    add_definitions(-DHAVE_HIP=1)
+  elseif (OPENMP)
+    add_definitions(-DHAVE_OPENMP=1)
+  elseif (THREADS)
+    add_definitions(-DHAVE_THREADS=1)
+  endif()
+
+
+  add_executable(pointcloud pointcloud.cpp)
+  target_link_libraries(pointcloud ${LINKING_LIBRARIES})
+endif(KOKKOS)
diff --git a/examples/pointcloud/graphics-reader.h b/examples/pointcloud/graphics-reader.h
new file mode 100755
index 00000000..7081de6b
--- /dev/null
+++ b/examples/pointcloud/graphics-reader.h
@@ -0,0 +1,759 @@
+// -----------------------------------------------
+// routines to read a graphics file
+//
+// -----------------------------------------------
+
+#include <iostream>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fstream>
+#include <cmath>
+#include <vector>
+
+#include "matar.h"
+
+using namespace mtr;
+
+
+
+// checks to see if a path exists
+bool DoesPathExist(const std::string &s)
+{
+    struct stat buffer;
+    return (stat (s.c_str(), &buffer) == 0);
+}
+
+// Code from stackover flow for string delimiter parsing
+std::vector<std::string> split (std::string s, std::string delimiter) {
+    size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+    std::string token;
+    std::vector<std::string> res;
+
+    while ((pos_end = s.find (delimiter, pos_start)) != std::string::npos) {
+        token = s.substr (pos_start, pos_end - pos_start);
+        pos_start = pos_end + delim_len;
+        res.push_back (token);
+    }
+
+    res.push_back (s.substr (pos_start));
+    return res;
+    
+} // end of split
+
+
+// retrieves multiple values between [ ]
+std::vector<double> extract_list(std::string str) {
+    
+    // replace '[' with a space and ']' with a space
+    std::replace(str.begin(), str.end(), '[', ' ');
+    std::replace(str.begin(), str.end(), ']', ' ');
+    
+    std::vector<std::string> str_values;
+    std::vector<double> values;
+
+    // exact the str values into a vector
+    str_values = split(str, ",");
+    
+    // convert the text values into double values
+    for (auto &word : str_values) {
+        values.push_back( atof(word.c_str()) );
+    } // end for
+    
+    return values;
+    
+}  // end of extract_list
+
+
+
+// from stack overflow on removing blanks in string
+template<typename T, typename P>
+T remove_if(T beg, T end, P pred)
+{
+    T dest = beg;
+    for (T itr = beg;itr != end; ++itr)
+        if (!pred(*itr))
+            *(dest++) = *itr;
+    return dest;
+}
+
+
+
+
+
+
+
+// This function reads a VTK file
+//--------------------------------------------------------
+//
+/*
+ vtk mesh node ordering
+          7--------6
+         /|       /|
+        / |      / |
+       4--------5  |
+       |  |     |  |
+       |  |     |  |
+       |  3-----|--2
+       | /      | /
+       |/       |/
+       0--------1
+ 
+ in the i,j,k format node order is
+ 0 = (i  , j  , k  )
+ 1 = (i+1, j  , k  )
+ 2 = (i+1, j+1, k  )
+ 3 = (i  , j+1, k  )
+ 4 = (i  , j  , k+1)
+ 5 = (i+1, j  , k+1)
+ 6 = (i+1, j+1, k+1)
+ 7 = (i  , j+1, k+1)
+ 
+ The marching cubes ordering is
+ (i  , j  , k  ) = 0
+ (i+1, j  , k  ) = 1
+ (i+1, j  , k+1) = 5
+ (i  , j  , k+1) = 4
+ (i  , j+1, k  ) = 3
+ (i+1, j+1, k  ) = 2
+ (i+1, j+1, k+1) = 6
+ (i  , j+1, k+1) = 7
+
+ */
+void readVTK(char * filename,
+             CArray <double> &pt_coords,
+             CArray <int> &elem_point_list,
+             CArray <double> &pt_values,
+             int &num_points,
+             int &num_elems,
+             int &num_points_in_elem,
+             bool test_vtk_read,
+             bool verbose_vtk)
+{
+   
+    int i;           // used for writing information to file
+    int point_id;    // the global id for the point
+    int elem_id;     // the global id for the elem
+    int this_point;   // a local id for a point in a elem (0:7 for a Hexahedral elem)
+    
+    int num_dims = 3;
+    
+
+    std::string token;
+    
+    bool found = false;
+    
+    std::ifstream in;  // FILE *in;
+    in.open(filename);
+    
+
+    // look for POINTS
+    i = 0;
+    while (found==false) {
+        std::string str;
+        std::string delimiter = " ";
+        std::getline(in, str);
+        std::vector<std::string> v = split (str, delimiter);
+        
+        // looking for the following text:
+        //      POINTS %d float
+        if(v[0] == "POINTS"){
+            num_points = std::stoi(v[1]);
+            printf("Num nodes read in %d\n", num_points);
+            
+            found=true;
+        } // end if
+        
+        
+        if (i>1000){
+            printf("ERROR: Failed to find POINTS \n");
+            break;
+        } // end if
+        
+        i++;
+    } // end while
+    
+    
+    // allocate memory for point coords and values
+    pt_coords = CArray <double> (num_points,3);
+    pt_values = CArray <double> (num_points);
+    
+    
+    // read the point coordinates
+    for (point_id=0; point_id<num_points; point_id++){
+        
+        std::string str;
+        std::getline(in, str);
+        
+        std::string delimiter = " ";
+        std::vector<std::string> v = split (str, delimiter);
+        
+        for (int dim=0; dim<3; dim++){
+            pt_coords(point_id,dim) = std::stod(v[dim]); // double
+            //if num_dims=2 skip the 3rd value
+            
+            // printing all the mesh coordinates
+            if (verbose_vtk) printf(" %f ", pt_coords(point_id,dim));
+        }
+        if (verbose_vtk) printf("\n"); // printing a space for readability
+        
+    } // end for points
+    found=false;
+    
+    
+    if (verbose_vtk)printf("\n");
+    if (verbose_vtk) printf("looking for CELLS \n");
+    
+    // look for CELLS
+    i = 0;
+    while (found==false) {
+        std::string str;
+        std::getline(in, str);
+        
+        std::string delimiter = " ";
+        std::vector<std::string> v = split (str, delimiter);
+        
+        // looking for the following text:
+        //      CELLS num_elems size
+        if(v[0] == "CELLS"){
+            num_elems = std::stoi(v[1]);
+            printf("Num elements read in %d\n", num_elems);
+            
+            found=true;
+        } // end if
+        
+        
+        if (i>1000){
+            printf("ERROR: Failed to find CELLS \n");
+            break;
+        } // end if
+        
+        i++;
+    } // end while
+    
+    
+    
+    // allocate memomry for points in each element
+    elem_point_list = CArray <int> (num_elems,8);   // 8 points in a hex
+    
+    
+    // read the point ids in the element
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        
+        std::string str;
+        std::getline(in, str);
+        
+        std::string delimiter = " ";
+        std::vector<std::string> v = split (str, delimiter);
+        num_points_in_elem = std::stoi(v[0]);
+        
+        for (this_point=0; this_point<num_points_in_elem; this_point++){
+            elem_point_list(elem_id,this_point) = std::stod(v[this_point+1]);
+            
+            // printing details on nodes in the element
+            if (verbose_vtk) printf(" %d ", elem_point_list(elem_id,this_point) );
+        }
+        if (verbose_vtk) printf("\n"); // printing a space for readability
+        
+    } // end for
+    found=false;
+
+    if (verbose_vtk) printf("\n"); // printing a space for readability
+    
+    
+    // look for CELL_TYPE
+    i = 0;
+    int elem_type = 0;
+    while (found==false) {
+        std::string str;
+        std::string delimiter = " ";
+        std::getline(in, str);
+        std::vector<std::string> v = split (str, delimiter);
+        
+        // looking for the following text:
+        //      CELLS num_elems size
+        if(v[0] == "CELL_TYPES"){
+
+            std::getline(in, str);
+            elem_type = std::stoi(str);
+            
+            found=true;
+        } // end if
+        
+        
+        if (i>1000){
+            printf("ERROR: Failed to find elem_TYPE \n");
+            break;
+        } // end if
+        
+        i++;
+    } // end while
+    
+    if (verbose_vtk) printf("elem type = %d \n", elem_type);
+    // elem types:
+    // linear hex = 12, linear quad = 9
+    found=false;
+    
+    // verify mesh has hexahedral elements, which have 8 nodes
+    if(num_points_in_elem != 8) {
+        printf("wrong elem type of %d \n", elem_type);
+    }
+    
+    
+    
+    // look for the point_var in the POINT_DATA heading
+    i = 0;
+    while (found==false) {
+        std::string str;
+        std::string delimiter = " ";
+        std::getline(in, str);
+        std::vector<std::string> v = split (str, delimiter);
+        
+        // looking for the following text:
+        //      POINT_DATA num_points
+        if(v[1] == "point_var"){
+            
+            std::getline(in, str);  // read next line -- its LOOKUP_TABLE
+            
+            //
+            for(int point_id=0; point_id<num_points; point_id++){
+                std::getline(in, str);
+                pt_values(point_id) = std::stoi(str);
+                
+                
+                // printing the node values in the mesh
+                if (verbose_vtk) printf("%f \n", pt_values(point_id) );
+            }
+            if (verbose_vtk) printf("\n"); // printing a blank space
+        
+            found=true;
+            
+        } // end if
+        
+        
+        if (i>10000000){
+            printf("ERROR: Failed to find point_var in POINT_DATA \n");
+            break;
+        } // end if
+        
+        i++;
+    } // end while
+
+
+    found=false;
+    
+    
+    
+    // testing the file read by painting a part
+    if(test_vtk_read == true){
+        for(int node_id = 0; node_id<num_points; node_id++){
+            
+            double x = pt_coords(node_id,0);
+            double y = pt_coords(node_id,1);
+            double z = pt_coords(node_id,2);
+            
+            // a simple sphere
+            pt_values(node_id) = sqrt(x*x + y*y + z*z) - 0.5;
+        }
+    } // end of test=true
+    
+    
+    printf("Finished reading mesh \n\n");
+    
+    in.close();
+    
+}
+
+
+// an array to convert the marchint cubes id order to marching cubes id order
+const int marching_cubes_2_vtk[8] =
+{
+    0,
+    1,
+    5,
+    4,
+    3,
+    2,
+    6,
+    7
+};
+
+
+
+void readTechPlot(char * filename,
+             CArray <double> &pt_coords,
+             CArray <int> &elem_point_list,
+             CArray <double> &pt_values,
+             int &num_points,
+             int &num_elems,
+             int &num_points_in_elem,
+             bool test_tecplot_read,
+             bool verbose_vtk)
+{
+   
+    int i;           // used for writing information to file
+    int point_id;    // the global id for the point
+    int elem_id;     // the global id for the elem
+    
+    int num_dims = 3;
+    
+
+    std::string token;
+    
+    bool found = false;
+    
+    std::ifstream in;  // FILE *in;
+    in.open(filename);
+    
+
+    // look for POINTS
+    i = 0;  // lines in the file
+    while (found==false) {
+        std::string str;
+        std::string delimiter = ",";
+        std::getline(in, str);
+        std::vector<std::string> v = split (str, delimiter);
+        
+        bool found_nodes = false;
+        bool found_elems = false;
+        
+        // loop over the parsed text stored in v and
+        // am now looking for the following text:
+        //      NODES %d float
+        for (int text=0; text<v.size(); text++){
+
+            
+            std::string delimiter_words = "=";
+            std::vector<std::string> words = split (v[text], delimiter_words);
+            
+            
+            
+            for(int a_word=0; a_word<words.size(); a_word++){
+                
+                // erase extra spaces from the text, the remaining text are names and numbers
+                words[a_word].erase(std::remove_if(words[a_word].begin(), words[a_word].end(),
+                    [](char c) { return std::isspace(c); } ),
+                                  words[a_word].end());
+                
+                
+                if(words[a_word] == "NODES"){
+                    num_points = std::stoi(words[a_word+1]);
+                    printf("Num nodes to read in %d\n", num_points);
+                    
+                    found_nodes = true;
+                } // end if
+                
+                if(words[a_word] == "ELEMENTS"){
+                    num_elems = std::stoi(words[a_word+1]);
+                    printf("Num elements to read in %d\n", num_elems);
+                    
+                    found_elems = true;
+                }
+                
+                if(found_nodes == true && found_elems == true){
+                    found = true;
+                }
+            } // end loop over all the words in the text within a vector
+            
+        } // end for loop over text in the line
+        
+        if (i>=2) found=true;
+        
+        if (i>1000){
+            printf("ERROR: Failed to find NODES and ELEMENTS \n");
+            break;
+        } // end if
+        
+        i++;
+    } // end while
+    
+    
+    // allocate memory for point coords and values
+    pt_coords = CArray <double> (num_points,3);
+    pt_values = CArray <double> (num_points);
+    
+
+    printf("starting the x,y,z point read and density value \n");
+    
+    // read the point coordinates
+    for (point_id=0; point_id<num_points; point_id++){
+        
+        std::string str;
+        std::getline(in, str);
+        
+        std::string delimiter = " ";
+        std::vector<std::string> v = split (str, delimiter);
+        
+        int column = 0;
+        for (int text=0; text<v.size(); text++){
+            
+            // erase extra spaces from the text, the remaining text is a number
+            v[text].erase(std::remove_if(v[text].begin(), v[text].end(),
+                [](char c) { return std::isspace(c); } ),
+                          v[text].end());
+            
+            
+            // column numbering starts at 0
+            if(v[text].size() > 0 && column==3){
+                pt_values(point_id) = std::stod(v[text]); // double
+                
+                if (verbose_vtk) printf("    %f \n", pt_values(point_id));
+                
+                break; // exit after reading the density on this line
+            } // end if column is density
+            
+            
+            // if there is text, then it is a number
+            if(v[text].size() > 0 && column<3){
+                
+                
+                pt_coords(point_id,column) = std::stod(v[text]); // double
+                //if num_dims=2 skip the 3rd value
+            
+                // printing all the mesh coordinates
+                if (verbose_vtk) printf(" %f ", pt_coords(point_id,column));
+                
+                column++; // this will make the column = 3 after all dims are saved
+            } // end if to save coordinates
+            
+            
+        } // end for over the partitioned test
+        
+    } // end for points
+    found=false;
+    
+    
+
+    
+    // allocate memomry for points in each element
+    num_points_in_elem = 8;
+    elem_point_list = CArray <int> (num_elems,8);   // 8 points in a hex
+    
+    
+    // read the point ids in the element
+    if (verbose_vtk) printf("Reading the nodes in the element\n");
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        
+        std::string str;
+        std::getline(in, str);
+        
+        std::string delimiter = " ";
+        std::vector<std::string> v = split (str, delimiter);
+        
+        
+        int column = 0;
+        for (int text=0; text<v.size(); text++){
+            
+            // erase extra spaces from the text, the remaining text is a number
+            v[text].erase(std::remove_if(v[text].begin(), v[text].end(),
+                [](char c) { return std::isspace(c); } ),
+                          v[text].end());
+            
+            
+            
+            if(column==8){
+                if (verbose_vtk) printf("\n"); // printing a space for readability
+                
+                break;  // exit after reading the last node on this line
+            }
+            
+            // if there is text, then it is a number
+            if(v[text].size() > 0 && column<8){
+                
+                
+                elem_point_list(elem_id,column) = std::stod(v[text]) - 1; // Fortran convention
+                //if num_dims=2 skip the 3rd value
+            
+                // printing all the mesh coordinates
+                // printing details on nodes in the element
+                if (verbose_vtk) printf(" %d ", elem_point_list(elem_id,column) );
+                
+                column++; // this will make the column = 8 after all node values are saved
+            } // end if to save points in this element
+            
+            
+        } // end loop over the text
+        
+    } // end for
+    found=false;
+
+    if (verbose_vtk) printf("\n"); // printing a space for readability
+ 
+        
+
+    
+    // testing the file read by painting a part
+    if(test_tecplot_read == true){
+        for(int node_id = 0; node_id<num_points; node_id++){
+            
+            
+            pt_coords(node_id,0) *= 100;
+            pt_coords(node_id,1) *= 100;
+            pt_coords(node_id,2) *= 100;
+            
+            double x = pt_coords(node_id,0);
+            double y = pt_coords(node_id,1);
+            double z = pt_coords(node_id,2);
+            
+            // a simple sphere
+            pt_values(node_id) = sqrt(x*x + y*y + z*z) - 0.5;
+        }
+    } // end of test=true
+     
+
+    
+    printf("Finished reading mesh \n\n");
+    
+    in.close();
+    
+}
+
+
+
+
+
+// -------------------------------------------------------
+// This function write outs the data to a VTK file
+//--------------------------------------------------------
+//
+void VTK(CArray <double> &pt_coords,
+         CArray <int> &elem_point_list,
+         int num_points,
+         int num_elems,
+         int num_points_in_elem,
+         CArray <double> &pt_values)
+{
+    
+    int GraphicsNumber = 0;
+    double Time = 0.0;
+    
+   
+    int i;           // used for writing information to file
+    int point_id;    // the global id for the point
+    int elem_id;     // the global id for the elem
+    int this_point;   // a local id for a point in a elem (0:7 for a Hexahedral elem)
+    
+    
+    FILE *out[20];   // the output files that are written to
+    char name[100];  // char string
+    
+    
+    
+    std::string directory = "vtk";
+    bool path = DoesPathExist(directory);
+    
+    // Create the folders for the ensight files
+    if (path==false) {
+        i=system("mkdir vtk");
+    }
+    
+    
+    
+    
+    /*
+     ---------------------------------------------------------------------------
+     Write the Geometry file
+     ---------------------------------------------------------------------------
+     */
+    
+    
+    snprintf(name, sizeof(name), "vtk/mesh.vtk");  // mesh file
+    
+    
+    out[0]=fopen(name,"w");
+    
+    
+    fprintf(out[0],"# vtk DataFile Version 2.0\n");  // part 2
+    fprintf(out[0],"Mesh for Fierro\n");             // part 2
+    fprintf(out[0],"ASCII \n");                      // part 3
+    fprintf(out[0],"DATASET UNSTRUCTURED_GRID\n\n"); // part 4
+    
+    fprintf(out[0],"POINTS %d float\n", num_points);
+
+    
+    // write all components of the point coordinates
+    for (point_id=0; point_id<num_points; point_id++){
+        fprintf(out[0],
+                "%f %f %f\n",
+                pt_coords(point_id,0),
+                pt_coords(point_id,1),
+                pt_coords(point_id,2));
+    } // end for
+    
+    /*
+     ---------------------------------------------------------------------------
+     Write the elems
+     ---------------------------------------------------------------------------
+     */
+    fprintf(out[0],"\n");
+    fprintf(out[0],"CELLS %d %d\n", num_elems, num_elems+num_elems*8);  // size=all printed values
+    
+    // write all global point numbers for this elem
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        
+        fprintf(out[0],"8 "); // num points in this elem
+        for (this_point=0; this_point<num_points_in_elem; this_point++){
+            fprintf(out[0],"%d ", elem_point_list(elem_id,this_point));
+        }
+        fprintf(out[0],"\n");
+        
+    } // end for
+    
+    fprintf(out[0],"\n");
+    fprintf(out[0],"CELL_TYPES %d \n", num_elems);
+    // elem types:
+    // linear hex = 12, linear quad = 9
+    // element types: https://vtk.org/doc/nightly/html/vtkCellType_8h_source.html
+    // element types: https://kitware.github.io/vtk-js/api/Common_DataModel_CellTypes.html
+    // vtk format: https://www.kitware.com//modeling-arbitrary-order-lagrange-finite-elements-in-the-visualization-toolkit/
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        fprintf(out[0],"%d \n", 12); // linear hex is type 12
+    }
+    
+    
+    /*
+     ---------------------------------------------------------------------------
+     Write the nodal variable file
+     ---------------------------------------------------------------------------
+     */
+    fprintf(out[0],"\n");
+    fprintf(out[0],"POINT_DATA %d \n", num_points);
+    fprintf(out[0],"SCALARS point_var float 1\n"); // the 1 is number of scalar components [1:4]
+    fprintf(out[0],"LOOKUP_TABLE default\n");
+    for (point_id=0; point_id<num_points; point_id++) {
+        double var=2;
+        fprintf(out[0],"%f\n",pt_values(point_id));
+    }
+    
+    /*
+     ---------------------------------------------------------------------------
+     Write the vector variables to file
+     ---------------------------------------------------------------------------
+     */
+
+    
+    /*
+     ---------------------------------------------------------------------------
+     Write the scalar elem variable to file
+     ---------------------------------------------------------------------------
+     */
+    fprintf(out[0],"\n");
+    fprintf(out[0],"CELL_DATA %d \n", num_elems);
+    fprintf(out[0],"SCALARS elem_var float 1\n"); // the 1 is number of scalar components [1:4]
+    fprintf(out[0],"LOOKUP_TABLE default\n");
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        double var=1;
+        fprintf(out[0],"%f\n",var);
+    }
+    
+    fprintf(out[0],"\n");
+    fprintf(out[0],"SCALARS elem_var2 float 1\n"); // the 1 is number of scalar components [1:4]
+    fprintf(out[0],"LOOKUP_TABLE default\n");
+    for (elem_id=0; elem_id<num_elems; elem_id++) {
+        double var=10;
+        fprintf(out[0],"%f\n",var);
+    }
+    
+    fclose(out[0]);
+
+}
+
diff --git a/examples/pointcloud/icosahedron_bin.stl b/examples/pointcloud/icosahedron_bin.stl
new file mode 100644
index 0000000000000000000000000000000000000000..edb31f4bbd3560068b5d950f0bbc169be1ca70f2
GIT binary patch
literal 1084
zcmZuwJ5Iwu5Zw#VaR8U0ux0FoK&03OC5MQBASF`bpacrKNOYWlQ*eXqCAa}C5<L~n
zoAubwRxGXdGw;p3H@mK$*N-pp?r9Yb@+3>rvnWfmQJjwBJdMutNq#lSM@e>ZIUWw9
z<^5)NyLgyyH`VNJStX$LzEkJi;`PMKC@^N9+amf}m2zDE{k%^x4{ok?kGYOHw~v44
z#oPDClo^HwiqK%oS2GUG=%JJq>XItY3`x|WOd;Z$-$M~<F@sUI4xY?4=3`3W!@3a|
zlK+7G@FG*=vYXeHZGAyP6<B*l*&0X;+dD@!yqyAJ_4q8T$%khYdomv(Q_awO2g-69
zD!64Ik{7PE!^}WE<OH+pLn3%3>Yf=r6tPH^XNJ{)N>dno;2x}B%wUwQ5!deGg+Q5h
z)U<a|?aem-hxY22b}+4fp@#SXS_f-qwlRVa@u2S({W2vX61r>X)?@8lS;j!r)?oAA
FxqppTS~UOw

literal 0
HcmV?d00001

diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud.cpp
new file mode 100755
index 00000000..963bffc1
--- /dev/null
+++ b/examples/pointcloud/pointcloud.cpp
@@ -0,0 +1,1101 @@
+// -----------------------------------------------
+// pointcloud reconstrution in C++
+//  credit to Andrew Morgan and Nathaniel Morgan
+//
+// To run the code with an external mesh file:
+//    ./a.out graphics-file
+//
+// Requires the matar.h and macros.h libraries from
+// the github LANL/MATAR/src folder
+//
+// The following rountines in this code came from:
+//   https://paulbourke.net/geometry/polygonise/
+//   - Polygonise
+//   - VertexInterp
+//
+//
+// The surface reconstruction method is from
+//   Reconstruction and Representation of 3D Objects with Radial BasisFunctions
+//    J. Carr, R. Beatson, ..., T. Evans
+//    https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf   
+// -----------------------------------------------
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <stdio.h>
+#include <cmath>
+
+
+#include "matar.h"
+
+#include "lu_solver.hpp"
+
+
+using namespace mtr;
+
+
+// -----------------------------------------------
+// inputs:
+
+
+// the number of nodes in the mesh
+const double dx = 0.015; // resolution
+const double dy = 0.015; // resolution
+const double dz = 0.015; // resolution
+
+
+// the mesh dimensions
+// length of the domain is 5 for crazy shape and 1 for sphere
+const double XMax = 1.0; 
+const double YMax = 1.0; 
+const double ZMax = 1.0; 
+
+const double X0 = 0.0; 
+const double Y0 = 0.0; 
+const double Z0 = 0.0; 
+
+
+const double isoLevel=0.0; // contour to extract
+
+
+//
+// -----------------------------------------------
+
+
+std::tuple<
+    CArray<float>,   // normal
+    CArray<float>, CArray<float>, CArray<float>,   // v1X, v1Y, v1Z
+    CArray<float>, CArray<float>, CArray<float>,   // v2X, v2Y, v2Z
+    CArray<float>, CArray<float>, CArray<float>,   // v3X, v3Y, v3Z
+    size_t // n_facets
+>
+binary_stl_reader(const std::string& path)
+{
+    std::ifstream in(path, std::ios::binary | std::ios::ate);
+    if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); }
+
+    const std::streamoff filesize = in.tellg();
+    if (filesize < 100) {
+        std::cerr << "ERROR: File too small to be a valid STL\n";
+        std::exit(EXIT_FAILURE);
+    }
+    in.seekg(0);
+
+    // ---- check if ASCII -------------------------------------------------
+    char magic[6] = { 0 };
+    in.read(magic, 5);          // read first 5 chars
+    in.seekg(0);               // rewind
+    if (std::strncmp(magic, "solid", 5) == 0) {
+        std::cerr
+            << "ERROR: \"" << path
+            << "\" looks like an **ASCII** STL (starts with \"solid\").\n"
+            << "Re‑export it as *binary* or implement an ASCII parser.\n";
+        std::exit(EXIT_FAILURE);        // or call ascii_stl_reader();
+    }
+
+    // ---- read 80‑byte header + nominal facet count ----------------------
+    char header[80];                in.read(header, 80);
+    size_t n_facets_nominal;  in.read(reinterpret_cast<char*>(&n_facets_nominal), 4);
+
+    // ---- compute expected count from file size to sanity‑check ----------
+    // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2)
+    const size_t n_facets_from_size =
+        static_cast<size_t>((filesize - 84) / 50);
+
+    size_t n_facets = n_facets_nominal;
+    if (n_facets_nominal != n_facets_from_size) {
+        std::cout << "WARNING: facet count in header (" << n_facets_nominal
+            << ") disagrees with file size (" << n_facets_from_size
+            << ").  Using size‑derived value.\n";
+        n_facets = n_facets_from_size;
+    }
+    std::cout << "STL facets: " << n_facets << '\n';
+
+    // ---- allocate MATAR arrays -----------------------------------------
+    CArray<float> normal(n_facets, 3);
+    CArray<float> v1X(n_facets), v1Y(n_facets), v1Z(n_facets);
+    CArray<float> v2X(n_facets), v2Y(n_facets), v2Z(n_facets);
+    CArray<float> v3X(n_facets), v3Y(n_facets), v3Z(n_facets);
+
+    // ---- read facet records --------------------------------------------
+    float nrm[3], v1[3], v2[3], v3[3];
+    for (unsigned int i = 0; i < n_facets; ++i) {
+        in.read(reinterpret_cast<char*>(nrm), 12);
+        in.read(reinterpret_cast<char*>(v1), 12);
+        in.read(reinterpret_cast<char*>(v2), 12);
+        in.read(reinterpret_cast<char*>(v3), 12);
+        in.ignore(2);                        // attribute byte count
+
+        for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d];
+        v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2];
+        v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2];
+        v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2];
+    }
+    return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets };
+}
+
+
+
+
+
+// a vector type with 3 components
+struct vec_t{
+    double x;
+    double y;
+    double z;
+    
+    // default constructor
+    vec_t (){};
+    
+    // overloaded constructor
+    vec_t(double x_in, double y_in, double z_in){
+        x = x_in;
+        y = y_in;
+        z = z_in;
+    };
+    
+}; // end vec_t
+
+
+// a triangle data type
+struct triangle_t {
+    
+    vec_t normal; // surface normal
+    
+    vec_t p[3];   // three nodes with x,y,z coords
+    
+    // default constructor
+    triangle_t(){};
+    
+    // overloaded constructor
+    triangle_t (vec_t p_in[3])
+    {
+        p[0]=p_in[0];
+        p[1]=p_in[1];
+        p[2]=p_in[2];
+    };
+    
+}; // end triangle_t
+
+
+// calculate the surface normal of a triangle
+KOKKOS_INLINE_FUNCTION
+void calc_normal(triangle_t *triangle){
+    
+    //A = p1 - p0;
+    //B = p2 - p0;
+    vec_t A;
+    A.x = triangle->p[1].x - triangle->p[0].x;
+    A.y = triangle->p[1].y - triangle->p[0].y;
+    A.z = triangle->p[1].z - triangle->p[0].z;
+    
+    vec_t B;
+    B.x = triangle->p[2].x - triangle->p[0].x;
+    B.y = triangle->p[2].y - triangle->p[0].y;
+    B.z = triangle->p[2].z - triangle->p[0].z;
+    
+    vec_t N;
+    N.x = A.y * B.z - A.z * B.y;
+    N.y = A.z * B.x - A.x * B.z;
+    N.z = A.x * B.y - A.y * B.x;
+    
+    double mag;
+    mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z);
+    
+    // save the unit normal
+    triangle->normal.x = N.x/mag;
+    triangle->normal.y = N.y/mag;
+    triangle->normal.z = N.z/mag;
+    
+} // end normal
+
+
+struct gridcell_t {
+    
+    vec_t* p;
+    double* val;
+    
+    // default constructor
+    gridcell_t(){};
+    
+    // overloaded constructor
+    gridcell_t (vec_t p_in[8], double val_in[8])
+    {
+        p=p_in;
+        val=val_in;
+    };
+    
+}; // end gridcell_t
+
+
+/*
+   Linearly interpolate the position where an isosurface cuts
+   an edge between two vertices, each with their own scalar value
+*/
+KOKKOS_INLINE_FUNCTION
+vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2)
+{
+   double mu;
+   vec_t p;
+
+   if (fabs(isolevel-valp1) < 0.00001)
+      return(p1);
+   if (fabs(isolevel-valp2) < 0.00001)
+      return(p2);
+   if (fabs(valp1-valp2) < 0.00001)
+      return(p1);
+   mu = (isolevel - valp1) / (valp2 - valp1);
+   p.x = p1.x + mu * (p2.x - p1.x);
+   p.y = p1.y + mu * (p2.y - p1.y);
+   p.z = p1.z + mu * (p2.z - p1.z);
+
+   return(p);
+}
+
+/*
+   Given a grid cell and an isolevel, calculate the triangular
+   facets required to represent the isosurface through the cell.
+   Return the number of triangular facets, the array "triangles"
+   will be loaded up with the vertices at most 5 triangular facets.
+    0 will be returned if the grid cell is either totally above
+   of totally below the isolevel.
+*/
+KOKKOS_INLINE_FUNCTION
+int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles)
+{
+    
+    int i,ntriang;
+    int cubeindex;
+    vec_t vertlist[12];
+
+    int edgeTable[256]={
+        0x0  , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c,
+        0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00,
+        0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c,
+        0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90,
+        0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c,
+        0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30,
+        0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac,
+        0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0,
+        0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c,
+        0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60,
+        0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc,
+        0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0,
+        0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c,
+        0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950,
+        0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc ,
+        0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0,
+        0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc,
+        0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0,
+        0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c,
+        0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650,
+        0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc,
+        0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0,
+        0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c,
+        0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460,
+        0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac,
+        0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0,
+        0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c,
+        0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230,
+        0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c,
+        0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190,
+        0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c,
+        0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0   };
+    
+    int triTable[256][16] =
+    {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1},
+        {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1},
+        {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1},
+        {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1},
+        {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
+        {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1},
+        {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
+        {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1},
+        {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1},
+        {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1},
+        {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1},
+        {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
+        {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1},
+        {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1},
+        {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
+        {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
+        {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1},
+        {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1},
+        {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1},
+        {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1},
+        {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1},
+        {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1},
+        {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1},
+        {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1},
+        {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1},
+        {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1},
+        {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1},
+        {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1},
+        {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1},
+        {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1},
+        {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1},
+        {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1},
+        {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1},
+        {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1},
+        {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1},
+        {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
+        {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1},
+        {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1},
+        {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1},
+        {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1},
+        {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1},
+        {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1},
+        {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1},
+        {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1},
+        {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1},
+        {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1},
+        {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1},
+        {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1},
+        {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1},
+        {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1},
+        {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1},
+        {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1},
+        {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1},
+        {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1},
+        {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1},
+        {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1},
+        {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1},
+        {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1},
+        {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1},
+        {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1},
+        {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1},
+        {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1},
+        {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1},
+        {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1},
+        {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1},
+        {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1},
+        {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1},
+        {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1},
+        {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1},
+        {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1},
+        {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
+        {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
+        {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
+        {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1},
+        {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1},
+        {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1},
+        {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1},
+        {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1},
+        {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1},
+        {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1},
+        {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1},
+        {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1},
+        {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1},
+        {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1},
+        {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1},
+        {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1},
+        {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1},
+        {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1},
+        {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1},
+        {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1},
+        {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1},
+        {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
+        {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1},
+        {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
+        {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1},
+        {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1},
+        {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1},
+        {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1},
+        {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1},
+        {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1},
+        {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1},
+        {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1},
+        {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1},
+        {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1},
+        {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1},
+        {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1},
+        {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1},
+        {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1},
+        {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1},
+        {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1},
+        {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1},
+        {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1},
+        {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1},
+        {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1},
+        {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1},
+        {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1},
+        {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1},
+        {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1},
+        {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1},
+        {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1},
+        {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1},
+        {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1},
+        {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1},
+        {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1},
+        {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1},
+        {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1},
+        {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1},
+        {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1},
+        {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1},
+        {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1},
+        {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1},
+        {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1},
+        {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1},
+        {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1},
+        {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1},
+        {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1},
+        {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1},
+        {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1},
+        {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1},
+        {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1},
+        {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1},
+        {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1},
+        {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1},
+        {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1},
+        {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1},
+        {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1},
+        {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1},
+        {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1},
+        {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1},
+        {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1},
+        {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1},
+        {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1},
+        {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1},
+        {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
+
+    
+    /*
+      Determine the index into the edge table which
+      tells us which vertices are inside of the surface
+     */
+    cubeindex = 0;
+
+    if (grid.val[0] < isolevel) cubeindex |= 1;
+    if (grid.val[1] < isolevel) cubeindex |= 2;
+    if (grid.val[2] < isolevel) cubeindex |= 4;
+    if (grid.val[3] < isolevel) cubeindex |= 8;
+    if (grid.val[4] < isolevel) cubeindex |= 16;
+    if (grid.val[5] < isolevel) cubeindex |= 32;
+    if (grid.val[6] < isolevel) cubeindex |= 64;
+    if (grid.val[7] < isolevel) cubeindex |= 128;
+    
+    
+    
+    /* Cube is entirely in/out of the surface */
+    if (edgeTable[cubeindex] == 0)
+        return(0);
+    
+    /* Find the vertices where the surface intersects the cube */
+    if (edgeTable[cubeindex] & 1)
+        vertlist[0] =
+         VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]);
+    if (edgeTable[cubeindex] & 2)
+        vertlist[1] =
+         VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]);
+    if (edgeTable[cubeindex] & 4)
+        vertlist[2] =
+         VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]);
+    if (edgeTable[cubeindex] & 8)
+        vertlist[3] =
+         VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]);
+    if (edgeTable[cubeindex] & 16)
+        vertlist[4] =
+         VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]);
+    if (edgeTable[cubeindex] & 32)
+        vertlist[5] =
+         VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]);
+    if (edgeTable[cubeindex] & 64)
+        vertlist[6] =
+         VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]);
+    if (edgeTable[cubeindex] & 128)
+        vertlist[7] =
+         VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]);
+    if (edgeTable[cubeindex] & 256)
+        vertlist[8] =
+         VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]);
+    if (edgeTable[cubeindex] & 512)
+        vertlist[9] =
+         VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]);
+    if (edgeTable[cubeindex] & 1024)
+        vertlist[10] =
+         VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]);
+    if (edgeTable[cubeindex] & 2048)
+        vertlist[11] =
+         VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]);
+    
+    /* Create the triangle */
+    ntriang = 0;
+    for (i=0; triTable[cubeindex][i]!=-1; i+=3) {
+        
+        triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i  ]];
+        triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]];
+        triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]];
+        
+        ntriang++;
+    } // end for i
+    
+    return(ntriang);
+}
+
+
+// Gaussian function part of the RBF
+// rbf = p(x) + lambda_j*exp(-(x - xj)*(x - xj))
+KOKKOS_FUNCTION
+double Gaussian_fcn(double xi_value[3], double xj_value[3]){
+
+    double diff_sqrd = 0.0;
+    for(size_t dim=0; dim<3; dim++){
+        diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]);
+    }
+    return exp(-diff_sqrd);
+}
+
+// biharmonic function part of the RBF
+// rbf = p(x) + lambda_j*sqrt((x - xj)*(x - xj))
+KOKKOS_FUNCTION
+double biharmonic_fcn(double xi_value[3], double xj_value[3]){
+
+    double diff_sqrd = 0.0;
+    for(size_t dim=0; dim<3; dim++){
+        diff_sqrd += (xi_value[dim] - xj_value[dim])*(xi_value[dim] - xj_value[dim]);
+    }
+    return sqrt(diff_sqrd);
+}
+
+
+
+int main(int argc, char *argv[])
+{
+    Kokkos::initialize(argc, argv);
+    {  
+
+        printf("Pointcloud reconstruction \n\n");
+
+        if(argc==1){
+            printf("Please supply an STL file for testing the point cloud surface reconstruction code \n");
+            return 0;
+        }
+        
+        std::string filename = argv[1];
+
+        auto [normal_host, 
+              v1X_host, v1Y_host, v1Z_host, 
+              v2X_host, v2Y_host, v2Z_host, 
+              v3X_host, v3Y_host, v3Z_host, 
+              num_inp_triangles_host] = binary_stl_reader(filename);
+        
+        // Warning on C++ support:
+        // At this time with C++, the contents from a tuple cannot 
+        // be used inside a lambda function.  The parallel loops use 
+        // lambda functions. To overcome this C++ limitation, all 
+        // contents in the tuple will be copied or pointed to (Using 
+        // a MATAR dual view) allowing the data to be used in parallel.
+        const size_t num_inp_triangles = num_inp_triangles_host;
+        DViewCArrayKokkos <float> normal(&normal_host(0,0), num_inp_triangles, 3);
+        DViewCArrayKokkos <float> v1X(&v1X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v1Y(&v1Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v1Z(&v1Z_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2X(&v2X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2Y(&v2Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2Z(&v2Z_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3X(&v3X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3Y(&v3Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3Z(&v3Z_host(0),num_inp_triangles);
+
+        normal.update_device(); 
+        v1X.update_device(); 
+        v1Y.update_device(); 
+        v1Z.update_device(); 
+        v2X.update_device(); 
+        v2Y.update_device(); 
+        v2Z.update_device(); 
+        v3X.update_device(); 
+        v3Y.update_device(); 
+        v3Z.update_device();
+        
+        
+        // define mesh spacing, it is used to create a mesh
+            
+        double LX = (XMax - X0);   // length in x-dir
+        double LY = (YMax - Y0);
+        double LZ = (ZMax - Z0);
+        
+        // the number of nodes in the mesh
+        int num_pt_x = (int)( LX/dx ) + 1;  // there must be at least 2 nodes
+        int num_pt_y = (int)( LY/dy ) + 1;  // there must be at least 2 nodes
+        int num_pt_z = (int)( LZ/dz ) + 1;  // there must be at least 2 nodes
+        
+        
+        // mesh coordinates
+        DCArrayKokkos <double> x(num_pt_x, "pt_x");
+        DCArrayKokkos <double> y(num_pt_y, "pt_y");
+        DCArrayKokkos <double> z(num_pt_z, "pt_z");
+
+        // small distance for moving in the +/- normal directions 
+        double epsilon = 0.1*fmin(fmin(dx, dy), dz);
+
+        
+        // function with isosurface that we want extracted
+        DCArrayKokkos <double> gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values");
+        
+
+        // define the triangles of extracted surface
+        const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1);
+        DCArrayKokkos <triangle_t> all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem
+        DCArrayKokkos <size_t> num_triangles_in_elem(num_elems, "num_tris_in_elem");
+        num_triangles_in_elem.set_values(0);
+
+
+        printf("Creating point cloud data from STL file \n\n");
+
+        // define a point cloud
+        size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions
+        DCArrayKokkos <double> point_positions(num_points, 3, "point_positions");
+        DCArrayKokkos <double> point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper
+
+        //for(size_t tri=0; tri<num_inp_triangles; tri++){
+        FOR_ALL(tri, 0, num_inp_triangles, {
+            // point on surface
+            point_positions(tri, 0) =  1.0/3.0*((double)v1X(tri) + (double)v2X(tri) + (double)v3X(tri));
+            point_positions(tri, 1) =  1.0/3.0*((double)v1Y(tri) + (double)v2Y(tri) + (double)v3Y(tri));
+            point_positions(tri, 2) =  1.0/3.0*((double)v1Z(tri) + (double)v2Z(tri) + (double)v3Z(tri));
+
+            point_signed_distance(tri) = 0.0;
+
+            // off surface +normal
+            point_positions(tri+num_inp_triangles, 0) =  point_positions(tri, 0) + epsilon*(double)normal(tri, 0);
+            point_positions(tri+num_inp_triangles, 1) =  point_positions(tri, 1) + epsilon*(double)normal(tri, 1);
+            point_positions(tri+num_inp_triangles, 2) =  point_positions(tri, 2) + epsilon*(double)normal(tri, 2);
+
+            point_signed_distance(tri+num_inp_triangles) = epsilon;
+
+            // off surface -normal
+            point_positions(tri+2*num_inp_triangles, 0) =  point_positions(tri, 0) - epsilon*(double)normal(tri, 0);
+            point_positions(tri+2*num_inp_triangles, 1) =  point_positions(tri, 1) - epsilon*(double)normal(tri, 1);
+            point_positions(tri+2*num_inp_triangles, 2) =  point_positions(tri, 2) - epsilon*(double)normal(tri, 2);
+
+            point_signed_distance(tri+2*num_inp_triangles) = -epsilon;
+
+        }); // end parallel for tri's in the file
+
+
+        // ----------------------------
+        // Reconstruct surface here
+        // ----------------------------
+
+        printf("Reconstructing surface using point cloud data \n\n");
+
+        size_t Pn = 4; // For p(x) = c0 + c1*x + c2*y + c3*z 
+        const size_t N = num_points+Pn;
+        DCArrayKokkos <double> M_matrix(N,N,"M_matrix");
+        DCArrayKokkos <double> b_vector(N, "b_vector");
+
+        // -----------------------
+        // assemble system matrix
+        //    Mx = B
+        // _     _  _      _   _ _
+        // |A   P|  |lambda|   |f|
+        // |P^T 0| *| c    | = |0|
+        // -     -  -      -   - -
+        //
+        // -----------------------
+
+        // initializing to zero
+        M_matrix.set_values(0.0);
+
+        // assemble M
+        FOR_ALL(i, 0, num_points, 
+                j, 0, num_points, {
+
+                // this is the A matrix part of M
+                M_matrix(i, j) = Gaussian_fcn(&point_positions(i,0), &point_positions(j,0));
+        });
+
+        FOR_ALL(i, 0, num_points, {
+            // Polynomial basis: [1, x, y, z]
+            // this the P matrix part of M
+            M_matrix(i, num_points+0) = 1.0;
+            M_matrix(i, num_points+1) = point_positions(i,0); // x coord
+            M_matrix(i, num_points+2) = point_positions(i,1); // y coord
+            M_matrix(i, num_points+3) = point_positions(i,2); // z coord
+        }); // end for i
+
+        FOR_ALL(j, 0, num_points, {
+            // this the P^T matrix part of M
+            M_matrix(num_points+0, j) = 1.0;
+            M_matrix(num_points+1, j) = point_positions(j,0); // x coord
+            M_matrix(num_points+2, j) = point_positions(j,1); // y coord
+            M_matrix(num_points+3, j) = point_positions(j,2); // z coord
+        }); // end for j
+
+        // adding the zeros in the bottom right corner
+        // for (size_t i = num_points; i < num_points+Pn; ++i) {
+        //     for (size_t j = num_points; j < num_points+Pn; ++j) {
+        //         M_matrix(i, j) = 0.0;
+        //     } // end for j
+        // } // end for i
+
+        //for(size_t i = num_points; i < num_points+Pn; ++i){
+        FOR_ALL(i, num_points, N, {
+            b_vector(i) = 0.0;
+        }); // end for
+
+        // assemble RHS vector, b
+        //for (size_t i = 0; i < num_points; ++i) {
+        FOR_ALL(i, 0, num_points, {
+            b_vector(i) = point_signed_distance(i);
+        }); // end for
+        
+
+        M_matrix.update_host();
+        b_vector.update_host();
+
+
+
+        // ----------------------------------
+        // Solve for x in Mx=b
+        // ----------------------------------
+
+        // checking by printing
+        printf("matrix = \n");
+        RUN({
+            for (size_t i = 0; i < N; ++i) {
+                for (size_t j = 0; j < N; ++j) {
+                    printf("%f , ", M_matrix(i, j));
+                } // end for j
+                printf("\n");
+            } // end for i
+
+            printf("\n");
+            printf("b = \n");
+            for (size_t i = 0; i < N; ++i) {
+                printf("%f \n", b_vector(i));
+            }
+        });
+
+        DCArrayKokkos <size_t> perm (N, "perm");
+        perm.set_values(0);
+        CArrayKokkos <double> vv(N, "vv");
+        
+        // used for LU problem
+
+        int singular = 0;
+        int parity = 0;
+        
+        // Get the LU decomposition of the Mass Matrix
+        singular = LU_decompose_host(M_matrix, perm, vv, parity);  // matrix is returned as the LU matrix  
+        if(singular==0){
+            printf("ERROR: matrix to fit point cloud surface is singluar \n");
+            return 0;
+        }
+        // BUG in LU decompose
+
+        // RUN({
+        //     int singular_d = 0; 
+        //     int parity_d = 0;
+        //     singular_d = LU_decompose(M_matrix, perm, vv, parity_d);  // M is returned as the LU matrix  
+        //     if(singular_d==0){
+        //         printf("ERROR: matrix is singluar \n");
+        //     }
+        // });
+        
+        LU_backsub_host(M_matrix, perm, b_vector);  // note: answer is sent back in b_vector
+
+        // RUN({
+        //     LU_backsub(M_matrix, perm, b_vector);  // note: answer is sent back in b
+        // });
+
+        RUN({
+            //lambda coefficients for radial basis function, slice out only lambda values and polynomial values
+            auto lambda = ViewCArrayKokkos <double> (&b_vector(0), num_points);
+            auto coefs  = ViewCArrayKokkos <double> (&b_vector(num_points), Pn);
+            printf("coeffs = \n");
+            for(size_t i=0; i<Pn; i++){
+                printf("%f \n", coefs(i));
+            }
+            printf("lambda = \n");
+            for(size_t i=0; i<num_points; i++){
+                printf("%f \n", lambda(i));
+            }
+            printf("perm = \n");
+            for(size_t i=0; i<N; i++){
+                printf("%zu \n", perm(i));
+            }
+        });
+
+        // ----------------------------------
+        // Evaluate surface function on mesh
+        // ----------------------------------
+
+        printf("Evaluating surf function on mesh \n");
+        FOR_ALL(i, 0, num_pt_x, {
+            x(i) = dx*(double)i + X0;;
+        });
+        FOR_ALL(j, 0, num_pt_y, {
+            y(j) = dy*(double)j + Y0;
+        });
+        FOR_ALL(k, 0, num_pt_z, {
+            z(k) = dz*(double)k + Z0;
+        });
+        Kokkos::fence();
+
+        // save mesh coordinates of the nodes
+        FOR_ALL(k, 0, num_pt_z, 
+                j, 0, num_pt_y,
+                i, 0, num_pt_x, {
+
+                    double x_point[3];
+                    x_point[0] = x(i);
+                    x_point[1] = y(j);
+                    x_point[2] = z(k);
+                    
+                    // lambda coefficients for radial basis function, slice out only lambda values and polynomial values
+                    // lambda = ViewCArrayKokkos <double> (&b_vector(0), num_points);
+                    // coefs  = ViewCArrayKokkos <double> (&b_vector(num_points), Pn);
+
+                    // evaluate the polynomial part
+                    gridValues(i,j,k) = b_vector(0+num_points) + 
+                                        b_vector(1+num_points)*x(i) + 
+                                        b_vector(2+num_points)*y(j) + 
+                                        b_vector(3+num_points)*z(k);
+
+
+                    for (size_t point=0; point<num_points; point++){
+                        gridValues(i,j,k) += b_vector(point)*Gaussian_fcn(&x_point[0], &point_positions(point,0));
+                    } // end for points      
+        
+        }); // end parallel over k,j,i
+
+        x.update_host();
+        y.update_host();
+        z.update_host();
+        gridValues.update_host();
+        
+        
+        
+        
+        // ------------------------------------
+        // Use marching cubes to build surface
+        // ------------------------------------
+
+        printf("Running marching cubes algorithm\n");
+        
+        FOR_ALL(k, 0, num_pt_z-1,
+                j, 0, num_pt_y-1,
+                i, 0, num_pt_x-1, {
+
+                    // elem gid
+                    size_t elem_gid = i + j*(num_pt_x-1) + k*(num_pt_x-1)*(num_pt_y-1);
+        
+                    // extract the x,y,z node coords
+                    // using the index ordering for the cell
+                    vec_t xyzs [8];
+                    xyzs[0] = vec_t(x(i  ), y(j  ), z(k  ));
+                    xyzs[1] = vec_t(x(i+1), y(j  ), z(k  ));
+                    xyzs[2] = vec_t(x(i+1), y(j  ), z(k+1));
+                    xyzs[3] = vec_t(x(i  ), y(j  ), z(k+1));
+                    xyzs[4] = vec_t(x(i  ), y(j+1), z(k  ));
+                    xyzs[5] = vec_t(x(i+1), y(j+1), z(k  ));
+                    xyzs[6] = vec_t(x(i+1), y(j+1), z(k+1));
+                    xyzs[7] = vec_t(x(i  ), y(j+1), z(k+1));
+        
+        
+                    // extract the values at the nodes
+                    // using the index ordering for the cell
+                    double vals [8];
+                    vals[0] = gridValues(i  ,j  ,k  );
+                    vals[1] = gridValues(i+1,j  ,k  );
+                    vals[2] = gridValues(i+1,j  ,k+1);
+                    vals[3] = gridValues(i  ,j  ,k+1);
+                    vals[4] = gridValues(i  ,j+1,k  );
+                    vals[5] = gridValues(i+1,j+1,k  );
+                    vals[6] = gridValues(i+1,j+1,k+1);
+                    vals[7] = gridValues(i  ,j+1,k+1);
+        
+        
+                    // details of the cell, save coords and the values at the nodes
+                    gridcell_t cell(xyzs, vals);
+        
+        
+                    // the most triangles in a cell is 5
+                    triangle_t triangles[5];
+                    num_triangles_in_elem(elem_gid) = Polygonise(cell, isoLevel, triangles);
+        
+                    // save the triangles
+                    for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++)
+                    {
+                        all_mesh_surf_triangles(elem_gid,tri) = triangles[tri];
+                    } // end for tri
+
+        });  // end parallel for k,j,i
+
+        
+        // calculate the normal vector of triangles
+        FOR_ALL(elem_gid, 0, num_elems, {
+            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
+                calc_normal(&all_mesh_surf_triangles(elem_gid, tri));
+            }
+        }); // end loop over triangles
+
+        all_mesh_surf_triangles.update_host();
+        num_triangles_in_elem.update_host();
+
+        
+        printf("Marching cubes finished \n\n");
+        
+        
+
+        // --------------------------------------------------
+        // Export STL file using results from marching cubes
+        // --------------------------------------------------
+
+        printf("Exporting STL file for a 3D printer\n");
+        
+        
+        // export triangles as STL file
+        
+        FILE * myfile;
+        myfile=fopen("surface.stl","w");
+        fprintf(myfile,"solid points \n");
+        // a serial file write
+        for(size_t elem_gid=0; elem_gid<num_elems; elem_gid++){
+            for (size_t tri = 0; tri < num_triangles_in_elem.host(elem_gid); tri++){
+        
+                fprintf(myfile,"facet normal %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.z);
+                
+                fprintf(myfile,"outer loop \n");
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].z);
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].z);
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].z);
+                fprintf(myfile,"endloop \n");
+                fprintf(myfile,"endfacet \n");
+            }   
+        } // end loop over triangles
+        fprintf(myfile,"endsolid points \n");
+        
+        fclose(myfile);
+            
+    
+        printf("Finished \n\n");
+
+    } // end of kokkos scope
+
+
+
+    Kokkos::finalize();
+
+    return 0;
+    
+} // end main

From 9631c99312adbd5adbe471b664d890e76fd678f3 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Fri, 15 Aug 2025 11:56:20 -0600
Subject: [PATCH 02/23] added volume calculation

---
 examples/pointcloud/graphics-reader.h | 37 +++++++++++
 examples/pointcloud/pointcloud.cpp    | 90 +++++++++++++++++++++++++--
 2 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/examples/pointcloud/graphics-reader.h b/examples/pointcloud/graphics-reader.h
index 7081de6b..0b1eb3cb 100755
--- a/examples/pointcloud/graphics-reader.h
+++ b/examples/pointcloud/graphics-reader.h
@@ -1,3 +1,40 @@
+/**********************************************************************************************
+ © 2020. Triad National Security, LLC. All rights reserved.
+ This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
+ National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
+ Department of Energy/National Nuclear Security Administration. All rights in the program are
+ reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear
+ Security Administration. The Government is granted for itself and others acting on its behalf a
+ nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare
+ derivative works, distribute copies to the public, perform publicly and display publicly, and
+ to permit others to do so.
+ This program is open source under the BSD-3 License.
+ Redistribution and use in source and binary forms, with or without modification, are permitted
+ provided that the following conditions are met:
+ 
+ 1.  Redistributions of source code must retain the above copyright notice, this list of
+ conditions and the following disclaimer.
+ 
+ 2.  Redistributions in binary form must reproduce the above copyright notice, this list of
+ conditions and the following disclaimer in the documentation and/or other materials
+ provided with the distribution.
+ 
+ 3.  Neither the name of the copyright holder nor the names of its contributors may be used
+ to endorse or promote products derived from this software without specific prior
+ written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **********************************************************************************************/
+ 
 // -----------------------------------------------
 // routines to read a graphics file
 //
diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud.cpp
index 963bffc1..442a8dc2 100755
--- a/examples/pointcloud/pointcloud.cpp
+++ b/examples/pointcloud/pointcloud.cpp
@@ -1,3 +1,40 @@
+/**********************************************************************************************
+ © 2020. Triad National Security, LLC. All rights reserved.
+ This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
+ National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
+ Department of Energy/National Nuclear Security Administration. All rights in the program are
+ reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear
+ Security Administration. The Government is granted for itself and others acting on its behalf a
+ nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare
+ derivative works, distribute copies to the public, perform publicly and display publicly, and
+ to permit others to do so.
+ This program is open source under the BSD-3 License.
+ Redistribution and use in source and binary forms, with or without modification, are permitted
+ provided that the following conditions are met:
+ 
+ 1.  Redistributions of source code must retain the above copyright notice, this list of
+ conditions and the following disclaimer.
+ 
+ 2.  Redistributions in binary form must reproduce the above copyright notice, this list of
+ conditions and the following disclaimer in the documentation and/or other materials
+ provided with the distribution.
+ 
+ 3.  Neither the name of the copyright holder nor the names of its contributors may be used
+ to endorse or promote products derived from this software without specific prior
+ written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **********************************************************************************************/
+
 // -----------------------------------------------
 // pointcloud reconstrution in C++
 //  credit to Andrew Morgan and Nathaniel Morgan
@@ -39,9 +76,9 @@ using namespace mtr;
 
 
 // the number of nodes in the mesh
-const double dx = 0.015; // resolution
-const double dy = 0.015; // resolution
-const double dz = 0.015; // resolution
+const double dx = 0.01; // resolution
+const double dy = 0.01; // resolution
+const double dz = 0.01; // resolution
 
 
 // the mesh dimensions
@@ -210,6 +247,28 @@ void calc_normal(triangle_t *triangle){
 } // end normal
 
 
+// cross prodcut
+vec_t cross(const vec_t &a, const vec_t &b) {
+    return {a.y*b.z - a.z*b.y,
+            a.z*b.x - a.x*b.z,
+            a.x*b.y - a.y*b.x};
+}
+
+double dot(const vec_t &a, const vec_t &b) {
+    return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+// calculate the volume of a tet with this triangular face
+double compute_volume(const triangle_t &triangle) {
+    // triangle.p[0] is the first vec_t, being node 0
+    // ...
+    // triangle.p[1] is the third vec_t, being node 2
+    double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0;
+
+    return volume;
+}
+
 struct gridcell_t {
     
     vec_t* p;
@@ -1040,7 +1099,30 @@ int main(int argc, char *argv[])
         
         printf("Marching cubes finished \n\n");
         
-        
+
+
+        // --------------------------------------------------
+        // volume calculation
+        // --------------------------------------------------
+        double volume = 0.0;
+        double vol_lcl = 0.0;
+        FOR_REDUCE_SUM(elem_gid, 0, num_elems, 
+                       vol_lcl, {
+
+            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
+                vol_lcl += compute_volume(all_mesh_surf_triangles(elem_gid,tri)); 
+            }
+
+        }, volume);
+        volume = fabs(volume);
+
+        double radius =  0.794651/2.0; // radius of constructured part, based on a small mesh size
+        double PI = 3.14159265358979323846264338327950288419716939937510;
+        double vol_exact = 4.0/3.0*PI*radius*radius*radius;
+        printf("volume = %f, and `exact' sphere volume = %f \n", volume, vol_exact);
+
+        // 0.262744 at 0.001 mesh size  
+
 
         // --------------------------------------------------
         // Export STL file using results from marching cubes

From eda38f70fdae992bba358ad8e233ea95c5465b58 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Thu, 4 Sep 2025 19:36:14 -0600
Subject: [PATCH 03/23] WIP: rk

---
 examples/laplaceMPI/CMakeLists.txt            |    4 +-
 examples/pointcloud/CMakeLists.txt            |    7 +-
 .../{pointcloud.cpp => pointcloud-gbl.cpp}    |    0
 examples/pointcloud/pointcloud-rk.cpp         | 1197 +++++++++++++++++
 solvers/lu_solver.hpp                         |  220 +++
 5 files changed, 1424 insertions(+), 4 deletions(-)
 rename examples/pointcloud/{pointcloud.cpp => pointcloud-gbl.cpp} (100%)
 create mode 100755 examples/pointcloud/pointcloud-rk.cpp

diff --git a/examples/laplaceMPI/CMakeLists.txt b/examples/laplaceMPI/CMakeLists.txt
index d9d4ec6c..5b114927 100644
--- a/examples/laplaceMPI/CMakeLists.txt
+++ b/examples/laplaceMPI/CMakeLists.txt
@@ -4,10 +4,10 @@ if (KOKKOS)
   #find_package(Kokkos REQUIRED) #new
   find_package(MPI REQUIRED)
   
-  #add_executable(laplace_mpi laplace_mpi.cpp)
+  add_executable(laplace_mpi laplace_mpi.cpp)
   #add_executable(laplace_mpi simple_mpi.cpp)
   #add_executable(laplace_mpi mpi_mesh_test.cpp)
-  add_executable(laplace_mpi simple_halo.cpp)
+  #add_executable(laplace_mpi simple_halo.cpp)
   add_definitions(-DHAVE_MPI=1)
 
   add_definitions(-DHAVE_KOKKOS=1)
diff --git a/examples/pointcloud/CMakeLists.txt b/examples/pointcloud/CMakeLists.txt
index bc6f7668..aafb20e9 100644
--- a/examples/pointcloud/CMakeLists.txt
+++ b/examples/pointcloud/CMakeLists.txt
@@ -16,6 +16,9 @@ if (KOKKOS)
   endif()
 
 
-  add_executable(pointcloud pointcloud.cpp)
-  target_link_libraries(pointcloud ${LINKING_LIBRARIES})
+  add_executable(pointcloud-gbl pointcloud-gbl.cpp)
+  add_executable(pointcloud-rk pointcloud-rk.cpp)
+
+  target_link_libraries(pointcloud-gbl ${LINKING_LIBRARIES})
+  target_link_libraries(pointcloud-rk ${LINKING_LIBRARIES})
 endif(KOKKOS)
diff --git a/examples/pointcloud/pointcloud.cpp b/examples/pointcloud/pointcloud-gbl.cpp
similarity index 100%
rename from examples/pointcloud/pointcloud.cpp
rename to examples/pointcloud/pointcloud-gbl.cpp
diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
new file mode 100755
index 00000000..9aae3661
--- /dev/null
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -0,0 +1,1197 @@
+/**********************************************************************************************
+ © 2020. Triad National Security, LLC. All rights reserved.
+ This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
+ National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
+ Department of Energy/National Nuclear Security Administration. All rights in the program are
+ reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear
+ Security Administration. The Government is granted for itself and others acting on its behalf a
+ nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare
+ derivative works, distribute copies to the public, perform publicly and display publicly, and
+ to permit others to do so.
+ This program is open source under the BSD-3 License.
+ Redistribution and use in source and binary forms, with or without modification, are permitted
+ provided that the following conditions are met:
+ 
+ 1.  Redistributions of source code must retain the above copyright notice, this list of
+ conditions and the following disclaimer.
+ 
+ 2.  Redistributions in binary form must reproduce the above copyright notice, this list of
+ conditions and the following disclaimer in the documentation and/or other materials
+ provided with the distribution.
+ 
+ 3.  Neither the name of the copyright holder nor the names of its contributors may be used
+ to endorse or promote products derived from this software without specific prior
+ written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **********************************************************************************************/
+
+// -----------------------------------------------
+// pointcloud reconstrution in C++
+//  credit to Andrew Morgan and Nathaniel Morgan
+//
+// To run the code with an external mesh file:
+//    ./a.out graphics-file
+//
+// Requires the matar.h and macros.h libraries from
+// the github LANL/MATAR/src folder
+//
+// The following rountines in this code came from:
+//   https://paulbourke.net/geometry/polygonise/
+//   - Polygonise
+//   - VertexInterp
+//
+//
+// The surface reconstruction method is from
+//   Reconstruction and Representation of 3D Objects with Radial BasisFunctions
+//    J. Carr, R. Beatson, ..., T. Evans
+//    https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf   
+// -----------------------------------------------
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <stdio.h>
+#include <cmath>
+
+
+#include "matar.h"
+
+#include "lu_solver.hpp"
+
+
+using namespace mtr;
+
+const double PI = 3.14159265358979323846;
+
+// -----------------------------------------------
+// inputs:
+
+
+// the number of nodes in the mesh
+const double dx = 0.1; // resolution
+const double dy = 0.1; // resolution
+const double dz = 0.1; // resolution
+
+
+// the mesh dimensions
+// length of the domain is 5 for crazy shape and 1 for sphere
+const double XMax = 1.0; 
+const double YMax = 1.0; 
+const double ZMax = 1.0; 
+
+const double X0 = 0.0; 
+const double Y0 = 0.0; 
+const double Z0 = 0.0; 
+
+
+const double isoLevel=0.0; // contour to extract
+
+
+//
+// -----------------------------------------------
+
+
+std::tuple<
+    CArray<float>,   // normal
+    CArray<float>, CArray<float>, CArray<float>,   // v1X, v1Y, v1Z
+    CArray<float>, CArray<float>, CArray<float>,   // v2X, v2Y, v2Z
+    CArray<float>, CArray<float>, CArray<float>,   // v3X, v3Y, v3Z
+    size_t // n_facets
+>
+binary_stl_reader(const std::string& path)
+{
+    std::ifstream in(path, std::ios::binary | std::ios::ate);
+    if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); }
+
+    const std::streamoff filesize = in.tellg();
+    if (filesize < 100) {
+        std::cerr << "ERROR: File too small to be a valid STL\n";
+        std::exit(EXIT_FAILURE);
+    }
+    in.seekg(0);
+
+    // ---- check if ASCII -------------------------------------------------
+    char magic[6] = { 0 };
+    in.read(magic, 5);          // read first 5 chars
+    in.seekg(0);               // rewind
+    if (std::strncmp(magic, "solid", 5) == 0) {
+        std::cerr
+            << "ERROR: \"" << path
+            << "\" looks like an **ASCII** STL (starts with \"solid\").\n"
+            << "Re‑export it as *binary* or implement an ASCII parser.\n";
+        std::exit(EXIT_FAILURE);        // or call ascii_stl_reader();
+    }
+
+    // ---- read 80‑byte header + nominal facet count ----------------------
+    char header[80];                in.read(header, 80);
+    size_t n_facets_nominal;  in.read(reinterpret_cast<char*>(&n_facets_nominal), 4);
+
+    // ---- compute expected count from file size to sanity‑check ----------
+    // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2)
+    const size_t n_facets_from_size =
+        static_cast<size_t>((filesize - 84) / 50);
+
+    size_t n_facets = n_facets_nominal;
+    if (n_facets_nominal != n_facets_from_size) {
+        std::cout << "WARNING: facet count in header (" << n_facets_nominal
+            << ") disagrees with file size (" << n_facets_from_size
+            << ").  Using size‑derived value.\n";
+        n_facets = n_facets_from_size;
+    }
+    std::cout << "STL facets: " << n_facets << '\n';
+
+    // ---- allocate MATAR arrays -----------------------------------------
+    CArray<float> normal(n_facets, 3);
+    CArray<float> v1X(n_facets), v1Y(n_facets), v1Z(n_facets);
+    CArray<float> v2X(n_facets), v2Y(n_facets), v2Z(n_facets);
+    CArray<float> v3X(n_facets), v3Y(n_facets), v3Z(n_facets);
+
+    // ---- read facet records --------------------------------------------
+    float nrm[3], v1[3], v2[3], v3[3];
+    for (unsigned int i = 0; i < n_facets; ++i) {
+        in.read(reinterpret_cast<char*>(nrm), 12);
+        in.read(reinterpret_cast<char*>(v1), 12);
+        in.read(reinterpret_cast<char*>(v2), 12);
+        in.read(reinterpret_cast<char*>(v3), 12);
+        in.ignore(2);                        // attribute byte count
+
+        for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d];
+        v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2];
+        v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2];
+        v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2];
+    }
+    return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets };
+}
+
+
+
+
+
+// a vector type with 3 components
+struct vec_t{
+    double x;
+    double y;
+    double z;
+    
+    // default constructor
+    vec_t (){};
+    
+    // overloaded constructor
+    vec_t(double x_in, double y_in, double z_in){
+        x = x_in;
+        y = y_in;
+        z = z_in;
+    };
+    
+}; // end vec_t
+
+
+// a triangle data type
+struct triangle_t {
+    
+    vec_t normal; // surface normal
+    
+    vec_t p[3];   // three nodes with x,y,z coords
+    
+    // default constructor
+    triangle_t(){};
+    
+    // overloaded constructor
+    triangle_t (vec_t p_in[3])
+    {
+        p[0]=p_in[0];
+        p[1]=p_in[1];
+        p[2]=p_in[2];
+    };
+    
+}; // end triangle_t
+
+
+// calculate the surface normal of a triangle
+KOKKOS_INLINE_FUNCTION
+void calc_normal(triangle_t *triangle){
+    
+    //A = p1 - p0;
+    //B = p2 - p0;
+    vec_t A;
+    A.x = triangle->p[1].x - triangle->p[0].x;
+    A.y = triangle->p[1].y - triangle->p[0].y;
+    A.z = triangle->p[1].z - triangle->p[0].z;
+    
+    vec_t B;
+    B.x = triangle->p[2].x - triangle->p[0].x;
+    B.y = triangle->p[2].y - triangle->p[0].y;
+    B.z = triangle->p[2].z - triangle->p[0].z;
+    
+    vec_t N;
+    N.x = A.y * B.z - A.z * B.y;
+    N.y = A.z * B.x - A.x * B.z;
+    N.z = A.x * B.y - A.y * B.x;
+    
+    double mag;
+    mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z);
+    
+    // save the unit normal
+    triangle->normal.x = N.x/mag;
+    triangle->normal.y = N.y/mag;
+    triangle->normal.z = N.z/mag;
+    
+} // end normal
+
+
+// cross prodcut
+vec_t cross(const vec_t &a, const vec_t &b) {
+    return {a.y*b.z - a.z*b.y,
+            a.z*b.x - a.x*b.z,
+            a.x*b.y - a.y*b.x};
+}
+
+double dot(const vec_t &a, const vec_t &b) {
+    return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+// calculate the volume of a tet with this triangular face
+double compute_volume(const triangle_t &triangle) {
+    // triangle.p[0] is the first vec_t, being node 0
+    // ...
+    // triangle.p[1] is the third vec_t, being node 2
+    double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0;
+
+    return volume;
+}
+
+struct gridcell_t {
+    
+    vec_t* p;
+    double* val;
+    
+    // default constructor
+    gridcell_t(){};
+    
+    // overloaded constructor
+    gridcell_t (vec_t p_in[8], double val_in[8])
+    {
+        p=p_in;
+        val=val_in;
+    };
+    
+}; // end gridcell_t
+
+
+/*
+   Linearly interpolate the position where an isosurface cuts
+   an edge between two vertices, each with their own scalar value
+*/
+KOKKOS_INLINE_FUNCTION
+vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2)
+{
+   double mu;
+   vec_t p;
+
+   if (fabs(isolevel-valp1) < 0.00001)
+      return(p1);
+   if (fabs(isolevel-valp2) < 0.00001)
+      return(p2);
+   if (fabs(valp1-valp2) < 0.00001)
+      return(p1);
+   mu = (isolevel - valp1) / (valp2 - valp1);
+   p.x = p1.x + mu * (p2.x - p1.x);
+   p.y = p1.y + mu * (p2.y - p1.y);
+   p.z = p1.z + mu * (p2.z - p1.z);
+
+   return(p);
+}
+
+/*
+   Given a grid cell and an isolevel, calculate the triangular
+   facets required to represent the isosurface through the cell.
+   Return the number of triangular facets, the array "triangles"
+   will be loaded up with the vertices at most 5 triangular facets.
+    0 will be returned if the grid cell is either totally above
+   of totally below the isolevel.
+*/
+KOKKOS_INLINE_FUNCTION
+int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles)
+{
+    
+    int i,ntriang;
+    int cubeindex;
+    vec_t vertlist[12];
+
+    int edgeTable[256]={
+        0x0  , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c,
+        0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00,
+        0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c,
+        0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90,
+        0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c,
+        0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30,
+        0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac,
+        0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0,
+        0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c,
+        0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60,
+        0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc,
+        0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0,
+        0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c,
+        0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950,
+        0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc ,
+        0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0,
+        0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc,
+        0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0,
+        0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c,
+        0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650,
+        0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc,
+        0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0,
+        0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c,
+        0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460,
+        0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac,
+        0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0,
+        0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c,
+        0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230,
+        0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c,
+        0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190,
+        0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c,
+        0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0   };
+    
+    int triTable[256][16] =
+    {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1},
+        {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1},
+        {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1},
+        {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1},
+        {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
+        {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1},
+        {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
+        {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1},
+        {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1},
+        {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1},
+        {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1},
+        {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
+        {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1},
+        {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1},
+        {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
+        {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
+        {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1},
+        {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1},
+        {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1},
+        {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1},
+        {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1},
+        {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1},
+        {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1},
+        {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1},
+        {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1},
+        {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1},
+        {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1},
+        {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1},
+        {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1},
+        {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1},
+        {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1},
+        {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1},
+        {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1},
+        {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1},
+        {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1},
+        {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
+        {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1},
+        {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1},
+        {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1},
+        {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1},
+        {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
+        {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1},
+        {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1},
+        {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1},
+        {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1},
+        {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1},
+        {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1},
+        {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1},
+        {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1},
+        {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1},
+        {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1},
+        {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1},
+        {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1},
+        {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1},
+        {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1},
+        {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1},
+        {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1},
+        {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1},
+        {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1},
+        {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1},
+        {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1},
+        {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1},
+        {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1},
+        {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1},
+        {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1},
+        {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1},
+        {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1},
+        {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1},
+        {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1},
+        {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1},
+        {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1},
+        {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
+        {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
+        {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
+        {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1},
+        {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1},
+        {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1},
+        {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1},
+        {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1},
+        {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1},
+        {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1},
+        {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1},
+        {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1},
+        {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1},
+        {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1},
+        {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1},
+        {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1},
+        {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1},
+        {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1},
+        {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1},
+        {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1},
+        {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1},
+        {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1},
+        {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
+        {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
+        {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1},
+        {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
+        {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1},
+        {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1},
+        {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1},
+        {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1},
+        {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1},
+        {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1},
+        {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1},
+        {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1},
+        {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1},
+        {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1},
+        {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1},
+        {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1},
+        {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1},
+        {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1},
+        {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1},
+        {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1},
+        {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1},
+        {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1},
+        {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1},
+        {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1},
+        {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1},
+        {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1},
+        {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1},
+        {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1},
+        {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1},
+        {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1},
+        {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1},
+        {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1},
+        {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1},
+        {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1},
+        {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1},
+        {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1},
+        {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1},
+        {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1},
+        {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1},
+        {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1},
+        {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1},
+        {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1},
+        {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1},
+        {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1},
+        {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1},
+        {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1},
+        {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1},
+        {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1},
+        {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1},
+        {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1},
+        {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1},
+        {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1},
+        {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1},
+        {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1},
+        {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1},
+        {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1},
+        {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1},
+        {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1},
+        {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1},
+        {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1},
+        {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1},
+        {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1},
+        {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1},
+        {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1},
+        {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1},
+        {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1},
+        {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1},
+        {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1},
+        {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1},
+        {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
+
+    
+    /*
+      Determine the index into the edge table which
+      tells us which vertices are inside of the surface
+     */
+    cubeindex = 0;
+
+    if (grid.val[0] < isolevel) cubeindex |= 1;
+    if (grid.val[1] < isolevel) cubeindex |= 2;
+    if (grid.val[2] < isolevel) cubeindex |= 4;
+    if (grid.val[3] < isolevel) cubeindex |= 8;
+    if (grid.val[4] < isolevel) cubeindex |= 16;
+    if (grid.val[5] < isolevel) cubeindex |= 32;
+    if (grid.val[6] < isolevel) cubeindex |= 64;
+    if (grid.val[7] < isolevel) cubeindex |= 128;
+    
+    
+    
+    /* Cube is entirely in/out of the surface */
+    if (edgeTable[cubeindex] == 0)
+        return(0);
+    
+    /* Find the vertices where the surface intersects the cube */
+    if (edgeTable[cubeindex] & 1)
+        vertlist[0] =
+         VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]);
+    if (edgeTable[cubeindex] & 2)
+        vertlist[1] =
+         VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]);
+    if (edgeTable[cubeindex] & 4)
+        vertlist[2] =
+         VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]);
+    if (edgeTable[cubeindex] & 8)
+        vertlist[3] =
+         VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]);
+    if (edgeTable[cubeindex] & 16)
+        vertlist[4] =
+         VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]);
+    if (edgeTable[cubeindex] & 32)
+        vertlist[5] =
+         VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]);
+    if (edgeTable[cubeindex] & 64)
+        vertlist[6] =
+         VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]);
+    if (edgeTable[cubeindex] & 128)
+        vertlist[7] =
+         VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]);
+    if (edgeTable[cubeindex] & 256)
+        vertlist[8] =
+         VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]);
+    if (edgeTable[cubeindex] & 512)
+        vertlist[9] =
+         VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]);
+    if (edgeTable[cubeindex] & 1024)
+        vertlist[10] =
+         VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]);
+    if (edgeTable[cubeindex] & 2048)
+        vertlist[11] =
+         VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]);
+    
+    /* Create the triangle */
+    ntriang = 0;
+    for (i=0; triTable[cubeindex][i]!=-1; i+=3) {
+        
+        triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i  ]];
+        triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]];
+        triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]];
+        
+        ntriang++;
+    } // end for i
+    
+    return(ntriang);
+}
+
+
+// Gaussian function part of the RBF
+// rbf = exp(-(x - xj)*(x - xj)/h)
+KOKKOS_FUNCTION
+double kernel(const double r[3], const double h){
+
+    double diff_sqrd = 0.0;
+
+    for(size_t dim=0; dim<3; dim++){
+        diff_sqrd += r[dim]*r[dim];
+    } // dim
+
+    return exp(-diff_sqrd/(h*h));
+} // end of function
+
+
+// Polynomial basis up to quadratic in 3D (10 terms)
+const size_t num_poly_basis = 10;
+KOKKOS_INLINE_FUNCTION
+void poly_basis(const double r[3], double *p) {
+
+    p[0] = 1.0;
+    p[1] = r[0];
+    p[2] = r[1];
+    p[3] = r[2];
+    p[4] = r[0] * r[0];
+    p[5] = r[0] * r[1];
+    p[6] = r[0] * r[2];
+    p[7] = r[1] * r[1];
+    p[8] = r[1] * r[2];
+    p[9] = r[2] * r[2];
+
+    // for high-order will use (x^a y^b z^c)
+
+    return;
+} // end function
+
+
+void compute_shape_functions(
+    size_t i,
+    const DCArrayKokkos <double>& x,
+    const CArrayKokkos <double>& vol,
+    const CArrayKokkos <double>& rk_coeffs,
+    const CArrayKokkos <double>& rk_basis,
+    const double h)
+{
+
+    // global num_points at this time, make it num_points in neighborhood
+    size_t num_points_neighborhood = x.dims(0); // will come from hash bins
+
+    // loop over all neighbors around point i
+    FOR_ALL(j, 0, num_points_neighborhood, {
+
+        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
+        double r[3];    // vecx_j - vecx_i
+        r[0] = x(j,0) - x(i,0); // x_j-x_i
+        r[1] = x(j,1) - x(i,1); // y_j-y_i
+        r[2] = x(j,2) - x(i,2); // z_j-z_i
+
+        double W = kernel(r, h);
+        poly_basis(r,p);
+
+        double correction = 0.0;
+        for (size_t a = 0; a < num_poly_basis; ++a){
+            correction += rk_coeffs(i,a) * p[a];
+        } // end a
+
+        rk_basis(i,j) = W * correction;
+    });
+
+
+    return;
+} // end function
+
+
+
+// Build reproducing kernel coefficients for one particle
+void build_rk_coefficients(
+    const DCArrayKokkos <double>& x,
+    const CArrayKokkos <double>& vol,
+    const CArrayKokkos <double>& rk_coeffs,
+    double h)
+{
+
+    // global num_points at this time, make it num_points in neighborhood
+    size_t num_points_neighborhood = x.dims(0); // will come from hash bins
+
+    // actual number of points
+    size_t num_points = x.dims(0);
+
+    
+    // loop over all nodes in the problem
+    FOR_ALL(i, 0, num_points, {
+
+        double M_1D[num_poly_basis*num_poly_basis]; 
+        ViewCArrayKokkos <double> M(&M_1D[0], num_poly_basis, num_poly_basis);
+
+        // values in rhs after this function will be accessed as rk_coeffs(i,0:N)
+        ViewCArrayKokkos <double> rhs (&rk_coeffs(i,0), num_poly_basis);
+        rhs(0) = 1.0;   // enforce reproduction of constant 1, everything else is = 0
+
+        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
+        double r[3];    // vecx_j - vecx_i
+
+        // loop over all nodes around point i
+        for (size_t j = 0; j < num_points_neighborhood; ++j) {
+           
+            r[0] = x(j,0) - x(i,0); // x_j-x_i
+            r[1] = x(j,1) - x(i,1); // y_j-y_i
+            r[2] = x(j,2) - x(i,2); // z_j-z_i
+
+            double W = kernel(r, h);
+            poly_basis(r,p);
+
+            // assemble matrix
+            for (size_t a = 0; a < num_poly_basis; ++a) {
+                for (size_t b = 0; b < num_poly_basis; ++b) {
+                    M(a,b) += vol(j) * W * p[a] * p[b]; 
+                    printf("M(a,b) = %f \n", M(a,b));
+                } // end for b
+            } // for a
+
+        } // end for point neighbors j
+    
+        // -------------
+        // solve Ax=B
+        // -------------
+
+        size_t perm_1D[num_poly_basis];
+        ViewCArrayKokkos <size_t> perm (&perm_1D[0], num_poly_basis);
+        for (size_t a = 0; a < num_poly_basis; ++a) {
+            perm(a)= 0;
+        } // end a
+
+        double vv_1D[num_poly_basis];
+        ViewCArrayKokkos <double> vv(&vv_1D[0], num_poly_basis);
+        
+        // used for LU problem
+        int singular = 0;
+        int parity = 0;
+        singular = LU_decompose(M, perm, vv, parity);  // M is returned as the LU matrix  
+        if(singular==0){
+            printf("ERROR: matrix is singluar \n");
+        }
+
+        LU_backsub(M, perm, rhs);  // note: answer is sent back in rhs
+
+    }); // end parallel loop
+
+
+    return; 
+} // end function
+
+
+
+
+int main(int argc, char *argv[])
+{
+    Kokkos::initialize(argc, argv);
+    {  
+
+        printf("Pointcloud reconstruction \n\n");
+
+        if(argc==1){
+            printf("Please supply an STL file for testing the point cloud surface reconstruction code \n");
+            return 0;
+        }
+        
+        std::string filename = argv[1];
+
+        auto [normal_host, 
+              v1X_host, v1Y_host, v1Z_host, 
+              v2X_host, v2Y_host, v2Z_host, 
+              v3X_host, v3Y_host, v3Z_host, 
+              num_inp_triangles_host] = binary_stl_reader(filename);
+        
+        // Warning on C++ support:
+        // At this time with C++, the contents from a tuple cannot 
+        // be used inside a lambda function.  The parallel loops use 
+        // lambda functions. To overcome this C++ limitation, all 
+        // contents in the tuple will be copied or pointed to (Using 
+        // a MATAR dual view) allowing the data to be used in parallel.
+        const size_t num_inp_triangles = num_inp_triangles_host;
+        DViewCArrayKokkos <float> normal(&normal_host(0,0), num_inp_triangles, 3);
+        DViewCArrayKokkos <float> v1X(&v1X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v1Y(&v1Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v1Z(&v1Z_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2X(&v2X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2Y(&v2Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v2Z(&v2Z_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3X(&v3X_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3Y(&v3Y_host(0),num_inp_triangles); 
+        DViewCArrayKokkos <float> v3Z(&v3Z_host(0),num_inp_triangles);
+
+        normal.update_device(); 
+        v1X.update_device(); 
+        v1Y.update_device(); 
+        v1Z.update_device(); 
+        v2X.update_device(); 
+        v2Y.update_device(); 
+        v2Z.update_device(); 
+        v3X.update_device(); 
+        v3Y.update_device(); 
+        v3Z.update_device();
+        
+        
+        // define mesh spacing, it is used to create a mesh
+            
+        double LX = (XMax - X0);   // length in x-dir
+        double LY = (YMax - Y0);
+        double LZ = (ZMax - Z0);
+        
+        // the number of nodes in the mesh
+        int num_pt_x = (int)( LX/dx ) + 1;  // there must be at least 2 nodes
+        int num_pt_y = (int)( LY/dy ) + 1;  // there must be at least 2 nodes
+        int num_pt_z = (int)( LZ/dz ) + 1;  // there must be at least 2 nodes
+        
+        
+        // mesh coordinates
+        DCArrayKokkos <double> x(num_pt_x, "pt_x");
+        DCArrayKokkos <double> y(num_pt_y, "pt_y");
+        DCArrayKokkos <double> z(num_pt_z, "pt_z");
+
+        // small distance for moving in the +/- normal directions 
+        double epsilon = 0.1*fmin(fmin(dx, dy), dz);
+
+        
+        // function with isosurface that we want extracted
+        DCArrayKokkos <double> gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values");
+        
+
+        // define the triangles of extracted surface
+        const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1);
+        DCArrayKokkos <triangle_t> all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem
+        DCArrayKokkos <size_t> num_triangles_in_elem(num_elems, "num_tris_in_elem");
+        num_triangles_in_elem.set_values(0);
+
+
+        printf("Creating point cloud data from STL file \n\n");
+
+        // define a point cloud
+        size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions
+        DCArrayKokkos <double> point_positions(num_points, 3, "point_positions");
+        DCArrayKokkos <double> point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper
+
+        // 1 point per triangle at this time, thus a loop over tris
+        FOR_ALL(tri, 0, num_inp_triangles, {
+            // point on surface
+            point_positions(tri, 0) =  1.0/3.0*((double)v1X(tri) + (double)v2X(tri) + (double)v3X(tri));
+            point_positions(tri, 1) =  1.0/3.0*((double)v1Y(tri) + (double)v2Y(tri) + (double)v3Y(tri));
+            point_positions(tri, 2) =  1.0/3.0*((double)v1Z(tri) + (double)v2Z(tri) + (double)v3Z(tri));
+
+            point_signed_distance(tri) = 0.0;
+
+            // off surface +normal
+            point_positions(tri+num_inp_triangles, 0) =  point_positions(tri, 0) + epsilon*(double)normal(tri, 0);
+            point_positions(tri+num_inp_triangles, 1) =  point_positions(tri, 1) + epsilon*(double)normal(tri, 1);
+            point_positions(tri+num_inp_triangles, 2) =  point_positions(tri, 2) + epsilon*(double)normal(tri, 2);
+
+            point_signed_distance(tri+num_inp_triangles) = epsilon;
+
+            // off surface -normal
+            point_positions(tri+2*num_inp_triangles, 0) =  point_positions(tri, 0) - epsilon*(double)normal(tri, 0);
+            point_positions(tri+2*num_inp_triangles, 1) =  point_positions(tri, 1) - epsilon*(double)normal(tri, 1);
+            point_positions(tri+2*num_inp_triangles, 2) =  point_positions(tri, 2) - epsilon*(double)normal(tri, 2);
+
+            point_signed_distance(tri+2*num_inp_triangles) = -epsilon;
+
+        }); // end parallel for tri's in the file
+
+
+        // ----------------------------
+        // Reconstruct surface here
+        // ----------------------------
+
+        printf("Reconstructing surface using point cloud data \n\n");
+
+        // assuming all point neighbors contribute, will change to a hash bins
+        const size_t num_points_neighborhood = num_points;
+
+        CArrayKokkos <double> rk_coeffs(num_points, num_poly_basis);  // reproducing kernel coefficients at each point
+        CArrayKokkos <double> rk_basis(num_points, num_points);       // reproducing kernel basis
+        CArrayKokkos <double> vol(num_points);
+        vol.set_values(1.0);
+
+        double h = 1.0;
+
+
+        printf("building rk coefficients \n");
+
+        // build coefficients on basis functions
+        build_rk_coefficients(point_positions, vol, rk_coeffs, h);
+
+        // build basis functions
+        for(size_t i=0; i<num_points; i++){
+            compute_shape_functions(i, point_positions, vol, rk_coeffs, rk_basis, h);
+        } // end for i
+
+
+
+        // ----------------------------------
+        // Evaluate surface function on mesh
+        // ----------------------------------
+/*
+        printf("Evaluating surf function on mesh \n");
+        FOR_ALL(i, 0, num_pt_x, {
+            x(i) = dx*(double)i + X0;;
+        });
+        FOR_ALL(j, 0, num_pt_y, {
+            y(j) = dy*(double)j + Y0;
+        });
+        FOR_ALL(k, 0, num_pt_z, {
+            z(k) = dz*(double)k + Z0;
+        });
+        Kokkos::fence();
+
+        // save mesh coordinates of the nodes
+        FOR_ALL(k, 0, num_pt_z, 
+                j, 0, num_pt_y,
+                i, 0, num_pt_x, {
+
+                    double x_point[3];
+                    x_point[0] = x(i);
+                    x_point[1] = y(j);
+                    x_point[2] = z(k);
+                    
+                    // lambda coefficients for radial basis function, slice out only lambda values and polynomial values
+                    // lambda = ViewCArrayKokkos <double> (&b_vector(0), num_points);
+                    // coefs  = ViewCArrayKokkos <double> (&b_vector(num_points), Pn);
+
+
+                    for (size_t point=0; point<num_points_neighborhood; point++){
+                        gridValues(i,j,k) += ;
+                    } // end for points      
+        
+        }); // end parallel over k,j,i
+
+        x.update_host();
+        y.update_host();
+        z.update_host();
+        gridValues.update_host();
+        
+        
+        
+        
+        // ------------------------------------
+        // Use marching cubes to build surface
+        // ------------------------------------
+
+        printf("Running marching cubes algorithm\n");
+        
+        FOR_ALL(k, 0, num_pt_z-1,
+                j, 0, num_pt_y-1,
+                i, 0, num_pt_x-1, {
+
+                    // elem gid
+                    size_t elem_gid = i + j*(num_pt_x-1) + k*(num_pt_x-1)*(num_pt_y-1);
+        
+                    // extract the x,y,z node coords
+                    // using the index ordering for the cell
+                    vec_t xyzs [8];
+                    xyzs[0] = vec_t(x(i  ), y(j  ), z(k  ));
+                    xyzs[1] = vec_t(x(i+1), y(j  ), z(k  ));
+                    xyzs[2] = vec_t(x(i+1), y(j  ), z(k+1));
+                    xyzs[3] = vec_t(x(i  ), y(j  ), z(k+1));
+                    xyzs[4] = vec_t(x(i  ), y(j+1), z(k  ));
+                    xyzs[5] = vec_t(x(i+1), y(j+1), z(k  ));
+                    xyzs[6] = vec_t(x(i+1), y(j+1), z(k+1));
+                    xyzs[7] = vec_t(x(i  ), y(j+1), z(k+1));
+        
+        
+                    // extract the values at the nodes
+                    // using the index ordering for the cell
+                    double vals [8];
+                    vals[0] = gridValues(i  ,j  ,k  );
+                    vals[1] = gridValues(i+1,j  ,k  );
+                    vals[2] = gridValues(i+1,j  ,k+1);
+                    vals[3] = gridValues(i  ,j  ,k+1);
+                    vals[4] = gridValues(i  ,j+1,k  );
+                    vals[5] = gridValues(i+1,j+1,k  );
+                    vals[6] = gridValues(i+1,j+1,k+1);
+                    vals[7] = gridValues(i  ,j+1,k+1);
+        
+        
+                    // details of the cell, save coords and the values at the nodes
+                    gridcell_t cell(xyzs, vals);
+        
+        
+                    // the most triangles in a cell is 5
+                    triangle_t triangles[5];
+                    num_triangles_in_elem(elem_gid) = Polygonise(cell, isoLevel, triangles);
+        
+                    // save the triangles
+                    for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++)
+                    {
+                        all_mesh_surf_triangles(elem_gid,tri) = triangles[tri];
+                    } // end for tri
+
+        });  // end parallel for k,j,i
+
+        
+        // calculate the normal vector of triangles
+        FOR_ALL(elem_gid, 0, num_elems, {
+            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
+                calc_normal(&all_mesh_surf_triangles(elem_gid, tri));
+            }
+        }); // end loop over triangles
+
+        all_mesh_surf_triangles.update_host();
+        num_triangles_in_elem.update_host();
+
+        
+        printf("Marching cubes finished \n\n");
+        
+
+
+        // --------------------------------------------------
+        // volume calculation
+        // --------------------------------------------------
+        double volume = 0.0;
+        double vol_lcl = 0.0;
+        FOR_REDUCE_SUM(elem_gid, 0, num_elems, 
+                       vol_lcl, {
+
+            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
+                vol_lcl += compute_volume(all_mesh_surf_triangles(elem_gid,tri)); 
+            }
+
+        }, volume);
+        volume = fabs(volume);
+
+        double radius =  0.794651/2.0; // radius of constructured part, based on a small mesh size
+        double PI = 3.14159265358979323846264338327950288419716939937510;
+        double vol_exact = 4.0/3.0*PI*radius*radius*radius;
+        printf("volume = %f, and `exact' sphere volume = %f \n", volume, vol_exact);
+
+        // 0.262744 at 0.001 mesh size  
+
+
+        // --------------------------------------------------
+        // Export STL file using results from marching cubes
+        // --------------------------------------------------
+
+        printf("Exporting STL file for a 3D printer\n");
+        
+        
+        // export triangles as STL file
+        
+        FILE * myfile;
+        myfile=fopen("surface.stl","w");
+        fprintf(myfile,"solid points \n");
+        // a serial file write
+        for(size_t elem_gid=0; elem_gid<num_elems; elem_gid++){
+            for (size_t tri = 0; tri < num_triangles_in_elem.host(elem_gid); tri++){
+        
+                fprintf(myfile,"facet normal %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).normal.z);
+                
+                fprintf(myfile,"outer loop \n");
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].z);
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].z);
+                
+                fprintf(myfile,"vertex %f %f %f\n",
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].x,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].y,
+                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].z);
+                fprintf(myfile,"endloop \n");
+                fprintf(myfile,"endfacet \n");
+            }   
+        } // end loop over triangles
+        fprintf(myfile,"endsolid points \n");
+        
+        fclose(myfile);
+            
+    
+        printf("Finished \n\n");
+*/
+
+
+
+
+
+    } // end of kokkos scope
+
+
+
+    Kokkos::finalize();
+
+    return 0;
+    
+} // end main
diff --git a/solvers/lu_solver.hpp b/solvers/lu_solver.hpp
index f9b611e8..fe095cc7 100644
--- a/solvers/lu_solver.hpp
+++ b/solvers/lu_solver.hpp
@@ -269,6 +269,226 @@ double LU_determinant(
 
 
 
+/////////// ViewKokkos Versions //////////
+
+
+// the function is run on the GPU
+KOKKOS_FUNCTION
+int LU_decompose(
+    const ViewCArrayKokkos <double> &A, // matrix A passed in and is sent out in LU decomp format
+    const ViewCArrayKokkos <size_t> &perm,  // permutations
+    const ViewCArrayKokkos <double> &vv,
+    int &parity) {                 // parity (+1 or -1)
+                          
+    const int n = A.dims(0);  // size of matrix 
+
+    parity = 1;
+
+    // helper variables
+    double temp;
+    
+    // search for the largest element in each row; save the scaling in the 
+    // temporary array vv and return zero if the matrix is singular 
+    for(size_t i = 0; i < n; i++) {
+        
+        double big = 0.;
+        for(size_t j = 0; j < n; j++){
+            if((temp=fabs(A(i,j))) > big){
+                big=temp;
+            }
+        }
+        
+        if(big == 0.0) return(0);
+        
+        vv(i) = big;
+    }
+
+    // the main loop for the Crout's algorithm
+    for(size_t j = 0; j < n; j++) {
+        
+        // this is the part a) of the algorithm except for i==j 
+        for(size_t i=0;i<j;i++) {
+            
+            double sum=A(i,j);
+            
+            for(size_t k=0;k<i;k++){
+                sum -= A(i,k)*A(k,j);
+            }
+
+            A(i,j) = sum;
+        }
+    
+        // initialize for the search for the largest pivot element
+        double big = 0.;
+        size_t imax = j;
+        
+        
+        // this is the part a) for i==j and part b) for i>j + pivot search 
+        for(size_t i = j; i < n; i++) {
+            
+            double sum = A(i,j);
+            
+            for(size_t k=0; k<j; k++){
+                sum -= A(i,k)*A(k,j);
+            }
+            
+            A(i,j) = sum;
+            
+            // is the figure of merit for the pivot better than the best so far?
+            if((temp = vv(i)*fabs(sum)) >= big){
+                big = temp; 
+                imax = i;
+            }
+        } // end for i
+
+        // interchange rows, if needed, change parity and the scale factor
+        if(imax != j) {
+            
+            for(size_t k = 0; k < n; k++){
+                temp = A(imax,k);
+                A(imax,k) = A(j,k);
+                A(j,k) = temp;
+            }
+            
+            parity = -(parity);
+            vv(imax) = vv(j);
+        }
+        
+        // store the index
+        perm(j) = imax;
+        // if the pivot element is zero, the matrix is singular but for some 
+        // applications a tiny number is desirable instead 
+        
+        if(A(j,j) == 0.0){
+            A(j,j) = TINY;
+        }
+        // finally, divide by the pivot element
+        
+        if(j<n-1) {
+            
+            temp=1./A(j,j);
+            for(size_t i = j+1; i < n; i++){
+                A(i,j)*=temp;
+            } // end for i
+        } // end if j
+
+    } // end for j
+    
+    return(1);
+} // end function
+
+
+
+// -------------------------------
+// LU back substitution functions 
+// -------------------------------
+
+// this function is run on the GPU
+KOKKOS_FUNCTION
+void LU_backsub(
+    const ViewCArrayKokkos <double> &A,     // input matrix A in LU decomp format
+    const ViewCArrayKokkos <size_t> &perm,  // permutations
+    const ViewCArrayKokkos <double> &b){          // RHS and is answer x to Ax=B
+
+        const int n = A.dims(0);    // size of matrix
+
+        int ii = -1;
+
+
+        // First step of backsubstitution; the only wrinkle is to unscramble 
+        // the permutation order. Note: the algorithm is optimized for a 
+        // possibility of large amount of zeroes in b
+        
+        for(size_t i = 0; i < n; i++) {
+           
+            size_t ip = perm(i);
+
+            double sum = b(ip);
+            b(ip) = b(i);
+         
+            if(ii >= 0){
+                for(size_t j = ii; j<i; j++){
+                    sum -= A(i,j)*b(j);
+                }
+            }
+            else if(sum>0){
+                ii=i;  // a nonzero element encounted
+            }
+          
+            b(i) = sum;
+        } // end loop i
+        
+        // the second step
+        for(int i=n-1; i>=0; i--) {
+            
+            double sum = b(i);
+            for(size_t j=i+1; j<n; j++){
+                sum-=A(i,j)*b(j);
+            } // end j
+       
+            b(i)=sum/A(i,i);
+        } // end loop i
+
+} // end if
+
+
+// ------------------ 
+// LU invert function 
+// ------------------ 
+KOKKOS_INLINE_FUNCTION
+void LU_invert(
+    ViewCArrayKokkos <double> &A,       // input matrix
+    ViewCArrayKokkos <size_t> &perm,    // permutations
+    ViewCArrayKokkos <double> &inv_mat, // inverse matrix
+    ViewCArrayKokkos <double> &col) {   // tmp array
+
+    const size_t n = A.dims(0);    // size of matrix
+
+
+    for(size_t j = 0; j < n; j++){
+
+        for(size_t i = 0; i < n; i++){
+            col(i) = 0.0;
+        } // end for i
+        
+        col(j) = 1.0;
+        LU_backsub(A, perm, col);
+        
+        for(size_t i = 0; i < n; i++){
+            inv_mat(i,j) = col(i);
+        } // end for i
+
+    } // end for j
+
+    return;
+
+} // end function
+
+// -----------------------
+// LU determinant function 
+//  Input:  A filled in LUPDecompose; N - dimension.
+//  Output: determinate of original A matrix
+// ----------------------- 
+KOKKOS_INLINE_FUNCTION
+double LU_determinant(
+    ViewCArrayKokkos <double> &A,  // input matrix
+    const int parity){          // parity (+1 0r -1)
+
+    const int n = A.dims(0);    // size of matrix
+
+    double res = (double)(parity);
+    
+    for(size_t j=0; j<n; j++){
+        res *= A(j,j);
+    } // end j
+
+    return(res);
+
+} // end function
+
+
+
+
 // ============================================
 //  GPU kernals
 // ============================================

From b537c3a580483094aebebf868831b35904cf371e Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Tue, 9 Sep 2025 09:19:06 -0600
Subject: [PATCH 04/23] WIP: rk basis functions give partion of unity

---
 examples/pointcloud/pointcloud-rk.cpp | 194 ++++++++++++++++++++------
 1 file changed, 150 insertions(+), 44 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 9aae3661..ca950309 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -77,9 +77,15 @@ const double PI = 3.14159265358979323846;
 
 
 // the number of nodes in the mesh
-const double dx = 0.1; // resolution
-const double dy = 0.1; // resolution
-const double dz = 0.1; // resolution
+const double dx = 0.1; // resolution to build STL file
+const double dy = 0.1; // resolution to build STL file
+const double dz = 0.1; // resolution to build STL file
+
+
+// the bin sizes for finding neighboring points
+const double bin_dx = 0.5; // 2 bins in x
+const double bin_dy = 0.5; // 2 bins in y
+const double bin_dz = 0.5; // 2 bins in z
 
 
 // the mesh dimensions
@@ -92,6 +98,10 @@ const double X0 = 0.0;
 const double Y0 = 0.0; 
 const double Z0 = 0.0; 
 
+const double LX = (XMax - X0);   // length in x-dir
+const double LY = (YMax - Y0);
+const double LZ = (ZMax - Z0);
+
 
 const double isoLevel=0.0; // contour to extract
 
@@ -695,6 +705,27 @@ int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles)
 }
 
 
+struct bin_ijk_t{
+    size_t i, j, k;
+};
+
+
+bin_ijk_t get_bin_ijk(const double x_pt, const double y_pt, const double z_pt){
+            
+    bin_ijk_t bin_ijk;
+
+    double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
+    double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
+    double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
+
+    // get the integers for the bins
+    bin_ijk.i = (size_t)i_dbl;
+    bin_ijk.j = (size_t)j_dbl;
+    bin_ijk.k = (size_t)k_dbl;
+    
+    return bin_ijk;
+} // end function
+
 // Gaussian function part of the RBF
 // rbf = exp(-(x - xj)*(x - xj)/h)
 KOKKOS_FUNCTION
@@ -734,6 +765,9 @@ void poly_basis(const double r[3], double *p) {
 
 void compute_shape_functions(
     size_t i,
+    const double xpt,
+    const double ypt,
+    const double zpt,
     const DCArrayKokkos <double>& x,
     const CArrayKokkos <double>& vol,
     const CArrayKokkos <double>& rk_coeffs,
@@ -749,9 +783,9 @@ void compute_shape_functions(
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         double r[3];    // vecx_j - vecx_i
-        r[0] = x(j,0) - x(i,0); // x_j-x_i
-        r[1] = x(j,1) - x(i,1); // y_j-y_i
-        r[2] = x(j,2) - x(i,2); // z_j-z_i
+        r[0] = x(j,0) - xpt; // x_j-x_i
+        r[1] = x(j,1) - ypt; // y_j-y_i
+        r[2] = x(j,2) - zpt; // z_j-z_i
 
         double W = kernel(r, h);
         poly_basis(r,p);
@@ -790,9 +824,11 @@ void build_rk_coefficients(
 
         double M_1D[num_poly_basis*num_poly_basis]; 
         ViewCArrayKokkos <double> M(&M_1D[0], num_poly_basis, num_poly_basis);
+        M.set_values(0.0);
 
         // values in rhs after this function will be accessed as rk_coeffs(i,0:N)
         ViewCArrayKokkos <double> rhs (&rk_coeffs(i,0), num_poly_basis);
+        rhs.set_values(0.0);
         rhs(0) = 1.0;   // enforce reproduction of constant 1, everything else is = 0
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
@@ -809,10 +845,10 @@ void build_rk_coefficients(
             poly_basis(r,p);
 
             // assemble matrix
+
             for (size_t a = 0; a < num_poly_basis; ++a) {
                 for (size_t b = 0; b < num_poly_basis; ++b) {
                     M(a,b) += vol(j) * W * p[a] * p[b]; 
-                    printf("M(a,b) = %f \n", M(a,b));
                 } // end for b
             } // for a
 
@@ -850,6 +886,7 @@ void build_rk_coefficients(
 
 
 
+
 int main(int argc, char *argv[])
 {
     Kokkos::initialize(argc, argv);
@@ -900,37 +937,10 @@ int main(int argc, char *argv[])
         v3Z.update_device();
         
         
-        // define mesh spacing, it is used to create a mesh
-            
-        double LX = (XMax - X0);   // length in x-dir
-        double LY = (YMax - Y0);
-        double LZ = (ZMax - Z0);
-        
-        // the number of nodes in the mesh
-        int num_pt_x = (int)( LX/dx ) + 1;  // there must be at least 2 nodes
-        int num_pt_y = (int)( LY/dy ) + 1;  // there must be at least 2 nodes
-        int num_pt_z = (int)( LZ/dz ) + 1;  // there must be at least 2 nodes
-        
         
-        // mesh coordinates
-        DCArrayKokkos <double> x(num_pt_x, "pt_x");
-        DCArrayKokkos <double> y(num_pt_y, "pt_y");
-        DCArrayKokkos <double> z(num_pt_z, "pt_z");
-
         // small distance for moving in the +/- normal directions 
         double epsilon = 0.1*fmin(fmin(dx, dy), dz);
 
-        
-        // function with isosurface that we want extracted
-        DCArrayKokkos <double> gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values");
-        
-
-        // define the triangles of extracted surface
-        const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1);
-        DCArrayKokkos <triangle_t> all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem
-        DCArrayKokkos <size_t> num_triangles_in_elem(num_elems, "num_tris_in_elem");
-        num_triangles_in_elem.set_values(0);
-
 
         printf("Creating point cloud data from STL file \n\n");
 
@@ -965,6 +975,55 @@ int main(int argc, char *argv[])
         }); // end parallel for tri's in the file
 
 
+
+        // ----------------------------
+        // Make bins here
+        // ----------------------------
+        
+        // the number of nodes in the mesh
+        size_t num_bins_x = (size_t)( round(LX/bin_dx) );  
+        size_t num_bins_y = (size_t)( round(LY/bin_dy) );  
+        size_t num_bins_z = (size_t)( round(LZ/bin_dz) );  
+
+        
+
+        size_t num_bins = num_bins_x*num_bins_y*num_bins_z;
+
+printf("num bins = %zu \n", num_bins);
+
+
+        DCArrayKokkos <size_t> num_points_in_bin(num_bins);
+        num_points_in_bin.set_values(0);
+        DCArrayKokkos <size_t> points_bin_id(num_points);
+        DCArrayKokkos <size_t> points_bin_id_storage(num_points);
+        
+        FOR_ALL(pt_id, 0, num_points, {
+
+            // get i,j,k indices of the bins
+            bin_ijk_t bin_ijk = get_bin_ijk(point_positions(pt_id,0), 
+                                            point_positions(pt_id,1), 
+                                            point_positions(pt_id,2));
+
+            // get the 1D index
+            size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x;
+          
+            size_t storage_place = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_id), 1);
+            points_bin_id(pt_id) = bin_id; // the id of the bin
+            points_bin_id_storage(pt_id) = storage_place; // the storage place in the bin
+
+        }); // end for all
+
+        DRaggedRightArrayKokkos <size_t> points_in_bin(num_points_in_bin);
+
+        FOR_ALL(pt_id, 0, num_points, {
+
+            size_t bin_id = points_bin_id(pt_id);
+            size_t storage_place = points_bin_id_storage(pt_id);
+            points_in_bin(bin_id, storage_place) = pt_id;
+
+        }); // end for all
+        
+
         // ----------------------------
         // Reconstruct surface here
         // ----------------------------
@@ -989,15 +1048,53 @@ int main(int argc, char *argv[])
 
         // build basis functions
         for(size_t i=0; i<num_points; i++){
-            compute_shape_functions(i, point_positions, vol, rk_coeffs, rk_basis, h);
+            compute_shape_functions(i, point_positions(i,0), point_positions(i,1), point_positions(i,2), point_positions, vol, rk_coeffs, rk_basis, h);
         } // end for i
 
+        
+        // performing checks on rk_coeffs
+        double partion_unity;
+        double partion_unity_lcl;
+
+        for(size_t i=0; i<num_points; i++){
+
+            FOR_REDUCE_SUM(j, 0, num_points, partion_unity_lcl, {
+                partion_unity_lcl += rk_basis(j, i)*vol(j);
+            }, partion_unity);
+
+            printf("partition unity = %f, at i=%zu \n", partion_unity, i);
+        }
 
 
         // ----------------------------------
         // Evaluate surface function on mesh
         // ----------------------------------
-/*
+
+        // define mesh spacing, it is used to create a mesh
+            
+        
+        // the number of nodes in the mesh
+        int num_pt_x = (int)( LX/dx ) + 1;  // there must be at least 2 nodes
+        int num_pt_y = (int)( LY/dy ) + 1;  // there must be at least 2 nodes
+        int num_pt_z = (int)( LZ/dz ) + 1;  // there must be at least 2 nodes
+        
+        
+        // mesh coordinates
+        DCArrayKokkos <double> x(num_pt_x, "pt_x");
+        DCArrayKokkos <double> y(num_pt_y, "pt_y");
+        DCArrayKokkos <double> z(num_pt_z, "pt_z");
+
+        
+        // function with isosurface that we want extracted
+        DCArrayKokkos <double> gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values");
+        
+
+        // define the triangles of extracted surface
+        const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1);
+        DCArrayKokkos <triangle_t> all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem
+        DCArrayKokkos <size_t> num_triangles_in_elem(num_elems, "num_tris_in_elem");
+        num_triangles_in_elem.set_values(0);
+
         printf("Evaluating surf function on mesh \n");
         FOR_ALL(i, 0, num_pt_x, {
             x(i) = dx*(double)i + X0;;
@@ -1019,15 +1116,24 @@ int main(int argc, char *argv[])
                     x_point[0] = x(i);
                     x_point[1] = y(j);
                     x_point[2] = z(k);
+
+                    gridValues(i,j,k) = 0.0;
                     
-                    // lambda coefficients for radial basis function, slice out only lambda values and polynomial values
-                    // lambda = ViewCArrayKokkos <double> (&b_vector(0), num_points);
-                    // coefs  = ViewCArrayKokkos <double> (&b_vector(num_points), Pn);
+                    // get i,j,k indices of the bins
+                    bin_ijk_t bin_ijk = get_bin_ijk(x_point[0], 
+                                                    x_point[1], 
+                                                    x_point[2]);
+
+                    // get the 1D index
+                    size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x;
+
+                    size_t point_i = points_in_bin(bin_id, 0); // get the first point in this bin
 
+                    for (size_t point_j=0; point_j<num_points_neighborhood; point_j++){
+                        // BUG HERE: need to evaluate basis at gridpoints. WARNING WARNING WARNING
+                        gridValues(i,j,k) += rk_basis(point_j, point_i)*point_signed_distance(point_j)*vol(point_j);
+                    } // end for points     
 
-                    for (size_t point=0; point<num_points_neighborhood; point++){
-                        gridValues(i,j,k) += ;
-                    } // end for points      
         
         }); // end parallel over k,j,i
 
@@ -1037,7 +1143,7 @@ int main(int argc, char *argv[])
         gridValues.update_host();
         
         
-        
+     
         
         // ------------------------------------
         // Use marching cubes to build surface
@@ -1180,7 +1286,7 @@ int main(int argc, char *argv[])
             
     
         printf("Finished \n\n");
-*/
+
 
 
 

From 1c99ac5d2a445f00d366e5b68eba589338a08daf Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Tue, 9 Sep 2025 10:01:49 -0600
Subject: [PATCH 05/23] WIP: fixed bug in rk_basis evaluation

---
 examples/pointcloud/pointcloud-rk.cpp | 59 ++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index ca950309..cda69090 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -795,7 +795,7 @@ void compute_shape_functions(
             correction += rk_coeffs(i,a) * p[a];
         } // end a
 
-        rk_basis(i,j) = W * correction;
+        rk_basis(j) = W * correction;
     });
 
 
@@ -1034,7 +1034,7 @@ printf("num bins = %zu \n", num_bins);
         const size_t num_points_neighborhood = num_points;
 
         CArrayKokkos <double> rk_coeffs(num_points, num_poly_basis);  // reproducing kernel coefficients at each point
-        CArrayKokkos <double> rk_basis(num_points, num_points);       // reproducing kernel basis
+        CArrayKokkos <double> rk_basis(num_points);       // reproducing kernel basis, should have size num_points_neighborhood
         CArrayKokkos <double> vol(num_points);
         vol.set_values(1.0);
 
@@ -1046,24 +1046,30 @@ printf("num bins = %zu \n", num_bins);
         // build coefficients on basis functions
         build_rk_coefficients(point_positions, vol, rk_coeffs, h);
 
-        // build basis functions
-        for(size_t i=0; i<num_points; i++){
-            compute_shape_functions(i, point_positions(i,0), point_positions(i,1), point_positions(i,2), point_positions, vol, rk_coeffs, rk_basis, h);
-        } // end for i
-
+        
         
         // performing checks on rk_coeffs
         double partion_unity;
         double partion_unity_lcl;
 
         for(size_t i=0; i<num_points; i++){
-
-            FOR_REDUCE_SUM(j, 0, num_points, partion_unity_lcl, {
-                partion_unity_lcl += rk_basis(j, i)*vol(j);
+            
+            // build basis functions at point i
+            compute_shape_functions(i, 
+                                    point_positions(i,0), point_positions(i,1), point_positions(i,2), 
+                                    point_positions, 
+                                    vol, 
+                                    rk_coeffs, 
+                                    rk_basis, 
+                                    h);
+
+            FOR_REDUCE_SUM(j, 0, num_points_neighborhood, partion_unity_lcl, {
+                partion_unity_lcl += rk_basis(j)*vol(j);
             }, partion_unity);
 
             printf("partition unity = %f, at i=%zu \n", partion_unity, i);
-        }
+
+        } // end for i
 
 
         // ----------------------------------
@@ -1127,11 +1133,36 @@ printf("num bins = %zu \n", num_bins);
                     // get the 1D index
                     size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x;
 
-                    size_t point_i = points_in_bin(bin_id, 0); // get the first point in this bin
+                    size_t point_i; // the closest point 
+                    double dist_i = 1.e16;
+
+
+                    // find the closest point to the evaluation location
+                    for (size_t point_lid=0; point_lid<num_points_in_bin(bin_id); point_lid++){
+                        // get the point id
+                        size_t pt_id = points_in_bin(bin_id, point_lid);
+
+                        // calculate the distance between this point and evaluation location
+                        double dist = (point_positions(pt_id,0) - x(i))*(point_positions(pt_id,0) - x(i))+
+                                      (point_positions(pt_id,1) - y(j))*(point_positions(pt_id,1) - y(j))+ 
+                                      (point_positions(pt_id,2) - z(k))*(point_positions(pt_id,2) - z(k));
+                        dist = sqrt(dist);
+                        if(fabs(dist)<fabs(dist_i)){
+                            point_i = pt_id;
+                            dist_i = dist;
+                        }
+                    }
 
                     for (size_t point_j=0; point_j<num_points_neighborhood; point_j++){
-                        // BUG HERE: need to evaluate basis at gridpoints. WARNING WARNING WARNING
-                        gridValues(i,j,k) += rk_basis(point_j, point_i)*point_signed_distance(point_j)*vol(point_j);
+                        // evaluate basis at gridpoint using closest point_i
+                        compute_shape_functions(point_i, 
+                                                x(i), y(i), z(i), 
+                                                point_positions, 
+                                                vol, 
+                                                rk_coeffs, 
+                                                rk_basis, 
+                                                h);
+                        gridValues(i,j,k) += rk_basis(point_j)*point_signed_distance(point_j)*vol(point_j);
                     } // end for points     
 
         

From 58bb756da2f021a1d9c0764aa20c15e3447ba8aa Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Tue, 9 Sep 2025 16:49:58 -0600
Subject: [PATCH 06/23] working rkpm with random particles

---
 examples/pointcloud/pointcloud-rk.cpp | 1012 ++-----------------------
 1 file changed, 46 insertions(+), 966 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index cda69090..6fd003cf 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -36,32 +36,18 @@
  **********************************************************************************************/
 
 // -----------------------------------------------
-// pointcloud reconstrution in C++
-//  credit to Andrew Morgan and Nathaniel Morgan
-//
-// To run the code with an external mesh file:
-//    ./a.out graphics-file
-//
-// Requires the matar.h and macros.h libraries from
-// the github LANL/MATAR/src folder
-//
-// The following rountines in this code came from:
-//   https://paulbourke.net/geometry/polygonise/
-//   - Polygonise
-//   - VertexInterp
-//
-//
-// The surface reconstruction method is from
-//   Reconstruction and Representation of 3D Objects with Radial BasisFunctions
-//    J. Carr, R. Beatson, ..., T. Evans
-//    https://www.cs.jhu.edu/~misha/Fall05/Papers/carr01.pdf   
+// pointcloud reproducing kernels in C++
+//  Nathaniel Morgan
 // -----------------------------------------------
+
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <stdio.h>
 #include <cmath>
 
+#include <cstdlib> // For rand() and srand()
+
 
 #include "matar.h"
 
@@ -75,634 +61,26 @@ const double PI = 3.14159265358979323846;
 // -----------------------------------------------
 // inputs:
 
-
-// the number of nodes in the mesh
-const double dx = 0.1; // resolution to build STL file
-const double dy = 0.1; // resolution to build STL file
-const double dz = 0.1; // resolution to build STL file
-
+const size_t num_points = 101;
 
 // the bin sizes for finding neighboring points
 const double bin_dx = 0.5; // 2 bins in x
 const double bin_dy = 0.5; // 2 bins in y
 const double bin_dz = 0.5; // 2 bins in z
 
+const double X0 = 0.0;   // origin
+const double Y0 = 0.0;
+const double Z0 = 0.0;
 
-// the mesh dimensions
-// length of the domain is 5 for crazy shape and 1 for sphere
-const double XMax = 1.0; 
-const double YMax = 1.0; 
-const double ZMax = 1.0; 
-
-const double X0 = 0.0; 
-const double Y0 = 0.0; 
-const double Z0 = 0.0; 
-
-const double LX = (XMax - X0);   // length in x-dir
-const double LY = (YMax - Y0);
-const double LZ = (ZMax - Z0);
-
-
-const double isoLevel=0.0; // contour to extract
-
+// length of the domain 
+const double LX = 1.0;   // length in x-dir
+const double LY = 1.0;
+const double LZ = 1.0;
 
 //
 // -----------------------------------------------
 
 
-std::tuple<
-    CArray<float>,   // normal
-    CArray<float>, CArray<float>, CArray<float>,   // v1X, v1Y, v1Z
-    CArray<float>, CArray<float>, CArray<float>,   // v2X, v2Y, v2Z
-    CArray<float>, CArray<float>, CArray<float>,   // v3X, v3Y, v3Z
-    size_t // n_facets
->
-binary_stl_reader(const std::string& path)
-{
-    std::ifstream in(path, std::ios::binary | std::ios::ate);
-    if (!in) { std::perror("open"); std::exit(EXIT_FAILURE); }
-
-    const std::streamoff filesize = in.tellg();
-    if (filesize < 100) {
-        std::cerr << "ERROR: File too small to be a valid STL\n";
-        std::exit(EXIT_FAILURE);
-    }
-    in.seekg(0);
-
-    // ---- check if ASCII -------------------------------------------------
-    char magic[6] = { 0 };
-    in.read(magic, 5);          // read first 5 chars
-    in.seekg(0);               // rewind
-    if (std::strncmp(magic, "solid", 5) == 0) {
-        std::cerr
-            << "ERROR: \"" << path
-            << "\" looks like an **ASCII** STL (starts with \"solid\").\n"
-            << "Re‑export it as *binary* or implement an ASCII parser.\n";
-        std::exit(EXIT_FAILURE);        // or call ascii_stl_reader();
-    }
-
-    // ---- read 80‑byte header + nominal facet count ----------------------
-    char header[80];                in.read(header, 80);
-    size_t n_facets_nominal;  in.read(reinterpret_cast<char*>(&n_facets_nominal), 4);
-
-    // ---- compute expected count from file size to sanity‑check ----------
-    // binary facet record = 50 bytes (12×4 + 12×4 + 12×4 + 2)
-    const size_t n_facets_from_size =
-        static_cast<size_t>((filesize - 84) / 50);
-
-    size_t n_facets = n_facets_nominal;
-    if (n_facets_nominal != n_facets_from_size) {
-        std::cout << "WARNING: facet count in header (" << n_facets_nominal
-            << ") disagrees with file size (" << n_facets_from_size
-            << ").  Using size‑derived value.\n";
-        n_facets = n_facets_from_size;
-    }
-    std::cout << "STL facets: " << n_facets << '\n';
-
-    // ---- allocate MATAR arrays -----------------------------------------
-    CArray<float> normal(n_facets, 3);
-    CArray<float> v1X(n_facets), v1Y(n_facets), v1Z(n_facets);
-    CArray<float> v2X(n_facets), v2Y(n_facets), v2Z(n_facets);
-    CArray<float> v3X(n_facets), v3Y(n_facets), v3Z(n_facets);
-
-    // ---- read facet records --------------------------------------------
-    float nrm[3], v1[3], v2[3], v3[3];
-    for (unsigned int i = 0; i < n_facets; ++i) {
-        in.read(reinterpret_cast<char*>(nrm), 12);
-        in.read(reinterpret_cast<char*>(v1), 12);
-        in.read(reinterpret_cast<char*>(v2), 12);
-        in.read(reinterpret_cast<char*>(v3), 12);
-        in.ignore(2);                        // attribute byte count
-
-        for (int d = 0; d < 3; ++d) normal(i, d) = nrm[d];
-        v1X(i) = v1[0]; v1Y(i) = v1[1]; v1Z(i) = v1[2];
-        v2X(i) = v2[0]; v2Y(i) = v2[1]; v2Z(i) = v2[2];
-        v3X(i) = v3[0]; v3Y(i) = v3[1]; v3Z(i) = v3[2];
-    }
-    return { normal,v1X,v1Y,v1Z,v2X,v2Y,v2Z,v3X,v3Y,v3Z,n_facets };
-}
-
-
-
-
-
-// a vector type with 3 components
-struct vec_t{
-    double x;
-    double y;
-    double z;
-    
-    // default constructor
-    vec_t (){};
-    
-    // overloaded constructor
-    vec_t(double x_in, double y_in, double z_in){
-        x = x_in;
-        y = y_in;
-        z = z_in;
-    };
-    
-}; // end vec_t
-
-
-// a triangle data type
-struct triangle_t {
-    
-    vec_t normal; // surface normal
-    
-    vec_t p[3];   // three nodes with x,y,z coords
-    
-    // default constructor
-    triangle_t(){};
-    
-    // overloaded constructor
-    triangle_t (vec_t p_in[3])
-    {
-        p[0]=p_in[0];
-        p[1]=p_in[1];
-        p[2]=p_in[2];
-    };
-    
-}; // end triangle_t
-
-
-// calculate the surface normal of a triangle
-KOKKOS_INLINE_FUNCTION
-void calc_normal(triangle_t *triangle){
-    
-    //A = p1 - p0;
-    //B = p2 - p0;
-    vec_t A;
-    A.x = triangle->p[1].x - triangle->p[0].x;
-    A.y = triangle->p[1].y - triangle->p[0].y;
-    A.z = triangle->p[1].z - triangle->p[0].z;
-    
-    vec_t B;
-    B.x = triangle->p[2].x - triangle->p[0].x;
-    B.y = triangle->p[2].y - triangle->p[0].y;
-    B.z = triangle->p[2].z - triangle->p[0].z;
-    
-    vec_t N;
-    N.x = A.y * B.z - A.z * B.y;
-    N.y = A.z * B.x - A.x * B.z;
-    N.z = A.x * B.y - A.y * B.x;
-    
-    double mag;
-    mag = sqrt(N.x*N.x + N.y*N.y + N.z*N.z);
-    
-    // save the unit normal
-    triangle->normal.x = N.x/mag;
-    triangle->normal.y = N.y/mag;
-    triangle->normal.z = N.z/mag;
-    
-} // end normal
-
-
-// cross prodcut
-vec_t cross(const vec_t &a, const vec_t &b) {
-    return {a.y*b.z - a.z*b.y,
-            a.z*b.x - a.x*b.z,
-            a.x*b.y - a.y*b.x};
-}
-
-double dot(const vec_t &a, const vec_t &b) {
-    return a.x*b.x + a.y*b.y + a.z*b.z;
-}
-
-
-// calculate the volume of a tet with this triangular face
-double compute_volume(const triangle_t &triangle) {
-    // triangle.p[0] is the first vec_t, being node 0
-    // ...
-    // triangle.p[1] is the third vec_t, being node 2
-    double volume = dot(triangle.p[0], cross(triangle.p[1], triangle.p[2])) / 6.0;
-
-    return volume;
-}
-
-struct gridcell_t {
-    
-    vec_t* p;
-    double* val;
-    
-    // default constructor
-    gridcell_t(){};
-    
-    // overloaded constructor
-    gridcell_t (vec_t p_in[8], double val_in[8])
-    {
-        p=p_in;
-        val=val_in;
-    };
-    
-}; // end gridcell_t
-
-
-/*
-   Linearly interpolate the position where an isosurface cuts
-   an edge between two vertices, each with their own scalar value
-*/
-KOKKOS_INLINE_FUNCTION
-vec_t VertexInterp(double isolevel, vec_t p1, vec_t p2, double valp1, double valp2)
-{
-   double mu;
-   vec_t p;
-
-   if (fabs(isolevel-valp1) < 0.00001)
-      return(p1);
-   if (fabs(isolevel-valp2) < 0.00001)
-      return(p2);
-   if (fabs(valp1-valp2) < 0.00001)
-      return(p1);
-   mu = (isolevel - valp1) / (valp2 - valp1);
-   p.x = p1.x + mu * (p2.x - p1.x);
-   p.y = p1.y + mu * (p2.y - p1.y);
-   p.z = p1.z + mu * (p2.z - p1.z);
-
-   return(p);
-}
-
-/*
-   Given a grid cell and an isolevel, calculate the triangular
-   facets required to represent the isosurface through the cell.
-   Return the number of triangular facets, the array "triangles"
-   will be loaded up with the vertices at most 5 triangular facets.
-    0 will be returned if the grid cell is either totally above
-   of totally below the isolevel.
-*/
-KOKKOS_INLINE_FUNCTION
-int Polygonise(gridcell_t grid, double isolevel, triangle_t *triangles)
-{
-    
-    int i,ntriang;
-    int cubeindex;
-    vec_t vertlist[12];
-
-    int edgeTable[256]={
-        0x0  , 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c,
-        0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00,
-        0x190, 0x99 , 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c,
-        0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90,
-        0x230, 0x339, 0x33 , 0x13a, 0x636, 0x73f, 0x435, 0x53c,
-        0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30,
-        0x3a0, 0x2a9, 0x1a3, 0xaa , 0x7a6, 0x6af, 0x5a5, 0x4ac,
-        0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0,
-        0x460, 0x569, 0x663, 0x76a, 0x66 , 0x16f, 0x265, 0x36c,
-        0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60,
-        0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff , 0x3f5, 0x2fc,
-        0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0,
-        0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55 , 0x15c,
-        0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950,
-        0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc ,
-        0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0,
-        0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc,
-        0xcc , 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0,
-        0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c,
-        0x15c, 0x55 , 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650,
-        0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc,
-        0x2fc, 0x3f5, 0xff , 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0,
-        0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c,
-        0x36c, 0x265, 0x16f, 0x66 , 0x76a, 0x663, 0x569, 0x460,
-        0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac,
-        0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa , 0x1a3, 0x2a9, 0x3a0,
-        0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c,
-        0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33 , 0x339, 0x230,
-        0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c,
-        0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99 , 0x190,
-        0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c,
-        0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0   };
-    
-    int triTable[256][16] =
-    {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1},
-        {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1},
-        {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1},
-        {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1},
-        {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1},
-        {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
-        {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1},
-        {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1},
-        {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
-        {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1},
-        {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1},
-        {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1},
-        {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1},
-        {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1},
-        {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
-        {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1},
-        {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1},
-        {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
-        {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
-        {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1},
-        {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1},
-        {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1},
-        {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1},
-        {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1},
-        {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1},
-        {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1},
-        {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1},
-        {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1},
-        {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1},
-        {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1},
-        {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1},
-        {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1},
-        {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1},
-        {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1},
-        {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1},
-        {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1},
-        {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1},
-        {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
-        {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1},
-        {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1},
-        {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1},
-        {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
-        {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
-        {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1},
-        {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1},
-        {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1},
-        {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1},
-        {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1},
-        {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
-        {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1},
-        {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1},
-        {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1},
-        {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1},
-        {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
-        {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1},
-        {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1},
-        {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1},
-        {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1},
-        {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1},
-        {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1},
-        {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1},
-        {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1},
-        {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1},
-        {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1},
-        {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1},
-        {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1},
-        {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1},
-        {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1},
-        {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1},
-        {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1},
-        {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1},
-        {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1},
-        {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1},
-        {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1},
-        {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1},
-        {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1},
-        {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1},
-        {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1},
-        {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1},
-        {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1},
-        {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1},
-        {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1},
-        {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1},
-        {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1},
-        {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1},
-        {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
-        {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
-        {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
-        {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1},
-        {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1},
-        {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1},
-        {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1},
-        {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1},
-        {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1},
-        {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1},
-        {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1},
-        {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1},
-        {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1},
-        {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1},
-        {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1},
-        {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1},
-        {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1},
-        {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1},
-        {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1},
-        {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1},
-        {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1},
-        {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1},
-        {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
-        {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
-        {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1},
-        {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
-        {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1},
-        {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1},
-        {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1},
-        {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1},
-        {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1},
-        {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1},
-        {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1},
-        {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1},
-        {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1},
-        {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1},
-        {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1},
-        {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1},
-        {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1},
-        {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1},
-        {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1},
-        {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1},
-        {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1},
-        {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1},
-        {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1},
-        {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1},
-        {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1},
-        {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1},
-        {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1},
-        {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1},
-        {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1},
-        {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1},
-        {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1},
-        {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1},
-        {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1},
-        {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1},
-        {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1},
-        {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1},
-        {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1},
-        {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1},
-        {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1},
-        {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1},
-        {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1},
-        {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1},
-        {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1},
-        {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1},
-        {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1},
-        {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1},
-        {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1},
-        {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1},
-        {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1},
-        {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1},
-        {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1},
-        {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1},
-        {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1},
-        {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1},
-        {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1},
-        {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1},
-        {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1},
-        {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1},
-        {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1},
-        {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1},
-        {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1},
-        {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1},
-        {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1},
-        {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1},
-        {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1},
-        {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1},
-        {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1},
-        {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1},
-        {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1},
-        {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1},
-        {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
-        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
-
-    
-    /*
-      Determine the index into the edge table which
-      tells us which vertices are inside of the surface
-     */
-    cubeindex = 0;
-
-    if (grid.val[0] < isolevel) cubeindex |= 1;
-    if (grid.val[1] < isolevel) cubeindex |= 2;
-    if (grid.val[2] < isolevel) cubeindex |= 4;
-    if (grid.val[3] < isolevel) cubeindex |= 8;
-    if (grid.val[4] < isolevel) cubeindex |= 16;
-    if (grid.val[5] < isolevel) cubeindex |= 32;
-    if (grid.val[6] < isolevel) cubeindex |= 64;
-    if (grid.val[7] < isolevel) cubeindex |= 128;
-    
-    
-    
-    /* Cube is entirely in/out of the surface */
-    if (edgeTable[cubeindex] == 0)
-        return(0);
-    
-    /* Find the vertices where the surface intersects the cube */
-    if (edgeTable[cubeindex] & 1)
-        vertlist[0] =
-         VertexInterp(isolevel,grid.p[0],grid.p[1],grid.val[0],grid.val[1]);
-    if (edgeTable[cubeindex] & 2)
-        vertlist[1] =
-         VertexInterp(isolevel,grid.p[1],grid.p[2],grid.val[1],grid.val[2]);
-    if (edgeTable[cubeindex] & 4)
-        vertlist[2] =
-         VertexInterp(isolevel,grid.p[2],grid.p[3],grid.val[2],grid.val[3]);
-    if (edgeTable[cubeindex] & 8)
-        vertlist[3] =
-         VertexInterp(isolevel,grid.p[3],grid.p[0],grid.val[3],grid.val[0]);
-    if (edgeTable[cubeindex] & 16)
-        vertlist[4] =
-         VertexInterp(isolevel,grid.p[4],grid.p[5],grid.val[4],grid.val[5]);
-    if (edgeTable[cubeindex] & 32)
-        vertlist[5] =
-         VertexInterp(isolevel,grid.p[5],grid.p[6],grid.val[5],grid.val[6]);
-    if (edgeTable[cubeindex] & 64)
-        vertlist[6] =
-         VertexInterp(isolevel,grid.p[6],grid.p[7],grid.val[6],grid.val[7]);
-    if (edgeTable[cubeindex] & 128)
-        vertlist[7] =
-         VertexInterp(isolevel,grid.p[7],grid.p[4],grid.val[7],grid.val[4]);
-    if (edgeTable[cubeindex] & 256)
-        vertlist[8] =
-         VertexInterp(isolevel,grid.p[0],grid.p[4],grid.val[0],grid.val[4]);
-    if (edgeTable[cubeindex] & 512)
-        vertlist[9] =
-         VertexInterp(isolevel,grid.p[1],grid.p[5],grid.val[1],grid.val[5]);
-    if (edgeTable[cubeindex] & 1024)
-        vertlist[10] =
-         VertexInterp(isolevel,grid.p[2],grid.p[6],grid.val[2],grid.val[6]);
-    if (edgeTable[cubeindex] & 2048)
-        vertlist[11] =
-         VertexInterp(isolevel,grid.p[3],grid.p[7],grid.val[3],grid.val[7]);
-    
-    /* Create the triangle */
-    ntriang = 0;
-    for (i=0; triTable[cubeindex][i]!=-1; i+=3) {
-        
-        triangles[ntriang].p[0] = vertlist[triTable[cubeindex][i  ]];
-        triangles[ntriang].p[1] = vertlist[triTable[cubeindex][i+1]];
-        triangles[ntriang].p[2] = vertlist[triTable[cubeindex][i+2]];
-        
-        ntriang++;
-    } // end for i
-    
-    return(ntriang);
-}
 
 
 struct bin_ijk_t{
@@ -765,9 +143,6 @@ void poly_basis(const double r[3], double *p) {
 
 void compute_shape_functions(
     size_t i,
-    const double xpt,
-    const double ypt,
-    const double zpt,
     const DCArrayKokkos <double>& x,
     const CArrayKokkos <double>& vol,
     const CArrayKokkos <double>& rk_coeffs,
@@ -783,9 +158,9 @@ void compute_shape_functions(
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         double r[3];    // vecx_j - vecx_i
-        r[0] = x(j,0) - xpt; // x_j-x_i
-        r[1] = x(j,1) - ypt; // y_j-y_i
-        r[2] = x(j,2) - zpt; // z_j-z_i
+        r[0] = x(j,0) - x(i,0); // x_j-x_i
+        r[1] = x(j,1) - x(i,1); // y_j-y_i
+        r[2] = x(j,2) - x(i,2); // z_j-z_i
 
         double W = kernel(r, h);
         poly_basis(r,p);
@@ -793,7 +168,7 @@ void compute_shape_functions(
         double correction = 0.0;
         for (size_t a = 0; a < num_poly_basis; ++a){
             correction += rk_coeffs(i,a) * p[a];
-        } // end a
+        } // end for a
 
         rk_basis(j) = W * correction;
     });
@@ -804,7 +179,7 @@ void compute_shape_functions(
 
 
 
-// Build reproducing kernel coefficients for one particle
+// Build reproducing kernel coefficients for all particles in the domain
 void build_rk_coefficients(
     const DCArrayKokkos <double>& x,
     const CArrayKokkos <double>& vol,
@@ -892,88 +267,33 @@ int main(int argc, char *argv[])
     Kokkos::initialize(argc, argv);
     {  
 
-        printf("Pointcloud reconstruction \n\n");
+        printf("Pointcloud Reproducing Kernels \n\n");
 
-        if(argc==1){
-            printf("Please supply an STL file for testing the point cloud surface reconstruction code \n");
-            return 0;
-        }
-        
-        std::string filename = argv[1];
-
-        auto [normal_host, 
-              v1X_host, v1Y_host, v1Z_host, 
-              v2X_host, v2Y_host, v2Z_host, 
-              v3X_host, v3Y_host, v3Z_host, 
-              num_inp_triangles_host] = binary_stl_reader(filename);
-        
-        // Warning on C++ support:
-        // At this time with C++, the contents from a tuple cannot 
-        // be used inside a lambda function.  The parallel loops use 
-        // lambda functions. To overcome this C++ limitation, all 
-        // contents in the tuple will be copied or pointed to (Using 
-        // a MATAR dual view) allowing the data to be used in parallel.
-        const size_t num_inp_triangles = num_inp_triangles_host;
-        DViewCArrayKokkos <float> normal(&normal_host(0,0), num_inp_triangles, 3);
-        DViewCArrayKokkos <float> v1X(&v1X_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v1Y(&v1Y_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v1Z(&v1Z_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v2X(&v2X_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v2Y(&v2Y_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v2Z(&v2Z_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v3X(&v3X_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v3Y(&v3Y_host(0),num_inp_triangles); 
-        DViewCArrayKokkos <float> v3Z(&v3Z_host(0),num_inp_triangles);
-
-        normal.update_device(); 
-        v1X.update_device(); 
-        v1Y.update_device(); 
-        v1Z.update_device(); 
-        v2X.update_device(); 
-        v2Y.update_device(); 
-        v2Z.update_device(); 
-        v3X.update_device(); 
-        v3Y.update_device(); 
-        v3Z.update_device();
-        
-        
-        
-        // small distance for moving in the +/- normal directions 
-        double epsilon = 0.1*fmin(fmin(dx, dy), dz);
-
-
-        printf("Creating point cloud data from STL file \n\n");
 
         // define a point cloud
-        size_t num_points = num_inp_triangles*3; // 1 point per triangle plus 2 more in the +/- directions
         DCArrayKokkos <double> point_positions(num_points, 3, "point_positions");
-        DCArrayKokkos <double> point_signed_distance(num_points, "point_sign_distance"); // this is f in the journal paper
-
-        // 1 point per triangle at this time, thus a loop over tris
-        FOR_ALL(tri, 0, num_inp_triangles, {
-            // point on surface
-            point_positions(tri, 0) =  1.0/3.0*((double)v1X(tri) + (double)v2X(tri) + (double)v3X(tri));
-            point_positions(tri, 1) =  1.0/3.0*((double)v1Y(tri) + (double)v2Y(tri) + (double)v3Y(tri));
-            point_positions(tri, 2) =  1.0/3.0*((double)v1Z(tri) + (double)v2Z(tri) + (double)v3Z(tri));
+        DCArrayKokkos <double> point_values(num_points, "point_values"); 
 
-            point_signed_distance(tri) = 0.0;
-
-            // off surface +normal
-            point_positions(tri+num_inp_triangles, 0) =  point_positions(tri, 0) + epsilon*(double)normal(tri, 0);
-            point_positions(tri+num_inp_triangles, 1) =  point_positions(tri, 1) + epsilon*(double)normal(tri, 1);
-            point_positions(tri+num_inp_triangles, 2) =  point_positions(tri, 2) + epsilon*(double)normal(tri, 2);
-
-            point_signed_distance(tri+num_inp_triangles) = epsilon;
+        // point locations
+        srand(static_cast<unsigned int>(time(0))); // Seed the random number generator
+        for(size_t i=0; i<num_points; i++){
+            point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+            point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+            point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+        }
+        point_positions.update_device();
+        Kokkos::fence();
 
-            // off surface -normal
-            point_positions(tri+2*num_inp_triangles, 0) =  point_positions(tri, 0) - epsilon*(double)normal(tri, 0);
-            point_positions(tri+2*num_inp_triangles, 1) =  point_positions(tri, 1) - epsilon*(double)normal(tri, 1);
-            point_positions(tri+2*num_inp_triangles, 2) =  point_positions(tri, 2) - epsilon*(double)normal(tri, 2);
+        // point values
+        FOR_ALL(i, 0, num_points, {
 
-            point_signed_distance(tri+2*num_inp_triangles) = -epsilon;
+            printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2));
+            point_values(i) = sqrt(point_positions(i, 0)*point_positions(i, 0) + 
+                                   point_positions(i, 1)*point_positions(i, 1) +
+                                   point_positions(i, 2)*point_positions(i, 2));
 
         }); // end parallel for tri's in the file
-
+        printf("\n");
 
 
         // ----------------------------
@@ -989,8 +309,7 @@ int main(int argc, char *argv[])
 
         size_t num_bins = num_bins_x*num_bins_y*num_bins_z;
 
-printf("num bins = %zu \n", num_bins);
-
+        //printf("num bins = %zu \n", num_bins);
 
         DCArrayKokkos <size_t> num_points_in_bin(num_bins);
         num_points_in_bin.set_values(0);
@@ -1052,11 +371,13 @@ printf("num bins = %zu \n", num_bins);
         double partion_unity;
         double partion_unity_lcl;
 
+        double linear_preserving;
+        double linear_preserving_lcl;
+
         for(size_t i=0; i<num_points; i++){
             
             // build basis functions at point i
             compute_shape_functions(i, 
-                                    point_positions(i,0), point_positions(i,1), point_positions(i,2), 
                                     point_positions, 
                                     vol, 
                                     rk_coeffs, 
@@ -1066,263 +387,22 @@ printf("num bins = %zu \n", num_bins);
             FOR_REDUCE_SUM(j, 0, num_points_neighborhood, partion_unity_lcl, {
                 partion_unity_lcl += rk_basis(j)*vol(j);
             }, partion_unity);
-
-            printf("partition unity = %f, at i=%zu \n", partion_unity, i);
-
-        } // end for i
-
-
-        // ----------------------------------
-        // Evaluate surface function on mesh
-        // ----------------------------------
-
-        // define mesh spacing, it is used to create a mesh
             
-        
-        // the number of nodes in the mesh
-        int num_pt_x = (int)( LX/dx ) + 1;  // there must be at least 2 nodes
-        int num_pt_y = (int)( LY/dy ) + 1;  // there must be at least 2 nodes
-        int num_pt_z = (int)( LZ/dz ) + 1;  // there must be at least 2 nodes
-        
-        
-        // mesh coordinates
-        DCArrayKokkos <double> x(num_pt_x, "pt_x");
-        DCArrayKokkos <double> y(num_pt_y, "pt_y");
-        DCArrayKokkos <double> z(num_pt_z, "pt_z");
-
-        
-        // function with isosurface that we want extracted
-        DCArrayKokkos <double> gridValues (num_pt_x,num_pt_y,num_pt_z, "grid_values");
-        
-
-        // define the triangles of extracted surface
-        const size_t num_elems = (num_pt_x-1)*(num_pt_y-1)*(num_pt_z-1);
-        DCArrayKokkos <triangle_t> all_mesh_surf_triangles(num_elems, 5, "mesh_surf_tris"); // max of 5 per elem
-        DCArrayKokkos <size_t> num_triangles_in_elem(num_elems, "num_tris_in_elem");
-        num_triangles_in_elem.set_values(0);
-
-        printf("Evaluating surf function on mesh \n");
-        FOR_ALL(i, 0, num_pt_x, {
-            x(i) = dx*(double)i + X0;;
-        });
-        FOR_ALL(j, 0, num_pt_y, {
-            y(j) = dy*(double)j + Y0;
-        });
-        FOR_ALL(k, 0, num_pt_z, {
-            z(k) = dz*(double)k + Z0;
-        });
-        Kokkos::fence();
-
-        // save mesh coordinates of the nodes
-        FOR_ALL(k, 0, num_pt_z, 
-                j, 0, num_pt_y,
-                i, 0, num_pt_x, {
-
-                    double x_point[3];
-                    x_point[0] = x(i);
-                    x_point[1] = y(j);
-                    x_point[2] = z(k);
-
-                    gridValues(i,j,k) = 0.0;
-                    
-                    // get i,j,k indices of the bins
-                    bin_ijk_t bin_ijk = get_bin_ijk(x_point[0], 
-                                                    x_point[1], 
-                                                    x_point[2]);
-
-                    // get the 1D index
-                    size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x;
-
-                    size_t point_i; // the closest point 
-                    double dist_i = 1.e16;
-
-
-                    // find the closest point to the evaluation location
-                    for (size_t point_lid=0; point_lid<num_points_in_bin(bin_id); point_lid++){
-                        // get the point id
-                        size_t pt_id = points_in_bin(bin_id, point_lid);
-
-                        // calculate the distance between this point and evaluation location
-                        double dist = (point_positions(pt_id,0) - x(i))*(point_positions(pt_id,0) - x(i))+
-                                      (point_positions(pt_id,1) - y(j))*(point_positions(pt_id,1) - y(j))+ 
-                                      (point_positions(pt_id,2) - z(k))*(point_positions(pt_id,2) - z(k));
-                        dist = sqrt(dist);
-                        if(fabs(dist)<fabs(dist_i)){
-                            point_i = pt_id;
-                            dist_i = dist;
-                        }
-                    }
-
-                    for (size_t point_j=0; point_j<num_points_neighborhood; point_j++){
-                        // evaluate basis at gridpoint using closest point_i
-                        compute_shape_functions(point_i, 
-                                                x(i), y(i), z(i), 
-                                                point_positions, 
-                                                vol, 
-                                                rk_coeffs, 
-                                                rk_basis, 
-                                                h);
-                        gridValues(i,j,k) += rk_basis(point_j)*point_signed_distance(point_j)*vol(point_j);
-                    } // end for points     
-
-        
-        }); // end parallel over k,j,i
-
-        x.update_host();
-        y.update_host();
-        z.update_host();
-        gridValues.update_host();
-        
-        
-     
-        
-        // ------------------------------------
-        // Use marching cubes to build surface
-        // ------------------------------------
-
-        printf("Running marching cubes algorithm\n");
-        
-        FOR_ALL(k, 0, num_pt_z-1,
-                j, 0, num_pt_y-1,
-                i, 0, num_pt_x-1, {
-
-                    // elem gid
-                    size_t elem_gid = i + j*(num_pt_x-1) + k*(num_pt_x-1)*(num_pt_y-1);
-        
-                    // extract the x,y,z node coords
-                    // using the index ordering for the cell
-                    vec_t xyzs [8];
-                    xyzs[0] = vec_t(x(i  ), y(j  ), z(k  ));
-                    xyzs[1] = vec_t(x(i+1), y(j  ), z(k  ));
-                    xyzs[2] = vec_t(x(i+1), y(j  ), z(k+1));
-                    xyzs[3] = vec_t(x(i  ), y(j  ), z(k+1));
-                    xyzs[4] = vec_t(x(i  ), y(j+1), z(k  ));
-                    xyzs[5] = vec_t(x(i+1), y(j+1), z(k  ));
-                    xyzs[6] = vec_t(x(i+1), y(j+1), z(k+1));
-                    xyzs[7] = vec_t(x(i  ), y(j+1), z(k+1));
-        
-        
-                    // extract the values at the nodes
-                    // using the index ordering for the cell
-                    double vals [8];
-                    vals[0] = gridValues(i  ,j  ,k  );
-                    vals[1] = gridValues(i+1,j  ,k  );
-                    vals[2] = gridValues(i+1,j  ,k+1);
-                    vals[3] = gridValues(i  ,j  ,k+1);
-                    vals[4] = gridValues(i  ,j+1,k  );
-                    vals[5] = gridValues(i+1,j+1,k  );
-                    vals[6] = gridValues(i+1,j+1,k+1);
-                    vals[7] = gridValues(i  ,j+1,k+1);
-        
-        
-                    // details of the cell, save coords and the values at the nodes
-                    gridcell_t cell(xyzs, vals);
-        
-        
-                    // the most triangles in a cell is 5
-                    triangle_t triangles[5];
-                    num_triangles_in_elem(elem_gid) = Polygonise(cell, isoLevel, triangles);
-        
-                    // save the triangles
-                    for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++)
-                    {
-                        all_mesh_surf_triangles(elem_gid,tri) = triangles[tri];
-                    } // end for tri
-
-        });  // end parallel for k,j,i
-
-        
-        // calculate the normal vector of triangles
-        FOR_ALL(elem_gid, 0, num_elems, {
-            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
-                calc_normal(&all_mesh_surf_triangles(elem_gid, tri));
-            }
-        }); // end loop over triangles
-
-        all_mesh_surf_triangles.update_host();
-        num_triangles_in_elem.update_host();
-
-        
-        printf("Marching cubes finished \n\n");
-        
-
-
-        // --------------------------------------------------
-        // volume calculation
-        // --------------------------------------------------
-        double volume = 0.0;
-        double vol_lcl = 0.0;
-        FOR_REDUCE_SUM(elem_gid, 0, num_elems, 
-                       vol_lcl, {
-
-            for (size_t tri = 0; tri < num_triangles_in_elem(elem_gid); tri++){
-                vol_lcl += compute_volume(all_mesh_surf_triangles(elem_gid,tri)); 
-            }
 
-        }, volume);
-        volume = fabs(volume);
+            FOR_REDUCE_SUM(j, 0, num_points_neighborhood, linear_preserving_lcl, {
+                linear_preserving_lcl += rk_basis(j)*vol(j)*point_positions(j,0);
+            }, linear_preserving);
 
-        double radius =  0.794651/2.0; // radius of constructured part, based on a small mesh size
-        double PI = 3.14159265358979323846264338327950288419716939937510;
-        double vol_exact = 4.0/3.0*PI*radius*radius*radius;
-        printf("volume = %f, and `exact' sphere volume = %f \n", volume, vol_exact);
-
-        // 0.262744 at 0.001 mesh size  
+            printf("partition unity = %f,  ", partion_unity);
+            printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(i,0)), i);
 
+        } // end for i
 
-        // --------------------------------------------------
-        // Export STL file using results from marching cubes
-        // --------------------------------------------------
 
-        printf("Exporting STL file for a 3D printer\n");
-        
-        
-        // export triangles as STL file
-        
-        FILE * myfile;
-        myfile=fopen("surface.stl","w");
-        fprintf(myfile,"solid points \n");
-        // a serial file write
-        for(size_t elem_gid=0; elem_gid<num_elems; elem_gid++){
-            for (size_t tri = 0; tri < num_triangles_in_elem.host(elem_gid); tri++){
-        
-                fprintf(myfile,"facet normal %f %f %f\n",
-                        all_mesh_surf_triangles.host(elem_gid,tri).normal.x,
-                        all_mesh_surf_triangles.host(elem_gid,tri).normal.y,
-                        all_mesh_surf_triangles.host(elem_gid,tri).normal.z);
-                
-                fprintf(myfile,"outer loop \n");
-                
-                fprintf(myfile,"vertex %f %f %f\n",
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].x,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].y,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[0].z);
-                
-                fprintf(myfile,"vertex %f %f %f\n",
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].x,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].y,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[1].z);
-                
-                fprintf(myfile,"vertex %f %f %f\n",
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].x,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].y,
-                        all_mesh_surf_triangles.host(elem_gid,tri).p[2].z);
-                fprintf(myfile,"endloop \n");
-                fprintf(myfile,"endfacet \n");
-            }   
-        } // end loop over triangles
-        fprintf(myfile,"endsolid points \n");
-        
-        fclose(myfile);
-            
     
         printf("Finished \n\n");
 
 
-
-
-
-
     } // end of kokkos scope
 
 

From 03cb262ef6848ac87b5e977e20831964e8842867 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Tue, 9 Sep 2025 17:03:14 -0600
Subject: [PATCH 07/23] added vtk writer of particles

---
 examples/pointcloud/pointcloud-rk.cpp | 30 +++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 6fd003cf..4c7faca7 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -293,6 +293,8 @@ int main(int argc, char *argv[])
                                    point_positions(i, 2)*point_positions(i, 2));
 
         }); // end parallel for tri's in the file
+        point_values.update_host();
+        Kokkos::fence();
         printf("\n");
 
 
@@ -393,22 +395,46 @@ int main(int argc, char *argv[])
                 linear_preserving_lcl += rk_basis(j)*vol(j)*point_positions(j,0);
             }, linear_preserving);
 
-            printf("partition unity = %f,  ", partion_unity);
+            printf("partition unity = %f, ", partion_unity);
             printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(i,0)), i);
 
         } // end for i
 
 
+        printf("Writing VTK Graphics File \n\n");
+
+        std::ofstream out("cloud.vtk");
+
+        out << "# vtk DataFile Version 3.0\n";
+        out << "3D point cloud\n";
+        out << "ASCII\n";
+        out << "DATASET POLYDATA\n";
+        out << "POINTS " << num_points << " float\n";
+        for (size_t pt_id = 0; pt_id < num_points; ++pt_id) {
+            out << point_positions.host(pt_id,0) << " " 
+                << point_positions.host(pt_id,1) << " " 
+                << point_positions.host(pt_id,2) << "\n";
+        }
+
+        out << "\nPOINT_DATA " << num_points << "\n";
+        out << "SCALARS field float 1\n";
+        out << "LOOKUP_TABLE default\n";
+        for (size_t pt_id = 0; pt_id < num_points; ++pt_id) {
+            out << point_values.host(pt_id) << "\n";
+        }
+
     
         printf("Finished \n\n");
 
 
-    } // end of kokkos scope
 
+    } // end of kokkos scope
 
 
     Kokkos::finalize();
 
+
+
     return 0;
     
 } // end main

From dbf96a2ba9b8b17954e3b4b55a959bd3775e0cdc Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Thu, 11 Sep 2025 11:01:09 -0600
Subject: [PATCH 08/23] added binning to make local neighbors

---
 examples/pointcloud/pointcloud-rk.cpp | 402 ++++++++++++++++++++------
 1 file changed, 317 insertions(+), 85 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 4c7faca7..3f86e704 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -53,6 +53,8 @@
 
 #include "lu_solver.hpp"
 
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
 
 using namespace mtr;
 
@@ -64,9 +66,9 @@ const double PI = 3.14159265358979323846;
 const size_t num_points = 101;
 
 // the bin sizes for finding neighboring points
-const double bin_dx = 0.5; // 2 bins in x
-const double bin_dy = 0.5; // 2 bins in y
-const double bin_dz = 0.5; // 2 bins in z
+const double bin_dx = 0.05; // bins in x
+const double bin_dy = 0.05; // bins in y
+const double bin_dz = 0.05; // bins in z
 
 const double X0 = 0.0;   // origin
 const double Y0 = 0.0;
@@ -82,26 +84,56 @@ const double LZ = 1.0;
 
 
 
-
-struct bin_ijk_t{
-    size_t i, j, k;
+struct bin_keys_t{
+    size_t i,j,k;
 };
 
+KOKKOS_INLINE_FUNCTION
+size_t get_gid(size_t i, size_t j, size_t k, size_t num_x, size_t num_y){
+    return i + (j + k*num_y)*num_x;
+}
 
-bin_ijk_t get_bin_ijk(const double x_pt, const double y_pt, const double z_pt){
+KOKKOS_INLINE_FUNCTION
+bin_keys_t get_bin_keys(const double x_pt, 
+                        const double y_pt, 
+                        const double z_pt){
+            
+
+    double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
+    double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
+    double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
+
+    bin_keys_t bin_keys; // save i,j,k to the bin keys
+
+    // get the integer for the bins
+    bin_keys.i = (size_t)i_dbl;
+    bin_keys.j = (size_t)j_dbl;
+    bin_keys.k = (size_t)k_dbl;
+
+    return bin_keys;
+
+} // end function
+
+KOKKOS_INLINE_FUNCTION
+size_t get_bin_gid(const double x_pt, 
+                   const double y_pt, 
+                   const double z_pt, 
+                   const size_t num_bins_x,
+                   const size_t num_bins_y){
             
-    bin_ijk_t bin_ijk;
 
     double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
     double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
     double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
 
     // get the integers for the bins
-    bin_ijk.i = (size_t)i_dbl;
-    bin_ijk.j = (size_t)j_dbl;
-    bin_ijk.k = (size_t)k_dbl;
+    size_t i = (size_t)i_dbl;
+    size_t j = (size_t)j_dbl;
+    size_t k = (size_t)k_dbl;
     
-    return bin_ijk;
+    // get the 1D index for this bin                               
+    return get_gid(i, j, k, num_bins_x, num_bins_y);
+
 } // end function
 
 // Gaussian function part of the RBF
@@ -142,39 +174,46 @@ void poly_basis(const double r[3], double *p) {
 
 
 void compute_shape_functions(
-    size_t i,
+    size_t point_gid,
     const DCArrayKokkos <double>& x,
+    const DCArrayKokkos <size_t> points_num_neighbors, 
+    const DRaggedRightArrayKokkos <size_t> points_in_point,
     const CArrayKokkos <double>& vol,
     const CArrayKokkos <double>& rk_coeffs,
-    const CArrayKokkos <double>& rk_basis,
+    const DRaggedRightArrayKokkos <double>& rk_basis,
     const double h)
 {
 
-    // global num_points at this time, make it num_points in neighborhood
-    size_t num_points_neighborhood = x.dims(0); // will come from hash bins
+    //---------------------------------------------
+    // walk over the neighboring points 
+    //---------------------------------------------
+
+    FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
 
-    // loop over all neighbors around point i
-    FOR_ALL(j, 0, num_points_neighborhood, {
+        size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         double r[3];    // vecx_j - vecx_i
-        r[0] = x(j,0) - x(i,0); // x_j-x_i
-        r[1] = x(j,1) - x(i,1); // y_j-y_i
-        r[2] = x(j,2) - x(i,2); // z_j-z_i
+        r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+        r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+        r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
         double W = kernel(r, h);
         poly_basis(r,p);
 
         double correction = 0.0;
         for (size_t a = 0; a < num_poly_basis; ++a){
-            correction += rk_coeffs(i,a) * p[a];
+            correction += rk_coeffs(point_gid,a) * p[a];
         } // end for a
 
-        rk_basis(j) = W * correction;
-    });
+        rk_basis(point_gid, neighbor_point_lid) = W * correction;
+
+    }); // neighbor_point_lid
+
 
 
     return;
+    
 } // end function
 
 
@@ -182,39 +221,44 @@ void compute_shape_functions(
 // Build reproducing kernel coefficients for all particles in the domain
 void build_rk_coefficients(
     const DCArrayKokkos <double>& x,
+    const DCArrayKokkos <size_t> points_num_neighbors, 
+    const DRaggedRightArrayKokkos <size_t> points_in_point,
     const CArrayKokkos <double>& vol,
     const CArrayKokkos <double>& rk_coeffs,
     double h)
 {
 
-    // global num_points at this time, make it num_points in neighborhood
-    size_t num_points_neighborhood = x.dims(0); // will come from hash bins
-
     // actual number of points
     size_t num_points = x.dims(0);
 
     
     // loop over all nodes in the problem
-    FOR_ALL(i, 0, num_points, {
+    FOR_ALL(point_gid, 0, num_points, {
 
         double M_1D[num_poly_basis*num_poly_basis]; 
         ViewCArrayKokkos <double> M(&M_1D[0], num_poly_basis, num_poly_basis);
         M.set_values(0.0);
 
         // values in rhs after this function will be accessed as rk_coeffs(i,0:N)
-        ViewCArrayKokkos <double> rhs (&rk_coeffs(i,0), num_poly_basis);
+        ViewCArrayKokkos <double> rhs (&rk_coeffs(point_gid,0), num_poly_basis);
         rhs.set_values(0.0);
         rhs(0) = 1.0;   // enforce reproduction of constant 1, everything else is = 0
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         double r[3];    // vecx_j - vecx_i
 
-        // loop over all nodes around point i
-        for (size_t j = 0; j < num_points_neighborhood; ++j) {
-           
-            r[0] = x(j,0) - x(i,0); // x_j-x_i
-            r[1] = x(j,1) - x(i,1); // y_j-y_i
-            r[2] = x(j,2) - x(i,2); // z_j-z_i
+
+        //---------------------------------------------
+        // walk over the neighboring points
+        //---------------------------------------------
+
+        for (size_t neighbor_point_lid=0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
+
+            size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+
+            r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+            r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+            r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
             double W = kernel(r, h);
             poly_basis(r,p);
@@ -223,11 +267,12 @@ void build_rk_coefficients(
 
             for (size_t a = 0; a < num_poly_basis; ++a) {
                 for (size_t b = 0; b < num_poly_basis; ++b) {
-                    M(a,b) += vol(j) * W * p[a] * p[b]; 
+                    M(a,b) += vol(neighbor_point_gid) * W * p[a] * p[b]; 
                 } // end for b
             } // for a
 
-        } // end for point neighbors j
+        } // neighbor_point_lid
+
     
         // -------------
         // solve Ax=B
@@ -306,69 +351,251 @@ int main(int argc, char *argv[])
         size_t num_bins_x = (size_t)( round(LX/bin_dx) );  
         size_t num_bins_y = (size_t)( round(LY/bin_dy) );  
         size_t num_bins_z = (size_t)( round(LZ/bin_dz) );  
+        size_t num_bins = num_bins_x*num_bins_y*num_bins_z;
 
+        // bins and their connectivity to each other and points
+        DCArrayKokkos <bin_keys_t> keys_in_bin(num_bins, "keys_in_bin"); // mapping from gid to (i,j,k)
+        DCArrayKokkos <size_t> num_points_in_bin(num_bins, "num_bins");
+        num_points_in_bin.set_values(0);
+        DRaggedRightArrayKokkos <size_t> points_in_bin; // allocated later
         
 
-        size_t num_bins = num_bins_x*num_bins_y*num_bins_z;
+        // connectivity from points to bins
+        DCArrayKokkos <size_t> points_bin_gid(num_points, "points_in_gid");
+        CArrayKokkos <size_t>  points_bin_lid_storage(num_points, "bin_lid_storage");  // only used to create storage
+        DCArrayKokkos <size_t> points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle
+        DCArrayKokkos <size_t> points_num_neighbors(num_points, "num_neighbors");
+        
+        // build reverse mapping between gid and i,j,k
+        FOR_ALL(i, 0, num_bins_x,
+                j, 0, num_bins_y,
+                k, 0, num_bins_z, {
+            
 
-        //printf("num bins = %zu \n", num_bins);
+            // get bin gid for this i,j,k
+            size_t bin_gid = get_gid(i, j, k, num_bins_x, num_bins_y);
 
-        DCArrayKokkos <size_t> num_points_in_bin(num_bins);
-        num_points_in_bin.set_values(0);
-        DCArrayKokkos <size_t> points_bin_id(num_points);
-        DCArrayKokkos <size_t> points_bin_id_storage(num_points);
-        
-        FOR_ALL(pt_id, 0, num_points, {
+            // the i,j,k for this bin
+            bin_keys_t bin_keys;
+            bin_keys.i = i;
+            bin_keys.j = j;
+            bin_keys.k = k;
+
+            // save mapping from bin_gid to bin_keys i,j,k
+            keys_in_bin(bin_gid) = bin_keys;
 
-            // get i,j,k indices of the bins
-            bin_ijk_t bin_ijk = get_bin_ijk(point_positions(pt_id,0), 
-                                            point_positions(pt_id,1), 
-                                            point_positions(pt_id,2));
+        });
+        Kokkos::fence();
+        keys_in_bin.update_host();
+
+
+        // -------------------------------------------------------------------
+        // below here, these routine must be called every time particles move
+        // -------------------------------------------------------------------
 
-            // get the 1D index
-            size_t bin_id = bin_ijk.i + (bin_ijk.j + bin_ijk.k*num_bins_y)*num_bins_x;
-          
-            size_t storage_place = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_id), 1);
-            points_bin_id(pt_id) = bin_id; // the id of the bin
-            points_bin_id_storage(pt_id) = storage_place; // the storage place in the bin
+        // save bin id to points
+        FOR_ALL(point_gid, 0, num_points, {
+
+            // get the 1D index for this bin
+            size_t bin_gid = get_bin_gid(point_positions(point_gid,0), 
+                                         point_positions(point_gid,1), 
+                                         point_positions(point_gid,2),
+                                         num_bins_x, 
+                                         num_bins_y);
+
+            size_t storage_lid = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_gid), 1);
+            points_bin_gid(point_gid) = bin_gid; // the id of the bin
+            points_bin_lid_storage(point_gid) = storage_lid; // the storage place in the bin
 
         }); // end for all
+        Kokkos::fence();
+        points_bin_gid.update_host();
+        num_points_in_bin.update_host();
 
-        DRaggedRightArrayKokkos <size_t> points_in_bin(num_points_in_bin);
 
-        FOR_ALL(pt_id, 0, num_points, {
+        // allocate points in bin connectivity
+        points_in_bin = DRaggedRightArrayKokkos <size_t> (num_points_in_bin, "num_points_in_bin");
 
-            size_t bin_id = points_bin_id(pt_id);
-            size_t storage_place = points_bin_id_storage(pt_id);
-            points_in_bin(bin_id, storage_place) = pt_id;
+        // save points in bin
+        FOR_ALL(point_gid, 0, num_points, {
+
+            // get bin gid
+            size_t bin_gid = points_bin_gid(point_gid);
+
+            // get it's storage location in the ragged right compressed storage
+            size_t storage_lid = points_bin_lid_storage(point_gid);
+
+            // save the point to this bin
+            points_in_bin(bin_gid, storage_lid) = point_gid;
 
         }); // end for all
+
+
+
+        // ------------------------------------------------
+        // Find the neighbors around each point using bins
+        // ------------------------------------------------
         
+        FOR_ALL(point_gid, 0, num_points, {
 
-        // ----------------------------
-        // Reconstruct surface here
-        // ----------------------------
+            // get bin gid
+            size_t bin_gid = points_bin_gid(point_gid);
+            
+            // get i,j,k for this bin
+            bin_keys_t bin_keys = keys_in_bin(bin_gid);
+            // printf(" keys = %zu, %zu, %zu, bin size = %zu, %zu, %zu \n", 
+            //     bin_keys.i, bin_keys.j, bin_keys.k,
+            //     num_bins_x, num_bins_y, num_bins_z);
+
+            // loop over neighboring bins
+            size_t num_points_found;
+
+            // establish the stencil size to get enough particles
+            for(int stencil=1; stencil<1000; stencil++){
+
+                num_points_found = 0;
+
+                const int i = bin_keys.i;
+                const int j = bin_keys.j;
+                const int k = bin_keys.k;
+
+                const int imin = MAX(0, i-stencil);
+                const int imax = MIN(num_bins_x-1, i+stencil);
+
+                const int jmin = MAX(0, j-stencil);
+                const int jmax = MIN(num_bins_y-1, j+stencil);
+
+                const int kmin = MAX(0, k-stencil);
+                const int kmax = MIN(num_bins_z-1, k+stencil);
+
+                for (int icount=imin; icount<=imax; icount++){
+                    for (int jcount=jmin; jcount<=jmax; jcount++) {
+                        for (int kcount=kmin; kcount<=kmax; kcount++){
+
+                            // get bin neighbor gid 
+                            size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
+                            num_points_found += num_points_in_bin(neighbor_bin_gid);
+
+                        } // end for kcount
+                    } // end for jcount
+                } // end for icount
+
+                // the min number of points required to solve the system is num_poly_basis+1
+                if (num_points_found > num_poly_basis+5){
+
+                    points_bin_stencil(point_gid) = stencil;
+                    points_num_neighbors(point_gid) = num_points_found; // key for allocations
+                    printf("neighbors found = %zu \n", num_points_found);
+                    break;
+                }
+                
+            } // end for stencil
+
+            printf("num_pts_in_bin = %zu, neighbors found = %zu, Stencil size = %zu, bin keys = %zu, %zu, %zu \n", 
+                   num_points_in_bin(bin_gid),
+                   points_num_neighbors(point_gid),
+                   points_bin_stencil(point_gid),
+                   bin_keys.i,
+                   bin_keys.j,
+                   bin_keys.k);
+
+        }); // end for all
+        Kokkos::fence();
+        points_bin_stencil.update_host();
+        points_num_neighbors.update_host();
+        
+        // allocate memory for points in point
+        DRaggedRightArrayKokkos <size_t> points_in_point(points_num_neighbors, "points_in_point");
+
+        // ---------------------
+        // Save the neighbors
+        // ---------------------
 
-        printf("Reconstructing surface using point cloud data \n\n");
+        // find my neighbors using bins
+        FOR_ALL(point_gid, 0, num_points, {
 
-        // assuming all point neighbors contribute, will change to a hash bins
-        const size_t num_points_neighborhood = num_points;
+            // get bin gid for this point
+            size_t bin_gid = points_bin_gid(point_gid);
+                    
+            // get i,j,k for this bin
+            bin_keys_t bin_keys = keys_in_bin(bin_gid);
 
-        CArrayKokkos <double> rk_coeffs(num_points, num_poly_basis);  // reproducing kernel coefficients at each point
-        CArrayKokkos <double> rk_basis(num_points);       // reproducing kernel basis, should have size num_points_neighborhood
+            const int i = bin_keys.i;
+            const int j = bin_keys.j;
+            const int k = bin_keys.k;
+
+            // walk over the stencil to get neighbors
+            const int stencil = points_bin_stencil(point_gid);
+
+            const int imin = MAX(0, i-stencil);
+            const int imax = MIN(num_bins_x-1, i+stencil);
+
+            const int jmin = MAX(0, j-stencil);
+            const int jmax = MIN(num_bins_y-1, j+stencil);
+
+            const int kmin = MAX(0, k-stencil);
+            const int kmax = MIN(num_bins_z-1, k+stencil);
+
+            size_t num_saved = 0;
+            size_t num_points_found = 0;
+
+            for (int icount=imin; icount<=imax; icount++){
+                for (int jcount=jmin; jcount<=jmax; jcount++) {
+                    for (int kcount=kmin; kcount<=kmax; kcount++){
+
+                        // get bin neighbor gid 
+                        size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
+                        num_points_found += num_points_in_bin(neighbor_bin_gid);
+
+                        // save the points in this bin
+                        for(size_t neighbor_pt_lid=0; neighbor_pt_lid<num_points_in_bin(neighbor_bin_gid); neighbor_pt_lid++){
+
+                            size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid);
+                            
+                            printf("num saved = %zu,  points_num_neighbors = %zu, num_points_found = %zu \n", 
+                                num_saved, points_num_neighbors(point_gid), num_points_found);
+                            points_in_point(point_gid, num_saved) = neighbor_point_gid;
+                            
+                            num_saved++;
+
+                        } // neighbor_point_lid
+
+                    } // end for kcount
+                } // end for jcount
+            } // end for icount        
+
+        }); // end for all
+        Kokkos::fence();
+        points_in_point.update_host();
+
+
+
+        // ----------------------------------------
+        // Find basis that reconstructs polynomial 
+        // ----------------------------------------
+
+        printf("Reconstructing basis using point cloud data \n\n");
+
+
+        CArrayKokkos <double> rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
+        DRaggedRightArrayKokkos <double> rk_basis(points_num_neighbors);   // reproducing kernel basis (num_points, num_neighbors)
         CArrayKokkos <double> vol(num_points);
         vol.set_values(1.0);
 
         double h = 1.0;
 
 
-        printf("building rk coefficients \n");
+        printf("building reproducing kernel coefficients \n");
 
         // build coefficients on basis functions
-        build_rk_coefficients(point_positions, vol, rk_coeffs, h);
+        build_rk_coefficients(point_positions, 
+                              points_num_neighbors, 
+                              points_in_point, 
+                              vol, 
+                              rk_coeffs, 
+                              h);
 
         
-        
         // performing checks on rk_coeffs
         double partion_unity;
         double partion_unity_lcl;
@@ -376,29 +603,34 @@ int main(int argc, char *argv[])
         double linear_preserving;
         double linear_preserving_lcl;
 
-        for(size_t i=0; i<num_points; i++){
+        // loop over the particles in the domain
+        for(size_t point_gid=0; point_gid<num_points; point_gid++){
             
             // build basis functions at point i
-            compute_shape_functions(i, 
+            compute_shape_functions(point_gid, 
                                     point_positions, 
+                                    points_num_neighbors, 
+                                    points_in_point, 
                                     vol, 
                                     rk_coeffs, 
                                     rk_basis, 
                                     h);
 
-            FOR_REDUCE_SUM(j, 0, num_points_neighborhood, partion_unity_lcl, {
-                partion_unity_lcl += rk_basis(j)*vol(j);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, {
+                partion_unity_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid);
             }, partion_unity);
             
 
-            FOR_REDUCE_SUM(j, 0, num_points_neighborhood, linear_preserving_lcl, {
-                linear_preserving_lcl += rk_basis(j)*vol(j)*point_positions(j,0);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), linear_preserving_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                linear_preserving_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
             }, linear_preserving);
 
             printf("partition unity = %f, ", partion_unity);
-            printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(i,0)), i);
+            printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(point_gid,0)), point_gid);
 
-        } // end for i
+        } // end for point gid
 
 
         printf("Writing VTK Graphics File \n\n");
@@ -410,17 +642,17 @@ int main(int argc, char *argv[])
         out << "ASCII\n";
         out << "DATASET POLYDATA\n";
         out << "POINTS " << num_points << " float\n";
-        for (size_t pt_id = 0; pt_id < num_points; ++pt_id) {
-            out << point_positions.host(pt_id,0) << " " 
-                << point_positions.host(pt_id,1) << " " 
-                << point_positions.host(pt_id,2) << "\n";
+        for (size_t point_gid = 0; point_gid < num_points; ++point_gid) {
+            out << point_positions.host(point_gid,0) << " " 
+                << point_positions.host(point_gid,1) << " " 
+                << point_positions.host(point_gid,2) << "\n";
         }
 
         out << "\nPOINT_DATA " << num_points << "\n";
         out << "SCALARS field float 1\n";
         out << "LOOKUP_TABLE default\n";
-        for (size_t pt_id = 0; pt_id < num_points; ++pt_id) {
-            out << point_values.host(pt_id) << "\n";
+        for (size_t point_gid = 0; point_gid < num_points; ++point_gid) {
+            out << point_values.host(point_gid) << "\n";
         }
 
     

From b0b7909ef2d68bc1ede2af31d4c80f7d38e4f366 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Thu, 11 Sep 2025 13:18:19 -0600
Subject: [PATCH 09/23] added Q2 test

---
 examples/pointcloud/pointcloud-rk.cpp | 28 +++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 3f86e704..eafcc38d 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -485,19 +485,11 @@ int main(int argc, char *argv[])
 
                     points_bin_stencil(point_gid) = stencil;
                     points_num_neighbors(point_gid) = num_points_found; // key for allocations
-                    printf("neighbors found = %zu \n", num_points_found);
                     break;
                 }
                 
             } // end for stencil
 
-            printf("num_pts_in_bin = %zu, neighbors found = %zu, Stencil size = %zu, bin keys = %zu, %zu, %zu \n", 
-                   num_points_in_bin(bin_gid),
-                   points_num_neighbors(point_gid),
-                   points_bin_stencil(point_gid),
-                   bin_keys.i,
-                   bin_keys.j,
-                   bin_keys.k);
 
         }); // end for all
         Kokkos::fence();
@@ -551,9 +543,7 @@ int main(int argc, char *argv[])
                         for(size_t neighbor_pt_lid=0; neighbor_pt_lid<num_points_in_bin(neighbor_bin_gid); neighbor_pt_lid++){
 
                             size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid);
-                            
-                            printf("num saved = %zu,  points_num_neighbors = %zu, num_points_found = %zu \n", 
-                                num_saved, points_num_neighbors(point_gid), num_points_found);
+
                             points_in_point(point_gid, num_saved) = neighbor_point_gid;
                             
                             num_saved++;
@@ -603,6 +593,9 @@ int main(int argc, char *argv[])
         double linear_preserving;
         double linear_preserving_lcl;
 
+        double quadratic_preserving;
+        double quadratic_preserving_lcl;
+
         // loop over the particles in the domain
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
             
@@ -616,19 +609,30 @@ int main(int argc, char *argv[])
                                     rk_basis, 
                                     h);
 
+            // partition of unity
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, {
                 partion_unity_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid);
             }, partion_unity);
             
 
+            // linear reproducing
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), linear_preserving_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 linear_preserving_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
             }, linear_preserving);
 
+
+            // quadratic reproducing
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), quadratic_preserving_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                quadratic_preserving_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
+            }, quadratic_preserving);
+
             printf("partition unity = %f, ", partion_unity);
-            printf("linear preserving error = %f at i=%zu \n", fabs(linear_preserving-point_positions(point_gid,0)), point_gid);
+            printf("linear preserving error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
+            printf("quadratic preserving error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
 
         } // end for point gid
 

From a6348cbb48ec1ce471a2083c81001256e624b051 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@syrupcastle.lanl.gov>
Date: Thu, 11 Sep 2025 16:27:48 -0600
Subject: [PATCH 10/23] WIP: grad of basis fcns

---
 examples/pointcloud/pointcloud-rk.cpp | 220 ++++++++++++++++++++++----
 1 file changed, 193 insertions(+), 27 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index eafcc38d..78f6f481 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -136,8 +136,9 @@ size_t get_bin_gid(const double x_pt,
 
 } // end function
 
+
 // Gaussian function part of the RBF
-// rbf = exp(-(x - xj)*(x - xj)/h)
+// rbf = exp(-(xj - x)*(xj - x)/h)
 KOKKOS_FUNCTION
 double kernel(const double r[3], const double h){
 
@@ -151,6 +152,28 @@ double kernel(const double r[3], const double h){
 } // end of function
 
 
+// Gradient Gaussian function
+// rbf = exp(-(xj - x)*(xj - x)/h)
+KOKKOS_FUNCTION
+void grad_kernel(const double r[3], const double h, double *grad_W){
+
+    double diff_sqrd = 0.0;
+
+    for(size_t dim=0; dim<3; dim++){
+        diff_sqrd += r[dim]*r[dim];
+    } // dim
+
+    const double rbf = exp(-diff_sqrd/(h*h));
+
+    // gradient
+    grad_W[0] = 2.0/h*r[0]*rbf; 
+    grad_W[1] = 2.0/h*r[1]*rbf; 
+    grad_W[2] = 2.0/h*r[2]*rbf;
+
+    return;
+} // end of function
+
+
 // Polynomial basis up to quadratic in 3D (10 terms)
 const size_t num_poly_basis = 10;
 KOKKOS_INLINE_FUNCTION
@@ -173,14 +196,66 @@ void poly_basis(const double r[3], double *p) {
 } // end function
 
 
-void compute_shape_functions(
+KOKKOS_INLINE_FUNCTION
+void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) {
+    
+    const double drdx = -1.0;
+
+    grad_p[0][0] = 0.0;
+    grad_p[0][1] = drdx;
+    grad_p[0][2] = 0.0;
+    grad_p[0][3] = 0.0;
+    grad_p[0][4] = 2.0*r[0]*drdx;
+    grad_p[0][5] = r[1]*drdx;
+    grad_p[0][6] = r[2]*drdx;
+    grad_p[0][7] = 0.0;
+    grad_p[0][8] = 0.0;
+    grad_p[0][9] = 0.0;
+
+    // for high-order will use (x^a y^b z^c)
+
+    const double drdy = -1.0;
+
+    grad_p[1][0] = 0.0;
+    grad_p[1][1] = 0.0;
+    grad_p[1][2] = drdy;
+    grad_p[1][3] = 0.0;
+    grad_p[1][4] = 0.0;
+    grad_p[1][5] = r[0]*drdy;
+    grad_p[1][6] = 0.0;
+    grad_p[1][7] = 2.0*r[1]*drdy;
+    grad_p[1][8] = r[2]*drdy;
+    grad_p[1][9] = 0.0;
+
+    // for high-order will use (x^a y^b z^c)
+
+    const double drdz = -1.0;
+
+    grad_p[2][0] = 0.0;
+    grad_p[2][1] = 0.0;
+    grad_p[2][2] = 0.0;
+    grad_p[2][3] = drdz;
+    grad_p[2][4] = 0.0;
+    grad_p[2][5] = 0.0;
+    grad_p[2][6] = r[0]*drdz;
+    grad_p[2][7] = 0.0;
+    grad_p[2][8] = r[1]*drdz;
+    grad_p[2][9] = 2.0*r[2]*drdz;
+
+    // for high-order will use (x^a y^b z^c)
+
+    return;
+} // end function
+
+
+void calc_shape_functions(
     size_t point_gid,
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
     const CArrayKokkos <double>& vol,
-    const CArrayKokkos <double>& rk_coeffs,
-    const DRaggedRightArrayKokkos <double>& rk_basis,
+    const CArrayKokkos <double>& p_coeffs,
+    const DRaggedRightArrayKokkos <double>& basis,
     const double h)
 {
 
@@ -203,14 +278,84 @@ void compute_shape_functions(
 
         double correction = 0.0;
         for (size_t a = 0; a < num_poly_basis; ++a){
-            correction += rk_coeffs(point_gid,a) * p[a];
+            correction += p_coeffs(point_gid,a) * p[a];
         } // end for a
 
-        rk_basis(point_gid, neighbor_point_lid) = W * correction;
+        basis(point_gid, neighbor_point_lid) = W * correction;
 
     }); // neighbor_point_lid
 
+    return;
+    
+} // end function
+
+
+
+void calc_grad_shape_functions(
+    size_t point_gid,
+    const DCArrayKokkos <double>& x,
+    const DCArrayKokkos <size_t> points_num_neighbors, 
+    const DRaggedRightArrayKokkos <size_t> points_in_point,
+    const CArrayKokkos <double>& vol,
+    const CArrayKokkos <double>& p_coeffs,
+    const DRaggedRightArrayKokkos <double>& basis,
+    const DRaggedRightArrayKokkos <double>& grad_basis,
+    const double h)
+{
+
+    //---------------------------------------------
+    // walk over the neighboring points 
+    //---------------------------------------------
+
+    FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
+
+        size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+
+        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
+        double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis
 
+        double r[3];    // vecx_j - vecx_i
+        r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+        r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+        r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
+
+        double W = kernel(r, h);
+        poly_basis(r,p);
+
+        double correction = 0.0;
+        for (size_t a = 0; a < num_poly_basis; ++a){
+            correction += p_coeffs(point_gid,a) * p[a];
+        } // end for a
+
+        basis(point_gid, neighbor_point_lid) = W * correction;
+
+        // --- gradient ---
+        double grad_W[3];
+        grad_kernel(r, h, grad_W);
+        grad_poly_basis(r, grad_p);
+
+        double term1_x = 0.0;
+        double term1_y = 0.0;
+        double term1_z = 0.0;
+
+        double term2_x = 0.0;
+        double term2_y = 0.0;
+        double term2_z = 0.0;
+
+        for (size_t a = 0; a < num_poly_basis; ++a){
+            term1_x += grad_p[0][a] * p_coeffs(point_gid,a);
+            term1_y += grad_p[1][a] * p_coeffs(point_gid,a);
+            term1_z += grad_p[2][a] * p_coeffs(point_gid,a);
+        } // end for a
+        term1_x *= W;
+        term1_y *= W;
+        term1_z *= W;
+
+        term2_x = correction*grad_W[0];
+        term2_y = correction*grad_W[1];
+        term2_z = correction*grad_W[2];
+
+    }); // neighbor_point_lid
 
     return;
     
@@ -218,13 +363,14 @@ void compute_shape_functions(
 
 
 
-// Build reproducing kernel coefficients for all particles in the domain
-void build_rk_coefficients(
+// Build reproducing kernel poly coefficients for all particles in the domain
+void calc_p_coefficients(
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
     const CArrayKokkos <double>& vol,
-    const CArrayKokkos <double>& rk_coeffs,
+    const CArrayKokkos <double>& p_coeffs,
+    const CArrayKokkos <double>& M_inv,
     double h)
 {
 
@@ -239,15 +385,14 @@ void build_rk_coefficients(
         ViewCArrayKokkos <double> M(&M_1D[0], num_poly_basis, num_poly_basis);
         M.set_values(0.0);
 
-        // values in rhs after this function will be accessed as rk_coeffs(i,0:N)
-        ViewCArrayKokkos <double> rhs (&rk_coeffs(point_gid,0), num_poly_basis);
+        // values in rhs after this function will be accessed as p_coeffs(i,0:N)
+        ViewCArrayKokkos <double> rhs (&p_coeffs(point_gid,0), num_poly_basis);
         rhs.set_values(0.0);
         rhs(0) = 1.0;   // enforce reproduction of constant 1, everything else is = 0
 
         double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         double r[3];    // vecx_j - vecx_i
 
-
         //---------------------------------------------
         // walk over the neighboring points
         //---------------------------------------------
@@ -295,6 +440,22 @@ void build_rk_coefficients(
             printf("ERROR: matrix is singluar \n");
         }
 
+
+        // --------------------------------------------------
+        // things needed for gradient of the basis function
+        double col_1D[num_poly_basis];
+        ViewCArrayKokkos <double> col(&col_1D[0], num_poly_basis);
+
+        // making a view, inverting only the matrix at point i
+        ViewCArrayKokkos <double> M_inv_pt(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis);
+
+        LU_invert(M,        // input matrix
+                  perm,     // permutations
+                  M_inv_pt, // inverse matrix at point gid
+                  col);     // tmp array
+        // -------------------------------------------------
+        
+        // solve for p_coefs
         LU_backsub(M, perm, rhs);  // note: answer is sent back in rhs
 
     }); // end parallel loop
@@ -567,26 +728,31 @@ int main(int argc, char *argv[])
         printf("Reconstructing basis using point cloud data \n\n");
 
 
-        CArrayKokkos <double> rk_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
-        DRaggedRightArrayKokkos <double> rk_basis(points_num_neighbors);   // reproducing kernel basis (num_points, num_neighbors)
+        CArrayKokkos <double> p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
         CArrayKokkos <double> vol(num_points);
         vol.set_values(1.0);
 
-        double h = 1.0;
+        CArrayKokkos <double> M_inv(num_points, num_poly_basis, num_poly_basis);
+        CArrayKokkos <double> grad_M(num_points, num_poly_basis, num_poly_basis);
+        
+        DRaggedRightArrayKokkos <double> basis(points_num_neighbors);        // reproducing kernel basis (num_points, num_neighbors)
+        DRaggedRightArrayKokkos <double> grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors)
 
 
+        double h = 1.0;
+
         printf("building reproducing kernel coefficients \n");
 
         // build coefficients on basis functions
-        build_rk_coefficients(point_positions, 
+        calc_p_coefficients(point_positions, 
                               points_num_neighbors, 
                               points_in_point, 
                               vol, 
-                              rk_coeffs, 
+                              p_coeffs, 
+                              M_inv,
                               h);
-
         
-        // performing checks on rk_coeffs
+        // performing checks on p_coeffs
         double partion_unity;
         double partion_unity_lcl;
 
@@ -600,18 +766,18 @@ int main(int argc, char *argv[])
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
             
             // build basis functions at point i
-            compute_shape_functions(point_gid, 
+            calc_shape_functions(point_gid, 
                                     point_positions, 
                                     points_num_neighbors, 
                                     points_in_point, 
                                     vol, 
-                                    rk_coeffs, 
-                                    rk_basis, 
+                                    p_coeffs, 
+                                    basis, 
                                     h);
 
             // partition of unity
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, {
-                partion_unity_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid);
+                partion_unity_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid);
             }, partion_unity);
             
 
@@ -619,7 +785,7 @@ int main(int argc, char *argv[])
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), linear_preserving_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                linear_preserving_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
+                linear_preserving_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
             }, linear_preserving);
 
 
@@ -627,12 +793,12 @@ int main(int argc, char *argv[])
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), quadratic_preserving_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                quadratic_preserving_lcl += rk_basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
+                quadratic_preserving_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
             }, quadratic_preserving);
 
             printf("partition unity = %f, ", partion_unity);
-            printf("linear preserving error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
-            printf("quadratic preserving error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
+            printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
+            printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
 
         } // end for point gid
 

From 2816a2d2d99ba80df5995db2a60787acd7dfb7b8 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 13 Sep 2025 13:05:59 -0600
Subject: [PATCH 11/23] Added gradient of basis

---
 examples/pointcloud/pointcloud-rk.cpp | 221 +++++++++++++++++++++-----
 1 file changed, 177 insertions(+), 44 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 78f6f481..45bcbb3e 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -166,9 +166,9 @@ void grad_kernel(const double r[3], const double h, double *grad_W){
     const double rbf = exp(-diff_sqrd/(h*h));
 
     // gradient
-    grad_W[0] = 2.0/h*r[0]*rbf; 
-    grad_W[1] = 2.0/h*r[1]*rbf; 
-    grad_W[2] = 2.0/h*r[2]*rbf;
+    grad_W[0] = 2.0/(h*h)*r[0]*rbf; 
+    grad_W[1] = 2.0/(h*h)*r[1]*rbf; 
+    grad_W[2] = 2.0/(h*h)*r[2]*rbf;
 
     return;
 } // end of function
@@ -248,7 +248,7 @@ void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) {
 } // end function
 
 
-void calc_shape_functions(
+void calc_basis_functions(
     size_t point_gid,
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
@@ -290,29 +290,66 @@ void calc_shape_functions(
 } // end function
 
 
-
-void calc_grad_shape_functions(
+// grad_C = -M^-1 * grad_M * C
+// grad_M[a][b][i] = V (P[a] * grad_p[i][b] W + grad_p[i][a] P[b] W + P[a] P[b] grad_W[i])
+// -M^-1[d][k]*grad_M[k][b][i]*C[b] = [d][i]
+// p[d] grad_C[d][i]
+void calc_basis_and_grad_basis_functions(
     size_t point_gid,
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
     const CArrayKokkos <double>& vol,
     const CArrayKokkos <double>& p_coeffs,
+    const CArrayKokkos <double>& M_inv,
     const DRaggedRightArrayKokkos <double>& basis,
     const DRaggedRightArrayKokkos <double>& grad_basis,
     const double h)
 {
 
-    //---------------------------------------------
-    // walk over the neighboring points 
-    //---------------------------------------------
+    // --------------
+    // Step 1: assemble grad M, the gradient of the moment matrix
+
+    FArrayKokkos <double> grad_m(3,num_poly_basis,num_poly_basis);
+    grad_m.set_values(0.0);
 
+    // walk over the neighboring points 
     FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
 
         size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
-        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
-        double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis
+        double r[3];    // vecx_j - vecx_i
+        r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+        r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+        r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
+
+        double W = kernel(r,h);
+        double grad_W[3]; grad_kernel(r,h,grad_W);
+
+        double p[num_poly_basis]; poly_basis(r,p);
+        double grad_p[3][num_poly_basis]; grad_poly_basis(r,grad_p);
+
+        double Vj = vol(neighbor_point_gid);
+
+        // grad_M = V (P grad_p W + grad_p P W + P P grad_W)
+        for(size_t a=0;a<num_poly_basis;++a){
+            for(size_t b=0;b<num_poly_basis;++b){
+                Kokkos::atomic_add(&grad_m(0,a,b), Vj * ( p[a]*grad_p[0][b]*W + grad_p[0][a]*p[b]*W + p[a]*p[b]*grad_W[0] ));
+                Kokkos::atomic_add(&grad_m(1,a,b), Vj * ( p[a]*grad_p[1][b]*W + grad_p[1][a]*p[b]*W + p[a]*p[b]*grad_W[1] ));
+                Kokkos::atomic_add(&grad_m(2,a,b), Vj * ( p[a]*grad_p[2][b]*W + grad_p[2][a]*p[b]*W + p[a]*p[b]*grad_W[2] ));
+            } // end for b
+        } // end for a
+
+    }); // end parallel loop over neighboring points
+
+
+    // -----------
+    // Step 2: calculate basis and grad basis
+
+    // walk over the neighboring points 
+    FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
+
+        size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
         double r[3];    // vecx_j - vecx_i
         r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
@@ -320,8 +357,17 @@ void calc_grad_shape_functions(
         r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
         double W = kernel(r, h);
+
+        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
         poly_basis(r,p);
+        
+        double grad_W[3];
+        grad_kernel(r, h, grad_W);
+
+        double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis
+        grad_poly_basis(r, grad_p);
 
+        // 
         double correction = 0.0;
         for (size_t a = 0; a < num_poly_basis; ++a){
             correction += p_coeffs(point_gid,a) * p[a];
@@ -329,31 +375,61 @@ void calc_grad_shape_functions(
 
         basis(point_gid, neighbor_point_lid) = W * correction;
 
-        // --- gradient ---
-        double grad_W[3];
-        grad_kernel(r, h, grad_W);
-        grad_poly_basis(r, grad_p);
 
-        double term1_x = 0.0;
-        double term1_y = 0.0;
-        double term1_z = 0.0;
+        // --- gradient contributions ---
 
-        double term2_x = 0.0;
-        double term2_y = 0.0;
-        double term2_z = 0.0;
+        // term from grad poly
+        double term_x = 0.0;
+        double term_y = 0.0;
+        double term_z = 0.0;
 
+        // calc the grad of poly term
         for (size_t a = 0; a < num_poly_basis; ++a){
-            term1_x += grad_p[0][a] * p_coeffs(point_gid,a);
-            term1_y += grad_p[1][a] * p_coeffs(point_gid,a);
-            term1_z += grad_p[2][a] * p_coeffs(point_gid,a);
+            term_x += grad_p[0][a] * p_coeffs(point_gid,a);
+            term_y += grad_p[1][a] * p_coeffs(point_gid,a);
+            term_z += grad_p[2][a] * p_coeffs(point_gid,a);
         } // end for a
-        term1_x *= W;
-        term1_y *= W;
-        term1_z *= W;
 
-        term2_x = correction*grad_W[0];
-        term2_y = correction*grad_W[1];
-        term2_z = correction*grad_W[2];
+        // saving the grad poly term plus the grad kernel term to the grad basis
+        grad_basis(point_gid,neighbor_point_lid,0) = term_x*W + correction*grad_W[0];
+        grad_basis(point_gid,neighbor_point_lid,1) = term_y*W + correction*grad_W[1];
+        grad_basis(point_gid,neighbor_point_lid,2) = term_z*W + correction*grad_W[2];
+
+
+        // --- gradient of correction coefficients (grad_C) ---
+
+        // the last contirubtion to grad_basis is from grad_C
+        // sum_a p[a]*W*grad_C[i][a]
+        double grad_C[3][num_poly_basis];
+
+
+        for (size_t a = 0; a < num_poly_basis; ++a){
+            grad_C[0][a] = 0.0;
+            grad_C[1][a] = 0.0;
+            grad_C[2][a] = 0.0;
+        } // end for 1
+
+
+        // grad_C = -M^-1 * grad_M * C
+        for (size_t d = 0; d < num_poly_basis; ++d) {
+            for (size_t b = 0; b < num_poly_basis; ++b) {
+                double Cb = p_coeffs(point_gid,b);
+                for (size_t k = 0; k < num_poly_basis; ++k) {
+                    grad_C[0][d] -= M_inv(point_gid,d,k) * grad_m(0,k,b) * Cb;
+                    grad_C[1][d] -= M_inv(point_gid,d,k) * grad_m(1,k,b) * Cb;
+                    grad_C[2][d] -= M_inv(point_gid,d,k) * grad_m(2,k,b) * Cb;
+                } // end for k
+            } // end for b
+        } // end for d
+
+
+        // adding grad_C[i][d]* p[d]*W to the grad basis function
+        for (size_t d = 0; d < num_poly_basis; ++d){
+            grad_basis(point_gid,neighbor_point_lid,0) += grad_C[0][d]*p[d]*W;
+            grad_basis(point_gid,neighbor_point_lid,1) += grad_C[1][d]*p[d]*W;
+            grad_basis(point_gid,neighbor_point_lid,2) += grad_C[2][d]*p[d]*W;
+        } // end for d
+
 
     }); // neighbor_point_lid
 
@@ -447,11 +523,11 @@ void calc_p_coefficients(
         ViewCArrayKokkos <double> col(&col_1D[0], num_poly_basis);
 
         // making a view, inverting only the matrix at point i
-        ViewCArrayKokkos <double> M_inv_pt(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis);
+        ViewCArrayKokkos <double> M_inv_point(&M_inv(point_gid,0,0), num_poly_basis,num_poly_basis);
 
         LU_invert(M,        // input matrix
                   perm,     // permutations
-                  M_inv_pt, // inverse matrix at point gid
+                  M_inv_point, // inverse matrix at point gid
                   col);     // tmp array
         // -------------------------------------------------
         
@@ -745,12 +821,12 @@ int main(int argc, char *argv[])
 
         // build coefficients on basis functions
         calc_p_coefficients(point_positions, 
-                              points_num_neighbors, 
-                              points_in_point, 
-                              vol, 
-                              p_coeffs, 
-                              M_inv,
-                              h);
+                            points_num_neighbors, 
+                            points_in_point, 
+                            vol, 
+                            p_coeffs, 
+                            M_inv,
+                            h);
         
         // performing checks on p_coeffs
         double partion_unity;
@@ -762,17 +838,50 @@ int main(int argc, char *argv[])
         double quadratic_preserving;
         double quadratic_preserving_lcl;
 
+        double grad_x_p0; 
+        double grad_x_p0_lcl; 
+        double grad_y_p0; 
+        double grad_y_p0_lcl; 
+        double grad_z_p0; 
+        double grad_z_p0_lcl; 
+
+        double grad_x_p1; 
+        double grad_x_p1_lcl; 
+        double grad_y_p1; 
+        double grad_y_p1_lcl; 
+        double grad_z_p1; 
+        double grad_z_p1_lcl; 
+
+        double grad_x_p2; 
+        double grad_x_p2_lcl; 
+        double grad_y_p2; 
+        double grad_y_p2_lcl; 
+        double grad_z_p2; 
+        double grad_z_p2_lcl; 
+
         // loop over the particles in the domain
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
             
             // build basis functions at point i
-            calc_shape_functions(point_gid, 
-                                    point_positions, 
+            // calc_basis_functions(point_gid, 
+            //                      point_positions, 
+            //                      points_num_neighbors, 
+            //                      points_in_point, 
+            //                      vol, 
+            //                      p_coeffs, 
+            //                      basis, 
+            //                      h);
+
+            calc_basis_and_grad_basis_functions(
+                                    point_gid,
+                                    point_positions,
                                     points_num_neighbors, 
-                                    points_in_point, 
-                                    vol, 
-                                    p_coeffs, 
-                                    basis, 
+                                    points_in_point,
+                                    vol,
+                                    p_coeffs,
+                                    M_inv,
+                                    basis,
+                                    grad_basis,
                                     h);
 
             // partition of unity
@@ -800,6 +909,30 @@ int main(int argc, char *argv[])
             printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
             printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
 
+
+            // -----------------
+            // gradient checks
+            // -----------------
+
+            // Sum(grad) = [0]; 
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p0_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_x_p0_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid);
+            }, grad_x_p0);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_y_p0_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid);
+            }, grad_y_p0);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid);
+            }, grad_z_p0);
+             printf("grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
+
+
         } // end for point gid
 
 

From 8a2172163764c2852bc1afd48d7681f477cd8c80 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 13 Sep 2025 14:00:42 -0600
Subject: [PATCH 12/23] added P0, P1, and P2 gradient tests

---
 examples/pointcloud/pointcloud-rk.cpp | 43 +++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 45bcbb3e..ec1d6ff4 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -933,6 +933,49 @@ int main(int argc, char *argv[])
              printf("grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
 
 
+            // Sum(grad(P1)) = [1]; 
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p1_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_x_p1_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
+            }, grad_x_p1);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p1_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_y_p1_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1);
+            }, grad_y_p1);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p1_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2);
+            }, grad_z_p1);
+             printf("error in grad(P1) = %f, %f, %f, \n", 
+                fabs(grad_x_p1 - 1.0), 
+                fabs(grad_y_p1 - 1.0), 
+                fabs(grad_z_p1 - 1.0));
+
+
+            // Sum(grad(P2)) = [2]; 
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_x_p2_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
+            }, grad_x_p2);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p2_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_y_p2_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1);
+            }, grad_y_p2);
+            FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p2_lcl, {
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2);
+            }, grad_z_p2);
+            printf("error in grad(P2) = %f, %f, %f, \n", 
+                    fabs(grad_x_p2-2.0*point_positions(point_gid,0)), 
+                    fabs(grad_y_p2-2.0*point_positions(point_gid,1)), 
+                    fabs(grad_z_p2-2.0*point_positions(point_gid,2)));
+
         } // end for point gid
 
 

From 66c1b28cabfec8dd179d03d01e821eb4b648b4a1 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 13 Sep 2025 15:46:19 -0600
Subject: [PATCH 13/23] more parallism and DOP implementation

---
 examples/pointcloud/pointcloud-rk.cpp | 301 +++++++++++++-------------
 1 file changed, 156 insertions(+), 145 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index ec1d6ff4..d42c2e92 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -197,50 +197,50 @@ void poly_basis(const double r[3], double *p) {
 
 
 KOKKOS_INLINE_FUNCTION
-void grad_poly_basis(const double r[3], double (*grad_p)[num_poly_basis]) {
+void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
     
     const double drdx = -1.0;
 
     grad_p[0][0] = 0.0;
-    grad_p[0][1] = drdx;
-    grad_p[0][2] = 0.0;
-    grad_p[0][3] = 0.0;
-    grad_p[0][4] = 2.0*r[0]*drdx;
-    grad_p[0][5] = r[1]*drdx;
-    grad_p[0][6] = r[2]*drdx;
-    grad_p[0][7] = 0.0;
-    grad_p[0][8] = 0.0;
-    grad_p[0][9] = 0.0;
+    grad_p[1][0] = drdx;
+    grad_p[2][0] = 0.0;
+    grad_p[3][0] = 0.0;
+    grad_p[4][0] = 2.0*r[0]*drdx;
+    grad_p[5][0] = r[1]*drdx;
+    grad_p[6][0] = r[2]*drdx;
+    grad_p[7][0] = 0.0;
+    grad_p[8][0] = 0.0;
+    grad_p[9][0] = 0.0;
 
     // for high-order will use (x^a y^b z^c)
 
     const double drdy = -1.0;
 
-    grad_p[1][0] = 0.0;
+    grad_p[0][1] = 0.0;
     grad_p[1][1] = 0.0;
-    grad_p[1][2] = drdy;
-    grad_p[1][3] = 0.0;
-    grad_p[1][4] = 0.0;
-    grad_p[1][5] = r[0]*drdy;
-    grad_p[1][6] = 0.0;
-    grad_p[1][7] = 2.0*r[1]*drdy;
-    grad_p[1][8] = r[2]*drdy;
-    grad_p[1][9] = 0.0;
+    grad_p[2][1] = drdy;
+    grad_p[3][1] = 0.0;
+    grad_p[4][1] = 0.0;
+    grad_p[5][1] = r[0]*drdy;
+    grad_p[6][1] = 0.0;
+    grad_p[7][1] = 2.0*r[1]*drdy;
+    grad_p[8][1] = r[2]*drdy;
+    grad_p[9][1] = 0.0;
 
     // for high-order will use (x^a y^b z^c)
 
     const double drdz = -1.0;
 
-    grad_p[2][0] = 0.0;
-    grad_p[2][1] = 0.0;
+    grad_p[0][2] = 0.0;
+    grad_p[1][2] = 0.0;
     grad_p[2][2] = 0.0;
-    grad_p[2][3] = drdz;
-    grad_p[2][4] = 0.0;
-    grad_p[2][5] = 0.0;
-    grad_p[2][6] = r[0]*drdz;
-    grad_p[2][7] = 0.0;
-    grad_p[2][8] = r[1]*drdz;
-    grad_p[2][9] = 2.0*r[2]*drdz;
+    grad_p[3][2] = drdz;
+    grad_p[4][2] = 0.0;
+    grad_p[5][2] = 0.0;
+    grad_p[6][2] = r[0]*drdz;
+    grad_p[7][2] = 0.0;
+    grad_p[8][2] = r[1]*drdz;
+    grad_p[9][2] = 2.0*r[2]*drdz;
 
     // for high-order will use (x^a y^b z^c)
 
@@ -295,7 +295,6 @@ void calc_basis_functions(
 // -M^-1[d][k]*grad_M[k][b][i]*C[b] = [d][i]
 // p[d] grad_C[d][i]
 void calc_basis_and_grad_basis_functions(
-    size_t point_gid,
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
@@ -307,131 +306,150 @@ void calc_basis_and_grad_basis_functions(
     const double h)
 {
 
-    // --------------
-    // Step 1: assemble grad M, the gradient of the moment matrix
+    // actual number of points
+    size_t num_points = x.dims(0);
+    
+    // loop over all nodes in the problem
+    FOR_ALL(point_gid, 0, num_points, {
 
-    FArrayKokkos <double> grad_m(3,num_poly_basis,num_poly_basis);
-    grad_m.set_values(0.0);
+        // --------------
+        // Step 1: assemble grad M, the gradient of the moment matrix
 
-    // walk over the neighboring points 
-    FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
+        double grad_m[num_poly_basis][num_poly_basis][3];
 
-        size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+        for(size_t a=0;a<num_poly_basis;++a){
+            for(size_t b=0;b<num_poly_basis;++b){
+                for(size_t dim=0;dim<3;++dim){
+                    grad_m[a][b][dim] = 0.0;
+                }
+            } // end for b
+        } // end for a
 
-        double r[3];    // vecx_j - vecx_i
-        r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
-        r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
-        r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
+        // walk over the neighboring points 
+        for(size_t neighbor_point_lid=0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
 
-        double W = kernel(r,h);
-        double grad_W[3]; grad_kernel(r,h,grad_W);
+            size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
-        double p[num_poly_basis]; poly_basis(r,p);
-        double grad_p[3][num_poly_basis]; grad_poly_basis(r,grad_p);
+            double r[3];    // vecx_j - vecx_i
+            r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+            r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+            r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
-        double Vj = vol(neighbor_point_gid);
+            double W = kernel(r,h);
+            double grad_W[3]; 
+            grad_kernel(r,h,grad_W);
 
-        // grad_M = V (P grad_p W + grad_p P W + P P grad_W)
-        for(size_t a=0;a<num_poly_basis;++a){
-            for(size_t b=0;b<num_poly_basis;++b){
-                Kokkos::atomic_add(&grad_m(0,a,b), Vj * ( p[a]*grad_p[0][b]*W + grad_p[0][a]*p[b]*W + p[a]*p[b]*grad_W[0] ));
-                Kokkos::atomic_add(&grad_m(1,a,b), Vj * ( p[a]*grad_p[1][b]*W + grad_p[1][a]*p[b]*W + p[a]*p[b]*grad_W[1] ));
-                Kokkos::atomic_add(&grad_m(2,a,b), Vj * ( p[a]*grad_p[2][b]*W + grad_p[2][a]*p[b]*W + p[a]*p[b]*grad_W[2] ));
-            } // end for b
-        } // end for a
+            double p[num_poly_basis]; 
+            poly_basis(r,p);
 
-    }); // end parallel loop over neighboring points
+            double grad_p[num_poly_basis][3]; 
+            grad_poly_basis(r,grad_p);
 
+            double Vj = vol(neighbor_point_gid);
 
-    // -----------
-    // Step 2: calculate basis and grad basis
+            // grad_M = V (P grad_p W + grad_p P W + P P grad_W)
+            for(size_t a=0;a<num_poly_basis;++a){
+                for(size_t b=0;b<num_poly_basis;++b){
+                    for(size_t dim=0;dim<3;++dim){
+                        grad_m[a][b][dim] += Vj * ( p[a]*grad_p[b][dim]*W + grad_p[a][dim]*p[b]*W + p[a]*p[b]*grad_W[dim] );
+                    }
+                } // end for b
+            } // end for a
 
-    // walk over the neighboring points 
-    FOR_ALL(neighbor_point_lid, 0, points_num_neighbors(point_gid), {
+        } // end for loop over neighboring points
 
-        size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
-        double r[3];    // vecx_j - vecx_i
-        r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
-        r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
-        r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
+        // -----------
+        // Step 2: calculate basis and grad basis
 
-        double W = kernel(r, h);
+        // walk over the neighboring points 
+        for(size_t neighbor_point_lid=0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
 
-        double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
-        poly_basis(r,p);
-        
-        double grad_W[3];
-        grad_kernel(r, h, grad_W);
+            size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
-        double grad_p[3][num_poly_basis]; // matrix holding grad polynomial basis
-        grad_poly_basis(r, grad_p);
+            double r[3];    // vecx_j - vecx_i
+            r[0] = x(neighbor_point_gid,0) - x(point_gid,0); // x_j-x_i
+            r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
+            r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
-        // 
-        double correction = 0.0;
-        for (size_t a = 0; a < num_poly_basis; ++a){
-            correction += p_coeffs(point_gid,a) * p[a];
-        } // end for a
+            double W = kernel(r, h);
 
-        basis(point_gid, neighbor_point_lid) = W * correction;
+            double p[num_poly_basis];    // array holding polynomial basis [x, y, z, x^2, y^2, ... , yz]
+            poly_basis(r,p);
+            
+            double grad_W[3];
+            grad_kernel(r, h, grad_W);
 
+            double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis
+            grad_poly_basis(r, grad_p);
 
-        // --- gradient contributions ---
+            // 
+            double correction = 0.0;
+            for (size_t a = 0; a < num_poly_basis; ++a){
+                correction += p_coeffs(point_gid,a) * p[a];
+            } // end for a
 
-        // term from grad poly
-        double term_x = 0.0;
-        double term_y = 0.0;
-        double term_z = 0.0;
+            basis(point_gid, neighbor_point_lid) = W * correction;
 
-        // calc the grad of poly term
-        for (size_t a = 0; a < num_poly_basis; ++a){
-            term_x += grad_p[0][a] * p_coeffs(point_gid,a);
-            term_y += grad_p[1][a] * p_coeffs(point_gid,a);
-            term_z += grad_p[2][a] * p_coeffs(point_gid,a);
-        } // end for a
 
-        // saving the grad poly term plus the grad kernel term to the grad basis
-        grad_basis(point_gid,neighbor_point_lid,0) = term_x*W + correction*grad_W[0];
-        grad_basis(point_gid,neighbor_point_lid,1) = term_y*W + correction*grad_W[1];
-        grad_basis(point_gid,neighbor_point_lid,2) = term_z*W + correction*grad_W[2];
+            // --- gradient contributions ---
 
+            // term from grad poly
+            double term[3]; 
+            for(size_t dim=0;dim<3;++dim){
+                term[dim] = 0.0;
+            }
 
-        // --- gradient of correction coefficients (grad_C) ---
+            // calc the grad of poly term
+            for(size_t dim=0;dim<3;++dim){
+                for (size_t a = 0; a < num_poly_basis; ++a){
+                    term[dim] += grad_p[a][dim] * p_coeffs(point_gid,a);
+                } // end for a
+            } // end for dim
 
-        // the last contirubtion to grad_basis is from grad_C
-        // sum_a p[a]*W*grad_C[i][a]
-        double grad_C[3][num_poly_basis];
+            // saving the grad poly term plus the grad kernel term to the grad basis
+            for(size_t dim=0;dim<3;++dim){
+                grad_basis(point_gid,neighbor_point_lid,dim) = term[dim]*W + correction*grad_W[dim];
+            }
 
+            // --- gradient of correction coefficients (grad_C) ---
 
-        for (size_t a = 0; a < num_poly_basis; ++a){
-            grad_C[0][a] = 0.0;
-            grad_C[1][a] = 0.0;
-            grad_C[2][a] = 0.0;
-        } // end for 1
-
-
-        // grad_C = -M^-1 * grad_M * C
-        for (size_t d = 0; d < num_poly_basis; ++d) {
-            for (size_t b = 0; b < num_poly_basis; ++b) {
-                double Cb = p_coeffs(point_gid,b);
-                for (size_t k = 0; k < num_poly_basis; ++k) {
-                    grad_C[0][d] -= M_inv(point_gid,d,k) * grad_m(0,k,b) * Cb;
-                    grad_C[1][d] -= M_inv(point_gid,d,k) * grad_m(1,k,b) * Cb;
-                    grad_C[2][d] -= M_inv(point_gid,d,k) * grad_m(2,k,b) * Cb;
-                } // end for k
-            } // end for b
-        } // end for d
+            // the last contirubtion to grad_basis is from grad_C
+            // sum_a p[a]*W*grad_C[i][a]
+            double grad_C[num_poly_basis][3];
 
+            
+            for (size_t a = 0; a < num_poly_basis; ++a){
+                for(size_t dim=0;dim<3;++dim){
+                    grad_C[a][dim]= 0.0;
+                }
+            } // end for 1
+            
 
-        // adding grad_C[i][d]* p[d]*W to the grad basis function
-        for (size_t d = 0; d < num_poly_basis; ++d){
-            grad_basis(point_gid,neighbor_point_lid,0) += grad_C[0][d]*p[d]*W;
-            grad_basis(point_gid,neighbor_point_lid,1) += grad_C[1][d]*p[d]*W;
-            grad_basis(point_gid,neighbor_point_lid,2) += grad_C[2][d]*p[d]*W;
-        } // end for d
+            // grad_C = -M^-1 * grad_M * C
+            for (size_t d = 0; d < num_poly_basis; ++d) {
+                for (size_t b = 0; b < num_poly_basis; ++b) {
+                    double Cb = p_coeffs(point_gid,b);
+                    for (size_t k = 0; k < num_poly_basis; ++k) {
+                        for(size_t dim=0;dim<3;++dim){
+                            grad_C[d][dim] -= M_inv(point_gid,d,k) * grad_m[k][b][dim] * Cb;
+                        } // end dim
+                    } // end for k
+                } // end for b
+            } // end for d
 
 
-    }); // neighbor_point_lid
+            // adding grad_C[i][d]* p[d]*W to the grad basis function
+            for (size_t d = 0; d < num_poly_basis; ++d){
+                for(size_t dim=0;dim<3;++dim){
+                    grad_basis(point_gid,neighbor_point_lid,dim) += grad_C[d][dim]*p[d]*W;
+                } // end dim
+            } // end for d
+
+
+        } // end for over neighbor_point_lid
+
+    }); // end parallel loop over all points 
 
     return;
     
@@ -828,7 +846,20 @@ int main(int argc, char *argv[])
                             M_inv,
                             h);
         
-        // performing checks on p_coeffs
+        
+        calc_basis_and_grad_basis_functions(
+                                    point_positions,
+                                    points_num_neighbors, 
+                                    points_in_point,
+                                    vol,
+                                    p_coeffs,
+                                    M_inv,
+                                    basis,
+                                    grad_basis,
+                                    h);
+
+
+        // performing checks on p_coeffs, basis, and grad_basis
         double partion_unity;
         double partion_unity_lcl;
 
@@ -861,28 +892,6 @@ int main(int argc, char *argv[])
 
         // loop over the particles in the domain
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
-            
-            // build basis functions at point i
-            // calc_basis_functions(point_gid, 
-            //                      point_positions, 
-            //                      points_num_neighbors, 
-            //                      points_in_point, 
-            //                      vol, 
-            //                      p_coeffs, 
-            //                      basis, 
-            //                      h);
-
-            calc_basis_and_grad_basis_functions(
-                                    point_gid,
-                                    point_positions,
-                                    points_num_neighbors, 
-                                    points_in_point,
-                                    vol,
-                                    p_coeffs,
-                                    M_inv,
-                                    basis,
-                                    grad_basis,
-                                    h);
 
             // partition of unity
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, {
@@ -930,7 +939,7 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid);
             }, grad_z_p0);
-             printf("grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
+             printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
 
 
             // Sum(grad(P1)) = [1]; 
@@ -979,6 +988,8 @@ int main(int argc, char *argv[])
         } // end for point gid
 
 
+        //////
+
         printf("Writing VTK Graphics File \n\n");
 
         std::ofstream out("cloud.vtk");

From 349c9a49d17b848f3b44f43b5246823da9871104 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 13 Sep 2025 16:04:32 -0600
Subject: [PATCH 14/23] added timers and increased particle count

---
 examples/pointcloud/pointcloud-rk.cpp | 35 ++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index d42c2e92..9098d82c 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -40,6 +40,8 @@
 //  Nathaniel Morgan
 // -----------------------------------------------
 
+#include <chrono>   // for timing
+
 #include <fstream>
 #include <iostream>
 #include <string>
@@ -63,7 +65,7 @@ const double PI = 3.14159265358979323846;
 // -----------------------------------------------
 // inputs:
 
-const size_t num_points = 101;
+const size_t num_points = 10001;
 
 // the bin sizes for finding neighboring points
 const double bin_dx = 0.05; // bins in x
@@ -621,6 +623,9 @@ int main(int argc, char *argv[])
         DCArrayKokkos <size_t> points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle
         DCArrayKokkos <size_t> points_num_neighbors(num_points, "num_neighbors");
         
+        // start timer
+        auto time_1 = std::chrono::high_resolution_clock::now();
+
         // build reverse mapping between gid and i,j,k
         FOR_ALL(i, 0, num_bins_x,
                 j, 0, num_bins_y,
@@ -643,11 +648,17 @@ int main(int argc, char *argv[])
         Kokkos::fence();
         keys_in_bin.update_host();
 
+        // end timer
+        auto time_2 = std::chrono::high_resolution_clock::now();
+
 
         // -------------------------------------------------------------------
         // below here, these routine must be called every time particles move
         // -------------------------------------------------------------------
 
+        // start timer
+        auto time_3 = std::chrono::high_resolution_clock::now();
+
         // save bin id to points
         FOR_ALL(point_gid, 0, num_points, {
 
@@ -814,6 +825,9 @@ int main(int argc, char *argv[])
         points_in_point.update_host();
 
 
+        // end timer
+        auto time_4 = std::chrono::high_resolution_clock::now();
+
 
         // ----------------------------------------
         // Find basis that reconstructs polynomial 
@@ -821,6 +835,7 @@ int main(int argc, char *argv[])
 
         printf("Reconstructing basis using point cloud data \n\n");
 
+        auto time_5 = std::chrono::high_resolution_clock::now();
 
         CArrayKokkos <double> p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
         CArrayKokkos <double> vol(num_points);
@@ -858,6 +873,9 @@ int main(int argc, char *argv[])
                                     grad_basis,
                                     h);
 
+        // end timer
+        auto time_6 = std::chrono::high_resolution_clock::now();
+
 
         // performing checks on p_coeffs, basis, and grad_basis
         double partion_unity;
@@ -988,7 +1006,18 @@ int main(int argc, char *argv[])
         } // end for point gid
 
 
-        //////
+        ////// timers ///
+
+        std::chrono::duration <double, std::milli> ms = time_2 - time_1;
+        std::cout << "runtime to create bins = " << ms.count() << "ms\n\n";
+
+        ms = time_4 - time_3;
+        std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n";
+
+        ms = time_6 - time_5;
+        std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n";
+
+
 
         printf("Writing VTK Graphics File \n\n");
 
@@ -1012,7 +1041,7 @@ int main(int argc, char *argv[])
             out << point_values.host(point_gid) << "\n";
         }
 
-    
+
         printf("Finished \n\n");
 
 

From 7a9993154844374368159fee2508e5d5c20659e3 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Thu, 18 Sep 2025 16:14:30 -0600
Subject: [PATCH 15/23] built maps and reverse maps between points

---
 examples/pointcloud/pointcloud-rk.cpp | 370 ++++++++++++++++++++++----
 1 file changed, 325 insertions(+), 45 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 9098d82c..952d24a7 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -55,6 +55,11 @@
 
 #include "lu_solver.hpp"
 
+
+#include <set> // for unorded map testing 
+
+
+
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 
@@ -65,7 +70,7 @@ const double PI = 3.14159265358979323846;
 // -----------------------------------------------
 // inputs:
 
-const size_t num_points = 10001;
+const size_t num_points = 31;
 
 // the bin sizes for finding neighboring points
 const double bin_dx = 0.05; // bins in x
@@ -81,6 +86,8 @@ const double LX = 1.0;   // length in x-dir
 const double LY = 1.0;
 const double LZ = 1.0;
 
+bool check_maps = false; // CPU only!!!!
+
 //
 // -----------------------------------------------
 
@@ -589,7 +596,7 @@ int main(int argc, char *argv[])
         // point values
         FOR_ALL(i, 0, num_points, {
 
-            printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2));
+            //printf("point location at i=%d is (%f, %f, %f) \n", i, point_positions(i, 0), point_positions(i, 1), point_positions(i, 2));
             point_values(i) = sqrt(point_positions(i, 0)*point_positions(i, 0) + 
                                    point_positions(i, 1)*point_positions(i, 1) +
                                    point_positions(i, 2)*point_positions(i, 2));
@@ -597,7 +604,7 @@ int main(int argc, char *argv[])
         }); // end parallel for tri's in the file
         point_values.update_host();
         Kokkos::fence();
-        printf("\n");
+        //printf("\n");
 
 
         // ----------------------------
@@ -620,9 +627,11 @@ int main(int argc, char *argv[])
         // connectivity from points to bins
         DCArrayKokkos <size_t> points_bin_gid(num_points, "points_in_gid");
         CArrayKokkos <size_t>  points_bin_lid_storage(num_points, "bin_lid_storage");  // only used to create storage
-        DCArrayKokkos <size_t> points_bin_stencil(num_points, "bin_stencil"); // how many bins needed for a particle
+        DCArrayKokkos <int> points_bin_stencil(num_points, 6, "bin_stencil");   // how imin,imax,jmin,jmax,kmin,kmax range for bins in stencil
         DCArrayKokkos <size_t> points_num_neighbors(num_points, "num_neighbors");
         
+        printf("Starting timers \n\n");
+
         // start timer
         auto time_1 = std::chrono::high_resolution_clock::now();
 
@@ -717,7 +726,7 @@ int main(int argc, char *argv[])
             size_t num_points_found;
 
             // establish the stencil size to get enough particles
-            for(int stencil=1; stencil<1000; stencil++){
+            for(int stencil=1; stencil<100000; stencil++){
 
                 num_points_found = 0;
 
@@ -747,10 +756,18 @@ int main(int argc, char *argv[])
                 } // end for icount
 
                 // the min number of points required to solve the system is num_poly_basis+1
-                if (num_points_found > num_poly_basis+5){
+                if (num_points_found > num_poly_basis+5 || num_points_found==num_points){
+
+                    points_bin_stencil(point_gid,0) = imin;
+                    points_bin_stencil(point_gid,1) = imax;
+                    points_bin_stencil(point_gid,2) = jmin;
+                    points_bin_stencil(point_gid,3) = jmax;
+                    points_bin_stencil(point_gid,4) = kmin;
+                    points_bin_stencil(point_gid,5) = kmax;
+
+                    points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors
+                    //points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because counted point i as a neighbor
 
-                    points_bin_stencil(point_gid) = stencil;
-                    points_num_neighbors(point_gid) = num_points_found; // key for allocations
                     break;
                 }
                 
@@ -760,16 +777,81 @@ int main(int argc, char *argv[])
         }); // end for all
         Kokkos::fence();
         points_bin_stencil.update_host();
+
+
+
+        // account for stencels not overlapping, fixing assymetry in points connectivity
+        FOR_ALL(point_gid, 0, num_points, {
+
+            // get bin gid for this point
+            size_t bin_gid = points_bin_gid(point_gid);
+                    
+            // get i,j,k for this bin
+            bin_keys_t bin_keys = keys_in_bin(bin_gid);
+
+            const int i = bin_keys.i;
+            const int j = bin_keys.j;
+            const int k = bin_keys.k;
+
+            // walk over the stencil to get neighbors of this bin
+            const int imin = points_bin_stencil(point_gid,0);
+            const int imax = points_bin_stencil(point_gid,1);
+            const int jmin = points_bin_stencil(point_gid,2);
+            const int jmax = points_bin_stencil(point_gid,3);
+            const int kmin = points_bin_stencil(point_gid,4);
+            const int kmax = points_bin_stencil(point_gid,5);
+
+            // loop over my bin stencil
+            for (int icount=imin; icount<=imax; icount++){
+                for (int jcount=jmin; jcount<=jmax; jcount++) {
+                    for (int kcount=kmin; kcount<=kmax; kcount++){
+
+                        // get bin neighbor gid 
+                        size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
+
+                        // save the points in this bin
+                        for(size_t neighbor_pt_lid=0; neighbor_pt_lid<num_points_in_bin(neighbor_bin_gid); neighbor_pt_lid++){
+
+                            size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid);
+
+                            // check if the point-point pairs have identical, overlapping stencils, if not, increment the number of neighbors
+                            const int neighbor_imin = points_bin_stencil(neighbor_point_gid,0);
+                            const int neighbor_imax = points_bin_stencil(neighbor_point_gid,1);
+                            const int neighbor_jmin = points_bin_stencil(neighbor_point_gid,2);
+                            const int neighbor_jmax = points_bin_stencil(neighbor_point_gid,3);
+                            const int neighbor_kmin = points_bin_stencil(neighbor_point_gid,4);
+                            const int neighbor_kmax = points_bin_stencil(neighbor_point_gid,5);
+                            
+                            // i,j,k is the bin where point_gid lives
+                            bool inside =
+                                (i >= neighbor_imin && i <= neighbor_imax) &&
+                                (j >= neighbor_jmin && j <= neighbor_jmax) &&
+                                (k >= neighbor_kmin && k <= neighbor_kmax);
+
+                            if(!inside){
+                                Kokkos::atomic_increment(&points_num_neighbors(neighbor_point_gid)); 
+                                // the other stencil didn't see my point because it was smaller, now it does see it
+                            }
+
+                        } // neighbor_point_lid
+
+                    } // end for kcount
+                } // end for jcount
+            } // end for icount        
+
+        }); // end for all
+        Kokkos::fence();
         points_num_neighbors.update_host();
         
         // allocate memory for points in point
         DRaggedRightArrayKokkos <size_t> points_in_point(points_num_neighbors, "points_in_point");
+        points_num_neighbors.set_values(0);  // this is a num saved counter now
 
         // ---------------------
         // Save the neighbors
         // ---------------------
 
-        // find my neighbors using bins
+        // find neighbors using bins
         FOR_ALL(point_gid, 0, num_points, {
 
             // get bin gid for this point
@@ -783,19 +865,13 @@ int main(int argc, char *argv[])
             const int k = bin_keys.k;
 
             // walk over the stencil to get neighbors
-            const int stencil = points_bin_stencil(point_gid);
-
-            const int imin = MAX(0, i-stencil);
-            const int imax = MIN(num_bins_x-1, i+stencil);
-
-            const int jmin = MAX(0, j-stencil);
-            const int jmax = MIN(num_bins_y-1, j+stencil);
-
-            const int kmin = MAX(0, k-stencil);
-            const int kmax = MIN(num_bins_z-1, k+stencil);
+            int imin = points_bin_stencil(point_gid,0);
+            int imax = points_bin_stencil(point_gid,1);
+            int jmin = points_bin_stencil(point_gid,2);
+            int jmax = points_bin_stencil(point_gid,3);
+            int kmin = points_bin_stencil(point_gid,4);
+            int kmax = points_bin_stencil(point_gid,5);
 
-            size_t num_saved = 0;
-            size_t num_points_found = 0;
 
             for (int icount=imin; icount<=imax; icount++){
                 for (int jcount=jmin; jcount<=jmax; jcount++) {
@@ -803,16 +879,45 @@ int main(int argc, char *argv[])
 
                         // get bin neighbor gid 
                         size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
-                        num_points_found += num_points_in_bin(neighbor_bin_gid);
 
                         // save the points in this bin
                         for(size_t neighbor_pt_lid=0; neighbor_pt_lid<num_points_in_bin(neighbor_bin_gid); neighbor_pt_lid++){
 
                             size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid);
-
-                            points_in_point(point_gid, num_saved) = neighbor_point_gid;
                             
-                            num_saved++;
+                            // make sure its a neighbor
+                            //if(neighbor_point_gid != point_gid){
+
+                            // I am including point_i in the neighbor list
+
+                                // save the neighbor
+                                size_t num_saved = Kokkos::atomic_fetch_add(&points_num_neighbors(point_gid), 1);
+                                points_in_point(point_gid, num_saved) = neighbor_point_gid;
+                                
+                                
+                                // if point j's stencil did not see point i, then save i to j's list
+                                const int neighbor_imin = points_bin_stencil(neighbor_point_gid,0);
+                                const int neighbor_imax = points_bin_stencil(neighbor_point_gid,1);
+                                const int neighbor_jmin = points_bin_stencil(neighbor_point_gid,2);
+                                const int neighbor_jmax = points_bin_stencil(neighbor_point_gid,3);
+                                const int neighbor_kmin = points_bin_stencil(neighbor_point_gid,4);
+                                const int neighbor_kmax = points_bin_stencil(neighbor_point_gid,5);
+
+                                // i,j,k is the bin where point_gid lives
+                                bool inside =
+                                    (i >= neighbor_imin && i <= neighbor_imax) &&
+                                    (j >= neighbor_jmin && j <= neighbor_jmax) &&
+                                    (k >= neighbor_kmin && k <= neighbor_kmax);
+
+                                if(!inside){
+
+                                    size_t num_saved_neighbor = Kokkos::atomic_fetch_add(&points_num_neighbors(neighbor_point_gid), 1);
+                                    points_in_point(neighbor_point_gid, num_saved_neighbor) = point_gid;
+                                    // the other stencil didn't see my point because it was smaller, now it does see it
+
+                                } // end if
+
+                            //} // end if neighbor != point_gid
 
                         } // neighbor_point_lid
 
@@ -825,6 +930,36 @@ int main(int argc, char *argv[])
         points_in_point.update_host();
 
 
+        // build the reverse map
+        DRaggedRightArrayKokkos <int> reverse_neighbor_lid(points_num_neighbors); 
+        reverse_neighbor_lid.set_values(-1);
+
+        FOR_ALL(point_gid, 0, num_points, {
+                
+            for(int neighbor_point_lid = 0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
+                
+                // get the point gid for this neighbor
+                int neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                
+                // loop over the neighbors of my neighbor
+                size_t found = 0;
+                for(int j_lid = 0; j_lid<points_num_neighbors(neighbor_point_gid); j_lid++){
+
+                    // get the neighboring point gid of my neighbor
+                    int j_point_gid = points_in_point(neighbor_point_gid, j_lid);
+                    if (point_gid == j_point_gid){
+                        reverse_neighbor_lid(point_gid, neighbor_point_lid) = j_lid;
+                        found = 1;
+                        //printf("found \n");
+                        break;
+                    }
+                } // end loop over j's neighboring points
+                if(found==0)printf("reverse map for i=%d and j=%d pair not found \n", point_gid, neighbor_point_gid);
+            } // end loop over i's neighboring points
+                
+        });
+
+
         // end timer
         auto time_4 = std::chrono::high_resolution_clock::now();
 
@@ -846,6 +981,7 @@ int main(int argc, char *argv[])
         
         DRaggedRightArrayKokkos <double> basis(points_num_neighbors);        // reproducing kernel basis (num_points, num_neighbors)
         DRaggedRightArrayKokkos <double> grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors)
+        
 
 
         double h = 1.0;
@@ -877,6 +1013,23 @@ int main(int argc, char *argv[])
         auto time_6 = std::chrono::high_resolution_clock::now();
 
 
+        // -----------------
+        //  Timers
+        // -----------------
+        printf("\n");
+        std::chrono::duration <double, std::milli> ms = time_2 - time_1;
+        std::cout << "runtime to create bins = " << ms.count() << "ms\n\n";
+
+        ms = time_4 - time_3;
+        std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n";
+
+        ms = time_6 - time_5;
+        std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n";
+
+
+
+        printf("Checking gradients at points \n\n");
+
         // performing checks on p_coeffs, basis, and grad_basis
         double partion_unity;
         double partion_unity_lcl;
@@ -913,7 +1066,9 @@ int main(int argc, char *argv[])
 
             // partition of unity
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), partion_unity_lcl, {
-                partion_unity_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_lid);
+                // get the point gid for this neighboring
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+                partion_unity_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid);
             }, partion_unity);
             
 
@@ -932,9 +1087,14 @@ int main(int argc, char *argv[])
                 quadratic_preserving_lcl += basis(point_gid,neighbor_point_lid)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
             }, quadratic_preserving);
 
-            printf("partition unity = %f, ", partion_unity);
-            printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
-            printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
+            if(fabs(partion_unity-1.0)>1e-13)
+                printf("partition unity = %f, ", partion_unity);
+
+            if(fabs(linear_preserving-point_positions(point_gid,0))>1e-13)
+                printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
+
+            if(fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0))>1e-13)
+                printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
 
 
             // -----------------
@@ -957,7 +1117,10 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid);
             }, grad_z_p0);
-             printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
+
+            const double grad_check_P0 = fabs(grad_x_p0)+fabs(grad_y_p0)+fabs(grad_z_p0);
+            if(0.333*fabs(grad_check_P0)>1e-8)
+                printf("error in grad(P0) = %f, %f, %f, \n", grad_x_p0, grad_y_p0, grad_z_p0);
 
 
             // Sum(grad(P1)) = [1]; 
@@ -976,10 +1139,14 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2);
             }, grad_z_p1);
-             printf("error in grad(P1) = %f, %f, %f, \n", 
-                fabs(grad_x_p1 - 1.0), 
-                fabs(grad_y_p1 - 1.0), 
-                fabs(grad_z_p1 - 1.0));
+
+            const double grad_check_P1 = fabs(grad_x_p1 - 1.0)+fabs(grad_y_p1 - 1.0)+fabs(grad_z_p1 - 1.0);
+            if(0.333*fabs(grad_check_P1)>1e-8){
+                printf("error in grad(P1) = %f, %f, %f, \n", 
+                    fabs(grad_x_p1 - 1.0), 
+                    fabs(grad_y_p1 - 1.0), 
+                    fabs(grad_z_p1 - 1.0));
+            }
 
 
             // Sum(grad(P2)) = [2]; 
@@ -998,24 +1165,137 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2);
             }, grad_z_p2);
-            printf("error in grad(P2) = %f, %f, %f, \n", 
-                    fabs(grad_x_p2-2.0*point_positions(point_gid,0)), 
-                    fabs(grad_y_p2-2.0*point_positions(point_gid,1)), 
-                    fabs(grad_z_p2-2.0*point_positions(point_gid,2)));
+
+            const double grad_check_P2 = fabs(grad_x_p2-2.0*point_positions(point_gid,0)) + fabs(grad_y_p2-2.0*point_positions(point_gid,1)) + fabs(grad_z_p2-2.0*point_positions(point_gid,2));
+            if(0.333*fabs(grad_check_P2)>1e-8){
+                printf("error in grad(P2) = %f, %f, %f, \n", 
+                        fabs(grad_x_p2-2.0*point_positions(point_gid,0)), 
+                        fabs(grad_y_p2-2.0*point_positions(point_gid,1)), 
+                        fabs(grad_z_p2-2.0*point_positions(point_gid,2)));
+            }
 
         } // end for point gid
 
 
-        ////// timers ///
+        if(check_maps){
+            size_t bad = 0;
+            for (size_t i=0; i<num_points; ++i) {
+                for (size_t lid=0; lid<points_num_neighbors(i); ++lid) {
+                    size_t j = points_in_point(i, lid);
+                    size_t rev = reverse_neighbor_lid(i, lid);
+                    if (rev == SIZE_MAX) {
+                    printf("MISSING reverse for pair (%zu,%zu)\n", i, j); ++bad;
+                    if (bad>50) break;
+                    } else {
+                    size_t check = points_in_point(j, rev);
+                    if (check != i) {
+                        printf("WRONG reverse: i=%zu j=%zu rev=%zu check=%zu\n", i, j, rev, check);
+                        ++bad;
+                        if (bad>50) break;
+                    }
+                    }
+                }
+                if (bad>50) break;
+            }
+            if (bad==0) printf("reverse map OK\n");
+
+
+            for (size_t i=0; i<num_points; ++i) {
+                std::set<size_t> seen;
+                for (size_t lid=0; lid<points_num_neighbors(i); ++lid) {
+                    size_t j = points_in_point(i, lid);
+                    //if (j == i) printf("SELF neighbor found at i=%zu lid=%zu\n", i, lid);
+                    if (!seen.insert(j).second) printf("DUPLICATE neighbor %zu in list of %zu\n", j, i);
+                }
+            }
+        }
+
+
+        printf("Building anti-symmetric gradient \n\n");
+
+        // -------------------
+        // Anti-sym gradient
+        // -------------------
+        
+        // DCArrayKokkos <double> div(num_points);
+        // div.set_values(0.0);
+
+        // FOR_ALL(point_gid, 0, num_points, {
+                
+        //     for(size_t neighbor_point_lid = 0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
+                
+        //         // get the point gid for this neighbor
+        //         size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+
+        //         // get the local id of my neighbor that matches my point_gid
+        //         size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
+
+        //         if(point_gid != points_in_point(neighbor_point_gid, neighbor_lid)){
+        //             printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", point_gid, points_in_point(neighbor_point_gid, neighbor_lid), neighbor_point_gid);
+        //         }
+
+        //         double dot_prod = 0.0;
+        //         for (size_t dim=0; dim<3; dim++){                  
+        //             dot_prod += (grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim))*
+        //                         (point_positions(neighbor_point_gid,dim) - point_positions(point_gid,dim)); 
+        //         }
+        //         div(point_gid) += vol(point_gid)*vol(neighbor_point_gid)*0.5*dot_prod;
+                
+        //     } // end neighbors
+
+        // }); // end parallel over points
+        // Kokkos::fence();
+
+        // FOR_ALL(point_gid, 0, num_points, {
+        //     div(point_gid) /= vol(point_gid);
+        // });
+        // div.update_host();
+
+        // for(size_t point_gid=0; point_gid<num_points; point_gid++){
+        //     printf("div = %f at point %zu \n", div.host(point_gid), point_gid);
+        // }
+
+
+        // other coding 
+        // dot_prod = 0.0;
+        // for (size_t dim=0; dim<3; dim++) {
+        //     dot_prod += grad_basis(point_gid,neighbor_point_lid,dim) *
+        //                 (point_positions(neighbor_point_gid,dim)- point_positions(point_gid,dim));
+        // }
+
+        // // contribution to i
+        // Kokkos::atomic_add(&div(point_gid),
+        //     0.5 * vol(point_gid) * vol(neighbor_point_gid) * dot_prod);
+
+        // // // contribution to j
+        // Kokkos::atomic_add(&div(neighbor_point_gid),
+        //     -0.5 * vol(point_gid) * vol(neighbor_point_gid) * dot_prod);
+
+
+        double conserve_check;
+        double conserve_check_lcl;
+        FOR_REDUCE_SUM(point_gid, 0, num_points, 
+                       conserve_check_lcl, {
+
+            for(size_t neighbor_point_lid = 0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
+                
+                // get the point gid for this neighbor
+                size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
+
+                // get the local id of my neighbor that matches my point_gid
+                size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
+
+                for (size_t dim=0; dim<3; dim++){
+                    conserve_check_lcl += grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim);
+                }
+            }
+
+        }, conserve_check);
+        printf("conservation = %f \n", conserve_check);
+
 
-        std::chrono::duration <double, std::milli> ms = time_2 - time_1;
-        std::cout << "runtime to create bins = " << ms.count() << "ms\n\n";
 
-        ms = time_4 - time_3;
-        std::cout << "runtime to find and save neighbors = " << ms.count() << "ms\n\n";
 
-        ms = time_6 - time_5;
-        std::cout << "runtime to calculate basis and grad basis = " << ms.count() << "ms\n\n";
 
 
 

From c77e0d5f6eaefc99bbe520906ebc7df32dc21c12 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Tue, 23 Sep 2025 17:35:11 -0600
Subject: [PATCH 16/23] rk conservative gradient test

---
 examples/pointcloud/pointcloud-rk.cpp | 391 +++++++++++++++++++++-----
 1 file changed, 328 insertions(+), 63 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 952d24a7..f0122013 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -70,7 +70,15 @@ const double PI = 3.14159265358979323846;
 // -----------------------------------------------
 // inputs:
 
-const size_t num_points = 31;
+
+const size_t num_1d_x = 4;
+const size_t num_1d_y = 4;
+const size_t num_1d_z = 4;
+
+const double h_kernel = 1.5/4.;
+const double num_points_fit = 30;
+
+const size_t num_points = num_1d_x*num_1d_y*num_1d_z;
 
 // the bin sizes for finding neighboring points
 const double bin_dx = 0.05; // bins in x
@@ -146,6 +154,64 @@ size_t get_bin_gid(const double x_pt,
 } // end function
 
 
+
+KOKKOS_FUNCTION
+double kernel_bs(const double r[3], double h) {
+    
+    double xij = 0.0;
+    for(size_t dim=0; dim<3; dim++){
+        xij += r[dim]*r[dim];
+    } // dim
+
+    double q = sqrt(xij)/h;
+    double alpha = 2.0/(3.0*h);
+    if (q < 0.0) return 0.0; // defensive
+    if (q < 1.0) return (alpha * (1.0 - 1.5*q*q + 0.75*q*q*q));
+    if (q < 2.0) return (alpha * 0.25 * pow(2.0 - q, 3));
+
+    return 0.0;
+}
+
+
+KOKKOS_FUNCTION
+// derivative dW/dx_i = - dW/dr where r = xj-xi
+void grad_kernel_bs(double *grad_W, const double r[3], const double h) {
+
+    double xij = 0.0;
+    for(size_t dim=0; dim<3; dim++){
+        xij += r[dim]*r[dim];
+    } // dim
+    // sqrt(xij) = radius
+
+    const double radius = sqrt(xij);
+    const double q = radius/h;
+
+    double df_dq = 0.0; // derivative of the dimensionless kernel shape function f(q)
+    if (q < 1.0) {
+        // f(q) = 1 - 1.5 q^2 + 0.75 q^3
+        // f'(q) = -3 q + 2.25 q^2
+        df_dq = -3.0 * q + 2.25 * q * q;
+    } else if (q < 2.0) {
+        // f(q) = 0.25 (2 - q)^3
+        // f'(q) = 0.25 * 3 (2 - q)^2 * (-1) = -0.75 (2 - q)^2
+        const double two_minus_q = 2.0 - q;
+        df_dq = -0.75 * two_minus_q * two_minus_q;
+    } else {
+        df_dq = 0.0;
+    }
+
+    const double dW_dr = (df_dq / h);
+    // grad W = dW/dr * (rij / radius)
+    const double invr = 1.0 / radius;
+
+    for (size_t dim=0; dim<3; ++dim) {
+        grad_W[dim] = dW_dr * r[dim] * invr;
+    }
+
+    return;
+}
+
+
 // Gaussian function part of the RBF
 // rbf = exp(-(xj - x)*(xj - x)/h)
 KOKKOS_FUNCTION
@@ -157,14 +223,33 @@ double kernel(const double r[3], const double h){
         diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    return exp(-diff_sqrd/(h*h));
+    double norm = 1.0 / (h * h * h * pow(PI, 1.5));
+    return norm * exp(-diff_sqrd / (h * h));
+} // end of function
+
+
+// Gaussian function part of the RBF, symmeterized
+// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2))
+KOKKOS_FUNCTION
+double kernel_syn(const double r[3], const double hi, const double hj){
+
+    double diff_sqrd = 0.0;
+
+    for(size_t dim=0; dim<3; dim++){
+        diff_sqrd += r[dim]*r[dim];
+    } // dim
+
+    const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call
+    const double Wj = exp(-diff_sqrd/(hj*hj));
+
+    return 0.5*(Wi + Wj);
 } // end of function
 
 
 // Gradient Gaussian function
-// rbf = exp(-(xj - x)*(xj - x)/h)
+// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) 
 KOKKOS_FUNCTION
-void grad_kernel(const double r[3], const double h, double *grad_W){
+void grad_kernel(double *grad_W, const double r[3], const double h){
 
     double diff_sqrd = 0.0;
 
@@ -172,16 +257,38 @@ void grad_kernel(const double r[3], const double h, double *grad_W){
         diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    const double rbf = exp(-diff_sqrd/(h*h));
+    const double drdxi = -1;
+    const double rbf = kernel(r, h);
 
     // gradient
-    grad_W[0] = 2.0/(h*h)*r[0]*rbf; 
-    grad_W[1] = 2.0/(h*h)*r[1]*rbf; 
-    grad_W[2] = 2.0/(h*h)*r[2]*rbf;
+    for (size_t dim=0; dim<3; ++dim) {
+        grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdxi; 
+    }
 
     return;
 } // end of function
 
+// d/dx rbf = d/dx ( 0.5(exp(-(xj - xi)*(xj - x)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) ) 
+KOKKOS_FUNCTION
+void grad_kernel_sym(double *gradW, const double r[3], const double hi, const double hj) {
+    double diff_sqrd = 0.0;
+    for (size_t dim=0; dim<3; ++dim){
+        diff_sqrd += r[dim]*r[dim];
+    }
+
+    const double drdxi = -1;
+
+    double Wi = exp(-diff_sqrd/(hi*hi));
+    double Wj = exp(-diff_sqrd/(hj*hj));
+
+    double dWi = -2.0/ (hi*hi) * Wi*drdxi; // it uses xj - xi so a minus one
+    double dWj = -2.0/ (hj*hj) * Wj; // it uses xi - xj so it has a +1 for drdxi
+
+    for (size_t dim=0; dim<3; ++dim) {
+        gradW[dim] = 0.5 * (dWi * r[dim] - dWj * r[dim]); // second term using -r
+    }
+}
+
 
 // Polynomial basis up to quadratic in 3D (10 terms)
 const size_t num_poly_basis = 10;
@@ -262,7 +369,7 @@ void calc_basis_functions(
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
-    const CArrayKokkos <double>& vol,
+    const DCArrayKokkos <double>& vol,
     const CArrayKokkos <double>& p_coeffs,
     const DRaggedRightArrayKokkos <double>& basis,
     const double h)
@@ -307,7 +414,7 @@ void calc_basis_and_grad_basis_functions(
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
-    const CArrayKokkos <double>& vol,
+    const DCArrayKokkos <double>& vol,
     const CArrayKokkos <double>& p_coeffs,
     const CArrayKokkos <double>& M_inv,
     const DRaggedRightArrayKokkos <double>& basis,
@@ -346,7 +453,7 @@ void calc_basis_and_grad_basis_functions(
 
             double W = kernel(r,h);
             double grad_W[3]; 
-            grad_kernel(r,h,grad_W);
+            grad_kernel(grad_W,r,h);
 
             double p[num_poly_basis]; 
             poly_basis(r,p);
@@ -387,7 +494,7 @@ void calc_basis_and_grad_basis_functions(
             poly_basis(r,p);
             
             double grad_W[3];
-            grad_kernel(r, h, grad_W);
+            grad_kernel(grad_W, r, h);
 
             double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis
             grad_poly_basis(r, grad_p);
@@ -471,7 +578,7 @@ void calc_p_coefficients(
     const DCArrayKokkos <double>& x,
     const DCArrayKokkos <size_t> points_num_neighbors, 
     const DRaggedRightArrayKokkos <size_t> points_in_point,
-    const CArrayKokkos <double>& vol,
+    const DCArrayKokkos <double>& vol,
     const CArrayKokkos <double>& p_coeffs,
     const CArrayKokkos <double>& M_inv,
     double h)
@@ -584,12 +691,34 @@ int main(int argc, char *argv[])
         DCArrayKokkos <double> point_values(num_points, "point_values"); 
 
         // point locations
+        if(false){
         srand(static_cast<unsigned int>(time(0))); // Seed the random number generator
-        for(size_t i=0; i<num_points; i++){
-            point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
-            point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
-            point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+            for(size_t i=0; i<num_points; i++){
+                point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+                point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+                point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+            }
         }
+        else {
+
+            double dx = LX/((double)num_1d_x);
+            double dy = LY/((double)num_1d_y);
+            double dz = LZ/((double)num_1d_z);
+
+            size_t point_gid = 0;  
+            for(size_t k=0; k<num_1d_z; k++){
+                for(size_t j=0; j<num_1d_y; j++){
+                    for(size_t i=0; i<num_1d_x; i++){
+                        point_positions.host(point_gid, 0) = X0 + static_cast<double>(i)*dx;
+                        point_positions.host(point_gid, 1) = Y0 + static_cast<double>(j)*dy;
+                        point_positions.host(point_gid, 2) = Z0 + static_cast<double>(k)*dz;
+                        point_gid++;
+                    } // end i
+                } // end j
+            } // end k
+
+        } // end if
+
         point_positions.update_device();
         Kokkos::fence();
 
@@ -742,10 +871,10 @@ int main(int argc, char *argv[])
 
                 const int kmin = MAX(0, k-stencil);
                 const int kmax = MIN(num_bins_z-1, k+stencil);
-
-                for (int icount=imin; icount<=imax; icount++){
+                    
+                for (int kcount=kmin; kcount<=kmax; kcount++){
                     for (int jcount=jmin; jcount<=jmax; jcount++) {
-                        for (int kcount=kmin; kcount<=kmax; kcount++){
+                        for (int icount=imin; icount<=imax; icount++){
 
                             // get bin neighbor gid 
                             size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
@@ -755,8 +884,8 @@ int main(int argc, char *argv[])
                     } // end for jcount
                 } // end for icount
 
-                // the min number of points required to solve the system is num_poly_basis+1
-                if (num_points_found > num_poly_basis+5 || num_points_found==num_points){
+                // the min number of points required to solve the system is num_poly_basis+1, was 2*num_poly_basis
+                if (num_points_found > num_points_fit  || num_points_found==num_points){
 
                     points_bin_stencil(point_gid,0) = imin;
                     points_bin_stencil(point_gid,1) = imax;
@@ -802,9 +931,9 @@ int main(int argc, char *argv[])
             const int kmax = points_bin_stencil(point_gid,5);
 
             // loop over my bin stencil
-            for (int icount=imin; icount<=imax; icount++){
+            for (int kcount=kmin; kcount<=kmax; kcount++){
                 for (int jcount=jmin; jcount<=jmax; jcount++) {
-                    for (int kcount=kmin; kcount<=kmax; kcount++){
+                    for (int icount=imin; icount<=imax; icount++){
 
                         // get bin neighbor gid 
                         size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
@@ -873,9 +1002,9 @@ int main(int argc, char *argv[])
             int kmax = points_bin_stencil(point_gid,5);
 
 
-            for (int icount=imin; icount<=imax; icount++){
+            for (int kcount=kmin; kcount<=kmax; kcount++){
                 for (int jcount=jmin; jcount<=jmax; jcount++) {
-                    for (int kcount=kmin; kcount<=kmax; kcount++){
+                    for (int icount=imin; icount<=imax; icount++){
 
                         // get bin neighbor gid 
                         size_t neighbor_bin_gid = get_gid(icount, jcount, kcount, num_bins_x, num_bins_y);
@@ -973,8 +1102,36 @@ int main(int argc, char *argv[])
         auto time_5 = std::chrono::high_resolution_clock::now();
 
         CArrayKokkos <double> p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
-        CArrayKokkos <double> vol(num_points);
-        vol.set_values(1.0);
+        
+        
+        DCArrayKokkos <double> vol(num_points);
+        vol.set_values(0.0);
+
+        const double dx = LX/((double)num_1d_x);
+        const double dy = LY/((double)num_1d_y);
+        const double dz = LZ/((double)num_1d_z); 
+        const double elem_vol = dx*dy*dz;
+
+        const size_t num_cells_1d_x = num_1d_x-1;
+        const size_t num_cells_1d_y = num_1d_y-1;
+        const size_t num_cells_1d_z = num_1d_z-1;
+
+        FOR_ALL(k,0,num_cells_1d_z,
+                j,0,num_cells_1d_y,
+                i,0,num_cells_1d_x,{
+
+            for (int kcount=k; kcount<=k+1; kcount++){
+                for (int jcount=j; jcount<=j+1; jcount++){
+                    for (int icount=i; icount<=i+1; icount++){
+                        size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y);
+                        Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25);
+                    } // end i
+                } // end j
+            } // end k
+                    
+        }); // end parallel over k,j,i 
+        vol.update_host();
+
 
         CArrayKokkos <double> M_inv(num_points, num_poly_basis, num_poly_basis);
         CArrayKokkos <double> grad_M(num_points, num_poly_basis, num_poly_basis);
@@ -984,7 +1141,8 @@ int main(int argc, char *argv[])
         
 
 
-        double h = 1.0;
+        double h = h_kernel; // kernel width
+
 
         printf("building reproducing kernel coefficients \n");
 
@@ -1107,11 +1265,13 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_x_p0_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid);
             }, grad_x_p0);
+
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
                 grad_y_p0_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid);
             }, grad_y_p0);
+
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
@@ -1216,45 +1376,150 @@ int main(int argc, char *argv[])
         // -------------------
         // Anti-sym gradient
         // -------------------
-        
-        // DCArrayKokkos <double> div(num_points);
-        // div.set_values(0.0);
+        printf("Testing divergence of vector field u = (x, y, z) \n\n");
 
-        // FOR_ALL(point_gid, 0, num_points, {
-                
-        //     for(size_t neighbor_point_lid = 0; neighbor_point_lid<points_num_neighbors(point_gid); neighbor_point_lid++){
+        DCArrayKokkos <double> u(num_points, 3);
+        FOR_ALL(i, 0, num_points, {
+            u(i, 0) = point_positions(i, 0);        
+            u(i, 1) = point_positions(i, 1);
+            u(i, 2) = point_positions(i, 2);
+        });
+        u.update_device();
+
+        DCArrayKokkos <double> div(num_points);
+        div.set_values(0.0);
+
+        DCArrayKokkos <double> div_fd(num_points);
+        div_fd.set_values(0.0);
+
+        FOR_ALL(i_gid, 0, num_points, {
+            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){                
+                size_t j_gid = points_in_point(i_gid, j_lid);
+                size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
+
+                if(i_gid != points_in_point(j_gid, i_lid)){
+                    printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", i_gid, points_in_point(j_gid, i_lid), j_gid);
+                }
+
+                double g_ij[3];
+                double g_ji[3];
+                for (int dim=0; dim<3; ++dim) {
+                    g_ij[dim] = grad_basis(i_gid,j_lid,dim);
+                    g_ji[dim] = grad_basis(j_gid,i_lid,dim);
+                }
+
+                // conservative mesh-free FE
+                double contrib = 0.0;
+                for (int dim=0; dim<3; ++dim) {
+                    contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) - u(i_gid, dim));
+                }
+                div(i_gid) += vol(i_gid) * vol(j_gid) * contrib;
+
+                // finite difference
+                contrib = 0.0;
+                for (int dim=0; dim<3; ++dim) {
+                    contrib += g_ij[dim]*u(j_gid, dim)*vol(j_gid);
+                }
+                div_fd(i_gid) += contrib;
+
+            }
+            div(i_gid) /= vol(i_gid);
+            //div_fd(i_gid) /= vol(i_gid);  // finite difference doesn't have the V_i on the right side, so no division
+        });
+        div.update_host();
+        div_fd.update_host();
+
+
+        for(size_t point_gid=0; point_gid<num_points; point_gid++){
+            double error = fabs(div.host(point_gid) - 3.0);
+            //if(error > 1e-8){
+                printf("div(u) = %f at point %zu, error = %g, vol = %f\n", div.host(point_gid), point_gid, error, vol.host(point_gid));
+            //}
+        } // end for point_gid
+
+        for(size_t point_gid=0; point_gid<num_points; point_gid++){
+            double error = fabs(div_fd.host(point_gid) - 3.0);
+            if(error > 1e-8){
+                printf("div_fd(u) = %f at point %zu, error = %g\n", div_fd.host(point_gid), point_gid, error);
+            }
+        } // end for point_gid
+
+ /*       
+        DCArrayKokkos <double> div(num_points);
+        div.set_values(0.0);
+
+        FOR_ALL(i_gid, 0, num_points, {
                 
-        //         // get the point gid for this neighbor
-        //         size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-
-        //         // get the local id of my neighbor that matches my point_gid
-        //         size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
-
-        //         if(point_gid != points_in_point(neighbor_point_gid, neighbor_lid)){
-        //             printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", point_gid, points_in_point(neighbor_point_gid, neighbor_lid), neighbor_point_gid);
-        //         }
-
-        //         double dot_prod = 0.0;
-        //         for (size_t dim=0; dim<3; dim++){                  
-        //             dot_prod += (grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim))*
-        //                         (point_positions(neighbor_point_gid,dim) - point_positions(point_gid,dim)); 
-        //         }
-        //         div(point_gid) += vol(point_gid)*vol(neighbor_point_gid)*0.5*dot_prod;
+            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){                
+
+                // get the point gid for this neighbor
+                size_t j_gid = points_in_point(i_gid, j_lid);
+
+                // get the local id of my neighbor that matches my point_gid
+                size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
+
+                if(i_gid != points_in_point(j_gid, i_lid)){
+                    printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", i_gid, points_in_point(j_gid, i_lid), j_gid);
+                }
+
+
+                double g_ij[3];
+                double g_ji[3];
+                for (int dim=0; dim<3; ++dim) {
+                    g_ij[dim] = grad_basis(i_gid,j_lid,dim);
+                    g_ji[dim] = grad_basis(j_gid,i_lid,dim);
+                }
+
+                double Delta[3];
+                for (int dim=0; dim<3; ++dim) {
+                    Delta[dim] = g_ij[dim] - g_ji[dim];
+                }
+
+                double pair_dot = 0.0;
+                for (int dim=0; dim<3; ++dim){
+                    pair_dot += 0.5*(point_positions(i_gid,dim) + point_positions(j_gid,dim)) * Delta[dim];
+                }
+
+                div(i_gid) += vol(i_gid) * vol(j_gid) * pair_dot;
+
+                // // contribution to i
+                // Kokkos::atomic_add(&div(i_gid), contrib); 
+
+                // // // contribution to j
+                // Kokkos::atomic_add(&div(j_gid), -contrib);
+
+
+                // checks 
+                double Delta_norm = 0.0;
+                for (int d=0; d<3; ++d){ 
+                    Delta_norm += (g_ij[d]-g_ji[d])*(g_ij[d]-g_ji[d]);
+                } // end for
+
+                printf("pair %d,%zu: |Delta|=%g  g_ij=(%g,%g,%g)  g_ji=(%g,%g,%g)\n", i_gid, j_gid, sqrt(Delta_norm),
+                    g_ij[0],g_ij[1],g_ij[2], g_ji[0],g_ji[1],g_ji[2]);
+
                 
-        //     } // end neighbors
+            } // end neighbors
 
-        // }); // end parallel over points
-        // Kokkos::fence();
+        }); // end parallel over points
+        Kokkos::fence();
 
-        // FOR_ALL(point_gid, 0, num_points, {
-        //     div(point_gid) /= vol(point_gid);
-        // });
-        // div.update_host();
+        FOR_ALL(point_gid, 0, num_points, {
+            div(point_gid) /= vol(point_gid);
+        });
+        div.update_host();
 
-        // for(size_t point_gid=0; point_gid<num_points; point_gid++){
-        //     printf("div = %f at point %zu \n", div.host(point_gid), point_gid);
-        // }
+        for(size_t point_gid=0; point_gid<num_points; point_gid++){
+            printf("div = %f at point %zu \n", div.host(point_gid), point_gid);
+        }
 
+*/
+                // double dot_prod = 0.0;
+                // for (size_t dim=0; dim<3; dim++){                  
+                //     dot_prod += (grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim))*
+                //                 (point_positions(neighbor_point_gid,dim) - point_positions(point_gid,dim)); 
+                // }
+                // div(point_gid) += vol(point_gid)*vol(neighbor_point_gid)*0.5*dot_prod;
 
         // other coding 
         // dot_prod = 0.0;
@@ -1286,7 +1551,7 @@ int main(int argc, char *argv[])
                 size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
 
                 for (size_t dim=0; dim<3; dim++){
-                    conserve_check_lcl += grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim);
+                    conserve_check_lcl += 0.5*(grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim));
                 }
             }
 
@@ -1318,7 +1583,7 @@ int main(int argc, char *argv[])
         out << "SCALARS field float 1\n";
         out << "LOOKUP_TABLE default\n";
         for (size_t point_gid = 0; point_gid < num_points; ++point_gid) {
-            out << point_values.host(point_gid) << "\n";
+            out << div.host(point_gid) << "\n";
         }
 
 

From 67fc1a96cea5d608dbd76c9182a075a1aaf8cefe Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Fri, 26 Sep 2025 12:47:54 -0600
Subject: [PATCH 17/23] WIP anti-symmetric gradient, added b-splid kernel and
 P1 P2 switch

---
 examples/pointcloud/pointcloud-rk.cpp | 423 +++++++++++++-------------
 1 file changed, 216 insertions(+), 207 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index f0122013..07390ca8 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -71,12 +71,15 @@ const double PI = 3.14159265358979323846;
 // inputs:
 
 
-const size_t num_1d_x = 4;
-const size_t num_1d_y = 4;
-const size_t num_1d_z = 4;
+#define P1 // P1 or P2
 
-const double h_kernel = 1.5/4.;
-const double num_points_fit = 30;
+
+const size_t num_1d_x = 3;
+const size_t num_1d_y = 3;
+const size_t num_1d_z = 3;
+
+const double h_kernel = 2/3.;
+const double num_points_fit = 27; // minimum to fit is 3x3x3
 
 const size_t num_points = num_1d_x*num_1d_y*num_1d_z;
 
@@ -116,9 +119,9 @@ bin_keys_t get_bin_keys(const double x_pt,
                         const double z_pt){
             
 
-    double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
-    double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
-    double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
+    double i_dbl = fmax(0, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
+    double j_dbl = fmax(0, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
+    double k_dbl = fmax(0, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
 
     bin_keys_t bin_keys; // save i,j,k to the bin keys
 
@@ -136,12 +139,13 @@ size_t get_bin_gid(const double x_pt,
                    const double y_pt, 
                    const double z_pt, 
                    const size_t num_bins_x,
-                   const size_t num_bins_y){
+                   const size_t num_bins_y,
+                   const size_t num_bins_z){
             
 
-    double i_dbl = fmax(1.0e-15, round((x_pt - X0 - bin_dx*0.5)/bin_dx - 1.0e-10)); // x = ih + X0 + dx_bin*0.5
-    double j_dbl = fmax(1.0e-15, round((y_pt - Y0 - bin_dy*0.5)/bin_dy - 1.0e-10));
-    double k_dbl = fmax(1.0e-15, round((z_pt - Z0 - bin_dz*0.5)/bin_dz - 1.0e-10));
+    double i_dbl = fmin(num_bins_x-1, fmax(0.0, round((x_pt - X0)/bin_dx - 1.0e-8))); // x = ih + X0
+    double j_dbl = fmin(num_bins_y-1, fmax(0.0, round((y_pt - Y0)/bin_dy - 1.0e-8)));
+    double k_dbl = fmin(num_bins_z-1, fmax(0.0, round((z_pt - Z0)/bin_dz - 1.0e-8)));
 
     // get the integers for the bins
     size_t i = (size_t)i_dbl;
@@ -156,15 +160,16 @@ size_t get_bin_gid(const double x_pt,
 
 
 KOKKOS_FUNCTION
-double kernel_bs(const double r[3], double h) {
+double kernel(const double r[3], double h) {
     
-    double xij = 0.0;
+    double diff_sqrd = 0.0;
     for(size_t dim=0; dim<3; dim++){
-        xij += r[dim]*r[dim];
+        diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    double q = sqrt(xij)/h;
-    double alpha = 2.0/(3.0*h);
+    const double radius = sqrt(diff_sqrd);
+    const double q = radius/h;
+    const double alpha = 2.0/(3.0*h);
     if (q < 0.0) return 0.0; // defensive
     if (q < 1.0) return (alpha * (1.0 - 1.5*q*q + 0.75*q*q*q));
     if (q < 2.0) return (alpha * 0.25 * pow(2.0 - q, 3));
@@ -175,15 +180,14 @@ double kernel_bs(const double r[3], double h) {
 
 KOKKOS_FUNCTION
 // derivative dW/dx_i = - dW/dr where r = xj-xi
-void grad_kernel_bs(double *grad_W, const double r[3], const double h) {
+void grad_kernel(double *grad_W, const double r[3], const double h) {
 
-    double xij = 0.0;
+    double diff_sqrd = 0.0;
     for(size_t dim=0; dim<3; dim++){
-        xij += r[dim]*r[dim];
+        diff_sqrd += r[dim]*r[dim];
     } // dim
-    // sqrt(xij) = radius
 
-    const double radius = sqrt(xij);
+    const double radius = sqrt(diff_sqrd);
     const double q = radius/h;
 
     double df_dq = 0.0; // derivative of the dimensionless kernel shape function f(q)
@@ -202,7 +206,7 @@ void grad_kernel_bs(double *grad_W, const double r[3], const double h) {
 
     const double dW_dr = (df_dq / h);
     // grad W = dW/dr * (rij / radius)
-    const double invr = 1.0 / radius;
+    const double invr = 1.0 /(radius + 1e-16);
 
     for (size_t dim=0; dim<3; ++dim) {
         grad_W[dim] = dW_dr * r[dim] * invr;
@@ -215,7 +219,7 @@ void grad_kernel_bs(double *grad_W, const double r[3], const double h) {
 // Gaussian function part of the RBF
 // rbf = exp(-(xj - x)*(xj - x)/h)
 KOKKOS_FUNCTION
-double kernel(const double r[3], const double h){
+double kernel_g(const double r[3], const double h){
 
     double diff_sqrd = 0.0;
 
@@ -223,7 +227,7 @@ double kernel(const double r[3], const double h){
         diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    double norm = 1.0 / (h * h * h * pow(PI, 1.5));
+    double norm = 1.0; // / (h * h * h * pow(PI, 1.5));
     return norm * exp(-diff_sqrd / (h * h));
 } // end of function
 
@@ -249,7 +253,7 @@ double kernel_syn(const double r[3], const double hi, const double hj){
 // Gradient Gaussian function
 // d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) 
 KOKKOS_FUNCTION
-void grad_kernel(double *grad_W, const double r[3], const double h){
+void grad_kernel_g(double *grad_W, const double r[3], const double h){
 
     double diff_sqrd = 0.0;
 
@@ -289,79 +293,121 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
     }
 }
 
+#if defined(P2)
+    // Polynomial basis up to quadratic in 3D (10 terms)
+    const size_t num_poly_basis = 10;
+    KOKKOS_INLINE_FUNCTION
+    void poly_basis(const double r[3], double *p) {
 
-// Polynomial basis up to quadratic in 3D (10 terms)
-const size_t num_poly_basis = 10;
-KOKKOS_INLINE_FUNCTION
-void poly_basis(const double r[3], double *p) {
+        p[0] = 1.0;
+        p[1] = r[0];
+        p[2] = r[1];
+        p[3] = r[2];
+        p[4] = r[0] * r[0];
+        p[5] = r[0] * r[1];
+        p[6] = r[0] * r[2];
+        p[7] = r[1] * r[1];
+        p[8] = r[1] * r[2];
+        p[9] = r[2] * r[2];
 
-    p[0] = 1.0;
-    p[1] = r[0];
-    p[2] = r[1];
-    p[3] = r[2];
-    p[4] = r[0] * r[0];
-    p[5] = r[0] * r[1];
-    p[6] = r[0] * r[2];
-    p[7] = r[1] * r[1];
-    p[8] = r[1] * r[2];
-    p[9] = r[2] * r[2];
+        // for high-order will use (x^a y^b z^c)
 
-    // for high-order will use (x^a y^b z^c)
+        return;
+    } // end function
 
-    return;
-} // end function
 
+    KOKKOS_INLINE_FUNCTION
+    void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
+        
+        const double drdx = -1.0;
+
+        grad_p[0][0] = 0.0;
+        grad_p[1][0] = drdx;
+        grad_p[2][0] = 0.0;
+        grad_p[3][0] = 0.0;
+        grad_p[4][0] = 2.0*r[0]*drdx;
+        grad_p[5][0] = r[1]*drdx;
+        grad_p[6][0] = r[2]*drdx;
+        grad_p[7][0] = 0.0;
+        grad_p[8][0] = 0.0;
+        grad_p[9][0] = 0.0;
+
+        // for high-order will use (x^a y^b z^c)
+
+        const double drdy = -1.0;
+
+        grad_p[0][1] = 0.0;
+        grad_p[1][1] = 0.0;
+        grad_p[2][1] = drdy;
+        grad_p[3][1] = 0.0;
+        grad_p[4][1] = 0.0;
+        grad_p[5][1] = r[0]*drdy;
+        grad_p[6][1] = 0.0;
+        grad_p[7][1] = 2.0*r[1]*drdy;
+        grad_p[8][1] = r[2]*drdy;
+        grad_p[9][1] = 0.0;
+
+        // for high-order will use (x^a y^b z^c)
+
+        const double drdz = -1.0;
+
+        grad_p[0][2] = 0.0;
+        grad_p[1][2] = 0.0;
+        grad_p[2][2] = 0.0;
+        grad_p[3][2] = drdz;
+        grad_p[4][2] = 0.0;
+        grad_p[5][2] = 0.0;
+        grad_p[6][2] = r[0]*drdz;
+        grad_p[7][2] = 0.0;
+        grad_p[8][2] = r[1]*drdz;
+        grad_p[9][2] = 2.0*r[2]*drdz;
+
+        // for high-order will use (x^a y^b z^c)
+
+        return;
+    } // end function
+#else
+    // Polynomial basis up to quadratic in 3D (10 terms)
+    const size_t num_poly_basis = 4;
+    KOKKOS_INLINE_FUNCTION
+    void poly_basis(const double r[3], double *p) {
+
+        p[0] = 1.0;
+        p[1] = r[0];
+        p[2] = r[1];
+        p[3] = r[2];
+
+        return;
+    } // end function
+
+
+    KOKKOS_INLINE_FUNCTION
+    void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
+        
+        const double drdx = -1.0;
+
+        grad_p[0][0] = 0.0;
+        grad_p[1][0] = drdx;
+        grad_p[2][0] = 0.0;
+        grad_p[3][0] = 0.0;
 
-KOKKOS_INLINE_FUNCTION
-void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
-    
-    const double drdx = -1.0;
-
-    grad_p[0][0] = 0.0;
-    grad_p[1][0] = drdx;
-    grad_p[2][0] = 0.0;
-    grad_p[3][0] = 0.0;
-    grad_p[4][0] = 2.0*r[0]*drdx;
-    grad_p[5][0] = r[1]*drdx;
-    grad_p[6][0] = r[2]*drdx;
-    grad_p[7][0] = 0.0;
-    grad_p[8][0] = 0.0;
-    grad_p[9][0] = 0.0;
-
-    // for high-order will use (x^a y^b z^c)
-
-    const double drdy = -1.0;
-
-    grad_p[0][1] = 0.0;
-    grad_p[1][1] = 0.0;
-    grad_p[2][1] = drdy;
-    grad_p[3][1] = 0.0;
-    grad_p[4][1] = 0.0;
-    grad_p[5][1] = r[0]*drdy;
-    grad_p[6][1] = 0.0;
-    grad_p[7][1] = 2.0*r[1]*drdy;
-    grad_p[8][1] = r[2]*drdy;
-    grad_p[9][1] = 0.0;
-
-    // for high-order will use (x^a y^b z^c)
-
-    const double drdz = -1.0;
-
-    grad_p[0][2] = 0.0;
-    grad_p[1][2] = 0.0;
-    grad_p[2][2] = 0.0;
-    grad_p[3][2] = drdz;
-    grad_p[4][2] = 0.0;
-    grad_p[5][2] = 0.0;
-    grad_p[6][2] = r[0]*drdz;
-    grad_p[7][2] = 0.0;
-    grad_p[8][2] = r[1]*drdz;
-    grad_p[9][2] = 2.0*r[2]*drdz;
-
-    // for high-order will use (x^a y^b z^c)
+        const double drdy = -1.0;
 
-    return;
-} // end function
+        grad_p[0][1] = 0.0;
+        grad_p[1][1] = 0.0;
+        grad_p[2][1] = drdy;
+        grad_p[3][1] = 0.0;
+
+        const double drdz = -1.0;
+
+        grad_p[0][2] = 0.0;
+        grad_p[1][2] = 0.0;
+        grad_p[2][2] = 0.0;
+        grad_p[3][2] = drdz;
+
+        return;
+    } // end function
+#endif
 
 
 void calc_basis_functions(
@@ -452,8 +498,10 @@ void calc_basis_and_grad_basis_functions(
             r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
             double W = kernel(r,h);
+            //printf("kernel = %f \n", W);
             double grad_W[3]; 
             grad_kernel(grad_W,r,h);
+            //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]);
 
             double p[num_poly_basis]; 
             poly_basis(r,p);
@@ -701,9 +749,9 @@ int main(int argc, char *argv[])
         }
         else {
 
-            double dx = LX/((double)num_1d_x);
-            double dy = LY/((double)num_1d_y);
-            double dz = LZ/((double)num_1d_z);
+            double dx = (LX-X0)/((double)num_1d_x - 1);
+            double dy = (LY-Y0)/((double)num_1d_y - 1);
+            double dz = (LZ-Z0)/((double)num_1d_z - 1);
 
             size_t point_gid = 0;  
             for(size_t k=0; k<num_1d_z; k++){
@@ -741,10 +789,16 @@ int main(int argc, char *argv[])
         // ----------------------------
         
         // the number of nodes in the mesh
-        size_t num_bins_x = (size_t)( round(LX/bin_dx) );  
-        size_t num_bins_y = (size_t)( round(LY/bin_dy) );  
-        size_t num_bins_z = (size_t)( round(LZ/bin_dz) );  
+        size_t num_bins_x = (size_t)( round( (LX - X0)/bin_dx) + 1 );  
+        size_t num_bins_y = (size_t)( round( (LY - Y0)/bin_dy) + 1 );  
+        size_t num_bins_z = (size_t)( round( (LZ - Z0)/bin_dz) + 1 );  
+        //  bin_dx = (LX-X0)/(num_bins_x - 1);
+        //  bin_dy = (LY-Y0)/(num_bins_y - 1);
+        //  bin_dz = (LZ-Z0)/(num_bins_z - 1);
+
+
         size_t num_bins = num_bins_x*num_bins_y*num_bins_z;
+        printf("num bins x=%zu, y=%zu, z=%zu \n", num_bins_x, num_bins_y, num_bins_z);
 
         // bins and their connectivity to each other and points
         DCArrayKokkos <bin_keys_t> keys_in_bin(num_bins, "keys_in_bin"); // mapping from gid to (i,j,k)
@@ -805,7 +859,8 @@ int main(int argc, char *argv[])
                                          point_positions(point_gid,1), 
                                          point_positions(point_gid,2),
                                          num_bins_x, 
-                                         num_bins_y);
+                                         num_bins_y,
+                                         num_bins_z);
 
             size_t storage_lid = Kokkos::atomic_fetch_add(&num_points_in_bin(bin_gid), 1);
             points_bin_gid(point_gid) = bin_gid; // the id of the bin
@@ -835,6 +890,21 @@ int main(int argc, char *argv[])
         }); // end for all
 
 
+        for(size_t bin_gid=0; bin_gid<num_bins; bin_gid++){
+            if(num_points_in_bin.host(bin_gid) > 0){
+                size_t i = keys_in_bin(bin_gid).i;
+                size_t j = keys_in_bin(bin_gid).j; 
+                size_t k = keys_in_bin(bin_gid).k;
+
+                double bin_x = ((double)i)*bin_dx;
+                double bin_y = ((double)j)*bin_dy;
+                double bin_z = ((double)k)*bin_dz;
+                printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", 
+                    num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z);
+            }
+        } // end for
+
+
 
         // ------------------------------------------------
         // Find the neighbors around each point using bins
@@ -885,20 +955,52 @@ int main(int argc, char *argv[])
                 } // end for icount
 
                 // the min number of points required to solve the system is num_poly_basis+1, was 2*num_poly_basis
-                if (num_points_found > num_points_fit  || num_points_found==num_points){
+                if (num_points_found >= num_points_fit  || num_points_found==num_points){
 
-                    points_bin_stencil(point_gid,0) = imin;
-                    points_bin_stencil(point_gid,1) = imax;
-                    points_bin_stencil(point_gid,2) = jmin;
-                    points_bin_stencil(point_gid,3) = jmax;
-                    points_bin_stencil(point_gid,4) = kmin;
-                    points_bin_stencil(point_gid,5) = kmax;
+                    const double x_pt_middle = bin_dx*((double)i) + X0; 
+                    const double y_pt_middle = bin_dy*((double)j) + Y0; 
+                    const double z_pt_middle = bin_dz*((double)k) + Z0; 
 
-                    points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors
-                    //points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because counted point i as a neighbor
+                    const double x_pt_minus = bin_dx*((double)imin) + X0; 
+                    const double y_pt_minus = bin_dy*((double)jmin) + Y0; 
+                    const double z_pt_minus = bin_dz*((double)kmin) + Z0; 
+                    
+                    const double x_pt_plus = bin_dx*((double)imax) + X0; 
+                    const double y_pt_plus = bin_dy*((double)jmax) + Y0; 
+                    const double z_pt_plus = bin_dz*((double)kmax) + Z0; 
 
-                    break;
-                }
+                    const double dist_minus = sqrt( (x_pt_minus - x_pt_middle)*(x_pt_minus - x_pt_middle) +
+                                                    (y_pt_minus - y_pt_middle)*(y_pt_minus - y_pt_middle) +
+                                                    (z_pt_minus - z_pt_middle)*(z_pt_minus - z_pt_middle) );
+
+                    const double dist_plus = sqrt( (x_pt_plus - x_pt_middle)*(x_pt_plus - x_pt_middle) +
+                                                   (y_pt_plus - y_pt_middle)*(y_pt_plus - y_pt_middle) +
+                                                   (z_pt_plus - z_pt_middle)*(z_pt_plus - z_pt_middle) );
+
+                    printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d,  jmax = %d, kmin = %d,  kmax = %d, \n", 
+                             h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax);
+
+                    // only exit when we exceed kernel distance
+                    if (dist_minus >= h_kernel || dist_plus >= h_kernel || num_points_found==num_points){
+
+                        //printf("exiting \n\n");
+
+                        points_bin_stencil(point_gid,0) = imin;
+                        points_bin_stencil(point_gid,1) = imax;
+                        points_bin_stencil(point_gid,2) = jmin;
+                        points_bin_stencil(point_gid,3) = jmax;
+                        points_bin_stencil(point_gid,4) = kmin;
+                        points_bin_stencil(point_gid,5) = kmax;
+
+                        points_num_neighbors(point_gid) = num_points_found; // including node_i in the list of neighbors
+                        points_num_neighbors(point_gid) = num_points_found - 1; // the -1 is because we counted point i as a neighbor
+
+                        break;
+                    }
+                    // else increase stencil size
+
+
+                } // end of check
                 
             } // end for stencil
 
@@ -1015,9 +1117,7 @@ int main(int argc, char *argv[])
                             size_t neighbor_point_gid = points_in_bin(neighbor_bin_gid, neighbor_pt_lid);
                             
                             // make sure its a neighbor
-                            //if(neighbor_point_gid != point_gid){
-
-                            // I am including point_i in the neighbor list
+                            if(neighbor_point_gid != point_gid){
 
                                 // save the neighbor
                                 size_t num_saved = Kokkos::atomic_fetch_add(&points_num_neighbors(point_gid), 1);
@@ -1046,7 +1146,7 @@ int main(int argc, char *argv[])
 
                                 } // end if
 
-                            //} // end if neighbor != point_gid
+                            } // end if neighbor != point_gid
 
                         } // neighbor_point_lid
 
@@ -1411,7 +1511,7 @@ int main(int argc, char *argv[])
                 // conservative mesh-free FE
                 double contrib = 0.0;
                 for (int dim=0; dim<3; ++dim) {
-                    contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) - u(i_gid, dim));
+                    contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) + u(i_gid, dim));
                 }
                 div(i_gid) += vol(i_gid) * vol(j_gid) * contrib;
 
@@ -1424,7 +1524,7 @@ int main(int argc, char *argv[])
 
             }
             div(i_gid) /= vol(i_gid);
-            //div_fd(i_gid) /= vol(i_gid);  // finite difference doesn't have the V_i on the right side, so no division
+            // remember: finite difference doesn't have the V_i on the right side, so no division
         });
         div.update_host();
         div_fd.update_host();
@@ -1444,97 +1544,6 @@ int main(int argc, char *argv[])
             }
         } // end for point_gid
 
- /*       
-        DCArrayKokkos <double> div(num_points);
-        div.set_values(0.0);
-
-        FOR_ALL(i_gid, 0, num_points, {
-                
-            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){                
-
-                // get the point gid for this neighbor
-                size_t j_gid = points_in_point(i_gid, j_lid);
-
-                // get the local id of my neighbor that matches my point_gid
-                size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
-
-                if(i_gid != points_in_point(j_gid, i_lid)){
-                    printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", i_gid, points_in_point(j_gid, i_lid), j_gid);
-                }
-
-
-                double g_ij[3];
-                double g_ji[3];
-                for (int dim=0; dim<3; ++dim) {
-                    g_ij[dim] = grad_basis(i_gid,j_lid,dim);
-                    g_ji[dim] = grad_basis(j_gid,i_lid,dim);
-                }
-
-                double Delta[3];
-                for (int dim=0; dim<3; ++dim) {
-                    Delta[dim] = g_ij[dim] - g_ji[dim];
-                }
-
-                double pair_dot = 0.0;
-                for (int dim=0; dim<3; ++dim){
-                    pair_dot += 0.5*(point_positions(i_gid,dim) + point_positions(j_gid,dim)) * Delta[dim];
-                }
-
-                div(i_gid) += vol(i_gid) * vol(j_gid) * pair_dot;
-
-                // // contribution to i
-                // Kokkos::atomic_add(&div(i_gid), contrib); 
-
-                // // // contribution to j
-                // Kokkos::atomic_add(&div(j_gid), -contrib);
-
-
-                // checks 
-                double Delta_norm = 0.0;
-                for (int d=0; d<3; ++d){ 
-                    Delta_norm += (g_ij[d]-g_ji[d])*(g_ij[d]-g_ji[d]);
-                } // end for
-
-                printf("pair %d,%zu: |Delta|=%g  g_ij=(%g,%g,%g)  g_ji=(%g,%g,%g)\n", i_gid, j_gid, sqrt(Delta_norm),
-                    g_ij[0],g_ij[1],g_ij[2], g_ji[0],g_ji[1],g_ji[2]);
-
-                
-            } // end neighbors
-
-        }); // end parallel over points
-        Kokkos::fence();
-
-        FOR_ALL(point_gid, 0, num_points, {
-            div(point_gid) /= vol(point_gid);
-        });
-        div.update_host();
-
-        for(size_t point_gid=0; point_gid<num_points; point_gid++){
-            printf("div = %f at point %zu \n", div.host(point_gid), point_gid);
-        }
-
-*/
-                // double dot_prod = 0.0;
-                // for (size_t dim=0; dim<3; dim++){                  
-                //     dot_prod += (grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim))*
-                //                 (point_positions(neighbor_point_gid,dim) - point_positions(point_gid,dim)); 
-                // }
-                // div(point_gid) += vol(point_gid)*vol(neighbor_point_gid)*0.5*dot_prod;
-
-        // other coding 
-        // dot_prod = 0.0;
-        // for (size_t dim=0; dim<3; dim++) {
-        //     dot_prod += grad_basis(point_gid,neighbor_point_lid,dim) *
-        //                 (point_positions(neighbor_point_gid,dim)- point_positions(point_gid,dim));
-        // }
-
-        // // contribution to i
-        // Kokkos::atomic_add(&div(point_gid),
-        //     0.5 * vol(point_gid) * vol(neighbor_point_gid) * dot_prod);
-
-        // // // contribution to j
-        // Kokkos::atomic_add(&div(neighbor_point_gid),
-        //     -0.5 * vol(point_gid) * vol(neighbor_point_gid) * dot_prod);
 
 
         double conserve_check;

From 7b7c5a606833b6937143b16a6ced96f58ffdd83d Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Fri, 26 Sep 2025 16:06:50 -0600
Subject: [PATCH 18/23] WIP: added grad_i and grad_j

---
 examples/pointcloud/pointcloud-rk.cpp | 156 +++++++++++++++++---------
 1 file changed, 102 insertions(+), 54 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 07390ca8..04fea617 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -71,7 +71,8 @@ const double PI = 3.14159265358979323846;
 // inputs:
 
 
-#define P1 // P1 or P2
+#define P2 // P1 or P2
+#define CUBIC_SPLINE // CUBIC_SPLINE or GUASS
 
 
 const size_t num_1d_x = 3;
@@ -159,6 +160,9 @@ size_t get_bin_gid(const double x_pt,
 
 
 
+
+#if defined(CUBIC_SPLINE)
+
 KOKKOS_FUNCTION
 double kernel(const double r[3], double h) {
     
@@ -180,7 +184,7 @@ double kernel(const double r[3], double h) {
 
 KOKKOS_FUNCTION
 // derivative dW/dx_i = - dW/dr where r = xj-xi
-void grad_kernel(double *grad_W, const double r[3], const double h) {
+void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i) {
 
     double diff_sqrd = 0.0;
     for(size_t dim=0; dim<3; dim++){
@@ -204,22 +208,29 @@ void grad_kernel(double *grad_W, const double r[3], const double h) {
         df_dq = 0.0;
     }
 
-    const double dW_dr = (df_dq / h);
+    const double alpha = 2.0/(3.0*h);
+    const double dW_dr = alpha*(df_dq / h);
     // grad W = dW/dr * (rij / radius)
     const double invr = 1.0 /(radius + 1e-16);
 
+    double drdx = -1.0;  // default is derivative with respect to i
+    if(derviative_wrt_i == false){
+        drdx = 1.0;  // derivative with respect to j
+    } // end if
+
     for (size_t dim=0; dim<3; ++dim) {
-        grad_W[dim] = dW_dr * r[dim] * invr;
+        grad_W[dim] = dW_dr * r[dim] * drdx * invr;
     }
 
     return;
 }
 
+#else 
 
 // Gaussian function part of the RBF
 // rbf = exp(-(xj - x)*(xj - x)/h)
 KOKKOS_FUNCTION
-double kernel_g(const double r[3], const double h){
+double kernel(const double r[3], const double h){
 
     double diff_sqrd = 0.0;
 
@@ -232,10 +243,11 @@ double kernel_g(const double r[3], const double h){
 } // end of function
 
 
-// Gaussian function part of the RBF, symmeterized
-// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2))
+
+// Gradient Gaussian function
+// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) 
 KOKKOS_FUNCTION
-double kernel_syn(const double r[3], const double hi, const double hj){
+void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i){
 
     double diff_sqrd = 0.0;
 
@@ -243,17 +255,27 @@ double kernel_syn(const double r[3], const double hi, const double hj){
         diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call
-    const double Wj = exp(-diff_sqrd/(hj*hj));
+    double drdx = -1;
+    if(derviative_wrt_i == false){
+        drdx = 1.0;  // derivative with respect to j
+    } // end if
 
-    return 0.5*(Wi + Wj);
+    const double rbf = kernel(r, h);
+
+    // gradient
+    for (size_t dim=0; dim<3; ++dim) {
+        grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdx; 
+    }
+
+    return;
 } // end of function
 
+#endif
 
-// Gradient Gaussian function
-// d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) 
+// Gaussian function part of the RBF, symmeterized
+// rbf = 0.5*(exp(-(xj - xi)*(xj - xi)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2))
 KOKKOS_FUNCTION
-void grad_kernel_g(double *grad_W, const double r[3], const double h){
+double kernel_syn(const double r[3], const double hi, const double hj){
 
     double diff_sqrd = 0.0;
 
@@ -261,17 +283,13 @@ void grad_kernel_g(double *grad_W, const double r[3], const double h){
         diff_sqrd += r[dim]*r[dim];
     } // dim
 
-    const double drdxi = -1;
-    const double rbf = kernel(r, h);
-
-    // gradient
-    for (size_t dim=0; dim<3; ++dim) {
-        grad_W[dim] = -2.0/(h*h)*r[dim]*rbf*drdxi; 
-    }
+    const double Wi = exp(-diff_sqrd/(hi*hi)); // use kernel func call
+    const double Wj = exp(-diff_sqrd/(hj*hj));
 
-    return;
+    return 0.5*(Wi + Wj);
 } // end of function
 
+
 // d/dx rbf = d/dx ( 0.5(exp(-(xj - xi)*(xj - x)/hi^2) + exp(-(xi - xj)*(xi - xj)/hj^2)) ) 
 KOKKOS_FUNCTION
 void grad_kernel_sym(double *gradW, const double r[3], const double hi, const double hj) {
@@ -317,9 +335,18 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
 
     KOKKOS_INLINE_FUNCTION
-    void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
+    void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derviative_wrt_i) {
         
-        const double drdx = -1.0;
+        // default r = r_j - r_i
+
+        double drdx = -1.0;  // default is derivative with respect to i
+        double drdy = -1.0;  // default is derivative with respect to i
+        double drdz = -1.0;  // default is derivative with respect to i
+        if(derviative_wrt_i == false){
+            drdx = 1.0;  // derivative with respect to j
+            drdy = 1.0;  // derivative with respect to j
+            drdz = 1.0;  // derivative with respect to j
+        } // end if
 
         grad_p[0][0] = 0.0;
         grad_p[1][0] = drdx;
@@ -334,8 +361,6 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
         // for high-order will use (x^a y^b z^c)
 
-        const double drdy = -1.0;
-
         grad_p[0][1] = 0.0;
         grad_p[1][1] = 0.0;
         grad_p[2][1] = drdy;
@@ -349,8 +374,6 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
         // for high-order will use (x^a y^b z^c)
 
-        const double drdz = -1.0;
-
         grad_p[0][2] = 0.0;
         grad_p[1][2] = 0.0;
         grad_p[2][2] = 0.0;
@@ -382,23 +405,27 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
 
     KOKKOS_INLINE_FUNCTION
-    void grad_poly_basis(const double r[3], double (*grad_p)[3]) {
+    void grad_poly_basis(const double r[3], double (*grad_p)[3], size_t eval_point) {
         
-        const double drdx = -1.0;
+        double drdx = -1.0;  // default is derivative with respect to i
+        double drdy = -1.0;  // default is derivative with respect to i
+        double drdz = -1.0;  // default is derivative with respect to i
+        if(derviative_wrt_i == false){
+            drdx = 1.0;  // derivative with respect to j
+            drdy = 1.0;  // derivative with respect to j
+            drdz = 1.0;  // derivative with respect to j
+        } // end if
 
         grad_p[0][0] = 0.0;
         grad_p[1][0] = drdx;
         grad_p[2][0] = 0.0;
         grad_p[3][0] = 0.0;
 
-        const double drdy = -1.0;
-
         grad_p[0][1] = 0.0;
         grad_p[1][1] = 0.0;
         grad_p[2][1] = drdy;
         grad_p[3][1] = 0.0;
 
-        const double drdz = -1.0;
 
         grad_p[0][2] = 0.0;
         grad_p[1][2] = 0.0;
@@ -465,9 +492,12 @@ void calc_basis_and_grad_basis_functions(
     const CArrayKokkos <double>& M_inv,
     const DRaggedRightArrayKokkos <double>& basis,
     const DRaggedRightArrayKokkos <double>& grad_basis,
-    const double h)
+    const double h,
+    const bool derviative_wrt_i)
 {
 
+    // dir 
+
     // actual number of points
     size_t num_points = x.dims(0);
     
@@ -497,17 +527,19 @@ void calc_basis_and_grad_basis_functions(
             r[1] = x(neighbor_point_gid,1) - x(point_gid,1); // y_j-y_i
             r[2] = x(neighbor_point_gid,2) - x(point_gid,2); // z_j-z_i
 
+
+
             double W = kernel(r,h);
             //printf("kernel = %f \n", W);
             double grad_W[3]; 
-            grad_kernel(grad_W,r,h);
+            grad_kernel(grad_W,r,h,derviative_wrt_i);
             //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]);
 
             double p[num_poly_basis]; 
             poly_basis(r,p);
 
             double grad_p[num_poly_basis][3]; 
-            grad_poly_basis(r,grad_p);
+            grad_poly_basis(r, grad_p, derviative_wrt_i);
 
             double Vj = vol(neighbor_point_gid);
 
@@ -542,10 +574,10 @@ void calc_basis_and_grad_basis_functions(
             poly_basis(r,p);
             
             double grad_W[3];
-            grad_kernel(grad_W, r, h);
+            grad_kernel(grad_W, r, h, derviative_wrt_i);
 
             double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis
-            grad_poly_basis(r, grad_p);
+            grad_poly_basis(r, grad_p, derviative_wrt_i);
 
             // 
             double correction = 0.0;
@@ -1237,7 +1269,8 @@ int main(int argc, char *argv[])
         CArrayKokkos <double> grad_M(num_points, num_poly_basis, num_poly_basis);
         
         DRaggedRightArrayKokkos <double> basis(points_num_neighbors);        // reproducing kernel basis (num_points, num_neighbors)
-        DRaggedRightArrayKokkos <double> grad_basis(points_num_neighbors,3); // reproducing kernel basis (num_points, num_neighbors)
+        DRaggedRightArrayKokkos <double> grad_basis_i(points_num_neighbors,3); // grad kernel basis j with respect to i (num_points, num_neighbors)
+        DRaggedRightArrayKokkos <double> grad_basis_j(points_num_neighbors,3); // grad kernel basis i with respect to j (num_points, num_neighbors)
         
 
 
@@ -1264,8 +1297,21 @@ int main(int argc, char *argv[])
                                     p_coeffs,
                                     M_inv,
                                     basis,
-                                    grad_basis,
-                                    h);
+                                    grad_basis_i,
+                                    h,
+                                    true);
+        
+        calc_basis_and_grad_basis_functions(
+                                    point_positions,
+                                    points_num_neighbors, 
+                                    points_in_point,
+                                    vol,
+                                    p_coeffs,
+                                    M_inv,
+                                    basis,
+                                    grad_basis_j,
+                                    h,
+                                    false);
 
         // end timer
         auto time_6 = std::chrono::high_resolution_clock::now();
@@ -1363,19 +1409,19 @@ int main(int argc, char *argv[])
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p0_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_x_p0_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid);
+                grad_x_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid);
             }, grad_x_p0);
 
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p0_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_y_p0_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid);
+                grad_y_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid);
             }, grad_y_p0);
 
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p0_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_z_p0_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid);
+                grad_z_p0_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid);
             }, grad_z_p0);
 
             const double grad_check_P0 = fabs(grad_x_p0)+fabs(grad_y_p0)+fabs(grad_z_p0);
@@ -1387,17 +1433,17 @@ int main(int argc, char *argv[])
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p1_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_x_p1_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
+                grad_x_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0);
             }, grad_x_p1);
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p1_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_y_p1_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1);
+                grad_y_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1);
             }, grad_y_p1);
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p1_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_z_p1_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2);
+                grad_z_p1_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2);
             }, grad_z_p1);
 
             const double grad_check_P1 = fabs(grad_x_p1 - 1.0)+fabs(grad_y_p1 - 1.0)+fabs(grad_z_p1 - 1.0);
@@ -1413,17 +1459,17 @@ int main(int argc, char *argv[])
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_x_p2_lcl += grad_basis(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
+                grad_x_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,0)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,0)*point_positions(neighbor_point_gid,0);
             }, grad_x_p2);
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_y_p2_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_y_p2_lcl += grad_basis(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1);
+                grad_y_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,1)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,1)*point_positions(neighbor_point_gid,1);
             }, grad_y_p2);
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_z_p2_lcl, {
                 // get the point gid for this neighboring
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
-                grad_z_p2_lcl += grad_basis(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2);
+                grad_z_p2_lcl += grad_basis_i(point_gid,neighbor_point_lid,2)*vol(neighbor_point_gid)*point_positions(neighbor_point_gid,2)*point_positions(neighbor_point_gid,2);
             }, grad_z_p2);
 
             const double grad_check_P2 = fabs(grad_x_p2-2.0*point_positions(point_gid,0)) + fabs(grad_y_p2-2.0*point_positions(point_gid,1)) + fabs(grad_z_p2-2.0*point_positions(point_gid,2));
@@ -1493,6 +1539,7 @@ int main(int argc, char *argv[])
         div_fd.set_values(0.0);
 
         FOR_ALL(i_gid, 0, num_points, {
+
             for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){                
                 size_t j_gid = points_in_point(i_gid, j_lid);
                 size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
@@ -1504,8 +1551,8 @@ int main(int argc, char *argv[])
                 double g_ij[3];
                 double g_ji[3];
                 for (int dim=0; dim<3; ++dim) {
-                    g_ij[dim] = grad_basis(i_gid,j_lid,dim);
-                    g_ji[dim] = grad_basis(j_gid,i_lid,dim);
+                    g_ij[dim] = grad_basis_i(i_gid,j_lid,dim);
+                    g_ji[dim] = grad_basis_j(j_gid,i_lid,dim);
                 }
 
                 // conservative mesh-free FE
@@ -1522,7 +1569,8 @@ int main(int argc, char *argv[])
                 }
                 div_fd(i_gid) += contrib;
 
-            }
+            } // end loop over neighbors
+
             div(i_gid) /= vol(i_gid);
             // remember: finite difference doesn't have the V_i on the right side, so no division
         });
@@ -1560,7 +1608,7 @@ int main(int argc, char *argv[])
                 size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
 
                 for (size_t dim=0; dim<3; dim++){
-                    conserve_check_lcl += 0.5*(grad_basis(point_gid,neighbor_point_lid,dim) - grad_basis(neighbor_point_gid,neighbor_lid,dim));
+                    conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim));
                 }
             }
 

From e09e7849668949fda13e5fd33688f46feffd4862 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 27 Sep 2025 10:59:06 -0600
Subject: [PATCH 19/23] WIP: anti-symmetric gradient

---
 examples/pointcloud/pointcloud-rk.cpp | 121 ++++++++++++++------------
 1 file changed, 66 insertions(+), 55 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 04fea617..3958886d 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -73,7 +73,7 @@ const double PI = 3.14159265358979323846;
 
 #define P2 // P1 or P2
 #define CUBIC_SPLINE // CUBIC_SPLINE or GUASS
-
+bool RAND_CLOUD = true; // RAND_CLOUD or uniform
 
 const size_t num_1d_x = 3;
 const size_t num_1d_y = 3;
@@ -184,7 +184,7 @@ double kernel(const double r[3], double h) {
 
 KOKKOS_FUNCTION
 // derivative dW/dx_i = - dW/dr where r = xj-xi
-void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i) {
+void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i) {
 
     double diff_sqrd = 0.0;
     for(size_t dim=0; dim<3; dim++){
@@ -214,7 +214,7 @@ void grad_kernel(double *grad_W, const double r[3], const double h, const bool d
     const double invr = 1.0 /(radius + 1e-16);
 
     double drdx = -1.0;  // default is derivative with respect to i
-    if(derviative_wrt_i == false){
+    if(derivative_wrt_i == false){
         drdx = 1.0;  // derivative with respect to j
     } // end if
 
@@ -247,7 +247,7 @@ double kernel(const double r[3], const double h){
 // Gradient Gaussian function
 // d/dx rbf = d/dx (exp(-(xj - xi)*(xj - x)/hi^2) 
 KOKKOS_FUNCTION
-void grad_kernel(double *grad_W, const double r[3], const double h, const bool derviative_wrt_i){
+void grad_kernel(double *grad_W, const double r[3], const double h, const bool derivative_wrt_i){
 
     double diff_sqrd = 0.0;
 
@@ -256,7 +256,7 @@ void grad_kernel(double *grad_W, const double r[3], const double h, const bool d
     } // dim
 
     double drdx = -1;
-    if(derviative_wrt_i == false){
+    if(derivative_wrt_i == false){
         drdx = 1.0;  // derivative with respect to j
     } // end if
 
@@ -335,14 +335,14 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
 
     KOKKOS_INLINE_FUNCTION
-    void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derviative_wrt_i) {
+    void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) {
         
-        // default r = r_j - r_i
+        // definition, r = r_j - r_i
 
         double drdx = -1.0;  // default is derivative with respect to i
         double drdy = -1.0;  // default is derivative with respect to i
         double drdz = -1.0;  // default is derivative with respect to i
-        if(derviative_wrt_i == false){
+        if(derivative_wrt_i == false){
             drdx = 1.0;  // derivative with respect to j
             drdy = 1.0;  // derivative with respect to j
             drdz = 1.0;  // derivative with respect to j
@@ -410,7 +410,7 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
         double drdx = -1.0;  // default is derivative with respect to i
         double drdy = -1.0;  // default is derivative with respect to i
         double drdz = -1.0;  // default is derivative with respect to i
-        if(derviative_wrt_i == false){
+        if(derivative_wrt_i == false){
             drdx = 1.0;  // derivative with respect to j
             drdy = 1.0;  // derivative with respect to j
             drdz = 1.0;  // derivative with respect to j
@@ -493,7 +493,7 @@ void calc_basis_and_grad_basis_functions(
     const DRaggedRightArrayKokkos <double>& basis,
     const DRaggedRightArrayKokkos <double>& grad_basis,
     const double h,
-    const bool derviative_wrt_i)
+    const bool derivative_wrt_i)
 {
 
     // dir 
@@ -532,14 +532,14 @@ void calc_basis_and_grad_basis_functions(
             double W = kernel(r,h);
             //printf("kernel = %f \n", W);
             double grad_W[3]; 
-            grad_kernel(grad_W,r,h,derviative_wrt_i);
+            grad_kernel(grad_W,r,h,derivative_wrt_i);
             //printf("grad kernel = %f, %f, %f \n", grad_W[0], grad_W[1], grad_W[2]);
 
             double p[num_poly_basis]; 
             poly_basis(r,p);
 
             double grad_p[num_poly_basis][3]; 
-            grad_poly_basis(r, grad_p, derviative_wrt_i);
+            grad_poly_basis(r, grad_p, derivative_wrt_i);
 
             double Vj = vol(neighbor_point_gid);
 
@@ -574,10 +574,10 @@ void calc_basis_and_grad_basis_functions(
             poly_basis(r,p);
             
             double grad_W[3];
-            grad_kernel(grad_W, r, h, derviative_wrt_i);
+            grad_kernel(grad_W, r, h, derivative_wrt_i);
 
             double grad_p[num_poly_basis][3]; // matrix holding grad polynomial basis
-            grad_poly_basis(r, grad_p, derviative_wrt_i);
+            grad_poly_basis(r, grad_p, derivative_wrt_i);
 
             // 
             double correction = 0.0;
@@ -770,14 +770,18 @@ int main(int argc, char *argv[])
         DCArrayKokkos <double> point_positions(num_points, 3, "point_positions");
         DCArrayKokkos <double> point_values(num_points, "point_values"); 
 
+        DCArrayKokkos <double> vol(num_points);
+        vol.set_values(0.0);
+
         // point locations
-        if(false){
+        if(RAND_CLOUD){
         srand(static_cast<unsigned int>(time(0))); // Seed the random number generator
             for(size_t i=0; i<num_points; i++){
                 point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
                 point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
                 point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
             }
+            vol.set_values(1.0);
         }
         else {
 
@@ -797,8 +801,35 @@ int main(int argc, char *argv[])
                 } // end j
             } // end k
 
-        } // end if
 
+
+            const double elem_dx = LX/((double)num_1d_x);
+            const double elem_dy = LY/((double)num_1d_y);
+            const double elem_dz = LZ/((double)num_1d_z); 
+            const double elem_vol = elem_dx*elem_dy*elem_dz;
+
+            const size_t num_cells_1d_x = num_1d_x-1;
+            const size_t num_cells_1d_y = num_1d_y-1;
+            const size_t num_cells_1d_z = num_1d_z-1;
+
+            FOR_ALL(k,0,num_cells_1d_z,
+                    j,0,num_cells_1d_y,
+                    i,0,num_cells_1d_x,{
+
+                for (int kcount=k; kcount<=k+1; kcount++){
+                    for (int jcount=j; jcount<=j+1; jcount++){
+                        for (int icount=i; icount<=i+1; icount++){
+                            size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y);
+                            Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25);
+                        } // end i
+                    } // end j
+                } // end k
+                        
+            }); // end parallel over k,j,i 
+            
+
+        } // end if
+        vol.update_host();
         point_positions.update_device();
         Kokkos::fence();
 
@@ -931,8 +962,8 @@ int main(int argc, char *argv[])
                 double bin_x = ((double)i)*bin_dx;
                 double bin_y = ((double)j)*bin_dy;
                 double bin_z = ((double)k)*bin_dz;
-                printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", 
-                    num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z);
+                //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", 
+                //    num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z);
             }
         } // end for
 
@@ -1009,8 +1040,8 @@ int main(int argc, char *argv[])
                                                    (y_pt_plus - y_pt_middle)*(y_pt_plus - y_pt_middle) +
                                                    (z_pt_plus - z_pt_middle)*(z_pt_plus - z_pt_middle) );
 
-                    printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d,  jmax = %d, kmin = %d,  kmax = %d, \n", 
-                             h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax);
+                    //printf("h = %f, dist_m = %f, dist_p = %f, num_points=%zu, imin = %d, imax = %d, jmin = %d,  jmax = %d, kmin = %d,  kmax = %d, \n", 
+                    //         h_kernel, dist_minus, dist_plus, num_points_found, imin, imax, jmin, jmax, kmin, kmax);
 
                     // only exit when we exceed kernel distance
                     if (dist_minus >= h_kernel || dist_plus >= h_kernel || num_points_found==num_points){
@@ -1235,34 +1266,6 @@ int main(int argc, char *argv[])
 
         CArrayKokkos <double> p_coeffs(num_points, num_poly_basis); // reproducing kernel coefficients at each point
         
-        
-        DCArrayKokkos <double> vol(num_points);
-        vol.set_values(0.0);
-
-        const double dx = LX/((double)num_1d_x);
-        const double dy = LY/((double)num_1d_y);
-        const double dz = LZ/((double)num_1d_z); 
-        const double elem_vol = dx*dy*dz;
-
-        const size_t num_cells_1d_x = num_1d_x-1;
-        const size_t num_cells_1d_y = num_1d_y-1;
-        const size_t num_cells_1d_z = num_1d_z-1;
-
-        FOR_ALL(k,0,num_cells_1d_z,
-                j,0,num_cells_1d_y,
-                i,0,num_cells_1d_x,{
-
-            for (int kcount=k; kcount<=k+1; kcount++){
-                for (int jcount=j; jcount<=j+1; jcount++){
-                    for (int icount=i; icount<=i+1; icount++){
-                        size_t point_gid = get_gid(icount, jcount, kcount, num_1d_x, num_1d_y);
-                        Kokkos::atomic_add(&vol(point_gid), elem_vol*0.25);
-                    } // end i
-                } // end j
-            } // end k
-                    
-        }); // end parallel over k,j,i 
-        vol.update_host();
 
 
         CArrayKokkos <double> M_inv(num_points, num_poly_basis, num_poly_basis);
@@ -1540,20 +1543,26 @@ int main(int argc, char *argv[])
 
         FOR_ALL(i_gid, 0, num_points, {
 
-            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){                
+            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){     
+
                 size_t j_gid = points_in_point(i_gid, j_lid);
                 size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
 
                 if(i_gid != points_in_point(j_gid, i_lid)){
                     printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", i_gid, points_in_point(j_gid, i_lid), j_gid);
                 }
+                //printf("map check: edge points (%d,%zu), rev from j = %zu using i_lid = %zu \n", i_gid, j_gid,  points_in_point(j_gid, i_lid), i_lid);
 
                 double g_ij[3];
                 double g_ji[3];
+                double sum[3];
                 for (int dim=0; dim<3; ++dim) {
                     g_ij[dim] = grad_basis_i(i_gid,j_lid,dim);
-                    g_ji[dim] = grad_basis_j(j_gid,i_lid,dim);
+                    g_ji[dim] = grad_basis_j(i_gid,j_lid,dim);
+                    sum[dim] = g_ij[dim] + g_ji[dim];
                 }
+                double norm = sqrt(sum[0]*sum[0] + sum[1]*sum[1] + sum[2]*sum[2]);
+                //printf("errors: norm=%f, g_ij = (%f, %f, %f), g_ji = (%f, %f, %f) \n", norm, g_ij[0], g_ij[1], g_ij[2], g_ji[0], g_ji[1], g_ji[2]);
 
                 // conservative mesh-free FE
                 double contrib = 0.0;
@@ -1580,9 +1589,9 @@ int main(int argc, char *argv[])
 
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
             double error = fabs(div.host(point_gid) - 3.0);
-            //if(error > 1e-8){
+            if(error > 1e-8){
                 printf("div(u) = %f at point %zu, error = %g, vol = %f\n", div.host(point_gid), point_gid, error, vol.host(point_gid));
-            //}
+            }
         } // end for point_gid
 
         for(size_t point_gid=0; point_gid<num_points; point_gid++){
@@ -1594,6 +1603,7 @@ int main(int argc, char *argv[])
 
 
 
+
         double conserve_check;
         double conserve_check_lcl;
         FOR_REDUCE_SUM(point_gid, 0, num_points, 
@@ -1605,15 +1615,16 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
                 // get the local id of my neighbor that matches my point_gid
-                size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
+                //size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
 
                 for (size_t dim=0; dim<3; dim++){
-                    conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim));
+                    conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(point_gid,neighbor_point_lid,dim));
+                    //conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim));
                 }
             }
 
         }, conserve_check);
-        printf("conservation = %f \n", conserve_check);
+        printf("conservation error = %f \n\n", conserve_check);
 
 
 

From 331cf0d60752aee99abd340f66a69d98b5ee222d Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 27 Sep 2025 11:36:07 -0600
Subject: [PATCH 20/23] added divergence sin(x) test

---
 examples/pointcloud/pointcloud-rk.cpp | 101 +++++++++++++++++++++++---
 1 file changed, 90 insertions(+), 11 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 3958886d..b482c3c6 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -72,14 +72,14 @@ const double PI = 3.14159265358979323846;
 
 
 #define P2 // P1 or P2
-#define CUBIC_SPLINE // CUBIC_SPLINE or GUASS
+#define CUBIC_SPLINE // CUBIC_SPLINE or GAUSS kernel
 bool RAND_CLOUD = true; // RAND_CLOUD or uniform
 
-const size_t num_1d_x = 3;
-const size_t num_1d_y = 3;
-const size_t num_1d_z = 3;
+const size_t num_1d_x = 5;
+const size_t num_1d_y = 5;
+const size_t num_1d_z = 5;
 
-const double h_kernel = 2/3.;
+const double h_kernel = 2/5.;
 const double num_points_fit = 27; // minimum to fit is 3x3x3
 
 const size_t num_points = num_1d_x*num_1d_y*num_1d_z;
@@ -1602,8 +1602,6 @@ int main(int argc, char *argv[])
         } // end for point_gid
 
 
-
-
         double conserve_check;
         double conserve_check_lcl;
         FOR_REDUCE_SUM(point_gid, 0, num_points, 
@@ -1615,11 +1613,14 @@ int main(int argc, char *argv[])
                 size_t neighbor_point_gid = points_in_point(point_gid, neighbor_point_lid);
 
                 // get the local id of my neighbor that matches my point_gid
-                //size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
+                size_t neighbor_lid = reverse_neighbor_lid(point_gid, neighbor_point_lid);
 
                 for (size_t dim=0; dim<3; dim++){
+                    // from point i
                     conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(point_gid,neighbor_point_lid,dim));
-                    //conserve_check_lcl += 0.5*(grad_basis_i(point_gid,neighbor_point_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim));
+
+                    // from neighbor point
+                    conserve_check_lcl += 0.5*(grad_basis_i(neighbor_point_gid,neighbor_lid,dim) - grad_basis_j(neighbor_point_gid,neighbor_lid,dim));
                 }
             }
 
@@ -1628,6 +1629,72 @@ int main(int argc, char *argv[])
 
 
 
+        printf("Testing sin(u) of vector field u = (x, y, z) \n\n");
+
+        FOR_ALL(i, 0, num_points, {
+            u(i, 0) = sin(point_positions(i, 0));        
+            u(i, 1) = sin(point_positions(i, 1));
+            u(i, 2) = sin(point_positions(i, 2));
+        });
+        u.update_device();
+
+        DCArrayKokkos <double> div_sinx(num_points);
+        div_sinx.set_values(0.0);
+
+        DCArrayKokkos <double> div_fd_sinx(num_points);
+        div_fd_sinx.set_values(0.0);
+
+        FOR_ALL(i_gid, 0, num_points, {
+
+            for(size_t j_lid = 0; j_lid<points_num_neighbors(i_gid); j_lid++){     
+
+                size_t j_gid = points_in_point(i_gid, j_lid);
+                size_t i_lid = reverse_neighbor_lid(i_gid, j_lid);
+
+                if(i_gid != points_in_point(j_gid, i_lid)){
+                    printf("CHECK: point i = %d, reverse map point i = %zu for j = %zu \n", i_gid, points_in_point(j_gid, i_lid), j_gid);
+                }
+                //printf("map check: edge points (%d,%zu), rev from j = %zu using i_lid = %zu \n", i_gid, j_gid,  points_in_point(j_gid, i_lid), i_lid);
+
+                double g_ij[3];
+                double g_ji[3];
+                double sum[3];
+                for (int dim=0; dim<3; ++dim) {
+                    g_ij[dim] = grad_basis_i(i_gid,j_lid,dim);
+                    g_ji[dim] = grad_basis_j(i_gid,j_lid,dim);
+                    sum[dim] = g_ij[dim] + g_ji[dim];
+                }
+                double norm = sqrt(sum[0]*sum[0] + sum[1]*sum[1] + sum[2]*sum[2]);
+                //printf("errors: norm=%f, g_ij = (%f, %f, %f), g_ji = (%f, %f, %f) \n", norm, g_ij[0], g_ij[1], g_ij[2], g_ji[0], g_ji[1], g_ji[2]);
+
+                // conservative mesh-free FE
+                double contrib = 0.0;
+                for (int dim=0; dim<3; ++dim) {
+                    contrib += 0.5*(g_ij[dim] - g_ji[dim]) * (u(j_gid, dim) + u(i_gid, dim));
+                }
+                div_sinx(i_gid) += vol(i_gid) * vol(j_gid) * contrib;
+
+                // finite difference
+                contrib = 0.0;
+                for (int dim=0; dim<3; ++dim) {
+                    contrib += g_ij[dim]*u(j_gid, dim)*vol(j_gid);
+                }
+                div_fd_sinx(i_gid) += contrib;
+
+            } // end loop over neighbors
+
+            div_sinx(i_gid) /= vol(i_gid);
+            // remember: finite difference doesn't have the V_i on the right side, so no division
+        });
+        div_sinx.update_host();
+        div_fd_sinx.update_host();
+
+
+
+       
+
+
+
 
 
 
@@ -1648,12 +1715,24 @@ int main(int argc, char *argv[])
         }
 
         out << "\nPOINT_DATA " << num_points << "\n";
-        out << "SCALARS field float 1\n";
+        out << "SCALARS error_div(x) float 1\n";
         out << "LOOKUP_TABLE default\n";
         for (size_t point_gid = 0; point_gid < num_points; ++point_gid) {
-            out << div.host(point_gid) << "\n";
+            out << div.host(point_gid)-3 << "\n";
         }
 
+        out << "SCALARS error_div(sin(x)) float 1\n";
+        out << "LOOKUP_TABLE default\n";
+        for (size_t point_gid = 0; point_gid < num_points; ++point_gid) {
+            // vec[0] = sin(x), dvec[0]/dx = cos(x)
+            // vec[1] = sin(y), dvec[1]/dy = cos(y)
+            // vec[2] = sin(z), dvec[2]/dz = cos(z)
+            double val0 = cos(point_positions.host(point_gid,0));
+            double val1 = cos(point_positions.host(point_gid,1));
+            double val2 = cos(point_positions.host(point_gid,2));
+            double exact_div = val0 + val1 + val2;
+            out << (div_sinx.host(point_gid) - exact_div) << "\n";
+        }
 
         printf("Finished \n\n");
 

From 9e877955dfba6f8766041fb7317950351355b99f Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 27 Sep 2025 12:46:26 -0600
Subject: [PATCH 21/23] cleaned up prints

---
 examples/pointcloud/pointcloud-rk.cpp | 48 +++++++++++++++------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index b482c3c6..0b7c15c3 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -79,8 +79,8 @@ const size_t num_1d_x = 5;
 const size_t num_1d_y = 5;
 const size_t num_1d_z = 5;
 
-const double h_kernel = 2/5.;
-const double num_points_fit = 27; // minimum to fit is 3x3x3
+const double h_kernel = 2.0/5.;
+const double num_points_fit = 27; // minimum to fit on structured mesh is 3x3x3
 
 const size_t num_points = num_1d_x*num_1d_y*num_1d_z;
 
@@ -775,14 +775,14 @@ int main(int argc, char *argv[])
 
         // point locations
         if(RAND_CLOUD){
-        srand(static_cast<unsigned int>(time(0))); // Seed the random number generator
-            for(size_t i=0; i<num_points; i++){
-                point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
-                point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
-                point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+            srand(static_cast<unsigned int>(time(0))); // Seed the random number generator
+                for(size_t i=0; i<num_points; i++){
+                    point_positions.host(i, 0) = X0 + LX*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+                    point_positions.host(i, 1) = Y0 + LY*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+                    point_positions.host(i, 2) = Z0 + LZ*static_cast<double>(rand())/static_cast<double>(RAND_MAX);
+                }
+                vol.set_values(1.0);
             }
-            vol.set_values(1.0);
-        }
         else {
 
             double dx = (LX-X0)/((double)num_1d_x - 1);
@@ -850,6 +850,8 @@ int main(int argc, char *argv[])
         // ----------------------------
         // Make bins here
         // ----------------------------
+
+        printf("making bins \n");
         
         // the number of nodes in the mesh
         size_t num_bins_x = (size_t)( round( (LX - X0)/bin_dx) + 1 );  
@@ -878,6 +880,7 @@ int main(int argc, char *argv[])
         
         printf("Starting timers \n\n");
 
+
         // start timer
         auto time_1 = std::chrono::high_resolution_clock::now();
 
@@ -914,6 +917,8 @@ int main(int argc, char *argv[])
         // start timer
         auto time_3 = std::chrono::high_resolution_clock::now();
 
+        printf("building neighbor point list \n");
+
         // save bin id to points
         FOR_ALL(point_gid, 0, num_points, {
 
@@ -953,19 +958,19 @@ int main(int argc, char *argv[])
         }); // end for all
 
 
-        for(size_t bin_gid=0; bin_gid<num_bins; bin_gid++){
-            if(num_points_in_bin.host(bin_gid) > 0){
-                size_t i = keys_in_bin(bin_gid).i;
-                size_t j = keys_in_bin(bin_gid).j; 
-                size_t k = keys_in_bin(bin_gid).k;
+        // for(size_t bin_gid=0; bin_gid<num_bins; bin_gid++){
+        //     if(num_points_in_bin.host(bin_gid) > 0){
+        //         size_t i = keys_in_bin(bin_gid).i;
+        //         size_t j = keys_in_bin(bin_gid).j; 
+        //         size_t k = keys_in_bin(bin_gid).k;
 
-                double bin_x = ((double)i)*bin_dx;
-                double bin_y = ((double)j)*bin_dy;
-                double bin_z = ((double)k)*bin_dz;
-                //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", 
-                //    num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z);
-            }
-        } // end for
+        //         double bin_x = ((double)i)*bin_dx;
+        //         double bin_y = ((double)j)*bin_dy;
+        //         double bin_z = ((double)k)*bin_dz;
+        //         //printf("num points in bin = %zu, bin keys = (%zu, %zu, %zu), bin x = (%f, %f, %f) \n", 
+        //         //    num_points_in_bin.host(bin_gid), i, j, k, bin_x, bin_y, bin_z);
+        //     }
+        // } // end for
 
 
 
@@ -1255,6 +1260,7 @@ int main(int argc, char *argv[])
         // end timer
         auto time_4 = std::chrono::high_resolution_clock::now();
 
+        printf("done building neighbor point list \n");
 
         // ----------------------------------------
         // Find basis that reconstructs polynomial 

From 3bfb736817088e6fd6599140d5d7dbdb2f90f58b Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 27 Sep 2025 16:57:17 -0600
Subject: [PATCH 22/23] fixed P1 option

---
 examples/pointcloud/pointcloud-rk.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index 0b7c15c3..b03fae88 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -405,7 +405,7 @@ void grad_kernel_sym(double *gradW, const double r[3], const double hi, const do
 
 
     KOKKOS_INLINE_FUNCTION
-    void grad_poly_basis(const double r[3], double (*grad_p)[3], size_t eval_point) {
+    void grad_poly_basis(const double r[3], double (*grad_p)[3], bool derivative_wrt_i) {
         
         double drdx = -1.0;  // default is derivative with respect to i
         double drdy = -1.0;  // default is derivative with respect to i
@@ -1228,8 +1228,7 @@ int main(int argc, char *argv[])
 
 
         // build the reverse map
-        DRaggedRightArrayKokkos <int> reverse_neighbor_lid(points_num_neighbors); 
-        reverse_neighbor_lid.set_values(-1);
+        DRaggedRightArrayKokkos <size_t> reverse_neighbor_lid(points_num_neighbors); 
 
         FOR_ALL(point_gid, 0, num_points, {
                 

From 3a89cb914d9ec6294f1b837f6f85e34b34143ff0 Mon Sep 17 00:00:00 2001
From: Nathaniel Morgan <nmorgan@pn2303395.lanl.gov>
Date: Sat, 27 Sep 2025 17:20:42 -0600
Subject: [PATCH 23/23] only run P2 tests with P2

---
 examples/pointcloud/pointcloud-rk.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/examples/pointcloud/pointcloud-rk.cpp b/examples/pointcloud/pointcloud-rk.cpp
index b03fae88..54fd49c9 100755
--- a/examples/pointcloud/pointcloud-rk.cpp
+++ b/examples/pointcloud/pointcloud-rk.cpp
@@ -73,14 +73,15 @@ const double PI = 3.14159265358979323846;
 
 #define P2 // P1 or P2
 #define CUBIC_SPLINE // CUBIC_SPLINE or GAUSS kernel
+
 bool RAND_CLOUD = true; // RAND_CLOUD or uniform
 
 const size_t num_1d_x = 5;
 const size_t num_1d_y = 5;
 const size_t num_1d_z = 5;
 
-const double h_kernel = 2.0/5.;
-const double num_points_fit = 27; // minimum to fit on structured mesh is 3x3x3
+const double h_kernel = 2.0/5.; // it is always 2/num_1d_x
+const double num_points_fit = 27; // minimum to P2 fit on structured mesh is 3x3x3
 
 const size_t num_points = num_1d_x*num_1d_y*num_1d_z;
 
@@ -98,6 +99,7 @@ const double LX = 1.0;   // length in x-dir
 const double LY = 1.0;
 const double LZ = 1.0;
 
+
 bool check_maps = false; // CPU only!!!!
 
 //
@@ -1405,9 +1407,10 @@ int main(int argc, char *argv[])
             if(fabs(linear_preserving-point_positions(point_gid,0))>1e-13)
                 printf("linear fcn error = %f, ", fabs(linear_preserving-point_positions(point_gid,0)));
 
+        #if defined(P2)
             if(fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0))>1e-13)
                 printf("quadratic fcn error = %f at i=%zu \n", fabs(quadratic_preserving-point_positions(point_gid,0)*point_positions(point_gid,0)), point_gid);
-
+        #endif
 
             // -----------------
             // gradient checks
@@ -1463,6 +1466,7 @@ int main(int argc, char *argv[])
             }
 
 
+        #if defined(P2)
             // Sum(grad(P2)) = [2]; 
             FOR_REDUCE_SUM(neighbor_point_lid, 0, points_num_neighbors.host(point_gid), grad_x_p2_lcl, {
                 // get the point gid for this neighboring
@@ -1487,6 +1491,7 @@ int main(int argc, char *argv[])
                         fabs(grad_y_p2-2.0*point_positions(point_gid,1)), 
                         fabs(grad_z_p2-2.0*point_positions(point_gid,2)));
             }
+        #endif
 
         } // end for point gid