UWB-Biocomputing · AndrewBMadison · Mar 12, 2025 · Jan 31, 2025 · Jan 31, 2025 · Feb 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -97,3 +97,6 @@ Testing/RegressionTesting/TestOutput/*.xml
 Testing/RegressionTesting/TestOutput/*.h5
 Testing/UnitTesting/TestOutput/*.xml
 Testing/UnitTesting/TestOutput/*.h5
+
+# Machine Specific build script
+build.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,7 +8,7 @@ cmake_minimum_required(VERSION 3.12)
 #
 #You can also pass this flag when running cmake from the command line like this:
 #
-#cmake..- D ENABLE_CUDA = YES
+#cmake -D ENABLE_CUDA=YES ..
 #
 #"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable.
 ############################################################################################
@@ -21,17 +21,24 @@ if(NOT PERFORMANCE_METRICS)
         set(PERFORMANCE_METRICS NO)
 endif()
 
-#CONDITIONAL FLAG to turn on the Gprof profiler(                                                   \
-#   Gprof is a performance analysis tool for Unix applications)
-#Steps to run Gprof
-#Step 01 : set(GPROF YES) below
-#Step 02 : Compile and run the simulation on CPU or GPU as usual
-#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt  \
-#      file to improve readability                                                                  \
-#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt"
-#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" 
-if(NOT GPROF)
-        set(GPROF NO)
+############################################################################################
+#CONDITIONAL FLAG to change target architecture for the GPU simulator from the default
+#
+#You can pass this flag when running cmake from the command line like this, setting TARGET_ARCH \
+#       to your desired architecture:                                                            \
+#
+#cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 ..
+#
+#"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable.
+#If no TARGET_ARCH is passed in then it will default to 37 which is the kepler architecture
+############################################################################################
+if(NOT DEFINED TARGET_ARCH)
+        set(TARGET_ARCH 37)
+endif()
+
+#CONDITIONAL FLAG to turn on the validation mode
+if(NOT VALIDATION_MODE)
+        set(VALIDATION_MODE NO)
 endif()
 
 #Creates the Graphitti project with the correct languages, depending on if using GPU or not
@@ -45,31 +52,112 @@ if(ENABLE_CUDA)
         add_compile_definitions(USE_GPU)
 #Specify the CUDA architecture / gencode that will be targeted
         ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware
-        set(CMAKE_CUDA_ARCHITECTURES 37)        
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_37,code=sm_37)
+        set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH})
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH})        
+        message(STATUS "Using CUDA architecture: ${TARGET_ARCH}")
 
 else()
         message("\n----Generating Makefile for Graphitti CPU version----")
         project(Graphitti LANGUAGES CXX C)
 endif()
 
+# -----------------------------------------------------------------------------
+# Build Type Configuration
+#
+# CMake support for different build types controling optimization, debugging and profiling:
+#
+#   - Debug         : No optimizations (`-O0`), includes debug symbols (`-g`).
+#   - Release       : Optimized build (`-O3`), removes debug symbols.
+#   - RelWithDebInfo: Optimized (`-O2`) but keeps debug symbols (`-g`) for profiling.
+#   - Profiling     : Custom build type (defined in this project) that enables:
+#                     - CPU profiling via `-pg` (GPROF)
+#                     - CUDA profiling via `-lineinfo` (for Nsight Compute)
+#
+# Selecting a Build Type:
+#   - By default, CMake does NOT set a build type for single-config generators.
+#   - If no build type is specified, this script defaults to "Release" for performance.
+#   - You can explicitly set the build type when configuring CMake:
+#
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug      # Debug mode
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Release    # Release mode
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Profiling  # Profiling mode 
+#       
+#       If you don't want to pass in the build type flag, you can edit this file and add...
+#               set(CMAKE_BUILD_TYPE "Debug") or whichever build type you want
+# -----------------------------------------------------------------------------
+set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;Profiling" CACHE STRING "Supported build types" FORCE)
+
+# Ensure single-config generators use a valid default
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the build type." FORCE)
+endif()
+
+# Set flags for all build types
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
+# We should consider using the -DNDEBUG flag for release code, it disables assert() calls and is higher performance
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g")
+
+# Define a custom build type: "Profiling"
+set(CMAKE_CXX_FLAGS_PROFILING "-pg -O2")
+set(CMAKE_EXE_LINKER_FLAGS_PROFILING "-pg")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
+
+# Apply the correct flags based on the selected build type
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G")   
+    endif()
+elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3")
+    endif()
+elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+elseif(CMAKE_BUILD_TYPE STREQUAL "Profiling")
+    message(STATUS "Profiling build enabled: Adding -pg (GPROF)")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_PROFILING}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS_PROFILING}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")   
+#       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo -Xptxas=-v")   
+    endif()
+endif()
+
+
+#   Gprof is a performance analysis tool for Unix applications)
+#Steps to run Gprof
+#Step 01 : set build configuration to Profiling  ...   -DCMAKE_BUILD_TYPE=Profiling
+#Step 02 : Compile and run the simulation on CPU or GPU as usual
+#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt  \
+#      file to improve readability                                                                  \
+#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt"
+#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" 
+
+
+# Print build type for verification
+message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
+message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+
+message(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}")
+if(ENABLE_CUDA)
+        message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+endif()
+
+
 #Setting the base version to C++ 17
 set(CMAKE_CXX_STANDARD 17)
 
-#set(DEBUG_MODE YES) for debugging, no optimization
-#set(DEBUG_MODE NO) for production code, -O3 optimization enabled
-set(DEBUG_MODE NO)
-
 if(PERFORMANCE_METRICS)
         message("-- Setting PEREFORMANCE_METRICS: ON")
         add_definitions(-DPERFORMANCE_METRICS)
 endif()
 
-if(GPROF)
-        message("-- Setting GPROF: ON")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
+if(VALIDATION_MODE)
+        message("-- Setting VALIDATION_MODE: ON")
+        add_definitions(-DVALIDATION_MODE)
 endif()
 
 #HDF5 Support, finds HDF5 package for C and C++ and links the hdf5 libraries to the executable     \
@@ -116,11 +204,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
 #Set extra warning flags
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
 
-if (NOT DEBUG_MODE)
-        message("-- Setting Optimization flag: O3")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-endif()
-
 #define TIXML_USE_STL as a preproccersser macro to use the C++ standard library with TinyXML
 add_compile_definitions(TIXML_USE_STL)
 message("-- Setting Compile Definition: TIMXL_USE_STL")
@@ -282,9 +365,33 @@ add_library(RNG STATIC ${RNG_Source})
 
 
 # Create Utils library
-file(GLOB Utils_Source  Simulator/Utils/*.cpp Simulator/Utils/*.h)
+file(GLOB Utils_Source Simulator/Utils/*.cpp Simulator/Utils/*.h)
 list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/Factory.cpp")
-add_library(Utils  ${Utils_Source})
+
+if(CMAKE_BUILD_TYPE STREQUAL "Profiling")
+        if(ENABLE_CUDA)
+# Find NVTX Library
+                find_library(NVTX_LIBRARY nvToolsExt)
+                if(NVTX_LIBRARY)
+                        message(STATUS "Found NVTX: ${NVTX_LIBRARY} included in Profiling")
+                        add_compile_definitions(ENABLE_NVTX) 
+                else()
+                        message(STATUS "NVTX library not found! Not included in Profiling.")
+                        list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp")
+                endif()
+        endif()
+
+else()
+        list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp")
+endif()
+
+# Always create the Utils library (even if NVTX and CUDA are missing)
+add_library(Utils ${Utils_Source})
+
+# Only link NVTX if it was found
+if(NVTX_LIBRARY)
+        target_link_libraries(Utils PRIVATE ${NVTX_LIBRARY})
+endif()
 
 
 # Used to locate and run other CMakeLists.txt files from Third Party resources for further compilation of the project.
@@ -352,6 +459,9 @@ endif()
 # ------ TESTS EXECUTABLE ------
 # Add the file that contains main (RunTests.cpp) and all test files. GoogleTest will only recognize them if they are
 # included in the executable.
+target_compile_options(gtest PRIVATE -Wno-error=maybe-uninitialized)
+target_compile_options(gtest_main PRIVATE -Wno-error=maybe-uninitialized)
+
 add_executable(tests
         Testing/RunTests.cpp
         Testing/UnitTesting/OperationManagerTests.cpp
@@ -426,3 +536,7 @@ target_link_libraries(serialSecondHalfTest combinedLib)
 unset(ENABLE_CUDA CACHE)
 unset(PERFORMANCE_METRICS CACHE)
 unset(GPROF CACHE)
+unset(CMAKE_BUILD_TYPE CACHE)
+unset(NVTX_LIBRARY CACHE)
+unset(TARGET_ARCH CACHE)
+unset(VALIDATION_MODE CACHE)
diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp
@@ -12,7 +12,10 @@
 #include "AllVertices.h"
 #include "Connections.h"
 #include "Global.h"
-
+#ifdef VALIDATION_MODE
+   #include "AllIFNeurons.h"
+   #include "OperationManager.h"
+#endif
 #ifdef PERFORMANCE_METRICS
 float g_time;
 cudaEvent_t start, stop;
@@ -144,8 +147,21 @@ void GPUModel::advance()
    AllVertices &vertices = layout_->getVertices();
    AllEdges &edges = connections_->getEdges();
 
+#ifdef VALIDATION_MODE
+   int verts = Simulator::getInstance().getTotalVertices();
+   std::vector<float> randNoise_h(verts);
+   for (int i = verts - 1; i >= 0; i--) {
+      randNoise_h[i] = (*noiseRNG)();
+   }
+   //static int testNumbers = 0;
+   // for (int i = 0; i < verts; i++) {
+   //    outFile << "index: " << i << " " << randNoise_h[i] << endl;
+   // }
+   cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice);
+#else
    normalMTGPU(randNoise_d);
-
+#endif
+//LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm);
 #ifdef PERFORMANCE_METRICS
    cudaLapTime(t_gpu_rndGeneration);
    cudaStartTimer();
@@ -155,7 +171,41 @@ void GPUModel::advance()
    // Advance vertices ------------->
    vertices.advanceVertices(edges, allVerticesDevice_, allEdgesDevice_, randNoise_d,
                             edgeIndexMapDevice_);
+#ifdef VALIDATION_MODE
+   //(AllIFNeuronsDeviceProperties *)allVerticesDevice,
+   log4cplus::Logger vertexLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("vertex"));
+   std::vector<float> sp_h(verts);
+   std::vector<float> vm_h(verts);
+   std::vector<float> Inoise_h(verts);
+   AllIFNeuronsDeviceProperties validationNeurons;
+   HANDLE_ERROR(cudaMemcpy((void *)&validationNeurons, allVerticesDevice_,
+                           sizeof(AllIFNeuronsDeviceProperties), cudaMemcpyDeviceToHost));
+   HANDLE_ERROR(cudaMemcpy(sp_h.data(), validationNeurons.spValidation_, verts * sizeof(float),
+                           cudaMemcpyDeviceToHost));
+   HANDLE_ERROR(cudaMemcpy(vm_h.data(), validationNeurons.Vm_, verts * sizeof(float),
+                           cudaMemcpyDeviceToHost));
+   HANDLE_ERROR(cudaMemcpy(Inoise_h.data(), validationNeurons.Inoise_, verts * sizeof(float),
+                           cudaMemcpyDeviceToHost));
 
+   for (int i = verts - 1; i >= 0; i--) {
+      LOG4CPLUS_DEBUG(vertexLogger_, endl
+                                        << "Advance Index[" << i << "] :: Noise = "
+                                        << randNoise_h[i] << "\tVm: " << vm_h[i] << endl
+                                        << "\tsp = " << sp_h[i] << endl
+                                        << "\tInoise = " << Inoise_h[i] << endl);
+   }
+#endif
+//LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON LIF[" << index << "] :: Noise = " << noise);
+//LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm);
+// LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl
+//                                          << "\tVm = " << Vm << endl
+//                                          << "\tVthresh = " << Vthresh << endl
+//                                          << "\tsummationPoint = " << summationPoint << endl
+//                                          << "\tI0 = " << I0 << endl
+//                                          << "\tInoise = " << Inoise << endl
+//                                          << "\tC1 = " << C1 << endl
+//                                          << "\tC2 = " << C2 << endl
+//                                          << "}" << endl);
 #ifdef PERFORMANCE_METRICS
    cudaLapTime(t_gpu_advanceNeurons);
    cudaStartTimer();

diff --git a/Simulator/Core/GPUModel.h b/Simulator/Core/GPUModel.h
@@ -24,6 +24,11 @@
 #include "AllEdges.h"
 #include "AllVertices.h"
 
+#ifdef VALIDATION_MODE
+   #include <fstream>
+   #include <iostream>
+#endif   // VALIDATION_MODE
+
 #ifdef __CUDACC__
    #include "Book.h"
 #endif

diff --git a/Simulator/Core/Simulator.cpp b/Simulator/Core/Simulator.cpp
@@ -31,6 +31,7 @@ Simulator::Simulator()
 
    consoleLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("console"));
    fileLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("file"));
+   fileLogger_.setLogLevel(log4cplus::DEBUG_LOG_LEVEL);
    edgeLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("edge"));
    workbenchLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("workbench"));
 

diff --git a/Simulator/Utils/NvtxHelper.cpp b/Simulator/Utils/NvtxHelper.cpp
@@ -0,0 +1,31 @@
+/**
+ * @file NvtxHelper.cpp
+ * 
+ * @ingroup Simulator/Utils
+ * 
+ * @brief Helper functions to enable nvtx profiling
+ * When ENABLE_NVTX is false the functions are replaced with blank inline functions which are removed by the compiler
+ * This file is only included in the utils library when ENABLE_CUDA=YES
+ */
+
+#include "NvtxHelper.h"
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+void nvtxPushColor(const std::string &name, Color pColor)
+{
+   nvtxEventAttributes_t eventAttrib = {};
+   eventAttrib.version = NVTX_VERSION;
+   eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+   eventAttrib.colorType = NVTX_COLOR_ARGB;
+   eventAttrib.color = static_cast<uint32_t>(pColor);
+   eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+   eventAttrib.message.ascii = name.c_str();
+
+   nvtxRangePushEx(&eventAttrib);
+}
+
+void nvtxPop()
+{
+   nvtxRangePop();
+}