From b01af62ad6771aafa61b9f26f851e80e69b0ae68 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 30 Jan 2025 22:23:00 -0800 Subject: [PATCH 01/18] Added conditional flag TARGET_ARCH and updated quickstart --- CMakeLists.txt | 22 +++++++++++++++++++--- docs/User/quickstart.md | 9 +++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 99d2ee131..5dc78838d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ cmake_minimum_required(VERSION 3.12) # #You can also pass this flag when running cmake from the command line like this: # -#cmake..- D ENABLE_CUDA = YES +#cmake -D ENABLE_CUDA=YES .. # #"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable. ############################################################################################ @@ -34,6 +34,21 @@ if(NOT GPROF) set(GPROF NO) endif() +############################################################################################ +#CONDITIONAL FLAG to change target architecture for the GPU simulator from the default +# +#You can pass this flag when running cmake from the command line like this, setting TARGET_ARCH \ +# to your desired architecture: \ +# +#cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 .. +# +#"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable. +#If no TARGET_ARCH is passed in then it will default to 37 which is the kepler architecture +############################################################################################ +if(NOT DEFINED TARGET_ARCH) + set(TARGET_ARCH 37) +endif() + #Creates the Graphitti project with the correct languages, depending on if using GPU or not #If using CUDA, also verify the CUDA package and set the required CUDA variables if(ENABLE_CUDA) @@ -45,8 +60,9 @@ if(ENABLE_CUDA) add_compile_definitions(USE_GPU) #Specify the CUDA architecture / gencode that will be targeted ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware - set(CMAKE_CUDA_ARCHITECTURES 37) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_37,code=sm_37) + set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH}) + message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") else() message("\n----Generating Makefile for Graphitti CPU version----") diff --git a/docs/User/quickstart.md b/docs/User/quickstart.md index 06e447067..a429f1ce7 100644 --- a/docs/User/quickstart.md +++ b/docs/User/quickstart.md @@ -16,11 +16,16 @@ As a quick start and sanity test, let's run a small, prepackaged simulation to m $ make $ ./tests ``` - To compile the GPU version set the variable `ENABLE_CUDA` to `YES` in the `CMakeLists.txt` + To compile the GPU version, use the cmake conditional flag ENABLE_CUDA, setting it to YES ```shell - set(ENABLE_CUDA YES) + cmake -D ENABLE_CUDA=YES .. ``` + By default, the target CUDA architecture is set to 37 which is the kepler architecture + To target a different architecture use the cmake conditional flag TARGET_ARCH setting it to your desired architecture + ```shell + cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 .. + ``` 3. Unless you have the necessary **HDF5** libraries installed please only use XML recorders only. - HDF5 is useful for making the data analysis easier for Matlab, which has native HDF5 support, after a simulation - especially a very long one; but it is fine to use the default XML output. From 035135545e58667108e03261fdca31fa17d8165e Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 30 Jan 2025 23:34:58 -0800 Subject: [PATCH 02/18] updated StudentSetup --- docs/Developer/StudentSetup.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/Developer/StudentSetup.md b/docs/Developer/StudentSetup.md index fbad9d460..ff2f69d9e 100644 --- a/docs/Developer/StudentSetup.md +++ b/docs/Developer/StudentSetup.md @@ -93,6 +93,34 @@ The following options are available: high-performance GPU version has been compiled (`ggraphitti`). - `-v, --version`: Outputs the current git commit ID and exits. +## Running the GPU version of the simulator (ggraphitti)] +1. To run: + + ``` + $ cd build + + $ cmake -D ENABLE_CUDA=YES .. + ``` + + This will generate a makefile. Then type: + + ``` + $ make + ``` + + You can then run a selection of sample tests to ensure the simulator is able to run: + + ``` + $ ./tests + ``` + + You can run ggraphitti on various test files: + + ``` + $ ./ggraphitti -c ../configfiles/test-small-connected.xml + ``` + + ## Using Visual Studio Code 1. Install the [C/C++ extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools) for IntelliSense and debugging. We recommend setting the IntelliSense Cache Size setting to 10 MB. The default size is 5120 MB, which can result in VSC consuming all of your allotted space on the CSS Linux Machines. From 28d8808dc9ab92847091afd450e8f44a86ea1fa0 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 4 Feb 2025 11:55:25 -0800 Subject: [PATCH 03/18] Added cuda flags and new build options to cmakelists. --- .gitignore | 3 ++ CMakeLists.txt | 131 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 8e5e3b4cb..3ec5cc21e 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,6 @@ Testing/RegressionTesting/TestOutput/*.xml Testing/RegressionTesting/TestOutput/*.h5 Testing/UnitTesting/TestOutput/*.xml Testing/UnitTesting/TestOutput/*.h5 + +# Machine Specific build script +build.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dc78838d..e230a5797 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,23 +16,6 @@ if(NOT ENABLE_CUDA) set(ENABLE_CUDA NO) endif() -#CONDITIONAL FLAG to turn on the performance metrics -if(NOT PERFORMANCE_METRICS) - set(PERFORMANCE_METRICS NO) -endif() - -#CONDITIONAL FLAG to turn on the Gprof profiler( \ -# Gprof is a performance analysis tool for Unix applications) -#Steps to run Gprof -#Step 01 : set(GPROF YES) below -#Step 02 : Compile and run the simulation on CPU or GPU as usual -#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt \ -# file to improve readability \ -#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt" -#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" -if(NOT GPROF) - set(GPROF NO) -endif() ############################################################################################ #CONDITIONAL FLAG to change target architecture for the GPU simulator from the default @@ -54,14 +37,11 @@ endif() if(ENABLE_CUDA) message("\n----Generating Makefile for Graphitti GPU version----") project(Graphitti LANGUAGES CXX CUDA C) -#Verify CUDA package is present - find_Package(CUDA REQUIRED) #Set the USE_GPU preprocessor macro so that GPU code will be compiled. add_compile_definitions(USE_GPU) #Specify the CUDA architecture / gencode that will be targeted ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH}) message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") else() @@ -69,25 +49,106 @@ else() project(Graphitti LANGUAGES CXX C) endif() +#CONDITIONAL FLAG to turn on the performance metrics +if(NOT PERFORMANCE_METRICS) + set(PERFORMANCE_METRICS NO) +endif() + + +# ----------------------------------------------------------------------------- +# Build Type Configuration +# +# CMake support for different build types controling optimization, debugging and profiling: +# +# - Debug : No optimizations (`-O0`), includes debug symbols (`-g`). +# - Release : Optimized build (`-O3`), removes debug symbols. +# - RelWithDebInfo: Optimized (`-O2`) but keeps debug symbols (`-g`) for profiling. +# - Profiling : Custom build type (defined in this project) that enables: +# - CPU profiling via `-pg` (GPROF) +# - CUDA profiling via `-lineinfo` (for Nsight Compute) +# +# Selecting a Build Type: +# - By default, CMake does NOT set a build type for single-config generators. +# - If no build type is specified, this script defaults to "Release" for performance. +# - You can explicitly set the build type when configuring CMake: +# +# cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug # Debug mode +# cmake -S . -B build -DCMAKE_BUILD_TYPE=Release # Release mode +# cmake -S . -B build -DCMAKE_BUILD_TYPE=Profiling # Profiling mode +# +# If you don't want to pass in the build type flag, you can edit this file and add... +# set(CMAKE_BUILD_TYPE "Debug") or whichever build type you want +# ----------------------------------------------------------------------------- +set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;MinSizeRel;Profiling" CACHE STRING "Supported build types" FORCE) + +# Ensure single-config generators use a valid default +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the build type." FORCE) +endif() + +# Set flags for all build types +set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") +# We should consider using the -DNDEBUG flag for release code, it disables assert() calls and is higher performance +set(CMAKE_CXX_FLAGS_RELEASE "-O3") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") + +# Define a custom build type: "Profiling" +set(CMAKE_CXX_FLAGS_PROFILING "-pg -O2") +set(CMAKE_EXE_LINKER_FLAGS_PROFILING "-pg") +set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg") + +# Apply the correct flags based on the selected build type +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") + if(ENABLE_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") + endif() +elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE}") + if(ENABLE_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") + endif() +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +elseif(CMAKE_BUILD_TYPE STREQUAL "Profiling") + message(STATUS "Profiling build enabled: Adding -pg (GPROF)") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_PROFILING}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS_PROFILING}") + if(ENABLE_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") +# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo -Xptxas=-v") + endif() +endif() + + +# Gprof is a performance analysis tool for Unix applications) +#Steps to run Gprof +#Step 01 : set build configuration to Profiling ... -DCMAKE_BUILD_TYPE=Profiling +#Step 02 : Compile and run the simulation on CPU or GPU as usual +#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt \ +# file to improve readability \ +#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt" +#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" + + +# Print build type for verification +message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") +message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + +message(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}") +if(ENABLE_CUDA) + message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") +endif() + + #Setting the base version to C++ 17 set(CMAKE_CXX_STANDARD 17) -#set(DEBUG_MODE YES) for debugging, no optimization -#set(DEBUG_MODE NO) for production code, -O3 optimization enabled -set(DEBUG_MODE NO) - if(PERFORMANCE_METRICS) message("-- Setting PEREFORMANCE_METRICS: ON") add_definitions(-DPERFORMANCE_METRICS) endif() -if(GPROF) - message("-- Setting GPROF: ON") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg") -endif() - #HDF5 Support, finds HDF5 package for C and C++ and links the hdf5 libraries to the executable \ # later in the file. find_package(HDF5 COMPONENTS C CXX) @@ -132,11 +193,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) #Set extra warning flags #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") -if (NOT DEBUG_MODE) - message("-- Setting Optimization flag: O3") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -endif() - #define TIXML_USE_STL as a preproccersser macro to use the C++ standard library with TinyXML add_compile_definitions(TIXML_USE_STL) message("-- Setting Compile Definition: TIMXL_USE_STL") @@ -368,6 +424,9 @@ endif() # ------ TESTS EXECUTABLE ------ # Add the file that contains main (RunTests.cpp) and all test files. GoogleTest will only recognize them if they are # included in the executable. +target_compile_options(gtest PRIVATE -Wno-error=maybe-uninitialized) +target_compile_options(gtest_main PRIVATE -Wno-error=maybe-uninitialized) + add_executable(tests Testing/RunTests.cpp Testing/UnitTesting/OperationManagerTests.cpp From 8de52334198fa70ef178e9a3c71d642144de9100 Mon Sep 17 00:00:00 2001 From: Andrew Blake Madison Date: Fri, 7 Feb 2025 12:07:07 -0800 Subject: [PATCH 04/18] fix --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e230a5797..2dd507a4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,7 @@ endif() ############################################################################################ if(NOT DEFINED TARGET_ARCH) set(TARGET_ARCH 37) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_37,code=sm_37) endif() #Creates the Graphitti project with the correct languages, depending on if using GPU or not @@ -41,7 +42,8 @@ if(ENABLE_CUDA) add_compile_definitions(USE_GPU) #Specify the CUDA architecture / gencode that will be targeted ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware - set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) + set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH}) message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") else() From 4beaabaf3efb8dbe58cf1836ccc52f0c06b1d0a6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 7 Feb 2025 13:13:04 -0800 Subject: [PATCH 05/18] fix --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dd507a4e..69fd9fe43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,6 @@ endif() ############################################################################################ if(NOT DEFINED TARGET_ARCH) set(TARGET_ARCH 37) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_37,code=sm_37) endif() #Creates the Graphitti project with the correct languages, depending on if using GPU or not From 424f6c2ece5fe730b08f65c33913c419a81e5d36 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 7 Feb 2025 13:35:00 -0800 Subject: [PATCH 06/18] revert changes to support legacy cuda --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 69fd9fe43..1d611e01e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,10 @@ if(NOT ENABLE_CUDA) set(ENABLE_CUDA NO) endif() +#CONDITIONAL FLAG to turn on the performance metrics +if(NOT PERFORMANCE_METRICS) + set(PERFORMANCE_METRICS NO) +endif() ############################################################################################ #CONDITIONAL FLAG to change target architecture for the GPU simulator from the default @@ -37,6 +41,8 @@ endif() if(ENABLE_CUDA) message("\n----Generating Makefile for Graphitti GPU version----") project(Graphitti LANGUAGES CXX CUDA C) +#Verify CUDA package is present + find_Package(CUDA REQUIRED) #Set the USE_GPU preprocessor macro so that GPU code will be compiled. add_compile_definitions(USE_GPU) #Specify the CUDA architecture / gencode that will be targeted @@ -80,7 +86,7 @@ endif() # If you don't want to pass in the build type flag, you can edit this file and add... # set(CMAKE_BUILD_TYPE "Debug") or whichever build type you want # ----------------------------------------------------------------------------- -set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;MinSizeRel;Profiling" CACHE STRING "Supported build types" FORCE) +set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;Profiling" CACHE STRING "Supported build types" FORCE) # Ensure single-config generators use a valid default if(NOT CMAKE_BUILD_TYPE) From 920b649117e2a150477744efdf128b952692d044 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 7 Feb 2025 13:36:52 -0800 Subject: [PATCH 07/18] cleanup --- CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d611e01e..9a2b9902d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,12 +56,6 @@ else() project(Graphitti LANGUAGES CXX C) endif() -#CONDITIONAL FLAG to turn on the performance metrics -if(NOT PERFORMANCE_METRICS) - set(PERFORMANCE_METRICS NO) -endif() - - # ----------------------------------------------------------------------------- # Build Type Configuration # From 08b49923841d30a8d659943b0966d59fe6a02c2e Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 7 Feb 2025 13:57:17 -0800 Subject: [PATCH 08/18] added documentation for using clang integration with vscode --- docs/Developer/codingConventions.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/Developer/codingConventions.md b/docs/Developer/codingConventions.md index 05fb68934..95d679523 100644 --- a/docs/Developer/codingConventions.md +++ b/docs/Developer/codingConventions.md @@ -80,6 +80,23 @@ clang-format -i fileName * If you want to make changes to the clang-format file options themselves, then visit the [clang-format options online documentation](https://clang.llvm.org/docs/ClangFormatStyleOptions.html) +### running clang-format inside VSCode + +If you want to set clang as the default formatter in VSCode and automatically run clang-format on files as they're saved, you can add this to your settings.json + +```json +{ + "editor.defaultFormatter": "xaver.clang-format", + "clang-format.style": "file", + "clang-format.executable": "/usr/bin/clang-format", + "editor.formatOnSave": true, + "files.associations": { + "ostream": "cpp" + } +} +``` + +Change the file path to wherever you have installed clang-format --------- [<< Go back to the Developer Documentation page](index.md) From f0ae7271aaac1c1a19e75685141581f4f707496d Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 7 Feb 2025 14:50:52 -0800 Subject: [PATCH 09/18] added nvtx support --- CMakeLists.txt | 28 ++++++++++++++++++++++++++-- Simulator/Core/GPUModel.cpp | 3 +++ Simulator/Utils/NvtxHelper.cpp | 21 +++++++++++++++++++++ Simulator/Utils/NvtxHelper.h | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 Simulator/Utils/NvtxHelper.cpp create mode 100644 Simulator/Utils/NvtxHelper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a2b9902d..5db1de924 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,9 +355,33 @@ add_library(RNG STATIC ${RNG_Source}) # Create Utils library -file(GLOB Utils_Source Simulator/Utils/*.cpp Simulator/Utils/*.h) +file(GLOB Utils_Source Simulator/Utils/*.cpp Simulator/Utils/*.h) list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/Factory.cpp") -add_library(Utils ${Utils_Source}) + +if(CMAKE_BUILD_TYPE STREQUAL "Profiling") + if(ENABLE_CUDA) +# Find NVTX Library + find_library(NVTX_LIBRARY nvToolsExt) + if(NVTX_LIBRARY) + message(STATUS "Found NVTX: ${NVTX_LIBRARY} included in Profiling") + add_compile_definitions(ENABLE_NVTX) + else() + message(STATUS "NVTX library not found! Not included in Profiling.") + list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp") + endif() + endif() + +else() + list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp") +endif() + +# Always create the Utils library (even if NVTX and CUDA are missing) +add_library(Utils ${Utils_Source}) + +# Only link NVTX if it was found +if(NVTX_LIBRARY) + target_link_libraries(Utils PRIVATE ${NVTX_LIBRARY}) +endif() # Used to locate and run other CMakeLists.txt files from Third Party resources for further compilation of the project. diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index 2085c3928..c3ad697f4 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -13,6 +13,7 @@ #include "AllVertices.h" #include "Connections.h" #include "Global.h" +#include "NvtxHelper.h" #ifdef PERFORMANCE_METRICS float g_time; @@ -186,9 +187,11 @@ void GPUModel::calcSummationPoint() int blocksPerGrid = (Simulator::getInstance().getTotalVertices() + threadsPerBlock - 1) / threadsPerBlock; + nvtxPushColor("calcSummation", GREEN); calcSummationPointDevice<<>>( Simulator::getInstance().getTotalVertices(), allVerticesDevice_, synapseIndexMapDevice_, allEdgesDevice_); + nvtxPop(); } /// Update the connection of all the Neurons and Synapses of the simulation. diff --git a/Simulator/Utils/NvtxHelper.cpp b/Simulator/Utils/NvtxHelper.cpp new file mode 100644 index 000000000..3f4903abd --- /dev/null +++ b/Simulator/Utils/NvtxHelper.cpp @@ -0,0 +1,21 @@ +#include "NvtxHelper.h" +#include +#include + +void nvtxPushColor(const std::string &name, uint32_t color) +{ + nvtxEventAttributes_t eventAttrib = {}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = color; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name.c_str(); + + nvtxRangePushEx(&eventAttrib); +} + +void nvtxPop() +{ + nvtxRangePop(); +} \ No newline at end of file diff --git a/Simulator/Utils/NvtxHelper.h b/Simulator/Utils/NvtxHelper.h new file mode 100644 index 000000000..3911f0f68 --- /dev/null +++ b/Simulator/Utils/NvtxHelper.h @@ -0,0 +1,34 @@ +#ifndef NVTX_HELPER_H +#define NVTX_HELPER_H + +#include +#include + +// Define NVTX colors (ARGB format) +#define RED 0xFFFF0000 // Red +#define GREEN 0xFF00FF00 // Green +#define BLUE 0xFF0000FF // Blue +#define YELLOW 0xFFFFFF00 // Yellow +#define ORANGE 0xFFFFA500 // Orange +#define PURPLE 0xFF800080 // Purple + +#ifdef ENABLE_NVTX + +// Function to push an NVTX range with a given name and color +void nvtxPushColor(const std::string &name, uint32_t color); + +// Function to pop the most recent NVTX range +void nvtxPop(); + +#else +inline void nvtxPushColor(const std::string &, uint32_t) +{ +} +inline void nvtxPop() +{ +} + +#endif // ENABLE_NVTX + + +#endif // NVTX_HELPER_H \ No newline at end of file From dbc37b3fa3491629abf35ba2768faafa5f5b5047 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 10 Feb 2025 14:29:24 -0800 Subject: [PATCH 10/18] changed defines to enum class --- Simulator/Core/GPUModel.cpp | 3 --- Simulator/Utils/NvtxHelper.cpp | 14 ++++++++++++-- Simulator/Utils/NvtxHelper.h | 27 +++++++++++++++++++-------- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index c3ad697f4..2085c3928 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -13,7 +13,6 @@ #include "AllVertices.h" #include "Connections.h" #include "Global.h" -#include "NvtxHelper.h" #ifdef PERFORMANCE_METRICS float g_time; @@ -187,11 +186,9 @@ void GPUModel::calcSummationPoint() int blocksPerGrid = (Simulator::getInstance().getTotalVertices() + threadsPerBlock - 1) / threadsPerBlock; - nvtxPushColor("calcSummation", GREEN); calcSummationPointDevice<<>>( Simulator::getInstance().getTotalVertices(), allVerticesDevice_, synapseIndexMapDevice_, allEdgesDevice_); - nvtxPop(); } /// Update the connection of all the Neurons and Synapses of the simulation. diff --git a/Simulator/Utils/NvtxHelper.cpp b/Simulator/Utils/NvtxHelper.cpp index 3f4903abd..b067cd500 100644 --- a/Simulator/Utils/NvtxHelper.cpp +++ b/Simulator/Utils/NvtxHelper.cpp @@ -1,14 +1,24 @@ +/** + * @file NvtxHelper.cpp + * + * @ingroup Simulator/Utils + * + * @brief Helper functions to enable nvtx profiling + * When ENABLE_NVTX is false the functions are replaced with blank inline functions which are removed by the compiler + * This file is only included in the utils library when ENABLE_CUDA=YES + */ + #include "NvtxHelper.h" #include #include -void nvtxPushColor(const std::string &name, uint32_t color) +void nvtxPushColor(const std::string &name, Color pColor) { nvtxEventAttributes_t eventAttrib = {}; eventAttrib.version = NVTX_VERSION; eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; eventAttrib.colorType = NVTX_COLOR_ARGB; - eventAttrib.color = color; + eventAttrib.color = static_cast(pColor); eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; eventAttrib.message.ascii = name.c_str(); diff --git a/Simulator/Utils/NvtxHelper.h b/Simulator/Utils/NvtxHelper.h index 3911f0f68..487b9d755 100644 --- a/Simulator/Utils/NvtxHelper.h +++ b/Simulator/Utils/NvtxHelper.h @@ -1,3 +1,12 @@ +/** + * @file NvtxHelper.h + * + * @ingroup Simulator/Utils + * + * @brief Helper functions to enable nvtx profiling + * When ENABLE_NVTX is false the functions are replaced with blank inline functions which are removed by the compiler + */ + #ifndef NVTX_HELPER_H #define NVTX_HELPER_H @@ -5,23 +14,25 @@ #include // Define NVTX colors (ARGB format) -#define RED 0xFFFF0000 // Red -#define GREEN 0xFF00FF00 // Green -#define BLUE 0xFF0000FF // Blue -#define YELLOW 0xFFFFFF00 // Yellow -#define ORANGE 0xFFFFA500 // Orange -#define PURPLE 0xFF800080 // Purple +enum class Color : std::uint32_t { + RED = 0xFFFF0000, + GREEN = 0xFF00FF00, + BLUE = 0xFF0000FF, + YELLOW = 0xFFFFFF00, + ORANGE = 0xFFFFA500, + PURPLE = 0xFF800080 +}; #ifdef ENABLE_NVTX // Function to push an NVTX range with a given name and color -void nvtxPushColor(const std::string &name, uint32_t color); +void nvtxPushColor(const std::string &name, Color pColor); // Function to pop the most recent NVTX range void nvtxPop(); #else -inline void nvtxPushColor(const std::string &, uint32_t) +inline void nvtxPushColor(const std::string &, Color) { } inline void nvtxPop() From bb6a77ddb489f1fc94a33970921ee03bce7e79e6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 11 Feb 2025 11:52:22 -0800 Subject: [PATCH 11/18] implemented validation mode --- CMakeLists.txt | 3 +++ Simulator/Core/GPUModel.cpp | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5db1de924..9ac4b2941 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -526,3 +526,6 @@ target_link_libraries(serialSecondHalfTest combinedLib) unset(ENABLE_CUDA CACHE) unset(PERFORMANCE_METRICS CACHE) unset(GPROF CACHE) +unset(CMAKE_BUILD_TYPE CACHE) +unset(NVTX_LIBRARY CACHE) +unset(TARGET_ARCH CACHE) \ No newline at end of file diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index 2085c3928..ffc61f9b0 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -144,7 +144,18 @@ void GPUModel::advance() AllVertices &neurons = layout_->getVertices(); AllEdges &synapses = connections_->getEdges(); - normalMTGPU(randNoise_d); + //#ifdef VALIDATION_MODE + int verts = Simulator::getInstance().getTotalVertices(); + std::vector randNoise_h(verts); + for (int i = 0; i < verts; i++) { + randNoise_h[i] = noiseRNG->rand(); + } + cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice); + //#endif // VALIDATION_MODE + + //#else VALIDATION_MODE + // normalMTGPU(randNoise_d); + //#endif #ifdef PERFORMANCE_METRICS cudaLapTime(t_gpu_rndGeneration); From 87af4df08fd2b543e8fb01649602ace976cdf2d9 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 11 Feb 2025 11:53:25 -0800 Subject: [PATCH 12/18] clear new flags from cache --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5db1de924..9ac4b2941 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -526,3 +526,6 @@ target_link_libraries(serialSecondHalfTest combinedLib) unset(ENABLE_CUDA CACHE) unset(PERFORMANCE_METRICS CACHE) unset(GPROF CACHE) +unset(CMAKE_BUILD_TYPE CACHE) +unset(NVTX_LIBRARY CACHE) +unset(TARGET_ARCH CACHE) \ No newline at end of file From 9ab3af6462507d2a7b318bce67ba15e038d40b46 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 11 Feb 2025 12:13:37 -0800 Subject: [PATCH 13/18] test --- Simulator/Core/GPUModel.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index ffc61f9b0..bc1d275d1 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -145,16 +145,16 @@ void GPUModel::advance() AllEdges &synapses = connections_->getEdges(); //#ifdef VALIDATION_MODE - int verts = Simulator::getInstance().getTotalVertices(); - std::vector randNoise_h(verts); - for (int i = 0; i < verts; i++) { - randNoise_h[i] = noiseRNG->rand(); - } - cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice); + // int verts = Simulator::getInstance().getTotalVertices(); + // std::vector randNoise_h(verts); + // for (int i = 0; i < verts; i++) { + // randNoise_h[i] = noiseRNG->rand(); + // } + // cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice); //#endif // VALIDATION_MODE //#else VALIDATION_MODE - // normalMTGPU(randNoise_d); + normalMTGPU(randNoise_d); //#endif #ifdef PERFORMANCE_METRICS From 8f1b4a8fd78131e3ce5caaa1bbcab5eed3ac54b8 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 27 Feb 2025 18:41:20 -0800 Subject: [PATCH 14/18] Validation mode logging implementation --- CMakeLists.txt | 15 ++++- Simulator/Core/GPUModel.cpp | 63 ++++++++++++++++---- Simulator/Core/GPUModel.h | 5 ++ Simulator/Core/Simulator.cpp | 1 + Simulator/Vertices/AllVertices.cpp | 1 + Simulator/Vertices/AllVertices.h | 3 + Simulator/Vertices/Neuro/AllIFNeurons_d.cpp | 6 ++ Simulator/Vertices/Neuro/AllIZHNeurons.cpp | 18 ++++-- Simulator/Vertices/Neuro/AllLIFNeurons.cpp | 43 ++++++++----- Simulator/Vertices/Neuro/AllLIFNeurons.h | 3 +- Simulator/Vertices/Neuro/AllLIFNeurons_d.cpp | 4 +- 11 files changed, 127 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ac4b2941..995c34067 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,11 @@ if(NOT DEFINED TARGET_ARCH) set(TARGET_ARCH 37) endif() +#CONDITIONAL FLAG to turn on the validation mode +if(NOT VALIDATION_MODE) + set(VALIDATION_MODE NO) +endif() + #Creates the Graphitti project with the correct languages, depending on if using GPU or not #If using CUDA, also verify the CUDA package and set the required CUDA variables if(ENABLE_CUDA) @@ -102,7 +107,7 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg") if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}") if(ENABLE_CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G") endif() elseif(CMAKE_BUILD_TYPE STREQUAL "Release") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE}") @@ -150,6 +155,11 @@ if(PERFORMANCE_METRICS) add_definitions(-DPERFORMANCE_METRICS) endif() +if(VALIDATION_MODE) + message("-- Setting VALIDATION_MODE: ON") + add_definitions(-DVALIDATION_MODE) +endif() + #HDF5 Support, finds HDF5 package for C and C++ and links the hdf5 libraries to the executable \ # later in the file. find_package(HDF5 COMPONENTS C CXX) @@ -528,4 +538,5 @@ unset(PERFORMANCE_METRICS CACHE) unset(GPROF CACHE) unset(CMAKE_BUILD_TYPE CACHE) unset(NVTX_LIBRARY CACHE) -unset(TARGET_ARCH CACHE) \ No newline at end of file +unset(TARGET_ARCH CACHE) +unset(VALIDATION_MODE CACHE) \ No newline at end of file diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index bc1d275d1..d8c1cc533 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -13,7 +13,10 @@ #include "AllVertices.h" #include "Connections.h" #include "Global.h" - +#ifdef VALIDATION_MODE + #include "AllIFNeurons.h" + #include "OperationManager.h" +#endif #ifdef PERFORMANCE_METRICS float g_time; cudaEvent_t start, stop; @@ -144,19 +147,21 @@ void GPUModel::advance() AllVertices &neurons = layout_->getVertices(); AllEdges &synapses = connections_->getEdges(); - //#ifdef VALIDATION_MODE - // int verts = Simulator::getInstance().getTotalVertices(); - // std::vector randNoise_h(verts); +#ifdef VALIDATION_MODE + int verts = Simulator::getInstance().getTotalVertices(); + std::vector randNoise_h(verts); + for (int i = verts - 1; i >= 0; i--) { + randNoise_h[i] = (*noiseRNG)(); + } + //static int testNumbers = 0; // for (int i = 0; i < verts; i++) { - // randNoise_h[i] = noiseRNG->rand(); + // outFile << "index: " << i << " " << randNoise_h[i] << endl; // } - // cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice); - //#endif // VALIDATION_MODE - - //#else VALIDATION_MODE + cudaMemcpy(randNoise_d, randNoise_h.data(), verts * sizeof(float), cudaMemcpyHostToDevice); +#else normalMTGPU(randNoise_d); - //#endif - +#endif +//LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); #ifdef PERFORMANCE_METRICS cudaLapTime(t_gpu_rndGeneration); cudaStartTimer(); @@ -167,7 +172,43 @@ void GPUModel::advance() dynamic_cast(neurons).advanceVertices(connections_->getEdges(), allVerticesDevice_, allEdgesDevice_, randNoise_d, synapseIndexMapDevice_); +#ifdef VALIDATION_MODE + //(AllIFNeuronsDeviceProperties *)allVerticesDevice, + log4cplus::Logger vertexLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("vertex")); + std::vector sp_h(verts); + std::vector vm_h(verts); + std::vector Inoise_h(verts); + HANDLE_ERROR(cudaMemcpy(sp_h.data(), allVerticesDevice_->summationPoints_, verts * sizeof(float), + cudaMemcpyDeviceToHost)); + HANDLE_ERROR( + cudaMemcpy(sp_h.data(), randNoise_d, verts * sizeof(float), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(vm_h.data(), ((AllIFNeuronsDeviceProperties *)(allVerticesDevice_))->Vm_, + verts * sizeof(float), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(Inoise_h.data(), + ((AllIFNeuronsDeviceProperties *)(allVerticesDevice_))->Inoise_, + verts * sizeof(float), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(sp_h.data(), allVerticesDevice_->spValidation_, verts * sizeof(float), + cudaMemcpyDeviceToHost)); + for (int i = 0; i < verts; i++) { + LOG4CPLUS_DEBUG(vertexLogger_, "CUDA advance Index[ " + << i << "] :: Noise = " << randNoise_h[i] + << "\tVm: " << vm_h[i] << endl + << "\tsp = " << sp_h[i] << endl + << "\tInoise = " << Inoise_h[i] << endl); + } +#endif +//LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON LIF[" << index << "] :: Noise = " << noise); +//LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); +// LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl +// << "\tVm = " << Vm << endl +// << "\tVthresh = " << Vthresh << endl +// << "\tsummationPoint = " << summationPoint << endl +// << "\tI0 = " << I0 << endl +// << "\tInoise = " << Inoise << endl +// << "\tC1 = " << C1 << endl +// << "\tC2 = " << C2 << endl +// << "}" << endl); #ifdef PERFORMANCE_METRICS cudaLapTime(t_gpu_advanceNeurons); cudaStartTimer(); diff --git a/Simulator/Core/GPUModel.h b/Simulator/Core/GPUModel.h index fc5254d68..5dc857128 100644 --- a/Simulator/Core/GPUModel.h +++ b/Simulator/Core/GPUModel.h @@ -38,6 +38,11 @@ #include "AllSpikingNeurons.h" #include "AllSpikingSynapses.h" +#ifdef VALIDATION_MODE + #include + #include +#endif // VALIDATION_MODE + #ifdef __CUDACC__ #include "Book.h" #endif diff --git a/Simulator/Core/Simulator.cpp b/Simulator/Core/Simulator.cpp index 1e5fae4c4..e4a42d9df 100644 --- a/Simulator/Core/Simulator.cpp +++ b/Simulator/Core/Simulator.cpp @@ -31,6 +31,7 @@ Simulator::Simulator() consoleLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("console")); fileLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("file")); + fileLogger_.setLogLevel(log4cplus::DEBUG_LOG_LEVEL); edgeLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("edge")); workbenchLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("workbench")); diff --git a/Simulator/Vertices/AllVertices.cpp b/Simulator/Vertices/AllVertices.cpp index c8d907666..c97165541 100644 --- a/Simulator/Vertices/AllVertices.cpp +++ b/Simulator/Vertices/AllVertices.cpp @@ -25,6 +25,7 @@ AllVertices::AllVertices() : size_(0) // Get a copy of the file and vertex logger to use log4cplus macros to print to debug files fileLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("file")); vertexLogger_ = log4cplus::Logger::getInstance(LOG4CPLUS_TEXT("vertex")); + vertexLogger_.setLogLevel(log4cplus::DEBUG_LOG_LEVEL); } /// Setup the internal structure of the class (allocate memories). diff --git a/Simulator/Vertices/AllVertices.h b/Simulator/Vertices/AllVertices.h index b929dbd0b..4309406cf 100644 --- a/Simulator/Vertices/AllVertices.h +++ b/Simulator/Vertices/AllVertices.h @@ -155,6 +155,9 @@ struct AllVerticesDeviceProperties { /// On the next advance cycle, vertices add the values stored in their corresponding /// summation points to their Vm and resets the summation points to zero BGFLOAT *summationPoints_; + #ifdef VALIDATION_MODE + BGFLOAT *spValidation_; + #endif }; #endif // defined(USE_GPU) diff --git a/Simulator/Vertices/Neuro/AllIFNeurons_d.cpp b/Simulator/Vertices/Neuro/AllIFNeurons_d.cpp index ec9c1341d..8553b0152 100644 --- a/Simulator/Vertices/Neuro/AllIFNeurons_d.cpp +++ b/Simulator/Vertices/Neuro/AllIFNeurons_d.cpp @@ -50,6 +50,9 @@ void AllIFNeurons::allocDeviceStruct(AllIFNeuronsDeviceProperties &allVerticesDe HANDLE_ERROR( cudaMalloc((void **)&allVerticesDevice.numStepsInRefractoryPeriod_, count * sizeof(int))); HANDLE_ERROR(cudaMalloc((void **)&allVerticesDevice.summationPoints_, count * sizeof(BGFLOAT))); +#ifdef VALIDATION_MODE + HANDLE_ERROR(cudaMalloc((void **)&allVerticesDevice.spValidation_, count * sizeof(BGFLOAT))); +#endif HANDLE_ERROR(cudaMalloc((void **)&allVerticesDevice.spikeHistory_, count * sizeof(uint64_t *))); uint64_t *pSpikeHistory[count]; @@ -107,6 +110,9 @@ void AllIFNeurons::deleteDeviceStruct(AllIFNeuronsDeviceProperties &allVerticesD HANDLE_ERROR(cudaFree(allVerticesDevice.hasFired_)); HANDLE_ERROR(cudaFree(allVerticesDevice.numStepsInRefractoryPeriod_)); HANDLE_ERROR(cudaFree(allVerticesDevice.summationPoints_)); +#ifdef VALIDATION_MODE + HANDLE_ERROR(cudaFree(allVerticesDevice.spValidation_)); +#endif HANDLE_ERROR(cudaFree(allVerticesDevice.spikeHistory_)); } diff --git a/Simulator/Vertices/Neuro/AllIZHNeurons.cpp b/Simulator/Vertices/Neuro/AllIZHNeurons.cpp index 8f38f4cf0..50d368658 100644 --- a/Simulator/Vertices/Neuro/AllIZHNeurons.cpp +++ b/Simulator/Vertices/Neuro/AllIZHNeurons.cpp @@ -224,17 +224,27 @@ void AllIZHNeurons::advanceNeuron(int index) BGFLOAT &u = this->u_[index]; if (nStepsInRefr > 0) { - // is neuron refractory? + // is neuron refractory? + #ifdef VALIDATION_MODE + BGFLOAT noise = (*noiseRNG)(); + LOG4CPLUS_DEBUG(vertexLogger_, "REFRACTORY NEURON IZH[" << index << "] :: Noise = " << noise); + #endif --nStepsInRefr; } else if (Vm >= Vthresh) { - // should it fire? + // should it fire? + #ifdef VALIDATION_MODE + BGFLOAT noise = (*noiseRNG)(); + LOG4CPLUS_DEBUG(vertexLogger_, "FIRE NEURON IZH[" << index << "] :: Noise = " << noise); + #endif fire(index); } else { summationPoint += I0; // add IO // add noise BGFLOAT noise = (*noiseRNG)(); - // Happens really often, causes drastic slow down - // DEBUG_MID(cout << "ADVANCE NEURON[" << index << "] :: noise = " << noise << endl;) + // Happens really often, causes drastic slow down + #ifdef VALIDATION_MODE + LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON IZH[" << index << "] :: Noise = " << noise); + #endif summationPoint += noise * Inoise; // add noise BGFLOAT Vint = Vm * 1000; diff --git a/Simulator/Vertices/Neuro/AllLIFNeurons.cpp b/Simulator/Vertices/Neuro/AllLIFNeurons.cpp index 8fde11da2..e770592a9 100644 --- a/Simulator/Vertices/Neuro/AllLIFNeurons.cpp +++ b/Simulator/Vertices/Neuro/AllLIFNeurons.cpp @@ -33,33 +33,46 @@ void AllLIFNeurons::advanceNeuron(int index) int &nStepsInRefr = this->numStepsInRefractoryPeriod_[index]; if (nStepsInRefr > 0) { - // is neuron refractory? + // is neuron refractory? + #ifdef VALIDATION_MODE + BGFLOAT noise = (*noiseRNG)(); + LOG4CPLUS_DEBUG(vertexLogger_, "neuron refractory LIF[" << index << "] :: Noise = " << noise); + #endif --nStepsInRefr; } else if (Vm >= Vthresh) { - // should it fire? + // should it fire? + #ifdef VALIDATION_MODE + BGFLOAT noise = (*noiseRNG)(); + LOG4CPLUS_DEBUG(vertexLogger_, "FIRE NEURON LIF[" << index << "] :: Noise = " << noise); + #endif fire(index); } else { summationPoint += I0; // add IO // add noise BGFLOAT noise = (*noiseRNG)(); - //LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON[" << index << "] :: Noise = " << noise); + #ifdef VALIDATION_MODE + LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON LIF[" << index << "] :: Noise = " << noise); + #endif summationPoint += noise * Inoise; // add noise Vm = C1 * Vm + C2 * summationPoint; // decay Vm and add inputs } - // clear synaptic input for next time step - summationPoint = 0; // Causes a huge slowdown since it's printed so frequently - // LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); - // LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl - // << "\tVm = " << Vm << endl - // << "\tVthresh = " << Vthresh << endl - // << "\tsummationPoint = " << summationPoint << endl - // << "\tI0 = " << I0 << endl - // << "\tInoise = " << Inoise << endl - // << "\tC1 = " << C1 << endl - // << "\tC2 = " << C2 << endl - // << "}" << endl); + #ifdef VALIDATION_MODE + LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); + LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl + << "\tVm = " << Vm << endl + << "\tVthresh = " << Vthresh << endl + << "\tsummationPoint = " << summationPoint << endl + << "\tI0 = " << I0 << endl + << "\tInoise = " << Inoise << endl + << "\tC1 = " << C1 << endl + << "\tC2 = " << C2 << endl + << "}" << endl); + #endif + + // clear synaptic input for next time step + summationPoint = 0; } /// Fire the selected Neuron and calculate the result. diff --git a/Simulator/Vertices/Neuro/AllLIFNeurons.h b/Simulator/Vertices/Neuro/AllLIFNeurons.h index 63bde6618..a4007c50e 100644 --- a/Simulator/Vertices/Neuro/AllLIFNeurons.h +++ b/Simulator/Vertices/Neuro/AllLIFNeurons.h @@ -113,7 +113,7 @@ class AllLIFNeurons : public AllIFNeurons { /// @param edgeIndexMapDevice GPU address of the EdgeIndexMap on device memory. virtual void advanceVertices(AllEdges &synapses, void *allVerticesDevice, void *allEdgesDevice, float randNoise[], EdgeIndexMapDevice *edgeIndexMapDevice) override; -#else // !defined(USE_GPU) +#else // !defined(USE_GPU) protected: /// Helper for #advanceNeuron. Updates state of a single neuron. /// @@ -124,7 +124,6 @@ class AllLIFNeurons : public AllIFNeurons { /// /// @param index Index of the neuron to fire. virtual void fire(int index); - #endif // defined(USE_GPU) }; diff --git a/Simulator/Vertices/Neuro/AllLIFNeurons_d.cpp b/Simulator/Vertices/Neuro/AllLIFNeurons_d.cpp index 295352a50..25fcdf3be 100644 --- a/Simulator/Vertices/Neuro/AllLIFNeurons_d.cpp +++ b/Simulator/Vertices/Neuro/AllLIFNeurons_d.cpp @@ -181,7 +181,9 @@ __global__ void advanceLIFNeuronsDevice(int totalVertices, int maxEdges, int max vm = allVerticesDevice->C1_[idx] * r_vm + allVerticesDevice->C2_[idx] * (r_sp); // decay Vm and add inputs } - +#ifdef VALIDATION_MODE + allVerticesDevice->spValidation_[idx] = r_sp; +#endif // clear synaptic input for next time step sp = 0; } From fb5bb3fe9247b1f9bec1055f6c68da6cfff42dec Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 27 Feb 2025 21:13:42 -0800 Subject: [PATCH 15/18] fixed cudaMemcpy bugs --- Simulator/Core/GPUModel.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index d8c1cc533..f3a9f1dfc 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -178,16 +178,14 @@ void GPUModel::advance() std::vector sp_h(verts); std::vector vm_h(verts); std::vector Inoise_h(verts); - HANDLE_ERROR(cudaMemcpy(sp_h.data(), allVerticesDevice_->summationPoints_, verts * sizeof(float), + AllIFNeuronsDeviceProperties validationNeurons; + HANDLE_ERROR(cudaMemcpy((void *)&validationNeurons, allVerticesDevice_, + sizeof(AllIFNeuronsDeviceProperties), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(sp_h.data(), validationNeurons.spValidation_, verts * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR( - cudaMemcpy(sp_h.data(), randNoise_d, verts * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(vm_h.data(), ((AllIFNeuronsDeviceProperties *)(allVerticesDevice_))->Vm_, - verts * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(Inoise_h.data(), - ((AllIFNeuronsDeviceProperties *)(allVerticesDevice_))->Inoise_, - verts * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(sp_h.data(), allVerticesDevice_->spValidation_, verts * sizeof(float), + HANDLE_ERROR(cudaMemcpy(vm_h.data(), validationNeurons.Vm_, verts * sizeof(float), + cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(Inoise_h.data(), validationNeurons.Inoise_, verts * sizeof(float), cudaMemcpyDeviceToHost)); for (int i = 0; i < verts; i++) { From 7fe76a7d75837ca8263cfed1d9f7c995090420a9 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 28 Feb 2025 02:45:51 -0800 Subject: [PATCH 16/18] increased max file size for vertices log file and logged the validation vertices from the gpu in reverse order to match the reverse order operation of advancing vertices --- Simulator/Core/GPUModel.cpp | 2 +- build/RuntimeFiles/log4cplus_configure.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index f3a9f1dfc..43ec5fe79 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -188,7 +188,7 @@ void GPUModel::advance() HANDLE_ERROR(cudaMemcpy(Inoise_h.data(), validationNeurons.Inoise_, verts * sizeof(float), cudaMemcpyDeviceToHost)); - for (int i = 0; i < verts; i++) { + for (int i = verts - 1; i >= 0; i--) { LOG4CPLUS_DEBUG(vertexLogger_, "CUDA advance Index[ " << i << "] :: Noise = " << randNoise_h[i] << "\tVm: " << vm_h[i] << endl diff --git a/build/RuntimeFiles/log4cplus_configure.ini b/build/RuntimeFiles/log4cplus_configure.ini index 50566749a..47e15b211 100644 --- a/build/RuntimeFiles/log4cplus_configure.ini +++ b/build/RuntimeFiles/log4cplus_configure.ini @@ -38,7 +38,7 @@ log4cplus.appender.MyFileAppender.layout.ConversionPattern=[%-5p][%D{%Y/%m/%d %H #VertexFileAppender log4cplus.appender.VertexFileAppender=log4cplus::RollingFileAppender log4cplus.appender.VertexFileAppender.File=Output/Debug/vertices.txt -log4cplus.appender.VertexFileAppender.MaxFileSize=16MB +log4cplus.appender.VertexFileAppender.MaxFileSize=32MB log4cplus.appender.VertexFileAppender.MaxBackupIndex=1 log4cplus.appender.VertexFileAppender.layout=log4cplus::PatternLayout log4cplus.appender.VertexFileAppender.layout.ConversionPattern=[%-5p][%D{%Y/%m/%d %H:%M:%S:%q}][%-l] %m%n From 0c2164ff61aeb5a8243886d6fa5cad27a555426f Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 28 Feb 2025 11:00:46 -0800 Subject: [PATCH 17/18] matched log information in cgraphitti validation mode with ggraphitti validation mode to allow for easier comparison --- Simulator/Core/GPUModel.cpp | 6 ++-- Simulator/Vertices/Neuro/AllLIFNeurons.cpp | 38 +++++++++++++--------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Simulator/Core/GPUModel.cpp b/Simulator/Core/GPUModel.cpp index 43ec5fe79..10e74a4e5 100644 --- a/Simulator/Core/GPUModel.cpp +++ b/Simulator/Core/GPUModel.cpp @@ -189,9 +189,9 @@ void GPUModel::advance() cudaMemcpyDeviceToHost)); for (int i = verts - 1; i >= 0; i--) { - LOG4CPLUS_DEBUG(vertexLogger_, "CUDA advance Index[ " - << i << "] :: Noise = " << randNoise_h[i] - << "\tVm: " << vm_h[i] << endl + LOG4CPLUS_DEBUG(vertexLogger_, endl + << "Advance Index[" << i << "] :: Noise = " + << randNoise_h[i] << "\tVm: " << vm_h[i] << endl << "\tsp = " << sp_h[i] << endl << "\tInoise = " << Inoise_h[i] << endl); } diff --git a/Simulator/Vertices/Neuro/AllLIFNeurons.cpp b/Simulator/Vertices/Neuro/AllLIFNeurons.cpp index e770592a9..752a9f8ad 100644 --- a/Simulator/Vertices/Neuro/AllLIFNeurons.cpp +++ b/Simulator/Vertices/Neuro/AllLIFNeurons.cpp @@ -32,26 +32,27 @@ void AllLIFNeurons::advanceNeuron(int index) BGFLOAT &C2 = this->C2_[index]; int &nStepsInRefr = this->numStepsInRefractoryPeriod_[index]; + BGFLOAT noise; if (nStepsInRefr > 0) { // is neuron refractory? #ifdef VALIDATION_MODE - BGFLOAT noise = (*noiseRNG)(); - LOG4CPLUS_DEBUG(vertexLogger_, "neuron refractory LIF[" << index << "] :: Noise = " << noise); + noise = (*noiseRNG)(); + //LOG4CPLUS_DEBUG(vertexLogger_, "neuron refractory LIF[" << index << "] :: Noise = " << noise); #endif --nStepsInRefr; } else if (Vm >= Vthresh) { // should it fire? #ifdef VALIDATION_MODE - BGFLOAT noise = (*noiseRNG)(); - LOG4CPLUS_DEBUG(vertexLogger_, "FIRE NEURON LIF[" << index << "] :: Noise = " << noise); + noise = (*noiseRNG)(); + //LOG4CPLUS_DEBUG(vertexLogger_, "FIRE NEURON LIF[" << index << "] :: Noise = " << noise); #endif fire(index); } else { summationPoint += I0; // add IO // add noise - BGFLOAT noise = (*noiseRNG)(); + noise = (*noiseRNG)(); #ifdef VALIDATION_MODE - LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON LIF[" << index << "] :: Noise = " << noise); + //LOG4CPLUS_DEBUG(vertexLogger_, "ADVANCE NEURON LIF[" << index << "] :: Noise = " << noise); #endif summationPoint += noise * Inoise; // add noise Vm = C1 * Vm + C2 * summationPoint; // decay Vm and add inputs @@ -59,16 +60,21 @@ void AllLIFNeurons::advanceNeuron(int index) // Causes a huge slowdown since it's printed so frequently #ifdef VALIDATION_MODE - LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); - LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl - << "\tVm = " << Vm << endl - << "\tVthresh = " << Vthresh << endl - << "\tsummationPoint = " << summationPoint << endl - << "\tI0 = " << I0 << endl - << "\tInoise = " << Inoise << endl - << "\tC1 = " << C1 << endl - << "\tC2 = " << C2 << endl - << "}" << endl); + LOG4CPLUS_DEBUG(vertexLogger_, endl + << "Advance Index[" << index << "] :: Noise = " << noise + << "\tVm: " << Vm << endl + << "\tsp = " << summationPoint << endl + << "\tInoise = " << Inoise << endl); + // LOG4CPLUS_DEBUG(vertexLogger_, "Index: " << index << " Vm: " << Vm); + // LOG4CPLUS_DEBUG(vertexLogger_, "NEURON[" << index << "] {" << endl + // << "\tVm = " << Vm << endl + // << "\tVthresh = " << Vthresh << endl + // << "\tsummationPoint = " << summationPoint << endl + // << "\tI0 = " << I0 << endl + // << "\tInoise = " << Inoise << endl + // << "\tC1 = " << C1 << endl + // << "\tC2 = " << C2 << endl + // << "}" << endl); #endif // clear synaptic input for next time step From be054b05fde455af4d2b2f9c36fbcd0784f88314 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 6 Mar 2025 18:32:11 -0800 Subject: [PATCH 18/18] updated summation point to reflect the changes Nicolas made --- Simulator/Vertices/Neuro/AllSpikingNeurons.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Simulator/Vertices/Neuro/AllSpikingNeurons.h b/Simulator/Vertices/Neuro/AllSpikingNeurons.h index f9938b4e4..675370a26 100644 --- a/Simulator/Vertices/Neuro/AllSpikingNeurons.h +++ b/Simulator/Vertices/Neuro/AllSpikingNeurons.h @@ -159,6 +159,9 @@ struct AllSpikingNeuronsDeviceProperties : public AllVerticesDeviceProperties { /// On the next advance cycle, vertices add the values stored in their corresponding /// summation points to their Vm and resets the summation points to zero BGFLOAT *summationPoints_; + #ifdef VALIDATION_MODE + BGFLOAT *spValidation_; + #endif }; #endif // defined(USE_GPU)