Dev (#249)

CoinCheung · web-flow · commit 1cb50c8e2f23 · 2022-07-11T19:38:00.000+08:00
* a bit readme

* try ncnn on raspberry pi3

* typo
diff --git a/ncnn/CMakeLists.txt b/ncnn/CMakeLists.txt
@@ -7,9 +7,10 @@ set(CMAKE_CXX_FLAGS "-std=c++14 -O2")
 
 set (ncnn_DIR ${NCNN_ROOT}/lib/cmake/ncnn)
 find_package(OpenCV REQUIRED)
+find_package(OpenMP REQUIRED)
 find_package(ncnn REQUIRED)
 
 
 add_executable(segment segment.cpp)
 target_include_directories(segment PUBLIC ${OpenCV_INCLUDE_DIRS})
-target_link_libraries(segment ${OpenCV_LIBRARIES} ncnn)
+target_link_libraries(segment ${OpenCV_LIBRARIES} ncnn OpenMP::OpenMP_CXX)
diff --git a/ncnn/README.md b/ncnn/README.md
@@ -1,14 +1,10 @@
 
 ### My platform
 
-* ubuntu 18.04
-* Intel(R) Xeon(R) Gold 6240 CPU @ 2.60GHz
-* cmake 3.17.1
-* opencv built from source
+* raspberry pi 3b
+* armv8 4core cpu, 1G Memroy
+* 2022-04-04-raspios-bullseye-armhf-lite.img 
 
-### NOTE
-
-Though this demo runs on x86 platform, you can also use it on mobile platforms. NCNN is better optimized on mobile platforms.
 
 
 ### Install ncnn
@@ -19,48 +15,56 @@ $ python -m pip install onnx-simplifier
 ```
 
 #### 2. build ncnn  
-Just follow the ncnn official tutoral of [build-for-linux](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) to install ncnn:
+Just follow the ncnn official tutoral of [build-for-linux](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) to install ncnn. Following steps are all carried out on my raspberry pi:  
 
 **step 1:** install dependencies  
 ```
-# apt install build-essential git libprotobuf-dev protobuf-compiler 
+$ sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libopencv-dev
 ```
 
 **step 2:** (optional) install vulkan  
 
-**step 3:** install opencv from source  
-
-**step 4:** build   
-I am using commit `9391fae741a1fb8d58cdfdc92878a5e9800f8567`, and I have not tested over newer commits.  
+**step 3:** build   
+I am using commit `5725c028c0980efd`, and I have not tested over other commits.  
 ```
 $ git clone https://github.com/Tencent/ncnn.git
 $ cd ncnn
+$ git reset --hard 5725c028c0980efd
 $ git submodule update --init
 $ mkdir -p build
-$ cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc.toolchain.cmake ..
-$ make -j
+$ cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_VULKAN=OFF -DNCNN_BUILD_TOOLS=ON -DCMAKE_TOOLCHAIN_FILE=../toolchains/pi3.toolchain.cmake ..
+$ make -j2
 $ make install 
 ```
 
 ### Convert model, build and run the demo
 
 #### 1. convert pytorch model to ncnn model via onnx  
+On your training platform:
 ```
 $ cd BiSeNet/
 $ python tools/export_onnx.py --aux-mode eval --config configs/bisenetv2_city.py --weight-path /path/to/your/model.pth --outpath ./model_v2.onnx 
 $ python -m onnxsim model_v2.onnx model_v2_sim.onnx
+```
+
+Then copy your `model_v2_sim.onnx` from training platform to raspberry device.   
+
+On raspberry device:  
+```
 $ /path/to/ncnn/build/tools/onnx/onnx2ncnn model_v2_sim.onnx model_v2_sim.param model_v2_sim.bin
-$ mkdir -p ncnn/moidels
-$ mv model_v2_sim.param ncnn/models 
-$ mv model_v2_sim.bin ncnn/models 
+$ cd BiSeNet/ncnn/
+$ mkdir -p models
+$ mv model_v2_sim.param models/
+$ mv model_v2_sim.bin models/
 ```
 
 #### 2. compile demo code  
+On raspberry device:  
 ```
-mkdir -p ncnn/build
-cd ncnn/build
-cmake .. -DNCNN_ROOT=/path/to/ncnn/build/install
-make
+$ mkdir -p BiSeNet/ncnn/build
+$ cd BiSeNet/ncnn/build
+$ cmake .. -DNCNN_ROOT=/path/to/ncnn/build/install
+$ make
 ```
 
 #### 3. run demo  
diff --git a/ncnn/segment.cpp b/ncnn/segment.cpp
@@ -5,11 +5,13 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
+#include <omp.h>
 
 #include <iostream>
 #include <random>
 #include <algorithm>
 #include <stdio.h>
+#include <string>
 #include <vector>
 
 
@@ -29,7 +31,15 @@ int main(int argc, char** argv) {
 
 
 void inference() {
-    bool use_fp16 = false;
+    int nthreads = 4;
+    string mod_param = "../models/model_v2_sim.param";
+    string mod_model = "../models/model_v2_sim.bin";
+    int oH{512}, oW{1024}, n_classes{19};
+    float mean[3] = {0.3257f, 0.3690f, 0.3223f};
+    float var[3] = {0.2112f, 0.2148f, 0.2115f};
+    string impth = "../../example.png";
+    string savepth = "out.png";
+    
     // load model
     ncnn::Net mod;
 #if NCNN_VULKAN
@@ -41,30 +51,32 @@ void inference() {
     mod.opt.use_vulkan_compute = 1;
     mod.set_vulkan_device(1);
 #endif 
-    mod.load_param("../models/model_v2_sim.param");
-    mod.load_model("../models/model_v2_sim.bin");
-    mod.opt.use_fp16_packed = use_fp16;
-    mod.opt.use_fp16_storage = use_fp16;
-    mod.opt.use_fp16_arithmetic = use_fp16;
+    mod.load_param(mod_param.c_str());
+    mod.load_model(mod_model.c_str());
+    // ncnn enable fp16 by default, so we do not need these options
+    // int8 depends on the model itself, so we do not set here
+    // bool use_fp16 = false;
+    // mod.opt.use_fp16_packed = use_fp16;
+    // mod.opt.use_fp16_storage = use_fp16;
+    // mod.opt.use_fp16_arithmetic = use_fp16;
 
     // load image, and copy to ncnn mat
-    int oH{1024}, oW{2048}, n_classes{19};
-    float mean[3] = {0.3257f, 0.3690f, 0.3223f};
-    float var[3] = {0.2112f, 0.2148f, 0.2115f};
-    cv::Mat im = cv::imread("../../example.png");
+    cv::Mat im = cv::imread(impth);
     if (im.empty()) {
         fprintf(stderr, "cv::imread failed\n");
         return;
     }
+
     ncnn::Mat inp = ncnn::Mat::from_pixels_resize(
             im.data, ncnn::Mat::PIXEL_BGR, im.cols, im.rows, oW, oH);
     for (float &el : mean) el *= 255.;
-    for (float &el : var) el = 1. / (255. * el);
+    for (float &el : var) el = 1. / (255. * el); 
     inp.substract_mean_normalize(mean, var);
 
     // set input, run, get output
     ncnn::Extractor ex = mod.create_extractor();
-    // ex.set_num_threads(1);
+    ex.set_light_mode(true); // not sure what this mean
+    ex.set_num_threads(nthreads);
 #if NCNN_VULKAN
     ex.set_vulkan_compute(true);
 #endif
@@ -76,14 +88,16 @@ void inference() {
     // generate colorful output, and dump
     vector<vector<uint8_t>> color_map = get_color_map();
     Mat pred(cv::Size(oW, oH), CV_8UC3);
-    for (int i{0}; i < oH; ++i) {
+    int offset = oH * oW;
+    omp_set_num_threads(omp_get_max_threads());
+    #pragma omp parallel for 
+    for (int i=0; i < oH; ++i) {
         uint8_t *ptr = pred.ptr<uint8_t>(i);
         for (int j{0}; j < oW; ++j) {
             // compute argmax
-            int idx, offset, argmax{0}; 
+            int idx, argmax{0}; 
             float max;
             idx = i * oW + j;
-            offset = oH * oW;
             max = out[idx];
             for (int k{1}; k < n_classes; ++k) {
                 idx += offset;
@@ -99,7 +113,10 @@ void inference() {
             ptr += 3;
         }
     }
-    cv::imwrite("out.png", pred);
+    cv::imwrite(savepth, pred);
+
+    ex.clear(); // must have this, or error
+    mod.clear();
 
 }
 
diff --git a/tensorrt/README.md b/tensorrt/README.md
@@ -15,7 +15,7 @@ Then we can use either c++ or python to compile the model and run inference.
 
 ### Using C++
 
-#### My platform
+#### 1. My platform
 
 * ubuntu 18.04
 * nvidia Tesla T4 gpu, driver newer than 450.80
@@ -26,7 +26,7 @@ Then we can use either c++ or python to compile the model and run inference.
 
 
 
-#### Build with source code
+#### 2. Build with source code
 Just use the standard cmake build method:  
 ```
 mkdir -p tensorrt/build
@@ -37,7 +37,7 @@ make
 This would generate a `./segment` in the `tensorrt/build` directory.
 
 
-#### Convert onnx to tensorrt model
+#### 3. Convert onnx to tensorrt model
 If you can successfully compile the source code, you can parse the onnx model to tensorrt model like this:  
 ```
 $ ./segment compile /path/to/onnx.model /path/to/saved_model.trt
@@ -49,21 +49,21 @@ $ ./segment compile /path/to/onnx.model /path/to/saved_model.trt --fp16
 Note that I use the simplest method to parse the command line args, so please do **Not** change the order of the args in above command.  
 
 
-#### Infer with one single image
+#### 4. Infer with one single image
 Run inference like this:   
 ```
 $ ./segment run /path/to/saved_model.trt /path/to/input/image.jpg /path/to/saved_img.jpg
 ```
 
 
-#### Test speed  
+#### 5. Test speed  
 The speed depends on the specific gpu platform you are working on, you can test the fps on your gpu like this:  
 ```
 $ ./segment test /path/to/saved_model.trt
 ```
 
 
-#### Tips:  
+#### 6. Tips:  
 1. ~Since tensorrt 7.0.0 cannot parse well the `bilinear interpolation` op exported from pytorch, I replace them with pytorch `nn.PixelShuffle`, which would bring some performance overhead(more flops and parameters), and make inference a bit slower. Also due to the `nn.PixelShuffle` op, you **must** export the onnx model with input size to be *n* times of 32.~   
 If you are using 7.2.3.4 or newer versions, you should not have problem with `interpolate` anymore.
 
@@ -80,7 +80,7 @@ Likewise, you do not need to worry about this anymore with version newer than 7.
 You can also use python script to compile and run inference of your model.  
 
 
-#### Compile model to onnx
+#### 1. Compile model to onnx
 
 With this command: 
 ```
@@ -91,7 +91,7 @@ $ python segment.py compile --onnx /path/to/model.onnx --savepth ./model.trt --q
 This will compile onnx model into tensorrt serialized engine, save save to `./model.trt`.  
 
 
-#### inference with Tensorrt
+#### 2. Inference with Tensorrt
 
 Run Inference like this:  
 ```