Skip to content

Commit a53b368

Browse files
Merge pull request #396 from MqCreaple/pull-request
Reopen pull request #395
2 parents 3950db9 + 546680b commit a53b368

File tree

4 files changed

+193
-8
lines changed

4 files changed

+193
-8
lines changed

apriltag.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ either expressed or implied, of the Regents of The University of Michigan.
4141
#include <errno.h>
4242

4343
#include "common/image_u8.h"
44+
#include "common/image_u8_parallel.h"
4445
#include "common/image_u8x3.h"
4546
#include "common/zarray.h"
4647
#include "common/matd.h"
@@ -1076,11 +1077,11 @@ zarray_t *apriltag_detector_detect(apriltag_detector_t *td, image_u8_t *im_orig)
10761077

10771078
if (td->quad_sigma > 0) {
10781079
// Apply a blur
1079-
image_u8_gaussian_blur(quad_im, sigma, ksz);
1080+
image_u8_gaussian_blur_parallel(td->wp, quad_im, sigma, ksz);
10801081
} else {
10811082
// SHARPEN the image by subtracting the low frequency components.
10821083
image_u8_t *orig = image_u8_copy(quad_im);
1083-
image_u8_gaussian_blur(quad_im, sigma, ksz);
1084+
image_u8_gaussian_blur_parallel(td->wp, quad_im, sigma, ksz);
10841085

10851086
for (int y = 0; y < orig->height; y++) {
10861087
for (int x = 0; x < orig->width; x++) {

common/image_u8.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,29 +321,29 @@ void image_u8_convolve_2D(image_u8_t *im, const uint8_t *k, int ksz)
321321
{
322322
assert((ksz & 1) == 1); // ksz must be odd.
323323

324+
uint8_t *x = malloc(sizeof(uint8_t)*im->stride);
324325
for (int y = 0; y < im->height; y++) {
325326

326-
uint8_t *x = malloc(sizeof(uint8_t)*im->stride);
327327
memcpy(x, &im->buf[y*im->stride], im->stride);
328328

329329
convolve(x, &im->buf[y*im->stride], im->width, k, ksz);
330-
free(x);
331330
}
331+
free(x);
332332

333+
uint8_t *xb = malloc(sizeof(uint8_t)*im->height);
334+
uint8_t *yb = malloc(sizeof(uint8_t)*im->height);
333335
for (int x = 0; x < im->width; x++) {
334-
uint8_t *xb = malloc(sizeof(uint8_t)*im->height);
335-
uint8_t *yb = malloc(sizeof(uint8_t)*im->height);
336336

337337
for (int y = 0; y < im->height; y++)
338338
xb[y] = im->buf[y*im->stride + x];
339339

340340
convolve(xb, yb, im->height, k, ksz);
341-
free(xb);
342341

343342
for (int y = 0; y < im->height; y++)
344343
im->buf[y*im->stride + x] = yb[y];
345-
free(yb);
346344
}
345+
free(xb);
346+
free(yb);
347347
}
348348

349349
void image_u8_gaussian_blur(image_u8_t *im, double sigma, int ksz)

common/image_u8_parallel.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/**
2+
* @file image_u8_parallel.c
3+
* @author MqCreaple (gmq14159@gmail.com)
4+
* @brief Parallelized processing of various image_u8 related functions.
5+
* @version 0.1
6+
* @date 2025-08-07
7+
*
8+
* @copyright Copyright (c) 2025
9+
*
10+
*/
11+
12+
#include "common/image_u8_parallel.h"
13+
#include "common/workerpool.h"
14+
#include "common/math_util.h"
15+
16+
static void convolve(const uint8_t *x, uint8_t *y, int sz, const uint8_t *k, int ksz)
17+
{
18+
assert((ksz&1)==1);
19+
20+
for (int i = 0; i < ksz/2 && i < sz; i++)
21+
y[i] = x[i];
22+
23+
for (int i = 0; i < sz - ksz + 1; i++) {
24+
uint32_t acc = 0;
25+
26+
for (int j = 0; j < ksz; j++)
27+
acc += k[j]*x[i+j];
28+
29+
y[ksz/2 + i] = acc >> 8;
30+
}
31+
32+
for (int i = sz - ksz/2; i < sz; i++)
33+
y[i] = x[i];
34+
}
35+
36+
struct image_u8_convolve_2D_task {
37+
image_u8_t *im;
38+
const uint8_t *k;
39+
int ksz;
40+
int idx_st;
41+
int idx_ed;
42+
};
43+
44+
void _image_u8_convolve_2D_thread_1(void *p) {
45+
struct image_u8_convolve_2D_task *params = (struct image_u8_convolve_2D_task*) p;
46+
image_u8_t *im = params->im;
47+
const uint8_t *k = params->k;
48+
int ksz = params->ksz;
49+
int y_st = params->idx_st;
50+
int y_ed = params->idx_ed;
51+
52+
assert((ksz & 1) == 1); // ksz must be odd.
53+
54+
uint8_t *x = malloc(sizeof(uint8_t)*im->stride);
55+
for (int y = y_st; y < y_ed; y++) {
56+
memcpy(x, &im->buf[y*im->stride], im->stride);
57+
convolve(x, &im->buf[y*im->stride], im->width, k, ksz);
58+
}
59+
free(x);
60+
}
61+
62+
void _image_u8_convolve_2D_thread_2(void *p) {
63+
struct image_u8_convolve_2D_task *params = (struct image_u8_convolve_2D_task*) p;
64+
image_u8_t *im = params->im;
65+
const uint8_t *k = params->k;
66+
int ksz = params->ksz;
67+
int x_st = params->idx_st;
68+
int x_ed = params->idx_ed;
69+
70+
uint8_t *xb = malloc(sizeof(uint8_t)*im->height);
71+
uint8_t *yb = malloc(sizeof(uint8_t)*im->height);
72+
for (int x = x_st; x < x_ed; x++) {
73+
74+
for (int y = 0; y < im->height; y++)
75+
xb[y] = im->buf[y*im->stride + x];
76+
77+
convolve(xb, yb, im->height, k, ksz);
78+
79+
for (int y = 0; y < im->height; y++)
80+
im->buf[y*im->stride + x] = yb[y];
81+
}
82+
free(xb);
83+
free(yb);
84+
}
85+
86+
void image_u8_convolve_2D_parallel(workerpool_t *wp, image_u8_t *im, const uint8_t *k, int ksz) {
87+
if(im->width * im->height < 65536) {
88+
// for small images, directly use single threaded convolution
89+
image_u8_convolve_2D(im, k, ksz);
90+
return;
91+
}
92+
int nthreads = workerpool_get_nthreads(wp);
93+
94+
struct image_u8_convolve_2D_task *params = malloc(sizeof(struct image_u8_convolve_2D_task) * nthreads);
95+
int y_inc = im->height / nthreads;
96+
int y_remainder = im->height % nthreads;
97+
int last_y = 0;
98+
for(int idx = 0; idx < nthreads; idx++) {
99+
params[idx].im = im;
100+
params[idx].k = k;
101+
params[idx].ksz = ksz;
102+
params[idx].idx_st = last_y;
103+
last_y += y_inc;
104+
if(idx < y_remainder) {
105+
last_y += 1; // distribute the remainders across the n threads
106+
}
107+
params[idx].idx_ed = last_y;
108+
workerpool_add_task(wp, _image_u8_convolve_2D_thread_1, &params[idx]);
109+
}
110+
workerpool_run(wp);
111+
112+
int x_inc = im->width / nthreads;
113+
int x_remainder = im->width % nthreads;
114+
int last_x = 0;
115+
for(int idx = 0; idx < nthreads; idx++) {
116+
params[idx].im = im;
117+
params[idx].k = k;
118+
params[idx].ksz = ksz;
119+
params[idx].idx_st = last_x;
120+
last_x += x_inc;
121+
if(idx < x_remainder) {
122+
last_x += 1; // distribute the remainders across the n threads
123+
}
124+
params[idx].idx_ed = last_x;
125+
workerpool_add_task(wp, _image_u8_convolve_2D_thread_2, &params[idx]);
126+
}
127+
workerpool_run(wp);
128+
129+
free(params);
130+
}
131+
132+
void image_u8_gaussian_blur_parallel(workerpool_t *wp, image_u8_t *im, double sigma, int ksz) {
133+
if (sigma == 0)
134+
return;
135+
136+
assert((ksz & 1) == 1); // ksz must be odd.
137+
138+
// build the kernel.
139+
double *dk = malloc(sizeof(double)*ksz);
140+
141+
// for kernel of length 5:
142+
// dk[0] = f(-2), dk[1] = f(-1), dk[2] = f(0), dk[3] = f(1), dk[4] = f(2)
143+
for (int i = 0; i < ksz; i++) {
144+
int x = -ksz/2 + i;
145+
double v = exp(-.5*sq(x / sigma));
146+
dk[i] = v;
147+
}
148+
149+
// normalize
150+
double acc = 0;
151+
for (int i = 0; i < ksz; i++)
152+
acc += dk[i];
153+
154+
for (int i = 0; i < ksz; i++)
155+
dk[i] /= acc;
156+
157+
uint8_t *k = malloc(sizeof(uint8_t)*ksz);
158+
for (int i = 0; i < ksz; i++)
159+
k[i] = dk[i]*255;
160+
161+
free(dk);
162+
163+
image_u8_convolve_2D_parallel(wp, im, k, ksz);
164+
free(k);
165+
}

common/image_u8_parallel.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* @file image_u8_parallel.h
3+
* @author MqCreaple (gmq14159@gmail.com)
4+
* @brief Parallelized processing of various image_u8 related functions.
5+
* @version 0.1
6+
* @date 2025-08-07
7+
*
8+
* @copyright Copyright (c) 2025
9+
*
10+
*/
11+
#pragma once
12+
13+
#include "image_u8.h"
14+
#include "workerpool.h"
15+
#include "math_util.h"
16+
17+
void image_u8_convolve_2D_parallel(workerpool_t *wp, image_u8_t *im, const uint8_t *k, int ksz);
18+
19+
void image_u8_gaussian_blur_parallel(workerpool_t *wp, image_u8_t *im, double sigma, int ksz);

0 commit comments

Comments
 (0)