Skip to content

Commit 60061f0

Browse files
Update to mupdf 1.25 (#121)
* Update to mupdf 1.25 and implement new callback search fn * Update doc-comments and make default search fn use search_cb under the hood * Make search_cb function different from normal search fn * Fix clippy and fmt * Maybe fix feature flag compilation issues? * fmt * Removed debug print * Once again, try better cpu feature selection * Update to get ULL suffix fix past 1.25.4 * Unconditionally use sse4.1 on x86_64 and format XCFlags better * Fix build-script error on x86 * Once again try better arch_has_sse setting * remove unnecessary mut * Added an = woohoo * Add ARCH_HAS_* define * Use defines * Try NEON support * Comment out warnings --------- Co-authored-by: ginnyTheCat <ginnythecat@lelux.net>
1 parent 0030ec3 commit 60061f0

File tree

7 files changed

+207
-41
lines changed

7 files changed

+207
-41
lines changed

mupdf-sys/build.rs

+33-1
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,22 @@ fn cp_r(dir: &Path, dest: &Path, excluding_dir_names: &'static [&'static str]) {
4343
}
4444
}
4545

46+
const CPU_FLAGS: &[(&str, &str, &str, Option<&str>)] = &[
47+
("sse4.1", "-msse4.1", "HAVE_SSE4_1", Some("ARCH_HAS_SSE")),
48+
("avx", "-mavx", "HAVE_AVX", None),
49+
("avx2", "-mavx2", "HAVE_AVX2", None),
50+
("fma", "-mfma", "HAVE_FMA", None),
51+
("neon", "-mfpu=neon", "HAVE_NEON", Some("ARCH_HAS_NEON")),
52+
];
53+
4654
#[cfg(not(target_env = "msvc"))]
4755
fn build_libmupdf() {
4856
use std::process::Command;
4957

58+
let features_var =
59+
std::env::var("CARGO_CFG_TARGET_FEATURE").expect("We need cargo to build this");
60+
let target_features = features_var.split(',').collect::<Vec<_>>();
61+
5062
let profile = match &*env::var("PROFILE").unwrap_or("debug".to_owned()) {
5163
"bench" | "release" => "release",
5264
_ => "debug",
@@ -101,6 +113,19 @@ fn build_libmupdf() {
101113
"verbose=yes".to_owned(),
102114
];
103115

116+
for (feature, flag, make_flag, define) in CPU_FLAGS {
117+
let contains = target_features.contains(feature);
118+
if contains {
119+
build.flag(flag);
120+
121+
make_flags.push(format!("{make_flag}=yes"));
122+
}
123+
124+
if let Some(define) = define {
125+
build.define(define, if contains { "1" } else { "0" });
126+
}
127+
}
128+
104129
// this may be unused if none of the features below are enabled
105130
#[allow(unused_variables, unused_mut)]
106131
let mut add_lib = |cflags_name: &'static str, pkgcfg_names: &[&str]| {
@@ -182,7 +207,9 @@ fn build_libmupdf() {
182207
make_flags.push(format!("CC={}", cc));
183208
make_flags.push(format!("CXX={}", cxx));
184209
make_flags.push(format!("XCFLAGS={}", c_flags.to_string_lossy()));
185-
make_flags.push(format!("XCXXFLAGS={}", cxx_flags.to_string_lossy()));
210+
make_flags.push(format!("XCXXFLAGS={}", cxx_flags.to_string_lossy(),));
211+
212+
// println!("cargo::warning=using make_flags {make_flags:?}");
186213

187214
// Enable parallel compilation
188215
if let Ok(n) = std::thread::available_parallelism() {
@@ -266,6 +293,7 @@ fn build_libmupdf() {
266293
if cfg!(not(feature = "js")) {
267294
cl_env.push("/DFZ_ENABLE_JS#0".to_string());
268295
}
296+
269297
// Enable parallel compilation
270298
cl_env.push("/MP".to_string());
271299
let platform_toolset = env::var("MUPDF_MSVC_PLATFORM_TOOLSET").unwrap_or(
@@ -492,6 +520,10 @@ fn main() {
492520
if cfg!(target_os = "android") {
493521
build.flag("-DHAVE_ANDROID").flag_if_supported("-std=c99");
494522
}
523+
#[cfg(target_arch = "x86_64")]
524+
{
525+
build.flag("-DARCH_HAS_SSE=1");
526+
}
495527
build.compile("libmupdf-wrapper.a");
496528

497529
let bindings = bindgen::Builder::default()

mupdf-sys/mupdf

Submodule mupdf updated 373 files

mupdf-sys/wrapper.c

+13
Original file line numberDiff line numberDiff line change
@@ -3281,3 +3281,16 @@ int32_t mupdf_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point
32813281
}
32823282
return count;
32833283
}
3284+
3285+
int32_t mupdf_search_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_search_callback_fn *cb, void *opaque, mupdf_error_t **errptr) {
3286+
int32_t count = 0;
3287+
fz_try(ctx)
3288+
{
3289+
count = fz_search_stext_page_cb(ctx, page, needle, cb, opaque);
3290+
}
3291+
fz_catch(ctx)
3292+
{
3293+
mupdf_save_error(ctx, errptr);
3294+
}
3295+
return count;
3296+
}

src/colorspace.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,9 @@ mod test {
214214
)
215215
.unwrap();
216216
assert_eq!(n, 3);
217-
assert!((0.6..0.7).contains(&gray[0]));
218-
assert!((0.6..0.7).contains(&gray[1]));
219-
assert!((0.6..0.7).contains(&gray[2]));
217+
assert!((0.59..0.61).contains(&gray[0]), "gray = {:?}", gray);
218+
assert!((0.59..0.61).contains(&gray[1]), "gray = {:?}", gray);
219+
assert!((0.59..0.61).contains(&gray[2]), "gray = {:?}", gray);
220220
assert_eq!(gray[3], 0.0);
221221
}
222222
}

src/display_list.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -140,19 +140,19 @@ mod test {
140140
[Quad {
141141
ul: Point {
142142
x: 56.8,
143-
y: 69.32512
143+
y: 69.32953
144144
},
145145
ur: Point {
146-
x: 115.85405,
147-
y: 69.32512
146+
x: 115.85159,
147+
y: 69.32953
148148
},
149149
ll: Point {
150150
x: 56.8,
151-
y: 87.311844
151+
y: 87.29713
152152
},
153153
lr: Point {
154-
x: 115.85405,
155-
y: 87.311844
154+
x: 115.85159,
155+
y: 87.29713
156156
}
157157
}]
158158
);

src/page.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -534,20 +534,20 @@ mod test {
534534
[Quad {
535535
ul: Point {
536536
x: 56.8,
537-
y: 69.32512,
537+
y: 69.32953
538538
},
539539
ur: Point {
540-
x: 115.85405,
541-
y: 69.32512,
540+
x: 115.85159,
541+
y: 69.32953
542542
},
543543
ll: Point {
544544
x: 56.8,
545-
y: 87.311844,
545+
y: 87.29713
546546
},
547547
lr: Point {
548-
x: 115.85405,
549-
y: 87.311844,
550-
},
548+
x: 115.85159,
549+
y: 87.29713
550+
}
551551
}]
552552
);
553553

src/text_page.rs

+144-23
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
1-
use std::convert::TryInto;
2-
use std::ffi::CString;
3-
use std::io::Read;
4-
use std::marker::PhantomData;
5-
use std::ptr;
1+
use std::{
2+
convert::TryInto,
3+
ffi::{c_int, c_void, CString},
4+
io::Read,
5+
marker::PhantomData,
6+
ptr::{self, NonNull},
7+
slice,
8+
};
69

710
use bitflags::bitflags;
811
use mupdf_sys::*;
912
use num_enum::TryFromPrimitive;
1013

14+
use crate::FFIAnalogue;
1115
use crate::{
12-
array::FzArray, context, rust_slice_to_ffi_ptr, rust_vec_from_ffi_ptr, Buffer, Error, Image,
13-
Matrix, Point, Quad, Rect, WriteMode,
16+
context, rust_slice_to_ffi_ptr, Buffer, Error, Image, Matrix, Point, Quad, Rect, WriteMode,
1417
};
1518

1619
bitflags! {
@@ -51,20 +54,111 @@ impl TextPage {
5154
}
5255
}
5356

54-
pub fn search(&self, needle: &str, hit_max: u32) -> Result<FzArray<Quad>, Error> {
57+
pub fn search(&self, needle: &str) -> Result<Vec<Quad>, Error> {
58+
let mut vec = Vec::new();
59+
self.search_cb(needle, &mut vec, |v, quads| {
60+
v.extend(quads.iter().cloned());
61+
SearchHitResponse::ContinueSearch
62+
})?;
63+
Ok(vec)
64+
}
65+
66+
/// Search through the page, finding all instances of `needle` and processing them through
67+
/// `cb`.
68+
/// Note that the `&[Quad]` given to `cb` in its invocation lives only during the time that
69+
/// `cb` is being evaluated. That means the following won't work or compile:
70+
///
71+
/// ```compile_fail
72+
/// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
73+
/// # let text_page: TextPage = todo!();
74+
/// let mut quads: Vec<&Quad> = Vec::new();
75+
/// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
76+
/// v.extend(quads);
77+
/// SearchHitResponse::ContinueSearch
78+
/// }).unwrap();
79+
/// ```
80+
///
81+
/// But the following will:
82+
/// ```no_run
83+
/// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
84+
/// # let text_page: TextPage = todo!();
85+
/// let mut quads: Vec<Quad> = Vec::new();
86+
/// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
87+
/// v.extend(quads.iter().cloned());
88+
/// SearchHitResponse::ContinueSearch
89+
/// }).unwrap();
90+
/// ```
91+
pub fn search_cb<T, F>(&self, needle: &str, data: &mut T, cb: F) -> Result<u32, Error>
92+
where
93+
T: ?Sized,
94+
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
95+
{
96+
// This struct allows us to wrap both the callback that the user gave us and the data so
97+
// that we can pass it into the ffi callback nicely
98+
struct FnWithData<'parent, T: ?Sized, F>
99+
where
100+
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
101+
{
102+
data: &'parent mut T,
103+
f: F,
104+
}
105+
106+
let mut opaque = FnWithData { data, f: cb };
107+
108+
// And then here's the `fn` that we'll pass in - it has to be an fn, not capturing context,
109+
// because it needs to be unsafe extern "C". to be used with FFI.
110+
unsafe extern "C" fn ffi_cb<T, F>(
111+
_ctx: *mut fz_context,
112+
data: *mut c_void,
113+
num_quads: c_int,
114+
hit_bbox: *mut fz_quad,
115+
) -> c_int
116+
where
117+
T: ?Sized,
118+
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
119+
Quad: FFIAnalogue<FFIType = fz_quad>,
120+
{
121+
// This is upheld by our `FFIAnalogue` bound above
122+
let quad_ptr = hit_bbox.cast::<Quad>();
123+
let Some(nn) = NonNull::new(quad_ptr) else {
124+
return SearchHitResponse::ContinueSearch as c_int;
125+
};
126+
127+
// This guarantee is upheld by mupdf - they're giving us a pointer to the same type we
128+
// gave them.
129+
let data = data.cast::<FnWithData<'_, T, F>>();
130+
131+
// But if they like gave us a -1 for number of results or whatever, give up on
132+
// decoding.
133+
let Ok(len) = usize::try_from(num_quads) else {
134+
return SearchHitResponse::ContinueSearch as c_int;
135+
};
136+
137+
// SAFETY: We've ensure nn is not null, and we're trusting the FFI layer for the other
138+
// invariants (about actually holding the data, etc)
139+
let slice = unsafe { slice::from_raw_parts_mut(nn.as_ptr(), len) };
140+
141+
// Get the function and the data
142+
// SAFETY: Trusting that the FFI layer actually gave us this ptr
143+
let f = unsafe { &(*data).f };
144+
// SAFETY: Trusting that the FFI layer actually gave us this ptr
145+
let data = unsafe { &mut (*data).data };
146+
147+
// And call the function with the data
148+
f(data, slice) as c_int
149+
}
150+
55151
let c_needle = CString::new(needle)?;
56-
let hit_max = if hit_max < 1 { 16 } else { hit_max };
57-
let mut hit_count = 0;
58152
unsafe {
59-
ffi_try!(mupdf_search_stext_page(
153+
ffi_try!(mupdf_search_stext_page_cb(
60154
context(),
61155
self.inner,
62156
c_needle.as_ptr(),
63-
hit_max as _,
64-
&mut hit_count
157+
Some(ffi_cb::<T, F>),
158+
&raw mut opaque as *mut c_void
65159
))
66160
}
67-
.and_then(|quads| unsafe { rust_vec_from_ffi_ptr(quads, hit_count) })
161+
.map(|count| count as u32)
68162
}
69163

70164
pub fn highlight_selection(
@@ -98,6 +192,12 @@ impl Drop for TextPage {
98192
}
99193
}
100194

195+
#[repr(i32)]
196+
pub enum SearchHitResponse {
197+
ContinueSearch = 0,
198+
AbortSearch = 1,
199+
}
200+
101201
#[derive(Debug, Clone, Copy, PartialEq, TryFromPrimitive)]
102202
#[repr(u32)]
103203
pub enum TextBlockType {
@@ -262,7 +362,7 @@ impl<'a> Iterator for TextCharIter<'a> {
262362

263363
#[cfg(test)]
264364
mod test {
265-
use crate::{Document, TextPageOptions};
365+
use crate::{text_page::SearchHitResponse, Document, TextPageOptions};
266366

267367
#[test]
268368
fn test_text_page_search() {
@@ -271,31 +371,52 @@ mod test {
271371
let doc = Document::open("tests/files/dummy.pdf").unwrap();
272372
let page0 = doc.load_page(0).unwrap();
273373
let text_page = page0.to_text_page(TextPageOptions::BLOCK_IMAGE).unwrap();
274-
let hits = text_page.search("Dummy", 1).unwrap();
374+
let hits = text_page.search("Dummy").unwrap();
275375
assert_eq!(hits.len(), 1);
276376
assert_eq!(
277377
&*hits,
278378
[Quad {
279379
ul: Point {
280380
x: 56.8,
281-
y: 69.32512
381+
y: 69.32953
282382
},
283383
ur: Point {
284-
x: 115.85405,
285-
y: 69.32512
384+
x: 115.85159,
385+
y: 69.32953
286386
},
287387
ll: Point {
288388
x: 56.8,
289-
y: 87.311844
389+
y: 87.29713
290390
},
291391
lr: Point {
292-
x: 115.85405,
293-
y: 87.311844
392+
x: 115.85159,
393+
y: 87.29713
294394
}
295395
}]
296396
);
297397

298-
let hits = text_page.search("Not Found", 1).unwrap();
398+
let hits = text_page.search("Not Found").unwrap();
399+
assert_eq!(hits.len(), 0);
400+
}
401+
402+
#[test]
403+
fn test_text_page_cb_search() {
404+
let doc = Document::open("tests/files/dummy.pdf").unwrap();
405+
let page0 = doc.load_page(0).unwrap();
406+
let text_page = page0.to_text_page(TextPageOptions::BLOCK_IMAGE).unwrap();
407+
let mut sum_x = 0.0;
408+
let num_hits = text_page
409+
.search_cb("Dummy", &mut sum_x, |acc, hits| {
410+
for q in hits {
411+
*acc += q.ul.x + q.ur.x + q.ll.x + q.lr.x;
412+
}
413+
SearchHitResponse::ContinueSearch
414+
})
415+
.unwrap();
416+
assert_eq!(num_hits, 1);
417+
assert_eq!(sum_x, 56.8 + 115.85159 + 56.8 + 115.85159);
418+
419+
let hits = text_page.search("Not Found").unwrap();
299420
assert_eq!(hits.len(), 0);
300421
}
301422
}

0 commit comments

Comments
 (0)