1
- use std:: convert:: TryInto ;
2
- use std:: ffi:: CString ;
3
- use std:: io:: Read ;
4
- use std:: marker:: PhantomData ;
5
- use std:: ptr;
1
+ use std:: {
2
+ convert:: TryInto ,
3
+ ffi:: { c_int, c_void, CString } ,
4
+ io:: Read ,
5
+ marker:: PhantomData ,
6
+ ptr:: { self , NonNull } ,
7
+ slice,
8
+ } ;
6
9
7
10
use bitflags:: bitflags;
8
11
use mupdf_sys:: * ;
9
12
use num_enum:: TryFromPrimitive ;
10
13
14
+ use crate :: FFIAnalogue ;
11
15
use crate :: {
12
- array:: FzArray , context, rust_slice_to_ffi_ptr, rust_vec_from_ffi_ptr, Buffer , Error , Image ,
13
- Matrix , Point , Quad , Rect , WriteMode ,
16
+ context, rust_slice_to_ffi_ptr, Buffer , Error , Image , Matrix , Point , Quad , Rect , WriteMode ,
14
17
} ;
15
18
16
19
bitflags ! {
@@ -51,20 +54,111 @@ impl TextPage {
51
54
}
52
55
}
53
56
54
- pub fn search ( & self , needle : & str , hit_max : u32 ) -> Result < FzArray < Quad > , Error > {
57
+ pub fn search ( & self , needle : & str ) -> Result < Vec < Quad > , Error > {
58
+ let mut vec = Vec :: new ( ) ;
59
+ self . search_cb ( needle, & mut vec, |v, quads| {
60
+ v. extend ( quads. iter ( ) . cloned ( ) ) ;
61
+ SearchHitResponse :: ContinueSearch
62
+ } ) ?;
63
+ Ok ( vec)
64
+ }
65
+
66
+ /// Search through the page, finding all instances of `needle` and processing them through
67
+ /// `cb`.
68
+ /// Note that the `&[Quad]` given to `cb` in its invocation lives only during the time that
69
+ /// `cb` is being evaluated. That means the following won't work or compile:
70
+ ///
71
+ /// ```compile_fail
72
+ /// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
73
+ /// # let text_page: TextPage = todo!();
74
+ /// let mut quads: Vec<&Quad> = Vec::new();
75
+ /// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
76
+ /// v.extend(quads);
77
+ /// SearchHitResponse::ContinueSearch
78
+ /// }).unwrap();
79
+ /// ```
80
+ ///
81
+ /// But the following will:
82
+ /// ```no_run
83
+ /// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
84
+ /// # let text_page: TextPage = todo!();
85
+ /// let mut quads: Vec<Quad> = Vec::new();
86
+ /// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
87
+ /// v.extend(quads.iter().cloned());
88
+ /// SearchHitResponse::ContinueSearch
89
+ /// }).unwrap();
90
+ /// ```
91
+ pub fn search_cb < T , F > ( & self , needle : & str , data : & mut T , cb : F ) -> Result < u32 , Error >
92
+ where
93
+ T : ?Sized ,
94
+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
95
+ {
96
+ // This struct allows us to wrap both the callback that the user gave us and the data so
97
+ // that we can pass it into the ffi callback nicely
98
+ struct FnWithData < ' parent , T : ?Sized , F >
99
+ where
100
+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
101
+ {
102
+ data : & ' parent mut T ,
103
+ f : F ,
104
+ }
105
+
106
+ let mut opaque = FnWithData { data, f : cb } ;
107
+
108
+ // And then here's the `fn` that we'll pass in - it has to be an fn, not capturing context,
109
+ // because it needs to be unsafe extern "C". to be used with FFI.
110
+ unsafe extern "C" fn ffi_cb < T , F > (
111
+ _ctx : * mut fz_context ,
112
+ data : * mut c_void ,
113
+ num_quads : c_int ,
114
+ hit_bbox : * mut fz_quad ,
115
+ ) -> c_int
116
+ where
117
+ T : ?Sized ,
118
+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
119
+ Quad : FFIAnalogue < FFIType = fz_quad > ,
120
+ {
121
+ // This is upheld by our `FFIAnalogue` bound above
122
+ let quad_ptr = hit_bbox. cast :: < Quad > ( ) ;
123
+ let Some ( nn) = NonNull :: new ( quad_ptr) else {
124
+ return SearchHitResponse :: ContinueSearch as c_int ;
125
+ } ;
126
+
127
+ // This guarantee is upheld by mupdf - they're giving us a pointer to the same type we
128
+ // gave them.
129
+ let data = data. cast :: < FnWithData < ' _ , T , F > > ( ) ;
130
+
131
+ // But if they like gave us a -1 for number of results or whatever, give up on
132
+ // decoding.
133
+ let Ok ( len) = usize:: try_from ( num_quads) else {
134
+ return SearchHitResponse :: ContinueSearch as c_int ;
135
+ } ;
136
+
137
+ // SAFETY: We've ensure nn is not null, and we're trusting the FFI layer for the other
138
+ // invariants (about actually holding the data, etc)
139
+ let slice = unsafe { slice:: from_raw_parts_mut ( nn. as_ptr ( ) , len) } ;
140
+
141
+ // Get the function and the data
142
+ // SAFETY: Trusting that the FFI layer actually gave us this ptr
143
+ let f = unsafe { & ( * data) . f } ;
144
+ // SAFETY: Trusting that the FFI layer actually gave us this ptr
145
+ let data = unsafe { & mut ( * data) . data } ;
146
+
147
+ // And call the function with the data
148
+ f ( data, slice) as c_int
149
+ }
150
+
55
151
let c_needle = CString :: new ( needle) ?;
56
- let hit_max = if hit_max < 1 { 16 } else { hit_max } ;
57
- let mut hit_count = 0 ;
58
152
unsafe {
59
- ffi_try ! ( mupdf_search_stext_page (
153
+ ffi_try ! ( mupdf_search_stext_page_cb (
60
154
context( ) ,
61
155
self . inner,
62
156
c_needle. as_ptr( ) ,
63
- hit_max as _ ,
64
- & mut hit_count
157
+ Some ( ffi_cb :: < T , F > ) ,
158
+ & raw mut opaque as * mut c_void
65
159
) )
66
160
}
67
- . and_then ( |quads| unsafe { rust_vec_from_ffi_ptr ( quads , hit_count ) } )
161
+ . map ( |count| count as u32 )
68
162
}
69
163
70
164
pub fn highlight_selection (
@@ -98,6 +192,12 @@ impl Drop for TextPage {
98
192
}
99
193
}
100
194
195
+ #[ repr( i32 ) ]
196
+ pub enum SearchHitResponse {
197
+ ContinueSearch = 0 ,
198
+ AbortSearch = 1 ,
199
+ }
200
+
101
201
#[ derive( Debug , Clone , Copy , PartialEq , TryFromPrimitive ) ]
102
202
#[ repr( u32 ) ]
103
203
pub enum TextBlockType {
@@ -262,7 +362,7 @@ impl<'a> Iterator for TextCharIter<'a> {
262
362
263
363
#[ cfg( test) ]
264
364
mod test {
265
- use crate :: { Document , TextPageOptions } ;
365
+ use crate :: { text_page :: SearchHitResponse , Document , TextPageOptions } ;
266
366
267
367
#[ test]
268
368
fn test_text_page_search ( ) {
@@ -271,31 +371,52 @@ mod test {
271
371
let doc = Document :: open ( "tests/files/dummy.pdf" ) . unwrap ( ) ;
272
372
let page0 = doc. load_page ( 0 ) . unwrap ( ) ;
273
373
let text_page = page0. to_text_page ( TextPageOptions :: BLOCK_IMAGE ) . unwrap ( ) ;
274
- let hits = text_page. search ( "Dummy" , 1 ) . unwrap ( ) ;
374
+ let hits = text_page. search ( "Dummy" ) . unwrap ( ) ;
275
375
assert_eq ! ( hits. len( ) , 1 ) ;
276
376
assert_eq ! (
277
377
& * hits,
278
378
[ Quad {
279
379
ul: Point {
280
380
x: 56.8 ,
281
- y: 69.32512
381
+ y: 69.32953
282
382
} ,
283
383
ur: Point {
284
- x: 115.85405 ,
285
- y: 69.32512
384
+ x: 115.85159 ,
385
+ y: 69.32953
286
386
} ,
287
387
ll: Point {
288
388
x: 56.8 ,
289
- y: 87.311844
389
+ y: 87.29713
290
390
} ,
291
391
lr: Point {
292
- x: 115.85405 ,
293
- y: 87.311844
392
+ x: 115.85159 ,
393
+ y: 87.29713
294
394
}
295
395
} ]
296
396
) ;
297
397
298
- let hits = text_page. search ( "Not Found" , 1 ) . unwrap ( ) ;
398
+ let hits = text_page. search ( "Not Found" ) . unwrap ( ) ;
399
+ assert_eq ! ( hits. len( ) , 0 ) ;
400
+ }
401
+
402
+ #[ test]
403
+ fn test_text_page_cb_search ( ) {
404
+ let doc = Document :: open ( "tests/files/dummy.pdf" ) . unwrap ( ) ;
405
+ let page0 = doc. load_page ( 0 ) . unwrap ( ) ;
406
+ let text_page = page0. to_text_page ( TextPageOptions :: BLOCK_IMAGE ) . unwrap ( ) ;
407
+ let mut sum_x = 0.0 ;
408
+ let num_hits = text_page
409
+ . search_cb ( "Dummy" , & mut sum_x, |acc, hits| {
410
+ for q in hits {
411
+ * acc += q. ul . x + q. ur . x + q. ll . x + q. lr . x ;
412
+ }
413
+ SearchHitResponse :: ContinueSearch
414
+ } )
415
+ . unwrap ( ) ;
416
+ assert_eq ! ( num_hits, 1 ) ;
417
+ assert_eq ! ( sum_x, 56.8 + 115.85159 + 56.8 + 115.85159 ) ;
418
+
419
+ let hits = text_page. search ( "Not Found" ) . unwrap ( ) ;
299
420
assert_eq ! ( hits. len( ) , 0 ) ;
300
421
}
301
422
}
0 commit comments