Skip to content

Commit 49b81b8

Browse files
committed
Abstract next_match/next_match_back with #[cfg(kani)] nondeterministic overapproximation
Replace the real memchr-based loops in CharSearcher::next_match() and next_match_back() with nondeterministic abstractions under #[cfg(kani)]. This mirrors the existing abstractions for next_reject/next_reject_back and allows Kani autoharness and partition 2 verification to complete within time limits.
1 parent 9ad651e commit 49b81b8

File tree

1 file changed

+91
-69
lines changed

1 file changed

+91
-69
lines changed

library/core/src/str/pattern.rs

Lines changed: 91 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
436436
}
437437
#[inline]
438438
fn next_match(&mut self) -> Option<(usize, usize)> {
439+
#[cfg(not(kani))]
439440
loop {
440441
// get the haystack after the last character found
441442
let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
@@ -464,23 +465,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
464465
if self.finger >= self.utf8_size() {
465466
let found_char = self.finger - self.utf8_size();
466467
if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
467-
// Under Kani, use an unrolled byte comparison to avoid calling
468-
// memcmp, which has internal variables that conflict with CBMC's
469-
// loop contract assigns checking. The utf8_size is always 1-4,
470-
// so this unrolled comparison is equivalent to slice == &encoded[..].
471-
#[cfg(not(kani))]
472-
let matched = slice == &self.utf8_encoded[0..self.utf8_size()];
473-
#[cfg(kani)]
474-
let matched = {
475-
let e = &self.utf8_encoded;
476-
let s = self.utf8_size();
477-
slice.len() == s
478-
&& (s < 1 || slice[0] == e[0])
479-
&& (s < 2 || slice[1] == e[1])
480-
&& (s < 3 || slice[2] == e[2])
481-
&& (s < 4 || slice[3] == e[3])
482-
};
483-
if matched {
468+
if slice == &self.utf8_encoded[0..self.utf8_size()] {
484469
return Some((found_char, self.finger));
485470
}
486471
}
@@ -491,6 +476,27 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
491476
return None;
492477
}
493478
}
479+
// Nondeterministic abstraction for Kani verification.
480+
// Overapproximates all possible behaviors of the real loop:
481+
// either finds a match at some valid position, or exhausts the haystack.
482+
#[cfg(kani)]
483+
{
484+
if self.finger >= self.finger_back {
485+
return None;
486+
}
487+
if kani::any() {
488+
let a: usize = kani::any();
489+
let w = self.utf8_size();
490+
kani::assume(a >= self.finger);
491+
kani::assume(w <= self.finger_back); // avoid overflow
492+
kani::assume(a + w <= self.finger_back);
493+
self.finger = a + w;
494+
Some((a, self.finger))
495+
} else {
496+
self.finger = self.finger_back;
497+
None
498+
}
499+
}
494500
}
495501

496502
// Override the default next_reject for unbounded verification.
@@ -555,63 +561,79 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
555561
}
556562
#[inline]
557563
fn next_match_back(&mut self) -> Option<(usize, usize)> {
558-
let haystack = self.haystack.as_bytes();
559-
loop {
560-
// get the haystack up to but not including the last character searched
561-
let bytes = haystack.get(self.finger..self.finger_back)?;
562-
// the last byte of the utf8 encoded needle
563-
// SAFETY: we have an invariant that `utf8_size < 5`
564-
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
565-
if let Some(index) = memchr::memrchr(last_byte, bytes) {
566-
// we searched a slice that was offset by self.finger,
567-
// add self.finger to recoup the original index
568-
let index = self.finger + index;
569-
// memrchr will return the index of the byte we wish to
570-
// find. In case of an ASCII character, this is indeed
571-
// were we wish our new finger to be ("after" the found
572-
// char in the paradigm of reverse iteration). For
573-
// multibyte chars we need to skip down by the number of more
574-
// bytes they have than ASCII
575-
let shift = self.utf8_size() - 1;
576-
if index >= shift {
577-
let found_char = index - shift;
578-
if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) {
579-
// Under Kani, use unrolled byte comparison (see next_match above).
580-
#[cfg(not(kani))]
581-
let matched = slice == &self.utf8_encoded[0..self.utf8_size()];
582-
#[cfg(kani)]
583-
let matched = {
584-
let e = &self.utf8_encoded;
585-
let s = self.utf8_size();
586-
slice.len() == s
587-
&& (s < 1 || slice[0] == e[0])
588-
&& (s < 2 || slice[1] == e[1])
589-
&& (s < 3 || slice[2] == e[2])
590-
&& (s < 4 || slice[3] == e[3])
591-
};
592-
if matched {
593-
// move finger to before the character found (i.e., at its start index)
594-
self.finger_back = found_char;
595-
return Some((self.finger_back, self.finger_back + self.utf8_size()));
564+
#[cfg(not(kani))]
565+
{
566+
let haystack = self.haystack.as_bytes();
567+
loop {
568+
// get the haystack up to but not including the last character searched
569+
let bytes = haystack.get(self.finger..self.finger_back)?;
570+
// the last byte of the utf8 encoded needle
571+
// SAFETY: we have an invariant that `utf8_size < 5`
572+
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
573+
if let Some(index) = memchr::memrchr(last_byte, bytes) {
574+
// we searched a slice that was offset by self.finger,
575+
// add self.finger to recoup the original index
576+
let index = self.finger + index;
577+
// memrchr will return the index of the byte we wish to
578+
// find. In case of an ASCII character, this is indeed
579+
// were we wish our new finger to be ("after" the found
580+
// char in the paradigm of reverse iteration). For
581+
// multibyte chars we need to skip down by the number of more
582+
// bytes they have than ASCII
583+
let shift = self.utf8_size() - 1;
584+
if index >= shift {
585+
let found_char = index - shift;
586+
if let Some(slice) =
587+
haystack.get(found_char..(found_char + self.utf8_size()))
588+
{
589+
if slice == &self.utf8_encoded[0..self.utf8_size()] {
590+
// move finger to before the character found (i.e., at its start index)
591+
self.finger_back = found_char;
592+
return Some((
593+
self.finger_back,
594+
self.finger_back + self.utf8_size(),
595+
));
596+
}
596597
}
597598
}
599+
// We can't use finger_back = index - size + 1 here. If we found the last char
600+
// of a different-sized character (or the middle byte of a different character)
601+
// we need to bump the finger_back down to `index`. This similarly makes
602+
// `finger_back` have the potential to no longer be on a boundary,
603+
// but this is OK since we only exit this function on a boundary
604+
// or when the haystack has been searched completely.
605+
//
606+
// Unlike next_match this does not
607+
// have the problem of repeated bytes in utf-8 because
608+
// we're searching for the last byte, and we can only have
609+
// found the last byte when searching in reverse.
610+
self.finger_back = index;
611+
} else {
612+
self.finger_back = self.finger;
613+
// found nothing, exit
614+
return None;
598615
}
599-
// We can't use finger_back = index - size + 1 here. If we found the last char
600-
// of a different-sized character (or the middle byte of a different character)
601-
// we need to bump the finger_back down to `index`. This similarly makes
602-
// `finger_back` have the potential to no longer be on a boundary,
603-
// but this is OK since we only exit this function on a boundary
604-
// or when the haystack has been searched completely.
605-
//
606-
// Unlike next_match this does not
607-
// have the problem of repeated bytes in utf-8 because
608-
// we're searching for the last byte, and we can only have
609-
// found the last byte when searching in reverse.
610-
self.finger_back = index;
616+
}
617+
}
618+
// Nondeterministic abstraction for Kani verification.
619+
// Overapproximates all possible behaviors of the real reverse loop:
620+
// either finds a match at some valid position, or exhausts the haystack.
621+
#[cfg(kani)]
622+
{
623+
if self.finger >= self.finger_back {
624+
return None;
625+
}
626+
if kani::any() {
627+
let a: usize = kani::any();
628+
let w = self.utf8_size();
629+
kani::assume(a >= self.finger);
630+
kani::assume(w <= self.finger_back);
631+
kani::assume(a + w <= self.finger_back);
632+
self.finger_back = a;
633+
Some((a, a + w))
611634
} else {
612635
self.finger_back = self.finger;
613-
// found nothing, exit
614-
return None;
636+
None
615637
}
616638
}
617639
}

0 commit comments

Comments
 (0)