@@ -436,6 +436,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
436436 }
437437 #[ inline]
438438 fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
439+ #[ cfg( not( kani) ) ]
439440 loop {
440441 // get the haystack after the last character found
441442 let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
@@ -464,23 +465,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
464465 if self . finger >= self . utf8_size ( ) {
465466 let found_char = self . finger - self . utf8_size ( ) ;
466467 if let Some ( slice) = self . haystack . as_bytes ( ) . get ( found_char..self . finger ) {
467- // Under Kani, use an unrolled byte comparison to avoid calling
468- // memcmp, which has internal variables that conflict with CBMC's
469- // loop contract assigns checking. The utf8_size is always 1-4,
470- // so this unrolled comparison is equivalent to slice == &encoded[..].
471- #[ cfg( not( kani) ) ]
472- let matched = slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] ;
473- #[ cfg( kani) ]
474- let matched = {
475- let e = & self . utf8_encoded ;
476- let s = self . utf8_size ( ) ;
477- slice. len ( ) == s
478- && ( s < 1 || slice[ 0 ] == e[ 0 ] )
479- && ( s < 2 || slice[ 1 ] == e[ 1 ] )
480- && ( s < 3 || slice[ 2 ] == e[ 2 ] )
481- && ( s < 4 || slice[ 3 ] == e[ 3 ] )
482- } ;
483- if matched {
468+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
484469 return Some ( ( found_char, self . finger ) ) ;
485470 }
486471 }
@@ -491,6 +476,27 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
491476 return None ;
492477 }
493478 }
479+ // Nondeterministic abstraction for Kani verification.
480+ // Overapproximates all possible behaviors of the real loop:
481+ // either finds a match at some valid position, or exhausts the haystack.
482+ #[ cfg( kani) ]
483+ {
484+ if self . finger >= self . finger_back {
485+ return None ;
486+ }
487+ if kani:: any ( ) {
488+ let a: usize = kani:: any ( ) ;
489+ let w = self . utf8_size ( ) ;
490+ kani:: assume ( a >= self . finger ) ;
491+ kani:: assume ( w <= self . finger_back ) ; // avoid overflow
492+ kani:: assume ( a + w <= self . finger_back ) ;
493+ self . finger = a + w;
494+ Some ( ( a, self . finger ) )
495+ } else {
496+ self . finger = self . finger_back ;
497+ None
498+ }
499+ }
494500 }
495501
496502 // Override the default next_reject for unbounded verification.
@@ -555,63 +561,79 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
555561 }
556562 #[ inline]
557563 fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
558- let haystack = self . haystack . as_bytes ( ) ;
559- loop {
560- // get the haystack up to but not including the last character searched
561- let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
562- // the last byte of the utf8 encoded needle
563- // SAFETY: we have an invariant that `utf8_size < 5`
564- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
565- if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
566- // we searched a slice that was offset by self.finger,
567- // add self.finger to recoup the original index
568- let index = self . finger + index;
569- // memrchr will return the index of the byte we wish to
570- // find. In case of an ASCII character, this is indeed
571- // were we wish our new finger to be ("after" the found
572- // char in the paradigm of reverse iteration). For
573- // multibyte chars we need to skip down by the number of more
574- // bytes they have than ASCII
575- let shift = self . utf8_size ( ) - 1 ;
576- if index >= shift {
577- let found_char = index - shift;
578- if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ( ) ) ) {
579- // Under Kani, use unrolled byte comparison (see next_match above).
580- #[ cfg( not( kani) ) ]
581- let matched = slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] ;
582- #[ cfg( kani) ]
583- let matched = {
584- let e = & self . utf8_encoded ;
585- let s = self . utf8_size ( ) ;
586- slice. len ( ) == s
587- && ( s < 1 || slice[ 0 ] == e[ 0 ] )
588- && ( s < 2 || slice[ 1 ] == e[ 1 ] )
589- && ( s < 3 || slice[ 2 ] == e[ 2 ] )
590- && ( s < 4 || slice[ 3 ] == e[ 3 ] )
591- } ;
592- if matched {
593- // move finger to before the character found (i.e., at its start index)
594- self . finger_back = found_char;
595- return Some ( ( self . finger_back , self . finger_back + self . utf8_size ( ) ) ) ;
564+ #[ cfg( not( kani) ) ]
565+ {
566+ let haystack = self . haystack . as_bytes ( ) ;
567+ loop {
568+ // get the haystack up to but not including the last character searched
569+ let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
570+ // the last byte of the utf8 encoded needle
571+ // SAFETY: we have an invariant that `utf8_size < 5`
572+ let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size ( ) - 1 ) } ;
573+ if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
574+ // we searched a slice that was offset by self.finger,
575+ // add self.finger to recoup the original index
576+ let index = self . finger + index;
577+ // memrchr will return the index of the byte we wish to
578+ // find. In case of an ASCII character, this is indeed
579+ // were we wish our new finger to be ("after" the found
580+ // char in the paradigm of reverse iteration). For
581+ // multibyte chars we need to skip down by the number of more
582+ // bytes they have than ASCII
583+ let shift = self . utf8_size ( ) - 1 ;
584+ if index >= shift {
585+ let found_char = index - shift;
586+ if let Some ( slice) =
587+ haystack. get ( found_char..( found_char + self . utf8_size ( ) ) )
588+ {
589+ if slice == & self . utf8_encoded [ 0 ..self . utf8_size ( ) ] {
590+ // move finger to before the character found (i.e., at its start index)
591+ self . finger_back = found_char;
592+ return Some ( (
593+ self . finger_back ,
594+ self . finger_back + self . utf8_size ( ) ,
595+ ) ) ;
596+ }
596597 }
597598 }
599+ // We can't use finger_back = index - size + 1 here. If we found the last char
600+ // of a different-sized character (or the middle byte of a different character)
601+ // we need to bump the finger_back down to `index`. This similarly makes
602+ // `finger_back` have the potential to no longer be on a boundary,
603+ // but this is OK since we only exit this function on a boundary
604+ // or when the haystack has been searched completely.
605+ //
606+ // Unlike next_match this does not
607+ // have the problem of repeated bytes in utf-8 because
608+ // we're searching for the last byte, and we can only have
609+ // found the last byte when searching in reverse.
610+ self . finger_back = index;
611+ } else {
612+ self . finger_back = self . finger ;
613+ // found nothing, exit
614+ return None ;
598615 }
599- // We can't use finger_back = index - size + 1 here. If we found the last char
600- // of a different-sized character (or the middle byte of a different character)
601- // we need to bump the finger_back down to `index`. This similarly makes
602- // `finger_back` have the potential to no longer be on a boundary,
603- // but this is OK since we only exit this function on a boundary
604- // or when the haystack has been searched completely.
605- //
606- // Unlike next_match this does not
607- // have the problem of repeated bytes in utf-8 because
608- // we're searching for the last byte, and we can only have
609- // found the last byte when searching in reverse.
610- self . finger_back = index;
616+ }
617+ }
618+ // Nondeterministic abstraction for Kani verification.
619+ // Overapproximates all possible behaviors of the real reverse loop:
620+ // either finds a match at some valid position, or exhausts the haystack.
621+ #[ cfg( kani) ]
622+ {
623+ if self . finger >= self . finger_back {
624+ return None ;
625+ }
626+ if kani:: any ( ) {
627+ let a: usize = kani:: any ( ) ;
628+ let w = self . utf8_size ( ) ;
629+ kani:: assume ( a >= self . finger ) ;
630+ kani:: assume ( w <= self . finger_back ) ;
631+ kani:: assume ( a + w <= self . finger_back ) ;
632+ self . finger_back = a;
633+ Some ( ( a, a + w) )
611634 } else {
612635 self . finger_back = self . finger ;
613- // found nothing, exit
614- return None ;
636+ None
615637 }
616638 }
617639 }
0 commit comments