From 4f97387575b01c64806430b2a94ff654d994e5b8 Mon Sep 17 00:00:00 2001 From: Julien-cpsn Date: Sat, 31 Aug 2024 02:05:41 +0200 Subject: [PATCH] Added rayon feature --- Cargo.toml | 9 ++++ README.md | 13 +++++ src/lib.rs | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 157 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0a8814d..82d4783 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,15 @@ keywords = ["string", "slice", "substring", "unicode", "utf-8"] categories = ["no-std", "algorithms", "parsing", "rust-patterns", "text-processing"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[dependencies] +rayon_ = { package = "rayon", version = "1.10.0", optional = true } + +[features] +std = [] +rayon = ["std"] + +[package.metadata.docs.rs] +features = ["std", "rayon"] [dev-dependencies.cargo-husky] version = "1" diff --git a/README.md b/README.md index 271073f..a4a25d5 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,19 @@ use stringslice::StringSlice; assert_eq!("string".try_slice(4..2), None); ``` +## Run in parallel + +You can have access to parallelized methods by enabling the `rayon` feature. Thanks to the [rayon](https://github.com/rayon-rs/rayon) crate, the string slicing will execute through many threads. + +**Par**allel methods: +- `par_slice` +- `par_try_slice` +- `par_substring` +- `par_try_substring` + +> [!WARNING] +> Using the **par**allel methods on bigger strings is recommended. Parallelism scales greatly on bigger sizes. + ## Licence Licensed under either of diff --git a/src/lib.rs b/src/lib.rs index ddf4fb8..29283d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,9 +42,18 @@ //! #![no_std] - +#![cfg(not(feature = "std"))] use core::ops::{Bound, RangeBounds}; +#[cfg(feature = "std")] +use std::ops::{Bound, RangeBounds}; + +#[cfg(feature = "std")] +extern crate std; + +#[cfg(feature = "rayon")] +use rayon_::prelude::*; + #[inline] fn range_to_begin_end(range: impl RangeBounds) -> (usize, usize) { let begin = match range.start_bound() { @@ -57,6 +66,7 @@ fn range_to_begin_end(range: impl RangeBounds) -> (usize, usize) { Bound::Included(&b) => b + 1, Bound::Excluded(&b) => b, // Note: using core::usize::MAX rather than usize::MAX for compatibility with Rust < 1.43 + #[allow(clippy::legacy_numeric_constants)] Bound::Unbounded => core::usize::MAX, }; @@ -125,6 +135,74 @@ pub trait StringSlice { /// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None fn try_substring(&self, begin: usize, end: usize) -> Option<&str>; + + #[cfg(feature = "rayon")] + /// Returns a string slice for the given range of characters + /// + /// This method will panic if the range is invalid, + /// for example if the beginning is greater than the end. + /// + /// Runs in parallel + /// + /// # Examples + /// ``` + /// use stringslice::StringSlice; + /// + /// assert_eq!("Γ™nΓ­c😎de".slice(4..5), "😎"); + /// ``` + fn par_slice(&self, range: impl RangeBounds) -> &str; + + #[cfg(feature = "rayon")] + /// Returns an [`Option`] containing string slice for the given range of characters + /// + /// This method will return [`None`] if the range is invalid, + /// for example if the beginning is greater than the end. + /// + /// Runs in parallel + /// + /// # Examples + /// ``` + /// use stringslice::StringSlice; + /// + /// assert_eq!("Γ™nΓ­c😎de".try_slice(4..5), Some("😎")); + /// ``` + /// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html + /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None + fn par_try_slice(&self, range: impl RangeBounds) -> Option<&str>; + + #[cfg(feature = "rayon")] + /// Returns a string slice between the given beginning and end characters + /// + /// This method will panic if the parameters are invalid, + /// for example if the beginning is greater than the end. + /// + /// Runs in parallel + /// + /// # Examples + /// ``` + /// use stringslice::StringSlice; + /// + /// assert_eq!("Γ™nΓ­c😎de".substring(4, 5), "😎"); + /// ``` + fn par_substring(&self, begin: usize, end: usize) -> &str; + + #[cfg(feature = "rayon")] + /// Returns an [`Option`] containing string slice between the given beginning and end characters + /// + /// This method will return [`None`] if the parameters are invalid, + /// for example if the beginning is greater than the end. + /// + /// Runs in parallel + /// + /// # Examples + /// ``` + /// use stringslice::StringSlice; + /// + /// assert_eq!("Γ™nΓ­c😎de".try_substring(4, 5), Some("😎")); + /// ``` + /// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html + /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None + fn par_try_substring(&self, begin: usize, end: usize) -> Option<&str>; } impl StringSlice for str { @@ -160,6 +238,49 @@ impl StringSlice for str { begin_ch }; + // Note (unsafe): Since we iterate character indices we can be sure that `begin_ch` and + // `end_ch` are on UTF-8 boundaries. For performance, we use get_unchecked rather than + // simply indexing. + unsafe { Some(self.get_unchecked(begin_ch..end_ch)) } + } + } + + #[cfg(feature = "rayon")] + #[inline] + fn par_slice(&self, range: impl RangeBounds) -> &str { + let (begin, end) = range_to_begin_end(range); + self.par_substring(begin, end) + } + + #[cfg(feature = "rayon")] + #[inline] + fn par_try_slice(&self, range: impl RangeBounds) -> Option<&str> { + let (begin, end) = range_to_begin_end(range); + self.par_try_substring(begin, end) + } + + #[cfg(feature = "rayon")] + #[inline] + fn par_substring(&self, begin: usize, end: usize) -> &str { + self.par_try_substring(begin, end) + .expect("begin < end when slicing string") + } + + #[cfg(feature = "rayon")] + fn par_try_substring(&self, begin: usize, end: usize) -> Option<&str> { + if begin > end { + None + } else { + let mut ch_idx = self.par_char_indices().map(|(i, _c)| i); + + let len = self.len(); + let begin_ch = ch_idx.nth(begin).unwrap_or(len); + let end_ch = if end > begin { + ch_idx.nth(end - begin - 1).unwrap_or(len) + } else { + begin_ch + }; + // Note (unsafe): Since we iterate character indices we can be sure that `begin_ch` and // `end_ch` are on UTF-8 boundaries. For performance we use get_unchecked rather than // simply indexing. @@ -170,8 +291,12 @@ impl StringSlice for str { #[cfg(test)] mod tests { + #[cfg(not(feature = "std"))] use core::ops::Bound; + #[cfg(feature = "std")] + use std::ops::Bound; + use super::StringSlice; #[test] @@ -218,4 +343,13 @@ mod tests { "str" ); } + + #[cfg(feature = "rayon")] + #[test] + fn par_test_utf8() { + let str = "πŸ—»βˆˆπŸŒ"; + assert_eq!("πŸ—»", str.par_slice(0..1)); + assert_eq!("∈", str.par_slice(1..2)); + assert_eq!("🌏", str.par_slice(2..3)); + } }