Skip to content

Commit 7c3cd50

Browse files
authored
Merge pull request #9082 from TCeason/ISSUE-9077
fix(query): vector_const like bug fix
2 parents 2a974cd + 63ef493 commit 7c3cd50

File tree

6 files changed

+14
-74
lines changed

6 files changed

+14
-74
lines changed

Cargo.lock

-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/functions-v2/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ ctor = "0.1.26"
3232
hex = "0.4.3"
3333
itertools = "0.10.5"
3434
match-template = "0.0.1"
35-
memchr = "2.5.0"
3635
num-traits = "0.2.15"
3736
once_cell = "1.15.0"
3837
ordered-float = { version = "3.1.0", features = [

src/query/functions-v2/src/scalars/comparison.rs

+1-30
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ use common_expression::FunctionDomain;
3333
use common_expression::FunctionProperty;
3434
use common_expression::FunctionRegistry;
3535
use common_expression::ValueRef;
36-
use memchr::memmem;
3736
use regex::bytes::Regex;
3837

3938
use crate::scalars::string_multi_args::regexp;
@@ -337,26 +336,7 @@ fn register_like(registry: &mut FunctionRegistry) {
337336
let ends_with = &pat[1..];
338337
Ok(str.ends_with(ends_with))
339338
}
340-
PatternType::PatternStr => {
341-
let pattern_str = simdutf8::basic::from_utf8(pat)
342-
.expect("Unable to convert the LIKE pattern to string: {}");
343-
let mut sub_strings: Vec<&str> = pattern_str
344-
.split(|c: char| c == '%' || c == '_' || c == '\\')
345-
.collect();
346-
sub_strings.retain(|&substring| !substring.is_empty());
347-
if std::intrinsics::unlikely(sub_strings.is_empty()) {
348-
Ok(like(str, pat))
349-
} else {
350-
let sub_string = sub_strings[0].as_bytes();
351-
if sub_strings.len() == 1 {
352-
Ok(search_sub_str(str, sub_string).is_some())
353-
} else if memmem::find(str, sub_string).is_none() {
354-
Ok(false)
355-
} else {
356-
Ok(like(str, pat))
357-
}
358-
}
359-
}
339+
PatternType::PatternStr => Ok(like(str, pat)),
360340
}
361341
}),
362342
);
@@ -553,15 +533,6 @@ pub fn check_pattern_type(pattern: &[u8], is_pruning: bool) -> PatternType {
553533
}
554534
}
555535

556-
#[inline]
557-
fn search_sub_str(str: &[u8], substr: &[u8]) -> Option<usize> {
558-
if substr.len() <= str.len() {
559-
str.windows(substr.len()).position(|w| w == substr)
560-
} else {
561-
None
562-
}
563-
}
564-
565536
#[inline]
566537
fn decode_one(data: &[u8]) -> Option<(u8, usize)> {
567538
if data.is_empty() {

src/query/functions/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ h3ron = "0.15.1"
3535
hex = "0.4.3"
3636
itertools = "0.10.5"
3737
md-5 = "0.10.5"
38-
memchr = "2.5.0"
3938
naive-cityhash = "0.2.0"
4039
num = "0.4.0"
4140
num-format = "0.4.0"

src/query/functions/src/scalars/comparisons/comparison_like.rs

+1-40
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// limitations under the License.
1414

1515
use common_datavalues::prelude::*;
16-
use memchr::memmem;
1716

1817
use super::comparison::StringSearchCreator;
1918
use super::utils::StringSearchImpl;
@@ -58,36 +57,7 @@ impl StringSearchImpl for StringSearchLike {
5857
BooleanColumn::from_iterator(lhs.scalar_iter().map(|x| op(x.ends_with(ends_with))))
5958
}
6059
PatternType::PatternStr => {
61-
let pattern = simdutf8::basic::from_utf8(rhs)
62-
.expect("Unable to convert the LIKE pattern to string: {}");
63-
let mut sub_strings: Vec<&str> = pattern
64-
.split(|c: char| c == '%' || c == '_' || c == '\\')
65-
.collect();
66-
sub_strings.retain(|&substring| !substring.is_empty());
67-
if std::intrinsics::unlikely(sub_strings.is_empty()) {
68-
BooleanColumn::from_iterator(lhs.scalar_iter().map(|x| op(like(x, rhs))))
69-
} else {
70-
let sub_string = sub_strings[0].as_bytes();
71-
// This impl like position function
72-
if sub_strings.len() == 1 {
73-
BooleanColumn::from_iterator(lhs.scalar_iter().map(|x| {
74-
let contain = search_sub_str(x, sub_string);
75-
if contain.is_none() {
76-
op(false)
77-
} else {
78-
op(true)
79-
}
80-
}))
81-
} else {
82-
BooleanColumn::from_iterator(lhs.scalar_iter().map(|x| {
83-
if memmem::find(x, sub_string).is_none() {
84-
op(false)
85-
} else {
86-
op(like(x, rhs))
87-
}
88-
}))
89-
}
90-
}
60+
BooleanColumn::from_iterator(lhs.scalar_iter().map(|x| op(like(x, rhs))))
9161
}
9262
}
9363
}
@@ -169,15 +139,6 @@ pub fn check_pattern_type(pattern: &[u8], is_pruning: bool) -> PatternType {
169139
}
170140
}
171141

172-
#[inline]
173-
fn search_sub_str(str: &[u8], substr: &[u8]) -> Option<usize> {
174-
if substr.len() <= str.len() {
175-
str.windows(substr.len()).position(|w| w == substr)
176-
} else {
177-
None
178-
}
179-
}
180-
181142
#[inline]
182143
fn decode_one(data: &[u8]) -> Option<(u8, usize)> {
183144
if data.is_empty() {

tests/logictest/suites/query/02_function/02_0005_function_compare

+12
Original file line numberDiff line numberDiff line change
@@ -1227,5 +1227,17 @@ SELECT parse_json('"cd"') not regexp '.*';
12271227
----
12281228
0
12291229

1230+
statement ok
1231+
drop table if exists t;
1232+
1233+
statement ok
1234+
create table t(id String);
12301235

1236+
statement ok
1237+
insert into t values('IRxxSIPD');
12311238

1239+
statement query T
1240+
select id from t where id not like '%_SIP';
1241+
1242+
----
1243+
IRxxSIPD

0 commit comments

Comments
 (0)