- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 795
Add support for the FTS5 trigram tokenizer #1655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -74,6 +74,48 @@ public struct FTS5 { | |
| #endif | ||
| } | ||
|  | ||
| #if GRDBCUSTOMSQLITE || GRDBCIPHER | ||
| /// Options for trigram tokenizer character matching. Matches the raw | ||
| /// "case_sensitive" and "remove_diacritics" tokenizer arguments. | ||
| /// | ||
| /// Related SQLite documentation: <https://sqlite.org/fts5.html#the_trigram_tokenizer> | ||
| public enum TrigramTokenizerMatching: Sendable { | ||
| /// Case insensitive matching without removing diacritics. This | ||
| /// option matches the raw "case_sensitive=0 remove_diacritics=0" | ||
| /// tokenizer argument. | ||
| case caseInsensitive | ||
| /// Case insensitive matching that removes diacritics before | ||
| /// matching. This option matches the raw | ||
| /// "case_sensitive=0 remove_diacritics=1" tokenizer argument. | ||
| case caseInsensitiveRemovingDiacritics | ||
| /// Case sensitive matching. Diacritics are not removed when | ||
| /// performing case sensitive matching. This option matches the raw | ||
| /// "case_sensitive=1 remove_diacritics=0" tokenizer argument. | ||
| case caseSensitive | ||
| } | ||
|          | ||
| #else | ||
| /// Options for trigram tokenizer character matching. Matches the raw | ||
| /// "case_sensitive" and "remove_diacritics" tokenizer arguments. | ||
| /// | ||
| /// Related SQLite documentation: <https://sqlite.org/fts5.html#the_trigram_tokenizer> | ||
| @available(iOS 15, macOS 12, tvOS 15, watchOS 8, *) // SQLite 3.35.0+ (3.34 actually) | ||
| public enum TrigramTokenizerMatching: Sendable { | ||
| /// Case insensitive matching without removing diacritics. This | ||
| /// option matches the raw "case_sensitive=0 remove_diacritics=0" | ||
| /// tokenizer argument. | ||
| case caseInsensitive | ||
| /// Case insensitive matching that removes diacritics before | ||
| /// matching. This option matches the raw | ||
| /// "case_sensitive=0 remove_diacritics=1" tokenizer argument. | ||
| @available(*, unavailable, message: "Requires a future OS release that includes SQLite >=3.45") | ||
| case caseInsensitiveRemovingDiacritics | ||
| /// Case sensitive matching. Diacritics are not removed when | ||
| /// performing case sensitive matching. This option matches the raw | ||
| /// "case_sensitive=1 remove_diacritics=0" tokenizer argument. | ||
| case caseSensitive | ||
| } | ||
| #endif | ||
|  | ||
| /// Creates an FTS5 module. | ||
| /// | ||
| /// For example: | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -148,11 +148,11 @@ extension FTS5Tokenizer { | |
| private func tokenize(_ string: String, for tokenization: FTS5Tokenization) | ||
| throws -> [(token: String, flags: FTS5TokenFlags)] | ||
| { | ||
| try ContiguousArray(string.utf8).withUnsafeBufferPointer { buffer -> [(String, FTS5TokenFlags)] in | ||
| try string.utf8CString.withUnsafeBufferPointer { buffer -> [(String, FTS5TokenFlags)] in | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ | ||
| guard let addr = buffer.baseAddress else { | ||
| return [] | ||
| } | ||
| let pText = UnsafeMutableRawPointer(mutating: addr).assumingMemoryBound(to: CChar.self) | ||
| let pText = addr | ||
| let nText = CInt(buffer.count) | ||
|  | ||
| var context = TokenizeContext() | ||
|  | ||
Uh oh!
There was an error while loading. Please reload this page.