Skip to content

Commit 43de67c

Browse files
committed
work
1 parent 883a783 commit 43de67c

File tree

1 file changed

+2
-19
lines changed

1 file changed

+2
-19
lines changed

clip.hpp

+2-19
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,6 @@ class CLIPTokenizer {
330330

331331
std::smatch matches;
332332
std::string str = text;
333-
std::vector<std::string> token_strs;
334333
while (std::regex_search(str, matches, pat)) {
335334
bool skip = on_new_token_cb(str, bpe_tokens);
336335
if (skip) {
@@ -349,24 +348,14 @@ class CLIPTokenizer {
349348
while ((pos = bpe_strs.find(' ', start)) != std::u32string::npos) {
350349
auto bpe_str = bpe_strs.substr(start, pos - start);
351350
bpe_tokens.push_back(encoder[bpe_str]);
352-
token_strs.push_back(utf32_to_utf8(bpe_str));
353-
354351
start = pos + 1;
355352
}
356353
auto bpe_str = bpe_strs.substr(start, bpe_strs.size() - start);
357354
bpe_tokens.push_back(encoder[bpe_str]);
358-
token_strs.push_back(utf32_to_utf8(bpe_str));
359355
}
360356
str = matches.suffix();
361357
}
362-
std::stringstream ss;
363-
ss << "[";
364-
for (auto token : token_strs) {
365-
ss << "\"" << token << "\", ";
366-
}
367-
ss << "]";
368-
// LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str());
369-
// printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
358+
370359
return bpe_tokens;
371360
}
372361
};
@@ -1093,8 +1082,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10931082
GGMLModule::compute(get_graph, n_threads, true, output, output_ctx);
10941083
}
10951084

1096-
std::pair<std::vector<int>, std::vector<float>> tokenize(std::string text,
1097-
bool padding = false) {
1085+
std::pair<std::vector<int>, std::vector<float>> tokenize(std::string text, bool padding = false) {
10981086
return tokenize(text, text_model.n_token, padding);
10991087
}
11001088

@@ -1348,11 +1336,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
13481336

13491337
pad_tokens(tokens, weights, max_length, padding);
13501338

1351-
// for (int i = 0; i < tokens.size(); i++) {
1352-
// std::cout << tokens[i] << ":" << weights[i] << ", ";
1353-
// }
1354-
// std::cout << std::endl;
1355-
13561339
return {tokens, weights};
13571340
}
13581341
};

0 commit comments

Comments
 (0)