@@ -330,7 +330,6 @@ class CLIPTokenizer {
330
330
331
331
std::smatch matches;
332
332
std::string str = text;
333
- std::vector<std::string> token_strs;
334
333
while (std::regex_search (str, matches, pat)) {
335
334
bool skip = on_new_token_cb (str, bpe_tokens);
336
335
if (skip) {
@@ -349,24 +348,14 @@ class CLIPTokenizer {
349
348
while ((pos = bpe_strs.find (' ' , start)) != std::u32string::npos) {
350
349
auto bpe_str = bpe_strs.substr (start, pos - start);
351
350
bpe_tokens.push_back (encoder[bpe_str]);
352
- token_strs.push_back (utf32_to_utf8 (bpe_str));
353
-
354
351
start = pos + 1 ;
355
352
}
356
353
auto bpe_str = bpe_strs.substr (start, bpe_strs.size () - start);
357
354
bpe_tokens.push_back (encoder[bpe_str]);
358
- token_strs.push_back (utf32_to_utf8 (bpe_str));
359
355
}
360
356
str = matches.suffix ();
361
357
}
362
- std::stringstream ss;
363
- ss << " [" ;
364
- for (auto token : token_strs) {
365
- ss << " \" " << token << " \" , " ;
366
- }
367
- ss << " ]" ;
368
- // LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str());
369
- // printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
358
+
370
359
return bpe_tokens;
371
360
}
372
361
};
@@ -1093,8 +1082,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
1093
1082
GGMLModule::compute (get_graph, n_threads, true , output, output_ctx);
1094
1083
}
1095
1084
1096
- std::pair<std::vector<int >, std::vector<float >> tokenize (std::string text,
1097
- bool padding = false ) {
1085
+ std::pair<std::vector<int >, std::vector<float >> tokenize (std::string text, bool padding = false ) {
1098
1086
return tokenize (text, text_model.n_token , padding);
1099
1087
}
1100
1088
@@ -1348,11 +1336,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
1348
1336
1349
1337
pad_tokens (tokens, weights, max_length, padding);
1350
1338
1351
- // for (int i = 0; i < tokens.size(); i++) {
1352
- // std::cout << tokens[i] << ":" << weights[i] << ", ";
1353
- // }
1354
- // std::cout << std::endl;
1355
-
1356
1339
return {tokens, weights};
1357
1340
}
1358
1341
};
0 commit comments