@@ -330,7 +330,6 @@ class CLIPTokenizer {
330330
331331 std::smatch matches;
332332 std::string str = text;
333- std::vector<std::string> token_strs;
334333 while (std::regex_search (str, matches, pat)) {
335334 bool skip = on_new_token_cb (str, bpe_tokens);
336335 if (skip) {
@@ -349,24 +348,14 @@ class CLIPTokenizer {
349348 while ((pos = bpe_strs.find (' ' , start)) != std::u32string::npos) {
350349 auto bpe_str = bpe_strs.substr (start, pos - start);
351350 bpe_tokens.push_back (encoder[bpe_str]);
352- token_strs.push_back (utf32_to_utf8 (bpe_str));
353-
354351 start = pos + 1 ;
355352 }
356353 auto bpe_str = bpe_strs.substr (start, bpe_strs.size () - start);
357354 bpe_tokens.push_back (encoder[bpe_str]);
358- token_strs.push_back (utf32_to_utf8 (bpe_str));
359355 }
360356 str = matches.suffix ();
361357 }
362- std::stringstream ss;
363- ss << " [" ;
364- for (auto token : token_strs) {
365- ss << " \" " << token << " \" , " ;
366- }
367- ss << " ]" ;
368- // LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str());
369- // printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
358+
370359 return bpe_tokens;
371360 }
372361};
@@ -1093,8 +1082,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10931082 GGMLModule::compute (get_graph, n_threads, true , output, output_ctx);
10941083 }
10951084
1096- std::pair<std::vector<int >, std::vector<float >> tokenize (std::string text,
1097- bool padding = false ) {
1085+ std::pair<std::vector<int >, std::vector<float >> tokenize (std::string text, bool padding = false ) {
10981086 return tokenize (text, text_model.n_token , padding);
10991087 }
11001088
@@ -1348,11 +1336,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
13481336
13491337 pad_tokens (tokens, weights, max_length, padding);
13501338
1351- // for (int i = 0; i < tokens.size(); i++) {
1352- // std::cout << tokens[i] << ":" << weights[i] << ", ";
1353- // }
1354- // std::cout << std::endl;
1355-
13561339 return {tokens, weights};
13571340 }
13581341};
0 commit comments