Skip to content

Commit 422724e

Browse files
committed
Add tokenize detokenize compatibility
1 parent 70dd0b7 commit 422724e

File tree

4 files changed

+82
-0
lines changed

4 files changed

+82
-0
lines changed

examples/tokenization.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import ollama
2+
3+
# Get tokens from a model
4+
response = ollama.tokenize(model='llama3.2', text='Hello world!')
5+
tokens = response.tokens
6+
print('tokens from model', tokens)
7+
8+
# Convert tokens back to text
9+
response = ollama.detokenize(model='llama3.2', tokens=tokens)
10+
print('text from tokens', response.text) # Prints: Hello world!

ollama/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
ListResponse,
1313
ShowResponse,
1414
ProcessResponse,
15+
TokenizeResponse,
16+
DetokenizeResponse,
1517
RequestError,
1618
ResponseError,
1719
)
@@ -31,6 +33,8 @@
3133
'ListResponse',
3234
'ShowResponse',
3335
'ProcessResponse',
36+
'TokenizeResponse',
37+
'DetokenizeResponse',
3438
'RequestError',
3539
'ResponseError',
3640
]
@@ -49,3 +53,5 @@
4953
copy = _client.copy
5054
show = _client.show
5155
ps = _client.ps
56+
tokenize = _client.tokenize
57+
detokenize = _client.detokenize

ollama/_client.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
CreateRequest,
4949
CopyRequest,
5050
DeleteRequest,
51+
DetokenizeRequest,
52+
DetokenizeResponse,
5153
EmbedRequest,
5254
EmbedResponse,
5355
EmbeddingsRequest,
@@ -67,6 +69,8 @@
6769
ShowRequest,
6870
ShowResponse,
6971
StatusResponse,
72+
TokenizeRequest,
73+
TokenizeResponse,
7074
Tool,
7175
)
7276

@@ -611,6 +615,28 @@ def ps(self) -> ProcessResponse:
611615
'/api/ps',
612616
)
613617

618+
def tokenize(self, model: str, text: str) -> TokenizeResponse:
619+
return self._request(
620+
TokenizeResponse,
621+
'POST',
622+
'/api/tokenize',
623+
json=TokenizeRequest(
624+
model=model,
625+
text=text,
626+
).model_dump(exclude_none=True),
627+
)
628+
629+
def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
630+
return self._request(
631+
DetokenizeResponse,
632+
'POST',
633+
'/api/detokenize',
634+
json=DetokenizeRequest(
635+
model=model,
636+
tokens=tokens,
637+
).model_dump(exclude_none=True),
638+
)
639+
614640

615641
class AsyncClient(BaseClient):
616642
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
@@ -1120,6 +1146,28 @@ async def ps(self) -> ProcessResponse:
11201146
'/api/ps',
11211147
)
11221148

1149+
async def tokenize(self, model: str, text: str) -> TokenizeResponse:
1150+
return await self._request(
1151+
TokenizeResponse,
1152+
'POST',
1153+
'/api/tokenize',
1154+
json=TokenizeRequest(
1155+
model=model,
1156+
text=text,
1157+
).model_dump(exclude_none=True),
1158+
)
1159+
1160+
async def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
1161+
return await self._request(
1162+
DetokenizeResponse,
1163+
'POST',
1164+
'/api/detokenize',
1165+
json=DetokenizeRequest(
1166+
model=model,
1167+
tokens=tokens,
1168+
).model_dump(exclude_none=True),
1169+
)
1170+
11231171

11241172
def _copy_messages(messages: Optional[Sequence[Union[Mapping[str, Any], Message]]]) -> Iterator[Message]:
11251173
for message in messages or []:

ollama/_types.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,24 @@ class Model(SubscriptableBaseModel):
494494
models: Sequence[Model]
495495

496496

497+
class TokenizeRequest(BaseRequest):
498+
model: str
499+
text: str
500+
501+
502+
class TokenizeResponse(BaseGenerateResponse):
503+
tokens: Sequence[int]
504+
505+
506+
class DetokenizeRequest(BaseRequest):
507+
model: str
508+
tokens: Sequence[int]
509+
510+
511+
class DetokenizeResponse(BaseGenerateResponse):
512+
text: str
513+
514+
497515
class RequestError(Exception):
498516
"""
499517
Common class for request errors.

0 commit comments

Comments
 (0)