Skip to content

Commit 431ad3c

Browse files
authored
TTS examples (#146)
1 parent 12d6324 commit 431ad3c

File tree

11 files changed

+955
-0
lines changed

11 files changed

+955
-0
lines changed

tts-python-example/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HUME_API_KEY=<YOUR HUME API KEY>

tts-python-example/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
## Overview
2+
3+
This project demonstrates how to use [Hume AI](https://hume.ai)'s [OCTAVE TTS API](https://dev.hume.ai/docs/text-to-speech-tts/overview) with Python.
4+
5+
Unlike conventional TTS that merely "reads" words, Octave is a speech-language model that understands what words mean in context, unlocking a new level of expressiveness. It acts out characters, generates voices from prompts, and takes instructions to modify the emotion and style of a given utterance.
6+
7+
See the [Quickstart guide](https://dev.hume.ai/docs/text-to-speech-tts/quickstart/python) for a detailed explanation of the code in this project.
8+
9+
## Instructions
10+
11+
1. Clone this examples repository
12+
13+
```shell
14+
git clone https://github.com/humeai/hume-api-examples
15+
cd hume-api-examples/tts-python-example
16+
```
17+
18+
2. Install dependencies:
19+
20+
We recommend `uv` but you can adapt these commands to your preferred package manager.
21+
```shell
22+
uv sync
23+
uv pip install -e $HOME/dev/fern-config/fern/apis/unioned/.preview/fern-python-sdk
24+
```
25+
26+
3. Set up your API keys:
27+
28+
* Visit the [API keys page](https://platform.hume.ai/settings/keys) on the Hume Platform to retrieve your API key.
29+
* Place it in a `.env` file at the project root. You can use the `.env.example` file as a template:
30+
31+
```shell
32+
cp .env.example .env
33+
```
34+
35+
4. Run project
36+
37+
```shell
38+
uv run app.py
39+
```

tts-python-example/app.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import os
2+
import time
3+
import asyncio
4+
import base64
5+
import tempfile
6+
from pathlib import Path
7+
from hume import AsyncHumeClient
8+
from hume.tts import (
9+
PostedContextWithGenerationId,
10+
PostedUtterance,
11+
PostedUtteranceVoiceWithName,
12+
ReturnGeneration,
13+
)
14+
15+
import aiofiles
16+
17+
from dotenv import load_dotenv
18+
19+
load_dotenv()
20+
21+
# Initialize the Hume client using your API key and the test environment URL.
22+
api_key = os.getenv("HUME_API_KEY")
23+
if not api_key:
24+
raise EnvironmentError("HUME_API_KEY not found in environment variables.")
25+
26+
hume = AsyncHumeClient(api_key=api_key)
27+
28+
# Create an output directory in the temporary folder.
29+
timestamp = int(time.time() * 1000) # similar to Date.now() in JavaScript
30+
output_dir = Path(tempfile.gettempdir()) / f"hume-audio-{timestamp}"
31+
32+
33+
async def write_result_to_file(base64_encoded_audio: str, filename: str) -> None:
34+
"""
35+
Writes the base64-decoded audio from a generation to a .wav file.
36+
"""
37+
file_path = output_dir / f"{filename}.wav"
38+
# Decode the base64-encoded audio data (similar to Buffer.from(..., "base64"))
39+
audio_data = base64.b64decode(base64_encoded_audio)
40+
async with aiofiles.open(file_path, "wb") as f:
41+
await f.write(audio_data)
42+
print("Wrote", file_path)
43+
44+
45+
async def main() -> None:
46+
output_dir.mkdir(parents=True, exist_ok=True)
47+
48+
print("Results will be written to", output_dir)
49+
50+
# Synthesizing speech with a new voice
51+
speech1 = await hume.tts.synthesize_json(
52+
utterances=[
53+
PostedUtterance(
54+
description="A refined, British aristocrat",
55+
text="Take an arrow from the quiver.",
56+
)
57+
]
58+
)
59+
await write_result_to_file(speech1.generations[0].audio, "speech1_0")
60+
61+
name = f"aristocrat-{int(time.time())}"
62+
# Naming the voice and saving it to your voice library
63+
# for later use
64+
generation_id = speech1.generations[0].generation_id
65+
await hume.tts.voices.create(
66+
name=name, generation_id=generation_id
67+
)
68+
69+
# Continuing previously-generated speech
70+
speech2 = await hume.tts.synthesize_json(
71+
utterances=[
72+
PostedUtterance(
73+
# Using a voice from your voice library
74+
voice=PostedUtteranceVoiceWithName(name=name),
75+
text="Now take a bow.",
76+
)
77+
],
78+
# Providing previous context to maintain consistency.
79+
# This should cause "bow" to rhyme with "toe" and not "cow".
80+
context=PostedContextWithGenerationId(generation_id=generation_id),
81+
num_generations=2,
82+
)
83+
84+
await write_result_to_file(speech2.generations[0].audio, "speech2_0")
85+
await write_result_to_file(speech2.generations[1].audio, "speech2_1")
86+
87+
# Acting instructions: modulating the speech from a previously-generated voice
88+
speech3 = await hume.tts.synthesize_json(
89+
utterances=[
90+
PostedUtterance(
91+
voice=PostedUtteranceVoiceWithName(name=name),
92+
description="Murmured softly, with a heavy dose of sarcasm and contempt",
93+
text="Does he even know how to use that thing?",
94+
)
95+
],
96+
context=PostedContextWithGenerationId(
97+
generation_id=speech2.generations[0].generation_id
98+
),
99+
num_generations=1,
100+
)
101+
await write_result_to_file(speech3.generations[0].audio, "speech3_0")
102+
103+
104+
if __name__ == "__main__":
105+
asyncio.run(main())
106+
print("Done")

tts-python-example/pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[project]
2+
name = "tts-python-example"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.11"
7+
dependencies = [
8+
"hume>=0.7.8",
9+
"python-dotenv>=1.0.1",
10+
]

tts-python-example/uv.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tts-typescript-example/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HUME_API_KEY=<YOUR HUME API KEY>

tts-typescript-example/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
node_modules
2+
.env

tts-typescript-example/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
## Overview
2+
3+
This project demonstrates how to use [Hume AI](https://hume.ai)'s [OCTAVE TTS API](https://dev.hume.ai/docs/text-to-speech-tts/overview) with Typescript.
4+
5+
Unlike conventional TTS that merely "reads" words, Octave is a speech-language model that understands what words mean in context, unlocking a new level of expressiveness. It acts out characters, generates voices from prompts, and takes instructions to modify the emotion and style of a given utterance.
6+
7+
See the [Quickstart guide](https://dev.hume.ai/docs/text-to-speech-tts/quickstart/typescript) for a detailed explanation of the code in this project.
8+
9+
## Instructions
10+
11+
1. Clone this examples repository
12+
13+
```shell
14+
git clone https://github.com/humeai/hume-api-examples
15+
cd hume-api-examples/tts-typescript-example
16+
```
17+
18+
2. Install dependencies
19+
20+
```shell
21+
npm install
22+
```
23+
24+
3. Set up your API keys:
25+
26+
* Visit the [API keys page](https://platform.hume.ai/settings/keys) on the Hume Platform to retrieve your API key.
27+
* Place it in a `.env` file at the project root. You can use the `.env.example` file as a template:
28+
29+
```shell
30+
cp .env.example .env
31+
```
32+
4. Run project
33+
34+
```shell
35+
npx ts-node index.ts
36+
```

tts-typescript-example/index.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { HumeClient } from "hume"
2+
import fs from "fs/promises"
3+
import path from "path"
4+
import * as os from "os"
5+
import dotenv from "dotenv"
6+
7+
dotenv.config()
8+
9+
const hume = new HumeClient({
10+
apiKey: process.env.HUME_API_KEY!,
11+
})
12+
13+
const outputDir = path.join(os.tmpdir(), `hume-audio-${Date.now()}`)
14+
15+
const writeResultToFile = async (base64EncodedAudio: string, filename: string) => {
16+
const filePath = path.join(outputDir, `${filename}.wav`)
17+
await fs.writeFile(filePath, Buffer.from(base64EncodedAudio, "base64"))
18+
console.log('Wrote', filePath)
19+
}
20+
21+
const main = async () => {
22+
await fs.mkdir(outputDir)
23+
console.log('Writing to', outputDir)
24+
25+
const speech1 = await hume.tts.synthesizeJson({
26+
utterances: [{
27+
description: "A refined, British aristocrat",
28+
text: "Take an arrow from the quiver."
29+
}]
30+
})
31+
await writeResultToFile(speech1.generations[0].audio, "speech1_0")
32+
33+
const name = `aristocrat-${Date.now()}`;
34+
await hume.tts.voices.create({
35+
name,
36+
generationId: speech1.generations[0].generationId,
37+
})
38+
39+
const speech2 = await hume.tts.synthesizeJson({
40+
utterances: [{
41+
voice: { name },
42+
text: "Now take a bow."
43+
}],
44+
context: {
45+
generationId: speech1.generations[0].generationId
46+
},
47+
numGenerations: 2,
48+
})
49+
await writeResultToFile(speech2.generations[0].audio, "speech2_0")
50+
await writeResultToFile(speech2.generations[1].audio, "speech2_1")
51+
52+
const speech3 = await hume.tts.synthesizeJson({
53+
utterances: [{
54+
voice: { name },
55+
description: "Murmured softly, with a heavy dose of sarcasm and contempt",
56+
text: "Does he even know how to use that thing?"
57+
}],
58+
context: {
59+
generationId: speech2.generations[0].generationId
60+
},
61+
numGenerations: 1
62+
})
63+
await writeResultToFile(speech3.generations[0].audio, "speech3_0")
64+
}
65+
66+
main().then(() => console.log('Done')).catch(console.error)

0 commit comments

Comments
 (0)