TTS examples (#146)

twitchard · web-flow · commit 431ad3c1e6c8 · 2025-02-26T07:22:37.000-08:00
diff --git a/tts-python-example/.env.example b/tts-python-example/.env.example
@@ -0,0 +1 @@
+HUME_API_KEY=<YOUR HUME API KEY>
diff --git a/tts-python-example/README.md b/tts-python-example/README.md
@@ -0,0 +1,39 @@
+## Overview
+
+This project demonstrates how to use [Hume AI](https://hume.ai)'s [OCTAVE TTS API](https://dev.hume.ai/docs/text-to-speech-tts/overview) with Python.
+
+Unlike conventional TTS that merely "reads" words, Octave is a speech-language model that understands what words mean in context, unlocking a new level of expressiveness. It acts out characters, generates voices from prompts, and takes instructions to modify the emotion and style of a given utterance.
+
+See the [Quickstart guide](https://dev.hume.ai/docs/text-to-speech-tts/quickstart/python) for a detailed explanation of the code in this project.
+
+## Instructions
+
+1. Clone this examples repository
+
+    ```shell
+    git clone https://github.com/humeai/hume-api-examples
+    cd hume-api-examples/tts-python-example
+    ```
+
+2. Install dependencies:
+
+    We recommend `uv` but you can adapt these commands to your preferred package manager.
+    ```shell
+    uv sync
+    uv pip install -e $HOME/dev/fern-config/fern/apis/unioned/.preview/fern-python-sdk
+    ```
+
+3. Set up your API keys:
+
+  * Visit the [API keys page](https://platform.hume.ai/settings/keys) on the Hume Platform to retrieve your API key.
+  * Place it in a `.env` file at the project root. You can use the `.env.example` file as a template:
+
+    ```shell
+    cp .env.example .env
+    ```
+
+4. Run project
+
+    ```shell
+    uv run app.py
+    ```
diff --git a/tts-python-example/app.py b/tts-python-example/app.py
@@ -0,0 +1,106 @@
+import os
+import time
+import asyncio
+import base64
+import tempfile
+from pathlib import Path
+from hume import AsyncHumeClient
+from hume.tts import (
+    PostedContextWithGenerationId,
+    PostedUtterance,
+    PostedUtteranceVoiceWithName,
+    ReturnGeneration,
+)
+
+import aiofiles
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Initialize the Hume client using your API key and the test environment URL.
+api_key = os.getenv("HUME_API_KEY")
+if not api_key:
+    raise EnvironmentError("HUME_API_KEY not found in environment variables.")
+
+hume = AsyncHumeClient(api_key=api_key)
+
+# Create an output directory in the temporary folder.
+timestamp = int(time.time() * 1000)  # similar to Date.now() in JavaScript
+output_dir = Path(tempfile.gettempdir()) / f"hume-audio-{timestamp}"
+
+
+async def write_result_to_file(base64_encoded_audio: str, filename: str) -> None:
+    """
+    Writes the base64-decoded audio from a generation to a .wav file.
+    """
+    file_path = output_dir / f"{filename}.wav"
+    # Decode the base64-encoded audio data (similar to Buffer.from(..., "base64"))
+    audio_data = base64.b64decode(base64_encoded_audio)
+    async with aiofiles.open(file_path, "wb") as f:
+        await f.write(audio_data)
+    print("Wrote", file_path)
+
+
+async def main() -> None:
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    print("Results will be written to", output_dir)
+
+    # Synthesizing speech with a new voice
+    speech1 = await hume.tts.synthesize_json(
+        utterances=[
+            PostedUtterance(
+                description="A refined, British aristocrat",
+                text="Take an arrow from the quiver.",
+            )
+        ]
+    )
+    await write_result_to_file(speech1.generations[0].audio, "speech1_0")
+
+    name = f"aristocrat-{int(time.time())}"
+    # Naming the voice and saving it to your voice library
+    # for later use
+    generation_id = speech1.generations[0].generation_id
+    await hume.tts.voices.create(
+        name=name, generation_id=generation_id
+    )
+
+    # Continuing previously-generated speech
+    speech2 = await hume.tts.synthesize_json(
+        utterances=[
+            PostedUtterance(
+                # Using a voice from your voice library
+                voice=PostedUtteranceVoiceWithName(name=name),
+                text="Now take a bow.",
+            )
+        ],
+        # Providing previous context to maintain consistency.
+        # This should cause "bow" to rhyme with "toe" and not "cow".
+        context=PostedContextWithGenerationId(generation_id=generation_id),
+        num_generations=2,
+    )
+
+    await write_result_to_file(speech2.generations[0].audio, "speech2_0")
+    await write_result_to_file(speech2.generations[1].audio, "speech2_1")
+
+    # Acting instructions: modulating the speech from a previously-generated voice
+    speech3 = await hume.tts.synthesize_json(
+        utterances=[
+            PostedUtterance(
+                voice=PostedUtteranceVoiceWithName(name=name),
+                description="Murmured softly, with a heavy dose of sarcasm and contempt",
+                text="Does he even know how to use that thing?",
+            )
+        ],
+        context=PostedContextWithGenerationId(
+            generation_id=speech2.generations[0].generation_id
+        ),
+        num_generations=1,
+    )
+    await write_result_to_file(speech3.generations[0].audio, "speech3_0")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+    print("Done")
diff --git a/tts-python-example/pyproject.toml b/tts-python-example/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "tts-python-example"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+  "hume>=0.7.8",
+  "python-dotenv>=1.0.1",
+]
diff --git a/tts-python-example/uv.lock b/tts-python-example/uv.lock
diff --git a/tts-typescript-example/.env.example b/tts-typescript-example/.env.example
@@ -0,0 +1 @@
+HUME_API_KEY=<YOUR HUME API KEY>
diff --git a/tts-typescript-example/.gitignore b/tts-typescript-example/.gitignore
@@ -0,0 +1,2 @@
+node_modules
+.env
diff --git a/tts-typescript-example/README.md b/tts-typescript-example/README.md
@@ -0,0 +1,36 @@
+## Overview
+
+This project demonstrates how to use [Hume AI](https://hume.ai)'s [OCTAVE TTS API](https://dev.hume.ai/docs/text-to-speech-tts/overview) with Typescript.
+
+Unlike conventional TTS that merely "reads" words, Octave is a speech-language model that understands what words mean in context, unlocking a new level of expressiveness. It acts out characters, generates voices from prompts, and takes instructions to modify the emotion and style of a given utterance.
+
+See the [Quickstart guide](https://dev.hume.ai/docs/text-to-speech-tts/quickstart/typescript) for a detailed explanation of the code in this project.
+
+## Instructions
+
+1. Clone this examples repository
+
+    ```shell
+    git clone https://github.com/humeai/hume-api-examples
+    cd hume-api-examples/tts-typescript-example
+    ```
+
+2. Install dependencies
+
+    ```shell
+    npm install
+    ```
+
+3. Set up your API keys:
+
+  * Visit the [API keys page](https://platform.hume.ai/settings/keys) on the Hume Platform to retrieve your API key.
+  * Place it in a `.env` file at the project root. You can use the `.env.example` file as a template:
+
+    ```shell
+    cp .env.example .env
+    ```
+4. Run project
+
+    ```shell
+    npx ts-node index.ts
+    ```
diff --git a/tts-typescript-example/index.ts b/tts-typescript-example/index.ts
@@ -0,0 +1,66 @@
+import { HumeClient } from "hume"
+import fs from "fs/promises"
+import path from "path"
+import * as os from "os"
+import dotenv from "dotenv"
+
+dotenv.config()
+
+const hume = new HumeClient({ 
+  apiKey: process.env.HUME_API_KEY!,
+})
+
+const outputDir = path.join(os.tmpdir(), `hume-audio-${Date.now()}`)
+
+const writeResultToFile = async (base64EncodedAudio: string, filename: string) => {
+  const filePath = path.join(outputDir, `${filename}.wav`)
+  await fs.writeFile(filePath, Buffer.from(base64EncodedAudio, "base64"))
+  console.log('Wrote', filePath)
+}
+
+const main = async () => {
+  await fs.mkdir(outputDir)
+  console.log('Writing to', outputDir)
+  
+  const speech1 = await hume.tts.synthesizeJson({
+    utterances: [{
+      description: "A refined, British aristocrat",
+      text: "Take an arrow from the quiver."
+    }]
+  })
+  await writeResultToFile(speech1.generations[0].audio, "speech1_0")
+
+  const name = `aristocrat-${Date.now()}`;
+  await hume.tts.voices.create({
+    name,
+    generationId: speech1.generations[0].generationId,
+  })
+  
+  const speech2 = await hume.tts.synthesizeJson({
+    utterances: [{
+      voice: { name },
+      text: "Now take a bow."
+    }],
+    context: {
+      generationId: speech1.generations[0].generationId
+    },
+    numGenerations: 2,
+  })
+  await writeResultToFile(speech2.generations[0].audio, "speech2_0")
+  await writeResultToFile(speech2.generations[1].audio, "speech2_1")
+  
+  const speech3 = await hume.tts.synthesizeJson({
+    utterances: [{
+      voice: { name },
+      description: "Murmured softly, with a heavy dose of sarcasm and contempt",
+      text: "Does he even know how to use that thing?"
+    }],
+    context: {
+      generationId: speech2.generations[0].generationId
+    },
+    numGenerations: 1
+  })
+  await writeResultToFile(speech3.generations[0].audio, "speech3_0")
+}
+
+main().then(() => console.log('Done')).catch(console.error)
diff --git a/tts-typescript-example/package-lock.json b/tts-typescript-example/package-lock.json
diff --git a/tts-typescript-example/package.json b/tts-typescript-example/package.json