Streaming TTS

Streaming text to speech

curl --request POST \
  --url https://api.kova.ai/v1/tts/stream \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "text": "<string>",
  "voice": "<string>",
  "normalize_text": false,
  "response_format": {
    "bitrate": "<string>",
    "encoding": "mp3",
    "sample_rate": 123
  },
  "temperature": 123,
  "timestamps": false
}
'

import requests

url = "https://api.kova.ai/v1/tts/stream"

payload = {
    "text": "<string>",
    "voice": "<string>",
    "normalize_text": False,
    "response_format": {
        "bitrate": "<string>",
        "encoding": "mp3",
        "sample_rate": 123
    },
    "temperature": 123,
    "timestamps": False
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    text: '<string>',
    voice: '<string>',
    normalize_text: false,
    response_format: {bitrate: '<string>', encoding: 'mp3', sample_rate: 123},
    temperature: 123,
    timestamps: false
  })
};

fetch('https://api.kova.ai/v1/tts/stream', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.kova.ai/v1/tts/stream",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice' => '<string>',
    'normalize_text' => false,
    'response_format' => [
        'bitrate' => '<string>',
        'encoding' => 'mp3',
        'sample_rate' => 123
    ],
    'temperature' => 123,
    'timestamps' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.kova.ai/v1/tts/stream"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.kova.ai/v1/tts/stream")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.kova.ai/v1/tts/stream")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "ctx": {},
      "input": "<unknown>"
    }
  ]
}

POST

tts

stream

Streaming text to speech

curl --request POST \
  --url https://api.kova.ai/v1/tts/stream \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "text": "<string>",
  "voice": "<string>",
  "normalize_text": false,
  "response_format": {
    "bitrate": "<string>",
    "encoding": "mp3",
    "sample_rate": 123
  },
  "temperature": 123,
  "timestamps": false
}
'

import requests

url = "https://api.kova.ai/v1/tts/stream"

payload = {
    "text": "<string>",
    "voice": "<string>",
    "normalize_text": False,
    "response_format": {
        "bitrate": "<string>",
        "encoding": "mp3",
        "sample_rate": 123
    },
    "temperature": 123,
    "timestamps": False
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    text: '<string>',
    voice: '<string>',
    normalize_text: false,
    response_format: {bitrate: '<string>', encoding: 'mp3', sample_rate: 123},
    temperature: 123,
    timestamps: false
  })
};

fetch('https://api.kova.ai/v1/tts/stream', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.kova.ai/v1/tts/stream",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice' => '<string>',
    'normalize_text' => false,
    'response_format' => [
        'bitrate' => '<string>',
        'encoding' => 'mp3',
        'sample_rate' => 123
    ],
    'temperature' => 123,
    'timestamps' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.kova.ai/v1/tts/stream"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.kova.ai/v1/tts/stream")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.kova.ai/v1/tts/stream")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"normalize_text\": false,\n  \"response_format\": {\n    \"bitrate\": \"<string>\",\n    \"encoding\": \"mp3\",\n    \"sample_rate\": 123\n  },\n  \"temperature\": 123,\n  \"timestamps\": false\n}"

response = http.request(request)
puts response.read_body

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "ctx": {},
      "input": "<unknown>"
    }
  ]
}

POST /v1/tts/stream returns a text/plain body of SSE-style records — one JSON object per data: line, separated by blank lines. The request body is identical to Text to speech. Use streaming when you want to start playback before generation finishes.

The default response encoding is mp3, not raw PCM. Each audio_chunk value is base64-encoded audio in your chosen response_format; concatenate the chunks to reconstruct the file.

Event stream format

The response body is a sequence of records:

data: {"type":"audio","audio_chunk":"<base64>"}

data: {"type":"audio","audio_chunk":"<base64>"}

data: {"type":"timestamps","words":["hello"],"start_seconds":[0.0],"end_seconds":[0.3]}

data: {"type":"audio","audio_chunk":"<base64>"}

Each record:

Starts with data: (note the space).
Contains one JSON object.
Ends with \n\n (two newlines).

There is no terminating data: [DONE] — the stream ends when the HTTP body ends.

Event types

audio

type AudioEvent = {
  type: "audio";
  audio_chunk: string;  // base64-encoded audio bytes in your response_format
};

timestamps (only when `timestamps: true`)

type TimestampsEvent = {
  type: "timestamps";
  words: string[];
  start_seconds: number[];
  end_seconds: number[];
};

The three arrays are parallel — words[i] starts at start_seconds[i] and ends at end_seconds[i].

Examples

import asyncio, os
from kova_tts import KovaTTSClient, AudioResponseFormat

async def main():
    client = KovaTTSClient(api_key=os.environ["KOVA_API_KEY"])
    audio_bytes = bytearray()
    async for event in client.stream_tts(
        text="This is streaming TTS from Kova.",
        voice="cal",
        response_format=AudioResponseFormat(encoding="mp3"),
        timestamps=True,
    ):
        if event.type == "audio":
            audio_bytes.extend(event.audio)
            print(f"audio: {len(event.audio)} bytes")
        elif event.type == "timestamps":
            print(f"words: {event.words}")

    with open("stream.mp3", "wb") as f:
        f.write(audio_bytes)

asyncio.run(main())

import { KovaTTSClient } from "@kova-ai/tts";
import { writeFile } from "node:fs/promises";

const client = new KovaTTSClient({ apiKey: process.env.KOVA_API_KEY! });
const chunks: Uint8Array[] = [];

for await (const event of client.streamTTS({
  text: "This is streaming TTS from Kova.",
  voice: "cal",
  response_format: { encoding: "mp3" },
  timestamps: true,
})) {
  switch (event.type) {
    case "audio":
      chunks.push(event.audio);
      console.log(`audio: ${event.audio.byteLength} bytes`);
      break;
    case "timestamps":
      console.log("words:", event.words);
      break;
  }
}

const total = chunks.reduce((n, c) => n + c.byteLength, 0);
const out = new Uint8Array(total);
let offset = 0;
for (const c of chunks) { out.set(c, offset); offset += c.byteLength; }
await writeFile("stream.mp3", out);

curl -N https://api.kova.ai/v1/tts/stream \
  -H "x-api-key: $KOVA_API_KEY" \
  -H "content-type: application/json" \
  -d '{
    "text": "This is streaming TTS from Kova.",
    "voice": "cal",
    "response_format": {"encoding": "mp3"},
    "timestamps": true
  }'
# Each line of output is `data: {json}` followed by a blank line.
# Parse each JSON, base64-decode `audio_chunk`, concatenate to reconstruct the file.

linear16 streaming caveat: linear16 re-emits a WAV header on every chunk. If you need raw streaming PCM, use encoding: "pcm" and assemble your own header at the end.

Streaming TTS

Event stream format

Event types

audio

timestamps (only when `timestamps: true`)

Examples

See also

Authorizations

Body

Response

​Event stream format

​Event types

​audio

​timestamps (only when timestamps: true)

​Examples

​See also

Authorizations

Body

Response

Event stream format

Event types

audio

timestamps (only when `timestamps: true`)

Examples

See also