Skip to content

Commit 0579af3

Browse files
committed
feat: Add Voice API endpoints and AsyncAPI specifications
1 parent 926b656 commit 0579af3

5 files changed

Lines changed: 1355 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ The major and minor version numbers reflect changes to the DeepL API
99
number is used only for corrections to the OpenAPI specification, for example:
1010
typos, schema fixes, or adding examples.
1111

12+
## [3.7.0] - 2025-11-10
13+
### Added
14+
* Add new endpoints `/v1/voice/realtime` (REST) and `/v1/voice/realtime/connect` (WebSocket) which enable real-time voice transcription and translation via WebSocket streaming
15+
* Adds AsyncAPI specification - `voice.asyncapi.yaml` and `voice.asyncapi.json` - documenting the Voice WebSocket streaming protocol
16+
1217
## [3.6.1] - 2025-11-06
1318
### Changed
1419
* Hebrew (`HE`), Thai (`TH`), and Vietnamese (`VI`) are now included in the `/v2/languages` endpoint response as they now support document translation in addition to text translation

openapi.json

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"name": "DeepL - Contact us",
99
"url": "https://www.deepl.com/contact-us"
1010
},
11-
"version": "3.6.1"
11+
"version": "3.7.0"
1212
},
1313
"externalDocs": {
1414
"description": "DeepL Pro - Plans and pricing",
@@ -48,6 +48,10 @@
4848
{
4949
"name": "MetaInformation",
5050
"description": "Information about API usage and value ranges"
51+
},
52+
{
53+
"name": "VoiceAPI",
54+
"description": "The Voice API provides real-time voice transcription and translation services.\nUse a two-step flow: first request a streaming URL via REST, then establish a WebSocket connection for streaming audio and receiving transcriptions."
5155
}
5256
],
5357
"paths": {
@@ -2662,6 +2666,119 @@
26622666
}
26632667
]
26642668
}
2669+
},
2670+
"/v1/voice/realtime": {
2671+
"post": {
2672+
"tags": [
2673+
"VoiceAPI"
2674+
],
2675+
"summary": "Get Streaming URL",
2676+
"operationId": "getVoiceStreamingUrl",
2677+
"requestBody": {
2678+
"required": true,
2679+
"content": {
2680+
"application/json": {
2681+
"schema": {
2682+
"type": "object",
2683+
"required": [
2684+
"media_content_type"
2685+
],
2686+
"properties": {
2687+
"media_content_type": {
2688+
"$ref": "#/components/schemas/VoiceMediaContentType"
2689+
},
2690+
"source_language": {
2691+
"$ref": "#/components/schemas/VoiceSourceLanguage"
2692+
},
2693+
"source_language_mode": {
2694+
"$ref": "#/components/schemas/VoiceSourceLanguageMode"
2695+
},
2696+
"target_languages": {
2697+
"$ref": "#/components/schemas/VoiceTargetLanguages"
2698+
},
2699+
"glossary_id": {
2700+
"$ref": "#/components/schemas/GlossaryId"
2701+
},
2702+
"formality": {
2703+
"$ref": "#/components/schemas/Formality"
2704+
}
2705+
}
2706+
},
2707+
"examples": {
2708+
"basic": {
2709+
"summary": "Basic configuration",
2710+
"value": {
2711+
"media_content_type": "audio/ogg; codecs=opus",
2712+
"source_language": "en",
2713+
"source_language_mode": "auto",
2714+
"target_languages": [
2715+
"de",
2716+
"fr",
2717+
"es"
2718+
]
2719+
}
2720+
},
2721+
"with_glossary": {
2722+
"summary": "With glossary and formality",
2723+
"value": {
2724+
"media_content_type": "audio/pcm; encoding=s16le; rate=16000",
2725+
"source_language": "en",
2726+
"source_language_mode": "fixed",
2727+
"target_languages": [
2728+
"de",
2729+
"fr"
2730+
],
2731+
"glossary_id": "def3a26b-3e84-45b3-84ae-0c0aaf3525f7",
2732+
"formality": "more"
2733+
}
2734+
}
2735+
}
2736+
}
2737+
}
2738+
},
2739+
"responses": {
2740+
"200": {
2741+
"description": "Successfully obtained streaming URL and token",
2742+
"content": {
2743+
"application/json": {
2744+
"schema": {
2745+
"$ref": "#/components/schemas/VoiceStreamingResponse"
2746+
},
2747+
"example": {
2748+
"streaming_url": "wss://api.deepl.com/v1/voice/realtime/connect",
2749+
"token": "VGhpcyBpcyBhIGZha2UgdG9rZW4K"
2750+
}
2751+
}
2752+
}
2753+
},
2754+
"400": {
2755+
"$ref": "#/components/responses/BadRequest"
2756+
},
2757+
"401": {
2758+
"$ref": "#/components/responses/Unauthorized"
2759+
},
2760+
"403": {
2761+
"$ref": "#/components/responses/Forbidden"
2762+
},
2763+
"429": {
2764+
"$ref": "#/components/responses/TooManyRequests"
2765+
},
2766+
"456": {
2767+
"$ref": "#/components/responses/QuotaExceeded"
2768+
},
2769+
"500": {
2770+
"$ref": "#/components/responses/InternalServerError"
2771+
},
2772+
"503": {
2773+
"$ref": "#/components/responses/ServiceUnavailable"
2774+
}
2775+
},
2776+
"security": [
2777+
{
2778+
"auth_header": []
2779+
}
2780+
]
2781+
}
26652782
}
26662783
},
26672784
"components": {
@@ -3591,6 +3708,96 @@
35913708
],
35923709
"example": "de"
35933710
},
3711+
"VoiceMediaContentType": {
3712+
"type": "string",
3713+
"description": "The audio format for streaming. Specifies container, codec, and encoding parameters.\nSupported formats include PCM (recommended), OPUS (recommended for low bandwidth), FLAC, MP3, and AAC.\nPCM formats require explicit sample rate. Mono audio only.",
3714+
"enum": [
3715+
"audio/auto",
3716+
"audio/flac",
3717+
"audio/mpeg",
3718+
"audio/ogg",
3719+
"audio/webm",
3720+
"audio/x-matroska",
3721+
"audio/ogg; codecs=flac",
3722+
"audio/ogg; codecs=opus",
3723+
"audio/pcm; encoding=s16le; rate=8000",
3724+
"audio/pcm; encoding=s16le; rate=16000",
3725+
"audio/pcm; encoding=s16le; rate=44100",
3726+
"audio/pcm; encoding=s16le; rate=48000",
3727+
"audio/webm; codecs=opus",
3728+
"audio/x-matroska; codecs=aac",
3729+
"audio/x-matroska; codecs=flac",
3730+
"audio/x-matroska; codecs=mp3",
3731+
"audio/x-matroska; codecs=opus"
3732+
],
3733+
"example": "audio/ogg; codecs=opus"
3734+
},
3735+
"VoiceSourceLanguage": {
3736+
"type": "string",
3737+
"description": "Source language of the audio stream. Must be one of the supported Voice API source languages.\nLanguage identifier must comply with IETF BCP 47 language tags.",
3738+
"enum": [
3739+
"zh",
3740+
"nl",
3741+
"en",
3742+
"fr",
3743+
"de",
3744+
"id",
3745+
"it",
3746+
"ja",
3747+
"ko",
3748+
"pl",
3749+
"pt",
3750+
"ro",
3751+
"ru",
3752+
"es",
3753+
"sv",
3754+
"tr",
3755+
"uk"
3756+
],
3757+
"example": "en"
3758+
},
3759+
"VoiceSourceLanguageMode": {
3760+
"type": "string",
3761+
"description": "Controls how the source_language value is used.\n- `auto`: Treats source language as a hint; server can override\n- `fixed`: Treats source language as mandatory; server must use this language",
3762+
"enum": [
3763+
"auto",
3764+
"fixed"
3765+
],
3766+
"default": "auto",
3767+
"example": "auto"
3768+
},
3769+
"VoiceStreamingResponse": {
3770+
"type": "object",
3771+
"required": [
3772+
"streaming_url",
3773+
"token"
3774+
],
3775+
"properties": {
3776+
"streaming_url": {
3777+
"type": "string",
3778+
"description": "The WebSocket URL to use for establishing the streaming connection. This URL is ephemeral and valid for one-time use only.",
3779+
"example": "wss://api.deepl.com/v1/voice/realtime/connect"
3780+
},
3781+
"token": {
3782+
"type": "string",
3783+
"description": "A unique ephemeral token for authentication with the streaming endpoint. Pass this as a query parameter when connecting to the WebSocket URL.",
3784+
"example": "VGhpcyBpcyBhIGZha2UgdG9rZW4K"
3785+
}
3786+
}
3787+
},
3788+
"VoiceTargetLanguages": {
3789+
"type": "array",
3790+
"description": "List of target languages for translation. The stream will emit translations for each language.\nMaximum 5 target languages per stream. Language identifiers must comply with IETF BCP 47.",
3791+
"items": {
3792+
"type": "string"
3793+
},
3794+
"maxItems": 5,
3795+
"example": [
3796+
"de",
3797+
"fr",
3798+
"es"
3799+
]
3800+
},
35943801
"WritingStyle": {
35953802
"type": "string",
35963803
"description": "Specify a style to rephrase your text in a way that fits your audience and goals.\nThe `prefer_` prefix allows falling back to the default style if the language does not yet support styles.",

0 commit comments

Comments
 (0)