Skip to content

Commit 438b303

Browse files
author
Wolfgang
committed
feat: add assistant response audio playback
1 parent dd61b9a commit 438b303

14 files changed

Lines changed: 1266 additions & 45 deletions

File tree

src-tauri/src/bin/codex_monitor_daemon.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,25 @@ impl DaemonState {
13291329
.await
13301330
}
13311331

1332+
async fn generate_message_audio_summary(
1333+
&self,
1334+
workspace_id: String,
1335+
response_text: String,
1336+
model_id: Option<String>,
1337+
) -> Result<String, String> {
1338+
codex_aux_core::generate_message_audio_summary_core(
1339+
&self.sessions,
1340+
&self.workspaces,
1341+
workspace_id,
1342+
&response_text,
1343+
model_id.as_deref(),
1344+
|workspace_id, thread_id| {
1345+
emit_background_thread_hide(&self.event_sink, workspace_id, thread_id);
1346+
},
1347+
)
1348+
.await
1349+
}
1350+
13321351
async fn local_usage_snapshot(
13331352
&self,
13341353
days: Option<u32>,

src-tauri/src/bin/codex_monitor_daemon/rpc/codex.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,23 @@ pub(super) async fn try_handle(
505505
.and_then(|value| serde_json::to_value(value).map_err(|err| err.to_string())),
506506
)
507507
}
508+
"generate_message_audio_summary" => {
509+
let workspace_id = match parse_string(params, "workspaceId") {
510+
Ok(value) => value,
511+
Err(err) => return Some(Err(err)),
512+
};
513+
let response_text = match parse_string(params, "responseText") {
514+
Ok(value) => value,
515+
Err(err) => return Some(Err(err)),
516+
};
517+
let model_id = parse_optional_string(params, "modelId");
518+
Some(
519+
state
520+
.generate_message_audio_summary(workspace_id, response_text, model_id)
521+
.await
522+
.and_then(|value| serde_json::to_value(value).map_err(|err| err.to_string())),
523+
)
524+
}
508525
_ => None,
509526
}
510527
}

src-tauri/src/codex/mod.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,3 +1022,51 @@ pub(crate) async fn generate_agent_description(
10221022
)
10231023
.await
10241024
}
1025+
1026+
#[tauri::command]
1027+
pub(crate) async fn generate_message_audio_summary(
1028+
workspace_id: String,
1029+
response_text: String,
1030+
model_id: Option<String>,
1031+
state: State<'_, AppState>,
1032+
app: AppHandle,
1033+
) -> Result<String, String> {
1034+
if remote_backend::is_remote_mode(&*state).await {
1035+
let value = remote_backend::call_remote(
1036+
&*state,
1037+
app,
1038+
"generate_message_audio_summary",
1039+
json!({
1040+
"workspaceId": workspace_id,
1041+
"responseText": response_text,
1042+
"modelId": model_id,
1043+
}),
1044+
)
1045+
.await?;
1046+
return serde_json::from_value(value).map_err(|err| err.to_string());
1047+
}
1048+
1049+
crate::shared::codex_aux_core::generate_message_audio_summary_core(
1050+
&state.sessions,
1051+
&state.workspaces,
1052+
workspace_id,
1053+
&response_text,
1054+
model_id.as_deref(),
1055+
|workspace_id, thread_id| {
1056+
let _ = app.emit(
1057+
"app-server-event",
1058+
AppServerEvent {
1059+
workspace_id: workspace_id.to_string(),
1060+
message: json!({
1061+
"method": "codex/backgroundThread",
1062+
"params": {
1063+
"threadId": thread_id,
1064+
"action": "hide"
1065+
}
1066+
}),
1067+
},
1068+
);
1069+
},
1070+
)
1071+
.await
1072+
}

src-tauri/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ pub fn run() {
218218
codex::generate_commit_message,
219219
codex::generate_run_metadata,
220220
codex::generate_agent_description,
221+
codex::generate_message_audio_summary,
221222
codex::resume_thread,
222223
codex::read_thread,
223224
codex::thread_live_subscribe,

src-tauri/src/shared/codex_aux_core.rs

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ Keep the summary line under 72 characters. \
2020
Only output the commit message, nothing else.\n\n\
2121
Changes:\n{diff}";
2222

23+
const DEFAULT_MESSAGE_AUDIO_SUMMARY_PROMPT: &str =
24+
"You are preparing audio playback for a coding assistant response.\n\
25+
Summarize the response below into short spoken prose for a developer.\n\n\
26+
Requirements:\n\
27+
- Return plain text only.\n\
28+
- Do not include markdown fences, bullets, or headings.\n\
29+
- Keep it concise and easy to listen to.\n\
30+
- Preserve important commands, file paths, errors, results, and next actions when they matter.\n\
31+
- If the response is mostly code or a table, summarize the outcome instead of reading every line.\n\n\
32+
Assistant response:\n{response}";
33+
2334
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2435
#[serde(rename_all = "camelCase")]
2536
pub(crate) struct GeneratedAgentConfiguration {
@@ -50,6 +61,15 @@ pub(crate) fn build_commit_message_prompt_for_diff(
5061
Ok(build_commit_message_prompt(diff, template))
5162
}
5263

64+
pub(crate) fn build_message_audio_summary_prompt(response_text: &str) -> Result<String, String> {
65+
let cleaned_response = response_text.trim();
66+
if cleaned_response.is_empty() {
67+
return Err("Response text is required.".to_string());
68+
}
69+
70+
Ok(DEFAULT_MESSAGE_AUDIO_SUMMARY_PROMPT.replace("{response}", cleaned_response))
71+
}
72+
5373
pub(crate) fn build_run_metadata_prompt(cleaned_prompt: &str) -> String {
5474
format!(
5575
"You create concise run metadata for a coding task.\n\
@@ -198,6 +218,28 @@ pub(crate) fn parse_agent_description_value(
198218
Err("No valid agent configuration was generated".to_string())
199219
}
200220

221+
pub(crate) fn normalize_message_audio_summary_value(raw: &str) -> Result<String, String> {
222+
let normalized = raw
223+
.lines()
224+
.map(str::trim)
225+
.filter(|line| !line.is_empty() && !line.starts_with("```"))
226+
.map(|line| {
227+
line.strip_prefix("- ")
228+
.or_else(|| line.strip_prefix("* "))
229+
.or_else(|| line.strip_prefix("• "))
230+
.unwrap_or(line)
231+
})
232+
.collect::<Vec<_>>()
233+
.join(" ");
234+
235+
let normalized = normalized.split_whitespace().collect::<Vec<_>>().join(" ");
236+
if normalized.is_empty() {
237+
return Err("No summary was generated".to_string());
238+
}
239+
240+
Ok(normalized)
241+
}
242+
201243
pub(crate) fn parse_run_metadata_value(raw: &str) -> Result<Value, String> {
202244
let trimmed = raw.trim();
203245
if trimmed.is_empty() {
@@ -649,10 +691,38 @@ where
649691
parse_agent_description_value(&response)
650692
}
651693

694+
pub(crate) async fn generate_message_audio_summary_core<F>(
695+
sessions: &Mutex<HashMap<String, Arc<WorkspaceSession>>>,
696+
workspaces: &Mutex<HashMap<String, WorkspaceEntry>>,
697+
workspace_id: String,
698+
response_text: &str,
699+
model: Option<&str>,
700+
on_hide_thread: F,
701+
) -> Result<String, String>
702+
where
703+
F: Fn(&str, &str),
704+
{
705+
let prompt = build_message_audio_summary_prompt(response_text)?;
706+
let response = run_background_prompt_core(
707+
sessions,
708+
workspaces,
709+
workspace_id,
710+
prompt,
711+
model,
712+
on_hide_thread,
713+
"Timeout waiting for audio summary generation",
714+
"Unknown error during audio summary generation",
715+
)
716+
.await?;
717+
718+
normalize_message_audio_summary_value(&response)
719+
}
720+
652721
#[cfg(test)]
653722
mod tests {
654723
use super::{
655-
build_commit_message_prompt_for_diff, parse_agent_description_value,
724+
build_commit_message_prompt_for_diff, build_message_audio_summary_prompt,
725+
normalize_message_audio_summary_value, parse_agent_description_value,
656726
parse_run_metadata_value,
657727
};
658728

@@ -665,6 +735,22 @@ mod tests {
665735
);
666736
}
667737

738+
#[test]
739+
fn build_message_audio_summary_prompt_requires_response_text() {
740+
let result = build_message_audio_summary_prompt(" ");
741+
assert_eq!(result.expect_err("should fail"), "Response text is required.");
742+
}
743+
744+
#[test]
745+
fn normalize_message_audio_summary_value_flattens_bullets_and_fences() {
746+
let result = normalize_message_audio_summary_value(
747+
"```md\n- Updated src/App.tsx\n- Ran npm run test\n```",
748+
)
749+
.expect("summary should parse");
750+
751+
assert_eq!(result, "Updated src/App.tsx Ran npm run test");
752+
}
753+
668754
#[test]
669755
fn parse_run_metadata_value_normalizes_worktree_name_alias() {
670756
let raw =

src/features/app/hooks/useMainAppLayoutSurfaces.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ function buildPrimarySurface({
443443
workspacePath: activeWorkspace?.path ?? null,
444444
openTargets: appSettings.openAppTargets,
445445
selectedOpenAppId: appSettings.selectedOpenAppId,
446+
selectedModelId,
446447
codeBlockCopyUseModifier: appSettings.composerCodeBlockCopyUseModifier,
447448
showMessageFilePath: appSettings.showMessageFilePath,
448449
userInputRequests,

0 commit comments

Comments
 (0)