mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-19 12:24:46 +08:00
perf: parallelize TTS generation in podcast skill
Use ThreadPoolExecutor to generate audio for multiple script lines concurrently, significantly speeding up podcast generation. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -23,7 +23,7 @@ class AioSandbox(Sandbox):
|
|||||||
"""
|
"""
|
||||||
super().__init__(id)
|
super().__init__(id)
|
||||||
self._base_url = base_url
|
self._base_url = base_url
|
||||||
self._client = AioSandboxClient(base_url=base_url)
|
self._client = AioSandboxClient(base_url=base_url, timeout=600)
|
||||||
self._home_dir = home_dir
|
self._home_dir = home_dir
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -98,25 +99,46 @@ def text_to_speech(text: str, voice_type: str) -> Optional[bytes]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def tts_node(script: Script) -> list[bytes]:
|
def _process_line(args: tuple[int, ScriptLine, int]) -> tuple[int, Optional[bytes]]:
|
||||||
"""Convert script lines to audio chunks using TTS."""
|
"""Process a single script line for TTS. Returns (index, audio_bytes)."""
|
||||||
logger.info("Converting script to audio...")
|
i, line, total = args
|
||||||
audio_chunks = []
|
|
||||||
|
|
||||||
for i, line in enumerate(script.lines):
|
|
||||||
# Select voice based on speaker gender
|
# Select voice based on speaker gender
|
||||||
if line.speaker == "male":
|
if line.speaker == "male":
|
||||||
voice_type = "zh_male_yangguangqingnian_moon_bigtts" # Male voice
|
voice_type = "zh_male_yangguangqingnian_moon_bigtts" # Male voice
|
||||||
else:
|
else:
|
||||||
voice_type = "zh_female_sajiaonvyou_moon_bigtts" # Female voice
|
voice_type = "zh_female_sajiaonvyou_moon_bigtts" # Female voice
|
||||||
|
|
||||||
logger.info(f"Processing line {i + 1}/{len(script.lines)} ({line.speaker})")
|
logger.info(f"Processing line {i + 1}/{total} ({line.speaker})")
|
||||||
audio = text_to_speech(line.paragraph, voice_type)
|
audio = text_to_speech(line.paragraph, voice_type)
|
||||||
|
|
||||||
|
if not audio:
|
||||||
|
logger.warning(f"Failed to generate audio for line {i + 1}")
|
||||||
|
|
||||||
|
return (i, audio)
|
||||||
|
|
||||||
|
|
||||||
|
def tts_node(script: Script, max_workers: int = 4) -> list[bytes]:
|
||||||
|
"""Convert script lines to audio chunks using TTS with multi-threading."""
|
||||||
|
logger.info(f"Converting script to audio using {max_workers} workers...")
|
||||||
|
|
||||||
|
total = len(script.lines)
|
||||||
|
tasks = [(i, line, total) for i, line in enumerate(script.lines)]
|
||||||
|
|
||||||
|
# Use ThreadPoolExecutor for parallel TTS generation
|
||||||
|
results: dict[int, Optional[bytes]] = {}
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
|
futures = {executor.submit(_process_line, task): task[0] for task in tasks}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
idx, audio = future.result()
|
||||||
|
results[idx] = audio
|
||||||
|
|
||||||
|
# Collect results in order, skipping failed ones
|
||||||
|
audio_chunks = []
|
||||||
|
for i in range(total):
|
||||||
|
audio = results.get(i)
|
||||||
if audio:
|
if audio:
|
||||||
audio_chunks.append(audio)
|
audio_chunks.append(audio)
|
||||||
else:
|
|
||||||
logger.warning(f"Failed to generate audio for line {i + 1}")
|
|
||||||
|
|
||||||
logger.info(f"Generated {len(audio_chunks)} audio chunks")
|
logger.info(f"Generated {len(audio_chunks)} audio chunks")
|
||||||
return audio_chunks
|
return audio_chunks
|
||||||
|
|||||||
Reference in New Issue
Block a user