fix(podcast): add fallback for models without json_object support (#747) (#785)

* fix(podcast): add fallback for models without json_object support (#747)

Models like Kimi K2 don't support response_format.type: json_object.
Add try-except to fall back to regular prompting with JSON parsing
when BadRequestError mentions json_object not supported.

- Add fallback to prompting + repair_json_output parsing
- Re-raise other BadRequestError types
- Add unit tests for script_writer_node with 100% coverage

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fixes: the unit test error of test_script_writer_node.py

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2025-12-26 23:04:20 +08:00
committed by GitHub
parent 5a79f896c4
commit bab60e6e3d
3 changed files with 254 additions and 10 deletions

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT

View File

@@ -0,0 +1,214 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
import json
from unittest.mock import MagicMock, patch
import openai
import pytest
from src.podcast.graph.script_writer_node import script_writer_node
from src.podcast.types import Script, ScriptLine
class TestScriptWriterNode:
"""Tests for script_writer_node function."""
@pytest.fixture
def sample_state(self):
"""Create a sample podcast state."""
return {"input": "Test content for podcast generation"}
@pytest.fixture
def sample_script(self):
"""Create a sample Script object."""
return Script(
locale="en",
lines=[
ScriptLine(speaker="male", paragraph="Hello, welcome to our podcast."),
ScriptLine(speaker="female", paragraph="Today we discuss testing."),
],
)
@pytest.fixture
def sample_script_json(self, sample_script):
"""Create JSON representation of sample script."""
return sample_script.model_dump_json()
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_with_json_mode_success(
self, mock_get_llm, mock_get_template, sample_state, sample_script
):
"""Test successful script generation using json_mode."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
mock_structured_model.invoke.return_value = sample_script
mock_get_llm.return_value = mock_model
result = script_writer_node(sample_state)
assert result["script"] == sample_script
assert result["audio_chunks"] == []
mock_model.with_structured_output.assert_called_once_with(
Script, method="json_mode"
)
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_fallback_on_json_object_not_supported(
self, mock_get_llm, mock_get_template, sample_state, sample_script_json
):
"""Test fallback to prompting when model doesn't support json_object."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
# Simulate json_object not supported error
mock_structured_model.invoke.side_effect = openai.BadRequestError(
message="json_object is not supported by this model",
response=MagicMock(status_code=400),
body={"error": {"message": "json_object is not supported"}},
)
# Mock the fallback response
mock_response = MagicMock()
mock_response.content = sample_script_json
mock_model.invoke.return_value = mock_response
mock_get_llm.return_value = mock_model
result = script_writer_node(sample_state)
assert result["script"].locale == "en"
assert len(result["script"].lines) == 2
assert result["audio_chunks"] == []
# Verify fallback was used
mock_model.invoke.assert_called_once()
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_reraises_other_bad_request_errors(
self, mock_get_llm, mock_get_template, sample_state
):
"""Test that other BadRequestError types are re-raised."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
# Simulate a different BadRequestError (not json_object related)
mock_structured_model.invoke.side_effect = openai.BadRequestError(
message="Invalid model parameter",
response=MagicMock(status_code=400),
body={"error": {"message": "Invalid model parameter"}},
)
mock_get_llm.return_value = mock_model
with pytest.raises(openai.BadRequestError) as exc_info:
script_writer_node(sample_state)
assert "Invalid model parameter" in str(exc_info.value)
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_fallback_with_markdown_wrapped_json(
self, mock_get_llm, mock_get_template, sample_state
):
"""Test fallback handles JSON wrapped in markdown code blocks."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
mock_structured_model.invoke.side_effect = openai.BadRequestError(
message="json_object is not supported",
response=MagicMock(status_code=400),
body={},
)
# Mock response with markdown-wrapped JSON (common LLM output)
mock_response = MagicMock()
mock_response.content = """```json
{
"locale": "zh",
"lines": [
{"speaker": "male", "paragraph": "欢迎收听播客。"}
]
}
```"""
mock_model.invoke.return_value = mock_response
mock_get_llm.return_value = mock_model
result = script_writer_node(sample_state)
assert result["script"].locale == "zh"
assert len(result["script"].lines) == 1
assert result["script"].lines[0].speaker == "male"
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_fallback_raises_on_invalid_json(
self, mock_get_llm, mock_get_template, sample_state
):
"""Test that fallback raises JSONDecodeError when response is not valid JSON."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
mock_structured_model.invoke.side_effect = openai.BadRequestError(
message="json_object is not supported",
response=MagicMock(status_code=400),
body={},
)
# Mock response with completely invalid JSON
mock_response = MagicMock()
mock_response.content = "This is not JSON at all, just plain text response."
mock_model.invoke.return_value = mock_response
mock_get_llm.return_value = mock_model
with pytest.raises(json.JSONDecodeError):
script_writer_node(sample_state)
@patch("src.podcast.graph.script_writer_node.get_prompt_template")
@patch("src.podcast.graph.script_writer_node.get_llm_by_type")
def test_script_writer_fallback_raises_on_invalid_schema(
self, mock_get_llm, mock_get_template, sample_state
):
"""Test that fallback raises ValidationError when JSON doesn't match Script schema."""
mock_get_template.return_value = "Generate a podcast script."
mock_model = MagicMock()
mock_structured_model = MagicMock()
mock_model.with_structured_output.return_value = mock_structured_model
mock_structured_model.invoke.side_effect = openai.BadRequestError(
message="json_object is not supported",
response=MagicMock(status_code=400),
body={},
)
# Mock response with valid JSON but invalid schema (missing required fields, wrong types)
mock_response = MagicMock()
mock_response.content = '{"locale": "invalid_locale", "lines": "not_a_list"}'
mock_model.invoke.return_value = mock_response
mock_get_llm.return_value = mock_model
# Pydantic ValidationError is raised when schema validation fails
from pydantic import ValidationError
with pytest.raises(ValidationError):
script_writer_node(sample_state)