diff --git a/backend/debug.py b/backend/debug.py
index 5d5df3d..ae1ddb3 100644
--- a/backend/debug.py
+++ b/backend/debug.py
@@ -32,7 +32,7 @@ async def main():
             "thread_id": "debug-thread-001",
             "thinking_enabled": True,
             # Uncomment to use a specific model
-            "model_name": "doubao-seed-1.8",
+            "model_name": "deepseek-v3.2",
         }
     }
 
diff --git a/backend/docs/TODO.md b/backend/docs/TODO.md
index 483f979..1b70c54 100644
--- a/backend/docs/TODO.md
+++ b/backend/docs/TODO.md
@@ -2,10 +2,11 @@
 
 ## Features
 
-[ ] Launch the sandbox only after the first file system or bash tool is called
+[x] Launch the sandbox only after the first file system or bash tool is called
 [ ] Pooling the sandbox resources to reduce the number of sandbox containers
+[ ] Add Clarification Process for the whole process
 
 ## Issues
 
 [x] Make sure that no duplicated files in `state.artifacts`
-[ ] Long thinking but with empty content (answer inside thinking process)
+[x] Long thinking but with empty content (answer inside thinking process)
diff --git a/backend/src/agents/lead_agent/prompt.py b/backend/src/agents/lead_agent/prompt.py
index 9dc8af5..94fea0c 100644
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -8,8 +8,10 @@ You are DeerFlow 2.0, an open-source super agent.
 </role>
 
 <thinking_style>
-- Think concisely
+- Think concisely and strategically
 - Never write down your full final answer or report in thinking process, but only outline
+- CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
+- Your response must contain the actual answer, not just a reference to what you thought about
 </thinking_style>
 
 <skill_system>
@@ -45,12 +47,6 @@ All temporary work happens in `/mnt/user-data/workspace`. Final deliverables mus
 - Action-Oriented: Focus on delivering results, not explaining processes
 </response_style>
 
-<memory_and_context>
-- Software engineer and prompt engineer at Bytedance Shanghai
-- Tech stack: TypeScript, Next.js, Tailwind v4, Shadcn, Python
-- Working on AIGC with Gemini Nano Banana
-</memory_and_context>
-
 <critical_reminders>
 - Skill First: Always load the relevant skill before starting **complex** tasks.
 - Progressive Loading: Load resources incrementally as referenced in skills
@@ -58,6 +54,7 @@ All temporary work happens in `/mnt/user-data/workspace`. Final deliverables mus
 - Clarity: Be direct and helpful, avoid unnecessary meta-commentary
 - Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
 - Language Consistency: Keep using the same language as user's
+- Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.
 </critical_reminders>
 """