feat: lite deep researcher implementation

This commit is contained in:
He Tao
2025-04-07 16:25:55 +08:00
commit 03798ded08
58 changed files with 4242 additions and 0 deletions

11
src/tools/__init__.py Normal file
View File

@@ -0,0 +1,11 @@
from .crawl import crawl_tool
from .python_repl import python_repl_tool
from .search import tavily_tool
from .bash_tool import bash_tool
__all__ = [
"bash_tool",
"crawl_tool",
"tavily_tool",
"python_repl_tool",
]

49
src/tools/bash_tool.py Normal file
View File

@@ -0,0 +1,49 @@
import logging
import subprocess
from typing import Annotated
from langchain_core.tools import tool
from .decorators import log_io
# Initialize logger
logger = logging.getLogger(__name__)
@tool
@log_io
def bash_tool(
cmd: Annotated[str, "The bash command to be executed."],
timeout: Annotated[
int, "Maximum time in seconds for the command to complete."
] = 120,
):
"""Use this to execute bash command and do necessary operations."""
logger.info(f"Executing Bash Command: {cmd} with timeout {timeout}s")
try:
# Execute the command and capture output
result = subprocess.run(
cmd, shell=True, check=True, text=True, capture_output=True, timeout=timeout
)
# Return stdout as the result
return result.stdout
except subprocess.CalledProcessError as e:
# If command fails, return error information
error_message = f"Command failed with exit code {
e.returncode}.\nStdout: {
e.stdout}\nStderr: {
e.stderr}"
logger.error(error_message)
return error_message
except subprocess.TimeoutExpired:
# Handle timeout exception
error_message = f"Command '{cmd}' timed out after {timeout}s."
logger.error(error_message)
return error_message
except Exception as e:
# Catch any other exceptions
error_message = f"Error executing command: {str(e)}"
logger.error(error_message)
return error_message
if __name__ == "__main__":
print(bash_tool.invoke("ls -all"))

25
src/tools/crawl.py Normal file
View File

@@ -0,0 +1,25 @@
import logging
from typing import Annotated
from langchain_core.tools import tool
from .decorators import log_io
from src.crawler import Crawler
logger = logging.getLogger(__name__)
@tool
@log_io
def crawl_tool(
url: Annotated[str, "The url to crawl."],
) -> str:
"""Use this to crawl a url and get a readable content in markdown format."""
try:
crawler = Crawler()
article = crawler.crawl(url)
return {"url": url, "crawled_content": article.to_markdown()[:1000]}
except BaseException as e:
error_msg = f"Failed to crawl. Error: {repr(e)}"
logger.error(error_msg)
return error_msg

78
src/tools/decorators.py Normal file
View File

@@ -0,0 +1,78 @@
import logging
import functools
from typing import Any, Callable, Type, TypeVar
logger = logging.getLogger(__name__)
T = TypeVar("T")
def log_io(func: Callable) -> Callable:
"""
A decorator that logs the input parameters and output of a tool function.
Args:
func: The tool function to be decorated
Returns:
The wrapped function with input/output logging
"""
@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
# Log input parameters
func_name = func.__name__
params = ", ".join(
[*(str(arg) for arg in args), *(f"{k}={v}" for k, v in kwargs.items())]
)
logger.debug(f"Tool {func_name} called with parameters: {params}")
# Execute the function
result = func(*args, **kwargs)
# Log the output
logger.debug(f"Tool {func_name} returned: {result}")
return result
return wrapper
class LoggedToolMixin:
"""A mixin class that adds logging functionality to any tool."""
def _log_operation(self, method_name: str, *args: Any, **kwargs: Any) -> None:
"""Helper method to log tool operations."""
tool_name = self.__class__.__name__.replace("Logged", "")
params = ", ".join(
[*(str(arg) for arg in args), *(f"{k}={v}" for k, v in kwargs.items())]
)
logger.debug(f"Tool {tool_name}.{method_name} called with parameters: {params}")
def _run(self, *args: Any, **kwargs: Any) -> Any:
"""Override _run method to add logging."""
self._log_operation("_run", *args, **kwargs)
result = super()._run(*args, **kwargs)
logger.debug(
f"Tool {self.__class__.__name__.replace('Logged', '')} returned: {result}"
)
return result
def create_logged_tool(base_tool_class: Type[T]) -> Type[T]:
"""
Factory function to create a logged version of any tool class.
Args:
base_tool_class: The original tool class to be enhanced with logging
Returns:
A new class that inherits from both LoggedToolMixin and the base tool class
"""
class LoggedTool(LoggedToolMixin, base_tool_class):
pass
# Set a more descriptive name for the class
LoggedTool.__name__ = f"Logged{base_tool_class.__name__}"
return LoggedTool

40
src/tools/python_repl.py Normal file
View File

@@ -0,0 +1,40 @@
import logging
from typing import Annotated
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL
from .decorators import log_io
# Initialize REPL and logger
repl = PythonREPL()
logger = logging.getLogger(__name__)
@tool
@log_io
def python_repl_tool(
code: Annotated[
str, "The python code to execute to do further analysis or calculation."
],
):
"""Use this to execute python code and do data analysis or calculation. If you want to see the output of a value,
you should print it out with `print(...)`. This is visible to the user."""
if not isinstance(code, str):
error_msg = f"Invalid input: code must be a string, got {type(code)}"
logger.error(error_msg)
return f"Error executing code:\n```python\n{code}\n```\nError: {error_msg}"
logger.info("Executing Python code")
try:
result = repl.run(code)
# Check if the result is an error message by looking for typical error patterns
if isinstance(result, str) and ("Error" in result or "Exception" in result):
logger.error(result)
return f"Error executing code:\n```python\n{code}\n```\nError: {result}"
logger.info("Code execution successful")
except BaseException as e:
error_msg = repr(e)
logger.error(error_msg)
return f"Error executing code:\n```python\n{code}\n```\nError: {error_msg}"
result_str = f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"
return result_str

10
src/tools/search.py Normal file
View File

@@ -0,0 +1,10 @@
import logging
from langchain_community.tools.tavily_search import TavilySearchResults
from src.config import TAVILY_MAX_RESULTS
from .decorators import create_logged_tool
logger = logging.getLogger(__name__)
# Initialize Tavily search tool with logging
LoggedTavilySearch = create_logged_tool(TavilySearchResults)
tavily_tool = LoggedTavilySearch(name="tavily_search", max_results=TAVILY_MAX_RESULTS)