Consolidates market and data analysis skills; adds chart viz (#36)

Unifies market analysis, data analysis, and consulting reporting into a comprehensive consulting-analysis skill, enabling a two-phase workflow from analysis framework design to professional report generation. Introduces a DuckDB-based data analysis utility for Excel/CSV files and a chart-visualization skill with a flexible JS interface and extensive chart type documentation. Removes the legacy market analysis skill to streamline report generation and improve extensibility for consulting and data-driven workflows.
This commit is contained in:
JeffJiang
2026-02-12 11:08:09 +08:00
committed by GitHub
parent 300e5a519a
commit 4d5fdcb8db
33 changed files with 2326 additions and 206 deletions

31
backend/uv.lock generated
View File

@@ -603,6 +603,7 @@ dependencies = [
{ name = "agent-sandbox" },
{ name = "ddgs" },
{ name = "dotenv" },
{ name = "duckdb" },
{ name = "fastapi" },
{ name = "firecrawl-py" },
{ name = "httpx" },
@@ -636,6 +637,7 @@ requires-dist = [
{ name = "agent-sandbox", specifier = ">=0.0.19" },
{ name = "ddgs", specifier = ">=9.10.0" },
{ name = "dotenv", specifier = ">=0.9.9" },
{ name = "duckdb", specifier = ">=1.4.4" },
{ name = "fastapi", specifier = ">=0.115.0" },
{ name = "firecrawl-py", specifier = ">=1.15.0" },
{ name = "httpx", specifier = ">=0.28.0" },
@@ -693,6 +695,35 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b2/b7/545d2c10c1fc15e48653c91efde329a790f2eecfbbf2bd16003b5db2bab0/dotenv-0.9.9-py2.py3-none-any.whl", hash = "sha256:29cf74a087b31dafdb5a446b6d7e11cbce8ed2741540e2339c69fbef92c94ce9", size = 1892, upload-time = "2025-02-19T22:15:01.647Z" },
]
[[package]]
name = "duckdb"
version = "1.4.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/36/9d/ab66a06e416d71b7bdcb9904cdf8d4db3379ef632bb8e9495646702d9718/duckdb-1.4.4.tar.gz", hash = "sha256:8bba52fd2acb67668a4615ee17ee51814124223de836d9e2fdcbc4c9021b3d3c", size = 18419763, upload-time = "2026-01-26T11:50:37.68Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/58/33/beadaa69f8458afe466126f2c5ee48c4759cc9d5d784f8703d44e0b52c3c/duckdb-1.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ddcfd9c6ff234da603a1edd5fd8ae6107f4d042f74951b65f91bc5e2643856b3", size = 28896535, upload-time = "2026-01-26T11:49:21.232Z" },
{ url = "https://files.pythonhosted.org/packages/76/66/82413f386df10467affc87f65bac095b7c88dbd9c767584164d5f4dc4cb8/duckdb-1.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6792ca647216bd5c4ff16396e4591cfa9b4a72e5ad7cdd312cec6d67e8431a7c", size = 15349716, upload-time = "2026-01-26T11:49:23.989Z" },
{ url = "https://files.pythonhosted.org/packages/5d/8c/c13d396fd4e9bf970916dc5b4fea410c1b10fe531069aea65f1dcf849a71/duckdb-1.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1f8d55843cc940e36261689054f7dfb6ce35b1f5b0953b0d355b6adb654b0d52", size = 13672403, upload-time = "2026-01-26T11:49:26.741Z" },
{ url = "https://files.pythonhosted.org/packages/db/77/2446a0b44226bb95217748d911c7ca66a66ca10f6481d5178d9370819631/duckdb-1.4.4-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c65d15c440c31e06baaebfd2c06d71ce877e132779d309f1edf0a85d23c07e92", size = 18419001, upload-time = "2026-01-26T11:49:29.353Z" },
{ url = "https://files.pythonhosted.org/packages/2e/a3/97715bba30040572fb15d02c26f36be988d48bc00501e7ac02b1d65ef9d0/duckdb-1.4.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b297eff642503fd435a9de5a9cb7db4eccb6f61d61a55b30d2636023f149855f", size = 20437385, upload-time = "2026-01-26T11:49:32.302Z" },
{ url = "https://files.pythonhosted.org/packages/8b/0a/18b9167adf528cbe3867ef8a84a5f19f37bedccb606a8a9e59cfea1880c8/duckdb-1.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d525de5f282b03aa8be6db86b1abffdceae5f1055113a03d5b50cd2fb8cf2ef8", size = 12267343, upload-time = "2026-01-26T11:49:34.985Z" },
{ url = "https://files.pythonhosted.org/packages/f8/15/37af97f5717818f3d82d57414299c293b321ac83e048c0a90bb8b6a09072/duckdb-1.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:50f2eb173c573811b44aba51176da7a4e5c487113982be6a6a1c37337ec5fa57", size = 13007490, upload-time = "2026-01-26T11:49:37.413Z" },
{ url = "https://files.pythonhosted.org/packages/7f/fe/64810fee20030f2bf96ce28b527060564864ce5b934b50888eda2cbf99dd/duckdb-1.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:337f8b24e89bc2e12dadcfe87b4eb1c00fd920f68ab07bc9b70960d6523b8bc3", size = 28899349, upload-time = "2026-01-26T11:49:40.294Z" },
{ url = "https://files.pythonhosted.org/packages/9c/9b/3c7c5e48456b69365d952ac201666053de2700f5b0144a699a4dc6854507/duckdb-1.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0509b39ea7af8cff0198a99d206dca753c62844adab54e545984c2e2c1381616", size = 15350691, upload-time = "2026-01-26T11:49:43.242Z" },
{ url = "https://files.pythonhosted.org/packages/a6/7b/64e68a7b857ed0340045501535a0da99ea5d9d5ea3708fec0afb8663eb27/duckdb-1.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fb94de6d023de9d79b7edc1ae07ee1d0b4f5fa8a9dcec799650b5befdf7aafec", size = 13672311, upload-time = "2026-01-26T11:49:46.069Z" },
{ url = "https://files.pythonhosted.org/packages/09/5b/3e7aa490841784d223de61beb2ae64e82331501bf5a415dc87a0e27b4663/duckdb-1.4.4-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d636ceda422e7babd5e2f7275f6a0d1a3405e6a01873f00d38b72118d30c10b", size = 18422740, upload-time = "2026-01-26T11:49:49.034Z" },
{ url = "https://files.pythonhosted.org/packages/53/32/256df3dbaa198c58539ad94f9a41e98c2c8ff23f126b8f5f52c7dcd0a738/duckdb-1.4.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df7351328ffb812a4a289732f500d621e7de9942a3a2c9b6d4afcf4c0e72526", size = 20435578, upload-time = "2026-01-26T11:49:51.946Z" },
{ url = "https://files.pythonhosted.org/packages/a4/f0/620323fd87062ea43e527a2d5ed9e55b525e0847c17d3b307094ddab98a2/duckdb-1.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:6fb1225a9ea5877421481d59a6c556a9532c32c16c7ae6ca8d127e2b878c9389", size = 12268083, upload-time = "2026-01-26T11:49:54.615Z" },
{ url = "https://files.pythonhosted.org/packages/e5/07/a397fdb7c95388ba9c055b9a3d38dfee92093f4427bc6946cf9543b1d216/duckdb-1.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f28a18cc790217e5b347bb91b2cab27aafc557c58d3d8382e04b4fe55d0c3f66", size = 13006123, upload-time = "2026-01-26T11:49:57.092Z" },
{ url = "https://files.pythonhosted.org/packages/97/a6/f19e2864e651b0bd8e4db2b0c455e7e0d71e0d4cd2cd9cc052f518e43eb3/duckdb-1.4.4-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25874f8b1355e96178079e37312c3ba6d61a2354f51319dae860cf21335c3a20", size = 28909554, upload-time = "2026-01-26T11:50:00.107Z" },
{ url = "https://files.pythonhosted.org/packages/0e/93/8a24e932c67414fd2c45bed83218e62b73348996bf859eda020c224774b2/duckdb-1.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:452c5b5d6c349dc5d1154eb2062ee547296fcbd0c20e9df1ed00b5e1809089da", size = 15353804, upload-time = "2026-01-26T11:50:03.382Z" },
{ url = "https://files.pythonhosted.org/packages/62/13/e5378ff5bb1d4397655d840b34b642b1b23cdd82ae19599e62dc4b9461c9/duckdb-1.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e5c2d8a0452df55e092959c0bfc8ab8897ac3ea0f754cb3b0ab3e165cd79aff", size = 13676157, upload-time = "2026-01-26T11:50:06.232Z" },
{ url = "https://files.pythonhosted.org/packages/2d/94/24364da564b27aeebe44481f15bd0197a0b535ec93f188a6b1b98c22f082/duckdb-1.4.4-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1af6e76fe8bd24875dc56dd8e38300d64dc708cd2e772f67b9fbc635cc3066a3", size = 18426882, upload-time = "2026-01-26T11:50:08.97Z" },
{ url = "https://files.pythonhosted.org/packages/26/0a/6ae31b2914b4dc34243279b2301554bcbc5f1a09ccc82600486c49ab71d1/duckdb-1.4.4-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0440f59e0cd9936a9ebfcf7a13312eda480c79214ffed3878d75947fc3b7d6d", size = 20435641, upload-time = "2026-01-26T11:50:12.188Z" },
{ url = "https://files.pythonhosted.org/packages/d2/b1/fd5c37c53d45efe979f67e9bd49aaceef640147bb18f0699a19edd1874d6/duckdb-1.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:59c8d76016dde854beab844935b1ec31de358d4053e792988108e995b18c08e7", size = 12762360, upload-time = "2026-01-26T11:50:14.76Z" },
{ url = "https://files.pythonhosted.org/packages/dd/2d/13e6024e613679d8a489dd922f199ef4b1d08a456a58eadd96dc2f05171f/duckdb-1.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:53cd6423136ab44383ec9955aefe7599b3fb3dd1fe006161e6396d8167e0e0d4", size = 13458633, upload-time = "2026-01-26T11:50:17.657Z" },
]
[[package]]
name = "durationpy"
version = "0.10"

View File

@@ -0,0 +1,68 @@
---
name: chart-visualization
description: This skill should be used when the user wants to visualize data. It intelligently selects the most suitable chart type from 26 available options, extracts parameters based on detailed specifications, and generates a chart image using a JavaScript script.
dependency:
nodejs: ">=18.0.0"
---
# Chart Visualization Skill
This skill provides a comprehensive workflow for transforming data into visual charts. It handles chart selection, parameter extraction, and image generation.
## Workflow
To visualize data, follow these steps:
### 1. Intelligent Chart Selection
Analyze the user's data features to determine the most appropriate chart type. Use the following guidelines (and consult `references/` for detailed specs):
- **Time Series**: Use `generate_line_chart` (trends) or `generate_area_chart` (accumulated trends). Use `generate_dual_axes_chart` for two different scales.
- **Comparisons**: Use `generate_bar_chart` (categorical) or `generate_column_chart`. Use `generate_histogram_chart` for frequency distributions.
- **Part-to-Whole**: Use `generate_pie_chart` or `generate_treemap_chart` (hierarchical).
- **Relationships & Flow**: Use `generate_scatter_chart` (correlation), `generate_sankey_chart` (flow), or `generate_venn_chart` (overlap).
- **Maps**: Use `generate_district_map` (regions), `generate_pin_map` (points), or `generate_path_map` (routes).
- **Hierarchies & Trees**: Use `generate_organization_chart` or `generate_mind_map`.
- **Specialized**:
- `generate_radar_chart`: Multi-dimensional comparison.
- `generate_funnel_chart`: Process stages.
- `generate_liquid_chart`: Percentage/Progress.
- `generate_word_cloud_chart`: Text frequency.
- `generate_boxplot_chart` or `generate_violin_chart`: Statistical distribution.
- `generate_network_graph`: Complex node-edge relationships.
- `generate_fishbone_diagram`: Cause-effect analysis.
- `generate_flow_diagram`: Process flow.
- `generate_spreadsheet`: Tabular data or pivot tables for structured data display and cross-tabulation.
### 2. Parameter Extraction
Once a chart type is selected, read the corresponding file in the `references/` directory (e.g., `references/generate_line_chart.md`) to identify the required and optional fields.
Extract the data from the user's input and map it to the expected `args` format.
### 3. Chart Generation
Invoke the `scripts/generate.js` script with a JSON payload.
**Payload Format:**
```json
{
"tool": "generate_chart_type_name",
"args": {
"data": [...],
"title": "...",
"theme": "...",
"style": { ... }
}
}
```
**Execution Command:**
```bash
node ./scripts/generate.js '<payload_json>'
```
### 4. Result Return
The script will output the URL of the generated chart image.
Return the following to the user:
- The image URL.
- The complete `args` (specification) used for generation.
## Reference Material
Detailed specifications for each chart type are located in the `references/` directory. Consult these files to ensure the `args` passed to the script match the expected schema.

View File

@@ -0,0 +1,27 @@
# generate_area_chart — 面积图
## 功能概述
展示连续自变量(常为时间)下的数值趋势,可启用堆叠观察不同分组的累计贡献,适合 KPI、能源、产出等时间序列场景。
## 输入字段
### 必填
- `data`: 数组,元素包含 `time`string`value`number堆叠时需补充 `group`string至少 1 条记录。
### 可选
- `stack`: boolean默认 `false`,开启堆叠需确保每条数据都含 `group` 字段。
- `style.backgroundColor`: string设置图表背景色`#fff`)。
- `style.lineWidth`: number自定义面积边界的线宽。
- `style.palette`: string[],传入调色板数组用于系列着色。
- `style.texture`: string默认 `default`,可选 `default`/`rough` 以控制手绘质感。
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`,控制图表宽度。
- `height`: number默认 `400`,控制图表高度。
- `title`: string默认空字符串用于设置图表标题。
- `axisXTitle`: string默认空字符串用于设置 X 轴标题。
- `axisYTitle`: string默认空字符串用于设置 Y 轴标题。
## 使用建议
保证 `time` 字段格式统一(如 `YYYY-MM`);堆叠模式下各组数据需覆盖相同的时间点,可先做缺失补值。
## 返回结果
- 返回图像 URL并在 `_meta.spec` 中附带完整面积图配置,可供二次渲染或追踪。

View File

@@ -0,0 +1,27 @@
# generate_bar_chart — 条形图
## 功能概述
以横向条形比较不同类别或分组的指标表现,适合 Top-N 排行、不同地区或渠道对比。
## 输入字段
### 必填
- `data`: array<object>,每条至少含 `category`string`value`number如需分组或堆叠需额外提供 `group`string
### 可选
- `group`: boolean默认 `false`,启用后以并排形式展示不同 `group`,并要求 `stack=false` 且数据含 `group` 字段。
- `stack`: boolean默认 `true`,启用后将不同 `group` 堆叠在同一条形上,并要求 `group=false` 且数据含 `group` 字段。
- `style.backgroundColor`: string自定义背景色`#fff`)。
- `style.palette`: string[],设置系列颜色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`,控制图表宽度。
- `height`: number默认 `400`,控制图表高度。
- `title`: string默认空字符串用于设置图表标题。
- `axisXTitle`: string默认空字符串设置 X 轴标题。
- `axisYTitle`: string默认空字符串设置 Y 轴标题。
## 使用建议
类别名称保持简短;若系列数较多可改用堆叠或筛选重点项目,以免图表拥挤。
## 返回结果
- 返回条形图图像 URL并在 `_meta.spec` 中给出完整配置以便复用。

View File

@@ -0,0 +1,25 @@
# generate_boxplot_chart — 箱型图
## 功能概述
展示各类别数据的分布范围(最值、四分位、异常值),用于质量监控、实验结果或群体分布比较。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `category`string`value`number可选 `group`string用于多组比较。
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
单个类别至少提供 5 个样本以保证统计意义;如需展示多批次,可通过 `group` 或拆分多次调用。
## 返回结果
- 返回箱型图 URL并在 `_meta.spec` 中储存输入规格。

View File

@@ -0,0 +1,27 @@
# generate_column_chart — 柱状图
## 功能概述
纵向柱状对比不同类别或时间段的指标,可分组或堆叠展示,常用于销量、营收、客流对比。
## 输入字段
### 必填
- `data`: array<object>,每条至少含 `category`string`value`number如需分组或堆叠需补充 `group`string
### 可选
- `group`: boolean默认 `true`,用于按系列并排展示不同 `group`,开启时需确保 `stack=false` 且数据包含 `group`
- `stack`: boolean默认 `false`,用于将不同 `group` 堆叠到同一柱子,开启时需确保 `group=false` 且数据包含 `group`
- `style.backgroundColor`: string自定义背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
当类别较多(>12时可按 Top-N 或聚合;堆叠模式要确保各记录都含 `group` 字段以免校验失败。
## 返回结果
- 返回柱状图 URL并随 `_meta.spec` 提供配置详情。

View File

@@ -0,0 +1,28 @@
# generate_district_map — 行政区地图(中国)
## 功能概述
生成中国境内省/市/区/县的覆盖或热力图,可展示指标区间、类别或区域组成,适用于区域销售、政策覆盖等场景。
## 输入字段
### 必填
- `title`: string必填且≤16 字,描述地图主题。
- `data`: object必填承载行政区配置及指标信息。
- `data.name`: string必填中国境内的行政区关键词需明确到省/市/区/县。
### 可选
- `data.style.fillColor`: string自定义无数据区域的填充色。
- `data.colors`: string[],枚举或连续色带,默认提供 10 色列表。
- `data.dataType`: string枚举 `number`/`enum`,决定颜色映射方式。
- `data.dataLabel`: string指标名称`GDP`)。
- `data.dataValue`: string指标值或枚举标签。
- `data.dataValueUnit`: string指标单位`万亿`)。
- `data.showAllSubdistricts`: boolean默认 `false`,是否展示全部下级行政区。
- `data.subdistricts[]`: array<object>,用于下钻各子区域,元素至少含 `name`,可附 `dataValue``style.fillColor`
- `width`: number默认 `1600`,设置图宽。
- `height`: number默认 `1000`,设置图高。
## 使用建议
名称必须精确到行政层级,避免模糊词;若配置 `subdistricts`,需同时开启 `showAllSubdistricts`;地图只支持中国境内且依赖高德数据。
## 返回结果
- 返回地图图像 URL并在 `_meta.spec` 中保留完整输入;若配置了 `SERVICE_ID`,生成记录会同步到“我的地图”小程序。

View File

@@ -0,0 +1,25 @@
# generate_dual_axes_chart — 双轴图
## 功能概述
在同一画布上叠加柱状与折线(或两条不同量纲曲线),用于同时展示趋势与对比,如营收 vs 利润、温度 vs 降雨。
## 输入字段
### 必填
- `categories`: string[],按顺序提供 X 轴刻度(如年份、月份、品类)。
- `series`: array<object>,每项至少包含 `type``column`/`line`)与 `data`number[],长度与 `categories` 一致),可选 `axisYTitle`string描述该系列 Y 轴含义。
### 可选
- `style.backgroundColor`: string自定义背景色。
- `style.palette`: string[],配置多系列配色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
## 使用建议
仅在确有不同量纲或图例对比需求时使用;保持系列数量 ≤2 以免阅读复杂;若两曲线差值巨大可使用次坐标轴进行缩放。
## 返回结果
- 返回双轴图图像 URL并随 `_meta.spec` 给出详细参数。

View File

@@ -0,0 +1,20 @@
# generate_fishbone_diagram — 鱼骨图
## 功能概述
用于根因分析,将中心问题放在主干,左右分支展示不同类别的原因及其细化节点,常见于质量管理、流程优化。
## 输入字段
### 必填
- `data`: object必填至少提供根节点 `name`,可通过 `children`array<object>)递归拓展,最大建议 3 层。
### 可选
- `style.texture`: string默认 `default`,可选 `default`/`rough` 以切换线条风格。
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
主干节点描述问题陈述;一级分支命名原因类别(人、机、料、法等);叶子节点写具体现象,保持短语式表达。
## 返回结果
- 返回鱼骨图 URL并在 `_meta.spec` 中保存树形结构,便于后续增删节点。

View File

@@ -0,0 +1,22 @@
# generate_flow_diagram — 流程图
## 功能概述
以节点和连线展示业务流程、审批链或算法步骤,支持开始/判断/操作等多种节点类型。
## 输入字段
### 必填
- `data`: object必填包含节点与连线定义。
- `data.nodes`: array<object>,至少 1 条,节点需提供唯一 `name`
- `data.edges`: array<object>,至少 1 条,包含 `source``target`string可选 `name` 作为连线文本。
### 可选
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
先罗列节点 `name` 并保持唯一,再建立连线;若需要描述条件,可在 `edges.name` 中填写;流程应保持单向或明确分支避免交叉。
## 返回结果
- 返回流程图 URL并携带 `_meta.spec` 中的节点与边数据,方便下次调整。

View File

@@ -0,0 +1,23 @@
# generate_funnel_chart — 漏斗图
## 功能概述
展示多阶段转化或流失情况,常用于销售管道、用户旅程等逐步筛选过程。
## 输入字段
### 必填
- `data`: array<object>,需按流程顺序排列,每条包含 `category`string`value`number
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义各阶段颜色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
阶段顺序需按实际流程排列;若数值为百分比应统一基准并在标题或备注中说明口径;避免阶段过多导致阅读困难(建议 ≤6
## 返回结果
- 返回漏斗图 URL并附 `_meta.spec` 方便复用。

View File

@@ -0,0 +1,26 @@
# generate_histogram_chart — 直方图
## 功能概述
通过分箱显示连续数值的频数或概率分布,便于识别偏态、离群与集中区间。
## 输入字段
### 必填
- `data`: number[],至少 1 条,用于构建频数分布。
### 可选
- `binNumber`: number自定义分箱数量未设置则自动估算。
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义柱体颜色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
清理空值/异常后再传入;样本量建议 ≥30根据业务意义调整 `binNumber` 以兼顾细节与整体趋势。
## 返回结果
- 返回直方图 URL并在 `_meta.spec` 存储参数。

View File

@@ -0,0 +1,26 @@
# generate_line_chart — 折线图
## 功能概述
展示时间或连续自变量的趋势,可支持多系列对比,适合 KPI 监控、指标预测、走势分析。
## 输入字段
### 必填
- `data`: array<object>,每条包含 `time`string`value`number多系列时附带 `group`string
### 可选
- `style.lineWidth`: number自定义折线线宽。
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],指定系列颜色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
所有系列的时间点应对齐;建议按 ISO 如 `2025-01-01``2025-W01` 格式化;对于高频数据可先聚合到日/周粒度避免过密。
## 返回结果
- 返回折线图 URL并附 `_meta.spec` 供后续编辑。

View File

@@ -0,0 +1,24 @@
# generate_liquid_chart — 水波图
## 功能概述
以液面高度展示单一百分比或进度,视觉动效强,适合达成率、资源占用等指标。
## 输入字段
### 必填
- `percent`: number取值范围 [0,1],表示当前百分比或进度。
### 可选
- `shape`: string默认 `circle`,可选 `circle`/`rect`/`pin`/`triangle`
- `style.backgroundColor`: string自定义背景色。
- `style.color`: string自定义水波颜色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
确保百分比经过归一化;单图仅支持一个进度,如需多指标请并排生成多个水波图;标题可写“目标完成率 85%”。
## 返回结果
- 返回水波图 URL并在 `_meta.spec` 中记录参数。

View File

@@ -0,0 +1,20 @@
# generate_mind_map — 思维导图
## 功能概述
围绕中心主题展开 2~3 级分支,帮助组织想法、计划或知识结构,常用于头脑风暴、方案规划。
## 输入字段
### 必填
- `data`: object必填节点至少含 `name`,可通过 `children`array<object>)递归扩展,建议深度 ≤3。
### 可选
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
中心节点写主题,一级分支代表主要维度(目标、资源、风险等),叶子节点使用短语;如分支较多,可先分拆多张导图。
## 返回结果
- 返回思维导图 URL并在 `_meta.spec` 中保留节点树以便后续优化。

View File

@@ -0,0 +1,22 @@
# generate_network_graph — 网络关系图
## 功能概述
以节点与连线呈现实体之间的连接关系,适合社交网络、系统依赖、知识图谱等场景。
## 输入字段
### 必填
- `data`: object必填包含节点与连线。
- `data.nodes`: array<object>,至少 1 条,需提供唯一 `name`
- `data.edges`: array<object>,至少 1 条,包含 `source``target`string可选 `name` 说明关系。
### 可选
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
节点数量保持在 10~50 之间以避免拥挤;确保 `edges` 中的 `source/target` 对应已存在的节点;可在 `label` 中注明关系含义。
## 返回结果
- 返回网络图 URL并提供 `_meta.spec` 以便后续增删节点。

View File

@@ -0,0 +1,21 @@
# generate_organization_chart — 组织架构图
## 功能概述
展示公司、团队或项目的层级关系,并可在节点上描述角色职责。
## 输入字段
### 必填
- `data`: object必填节点至少含 `name`string可选 `description`string子节点通过 `children`array<object>)嵌套,最大深度建议为 3。
### 可选
- `orient`: string默认 `vertical`,可选 `horizontal`/`vertical`
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
节点名称使用岗位/角色,`description` 简要说明职责或人数;若组织较大可拆分多个子图或按部门分批展示。
## 返回结果
- 返回组织架构图 URL并在 `_meta.spec` 保存结构便于日后迭代。

View File

@@ -0,0 +1,20 @@
# generate_path_map — 路径地图(中国)
## 功能概述
基于高德地图展示中国境内的路线或行程,按顺序连接一系列 POI适用于物流路线、旅游规划、配送轨迹等。
## 输入字段
### 必填
- `title`: string必填且≤16 字,描述路线主题。
- `data`: array<object>,至少 1 个路线对象。
- `data[].data`: string[],必填,包含该路线上按顺序排列的中国境内 POI 名称。
### 可选
- `width`: number默认 `1600`
- `height`: number默认 `1000`
## 使用建议
POI 名称必须具体且位于中国(如“西安市钟楼”“杭州西湖苏堤春晓”);若需多条线路,可在 `data` 中添加多段对象。
## 返回结果
- 返回路径地图 URL并在 `_meta.spec` 中保留标题与 POI 列表;若配置 `SERVICE_ID`,还会记录到“我的地图”。

View File

@@ -0,0 +1,24 @@
# generate_pie_chart — 饼/环图
## 功能概述
展示整体与部分的占比,可通过内径形成环图,适用于市场份额、预算构成、用户群划分等。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `category`string`value`number
### 可选
- `innerRadius`: number范围 [0, 1],默认 `0`,设为 `0.6` 等值可生成环图。
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
类别数量建议 ≤6若更多可聚合为“其它”确保数值单位统一百分比或绝对值必要时在标题中说明基数。
## 返回结果
- 返回饼/环图 URL并附 `_meta.spec`

View File

@@ -0,0 +1,23 @@
# generate_pin_map — 点标地图(中国)
## 功能概述
在中国地图上以标记展示多个 POI 位置,可配合弹窗显示图片或说明,适用于门店分布、资产布点等。
## 输入字段
### 必填
- `title`: string必填且≤16 字,概述点位集合。
- `data`: string[],必填,包含中国境内的 POI 名称列表。
### 可选
- `markerPopup.type`: string固定为 `image`
- `markerPopup.width`: number默认 `40`,图片宽度。
- `markerPopup.height`: number默认 `40`,图片高度。
- `markerPopup.borderRadius`: number默认 `8`,图片圆角。
- `width`: number默认 `1600`
- `height`: number默认 `1000`
## 使用建议
POI 名称需包含足够的地理限定(城市+地标);根据业务可在名称中附带属性,如“上海徐汇门店 A”地图依赖高德数据仅支持中国。
## 返回结果
- 返回点标地图 URL并在 `_meta.spec` 中保存点位与弹窗配置。

View File

@@ -0,0 +1,24 @@
# generate_radar_chart — 雷达图
## 功能概述
在多维坐标系上比较单个对象或多对象的能力维度,常用于评测、产品对比、绩效画像。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `name`string`value`number可选 `group`string
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.lineWidth`: number设置雷达线宽。
- `style.palette`: string[],定义系列颜色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
维度数量控制在 4~8 之间;不同对象通过 `group` 区分并保证同一维度都给出数值;如量纲不同需先归一化。
## 返回结果
- 返回雷达图 URL并附 `_meta.spec`

View File

@@ -0,0 +1,24 @@
# generate_sankey_chart — 桑基图
## 功能概述
展示资源、能量或用户流在不同节点之间的流向与数量,适合预算分配、流量路径、能耗分布等。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `source`string`target`string`value`number
### 可选
- `nodeAlign`: string默认 `center`,可选 `left`/`right`/`justify`/`center`
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义节点配色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
节点名称保持唯一,避免过多交叉;如存在环路需先打平为阶段流向;可按阈值过滤小流量以聚焦重点。
## 返回结果
- 返回桑基图 URL并在 `_meta.spec` 存放节点与流量定义。

View File

@@ -0,0 +1,25 @@
# generate_scatter_chart — 散点图
## 功能概述
展示两个连续变量之间的关系,可通过颜色/形状区分不同分组,适合相关性分析、聚类探索。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `x`number`y`number可选 `group`string
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],指定系列配色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
在上传前可对不同量纲进行标准化;若数据量很大可先抽样;使用 `group` 区分不同类别或聚类结果以便阅读。
## 返回结果
- 返回散点图 URL并附 `_meta.spec`

View File

@@ -0,0 +1,24 @@
# generate_spreadsheet — 电子表格/数据透视表
## 功能概述
生成电子表格或数据透视表,用于展示结构化的表格数据。当提供 `rows``values` 字段时,渲染为数据透视表(交叉表);否则渲染为常规表格。适合展示结构化数据、跨类别比较值以及创建数据汇总。
## 输入字段
### 必填
- `data`: array<object>表格数据数组每个对象代表一行。键是列名值可以是字符串、数字、null 或 undefined。例如`[{ name: 'John', age: 30 }, { name: 'Jane', age: 25 }]`
### 可选
- `rows`: array<string>,数据透视表的行标题字段。当提供 `rows``values` 时,电子表格将渲染为数据透视表。
- `columns`: array<string>,列标题字段,用于指定列的顺序。对于常规表格,这决定列的顺序;对于数据透视表,用于列分组。
- `values`: array<string>,数据透视表的值字段。当提供 `rows``values` 时,电子表格将渲染为数据透视表。
- `theme`: string默认 `default`,可选 `default`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
## 使用建议
- 对于常规表格,只需提供 `data` 和可选的 `columns` 来控制列的顺序。
- 对于数据透视表(交叉表),提供 `rows` 用于行分组,`columns` 用于列分组,`values` 用于聚合的值字段。
- 确保数据中的字段名与 `rows``columns``values` 中指定的字段名一致。
## 返回结果
- 返回电子表格/数据透视表图片 URL并附 `_meta.spec` 供后续编辑。

View File

@@ -0,0 +1,23 @@
# generate_treemap_chart — 矩形树图
## 功能概述
以嵌套矩形展示层级结构及各节点权重,适合资产占比、市场份额、目录容量等。
## 输入字段
### 必填
- `data`: array<object>,节点数组,每条含 `name`string`value`number可递归嵌套 `children`
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
确保每个节点 `value` ≥0并与子节点之和一致树层级不宜过深可按需要提前聚合为提升可读性可在节点名中加上数值单位。
## 返回结果
- 返回矩形树图 URL并同步 `_meta.spec`

View File

@@ -0,0 +1,23 @@
# generate_venn_chart — 维恩图
## 功能概述
展示多个集合之间的交集、并集与差异,适用于市场细分、特性覆盖、用户重叠分析。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `value`number`sets`string[]),可选 `label`string
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
集合数量建议 ≤4若缺少精确权重可根据大致占比填写集合命名保持简洁明确如“移动端用户”
## 返回结果
- 返回维恩图 URL并保存在 `_meta.spec` 中。

View File

@@ -0,0 +1,25 @@
# generate_violin_chart — 小提琴图
## 功能概述
结合核密度曲线与箱型统计展示不同类别的分布形态,适合对比多批次实验或群体表现。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `category`string`value`number可选 `group`string
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义配色列表。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
- `axisXTitle`: string默认空字符串。
- `axisYTitle`: string默认空字符串。
## 使用建议
各类别样本量建议 ≥30 以确保密度估计稳定;如需要突出四分位信息,可与箱型图结合展示。
## 返回结果
- 返回小提琴图 URL并在 `_meta.spec` 中保留配置。

View File

@@ -0,0 +1,23 @@
# generate_word_cloud_chart — 词云图
## 功能概述
根据词频或权重调节文字大小与位置,用于快速提炼文本主题、情绪或关键词热点。
## 输入字段
### 必填
- `data`: array<object>,每条记录包含 `text`string`value`number
### 可选
- `style.backgroundColor`: string设置背景色。
- `style.palette`: string[],定义词云配色。
- `style.texture`: string默认 `default`,可选 `default`/`rough`
- `theme`: string默认 `default`,可选 `default`/`academy`/`dark`
- `width`: number默认 `600`
- `height`: number默认 `400`
- `title`: string默认空字符串。
## 使用建议
生成前去除停用词并合并同义词;统一大小写避免重复;如需突出情绪可按正负值映射配色。
## 返回结果
- 返回词云图 URL并附 `_meta.spec`

View File

@@ -0,0 +1,173 @@
#!/usr/bin/env node
const fs = require("fs");
// Chart type mapping, consistent with src/utils/callTool.ts
const CHART_TYPE_MAP = {
generate_area_chart: "area",
generate_bar_chart: "bar",
generate_boxplot_chart: "boxplot",
generate_column_chart: "column",
generate_district_map: "district-map",
generate_dual_axes_chart: "dual-axes",
generate_fishbone_diagram: "fishbone-diagram",
generate_flow_diagram: "flow-diagram",
generate_funnel_chart: "funnel",
generate_histogram_chart: "histogram",
generate_line_chart: "line",
generate_liquid_chart: "liquid",
generate_mind_map: "mind-map",
generate_network_graph: "network-graph",
generate_organization_chart: "organization-chart",
generate_path_map: "path-map",
generate_pie_chart: "pie",
generate_pin_map: "pin-map",
generate_radar_chart: "radar",
generate_sankey_chart: "sankey",
generate_scatter_chart: "scatter",
generate_treemap_chart: "treemap",
generate_venn_chart: "venn",
generate_violin_chart: "violin",
generate_word_cloud_chart: "word-cloud",
};
function getVisRequestServer() {
return (
process.env.VIS_REQUEST_SERVER ||
"https://antv-studio.alipay.com/api/gpt-vis"
);
}
function getServiceIdentifier() {
return process.env.SERVICE_ID;
}
async function httpPost(url, payload) {
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`HTTP ${response.status}: ${text}`);
}
return response.json();
}
async function generateChartUrl(chartType, options) {
const url = getVisRequestServer();
const payload = {
type: chartType,
source: "chart-visualization-creator",
...options,
};
const data = await httpPost(url, payload);
if (!data.success) {
throw new Error(data.errorMessage || "Unknown error");
}
return data.resultObj;
}
async function generateMap(tool, inputData) {
const url = getVisRequestServer();
const payload = {
serviceId: getServiceIdentifier(),
tool,
input: inputData,
source: "chart-visualization-creator",
};
const data = await httpPost(url, payload);
if (!data.success) {
throw new Error(data.errorMessage || "Unknown error");
}
return data.resultObj;
}
async function main() {
if (process.argv.length < 3) {
console.error("Usage: node generate.js <spec_json_or_file>");
process.exit(1);
}
const specArg = process.argv[2];
let spec;
try {
if (fs.existsSync(specArg)) {
const fileContent = fs.readFileSync(specArg, "utf-8");
spec = JSON.parse(fileContent);
} else {
spec = JSON.parse(specArg);
}
} catch (e) {
console.error(`Error parsing spec: ${e.message}`);
process.exit(1);
}
const specs = Array.isArray(spec) ? spec : [spec];
for (const item of specs) {
const tool = item.tool;
const args = item.args || {};
if (!tool) {
console.error(
`Error: 'tool' field missing in spec: ${JSON.stringify(item)}`,
);
continue;
}
const chartType = CHART_TYPE_MAP[tool];
if (!chartType) {
console.error(`Error: Unknown tool '${tool}'`);
continue;
}
const isMapChartTool = [
"generate_district_map",
"generate_path_map",
"generate_pin_map",
].includes(tool);
try {
if (isMapChartTool) {
const result = await generateMap(tool, args);
if (result && result.content) {
for (const contentItem of result.content) {
if (contentItem.type === "text") {
console.log(contentItem.text);
}
}
} else {
console.log(JSON.stringify(result));
}
} else {
const url = await generateChartUrl(chartType, args);
console.log(url);
}
} catch (e) {
console.error(`Error generating chart for ${tool}: ${e.message}`);
}
}
}
if (require.main === module) {
main().catch((err) => {
console.error(err.message);
process.exit(1);
});
}
// Export functions for testing
module.exports = { generateChartUrl, generateMap, httpPost, CHART_TYPE_MAP };

View File

@@ -0,0 +1,620 @@
---
name: consulting-analysis
description: Use this skill when the user requests to generate, create, or write professional research reports including but not limited to market analysis, consumer insights, brand analysis, financial analysis, industry research, competitive intelligence, investment due diligence, or any consulting-grade analytical report. This skill operates in two phases — (1) generating a structured analysis framework with chapter skeleton, data query requirements, and analysis logic, and (2) after data collection by other skills, producing the final consulting-grade report with structured narratives, embedded charts, and strategic insights.
---
# Professional Research Report Skill
## Overview
This skill produces professional, consulting-grade research reports in Markdown format, covering domains such as **market analysis, consumer insights, brand strategy, financial analysis, industry research, competitive intelligence, investment research, and macroeconomic analysis**. It operates across two distinct phases:
1. **Phase 1 — Analysis Framework Generation**: Given a research subject, produce a rigorous analysis framework including chapter skeleton, per-chapter data requirements, analysis logic, and visualization plan.
2. **Phase 2 — Report Generation**: After data has been collected by other skills, synthesize all inputs into a final polished report.
The output adheres to McKinsey/BCG consulting voice standards. The report language follows the `output_locale` setting (default: `zh_CN` for Chinese).
## Core Capabilities
- **Design analysis frameworks** from scratch given only a research subject and scope
- Transform raw data into structured, high-depth research reports
- Follow the **"Visual Anchor → Data Contrast → Integrated Analysis"** flow per sub-chapter
- Produce insights following the **"Data → User Psychology → Strategy Implication"** chain
- Embed pre-generated charts and construct comparison tables
- Generate inline citations formatted per **GB/T 7714-2015** standards
- Output reports in the language specified by `output_locale` with professional consulting tone
- Adapt analytical depth and structure to domain (marketing, finance, industry, etc.)
## When to Use This Skill
**Always load this skill when:**
- User asks for a market analysis, consumer insight report, financial analysis, industry research, or any consulting-grade analytical report
- User provides a research subject and needs a structured analysis framework before data collection
- User provides data summaries, analysis frameworks, or chart files to be synthesized into a report
- User needs a professional consulting-style research report
- The task involves transforming research findings into structured strategic narratives
---
# Phase 1: Analysis Framework Generation
## Purpose
Given a **research subject** (e.g., "Gen-Z Skincare Market Analysis", "NEV Industry Competitive Landscape", "Brand X Consumer Profiling"), produce a complete **analysis framework** that serves as the blueprint for downstream data collection and final report generation.
## Phase 1 Inputs
| Input | Description | Required |
|-------|-------------|----------|
| **Research Subject** | The topic or question to be analyzed | Yes |
| **Scope / Constraints** | Geographic scope, time range, industry segment, target audience, etc. | Optional |
| **Specific Angles** | Any particular angles or hypotheses the user wants explored | Optional |
| **Domain** | The analytical domain: market, finance, industry, brand, consumer, investment, etc. | Inferred |
## Phase 1 Workflow
### Step 1.1: Understand the Research Subject
- Parse the research subject to identify the **core entity** (market, brand, product, industry, consumer segment, financial instrument, etc.)
- Identify the **analytical domain** (marketing, finance, industry, competitive, consumer, investment, macro, etc.)
- Determine the **natural analytical dimensions** based on domain:
| Domain | Typical Dimensions |
|--------|--------------------|
| Market Analysis | Market size, growth trends, market segmentation, growth drivers, competitive landscape, consumer profiling |
| Brand Analysis | Brand positioning, market share, consumer perception, marketing strategy, competitor comparison |
| Consumer Insights | Demographic profiling, purchase behavior, decision journey, pain points, scenario analysis |
| Financial Analysis | Macro environment, industry trends, company fundamentals, financial metrics, valuation, risk assessment |
| Industry Research | Value chain analysis, market size, competitive landscape, policy environment, technology trends, entry barriers |
| Investment Due Diligence | Business model, financial health, management assessment, market opportunity, risk factors, exit pathways |
| Competitive Intelligence | Competitor identification, strategic comparison, SWOT analysis, differentiated positioning, market dynamics |
### Step 1.2: Select Analysis Frameworks & Models
Based on the identified domain and research subject, select **one or more** professional analysis frameworks to structure the reasoning in each chapter. The chosen frameworks guide the **Analysis Logic** in the chapter skeleton (Step 1.3).
#### Strategic & Environmental Analysis
| Framework | Description | Best For |
|-----------|-------------|----------|
| **SWOT Analysis** | Strengths, Weaknesses, Opportunities, Threats | Brand assessment, competitive positioning, strategic planning |
| **PEST / PESTEL Analysis** | Political, Economic, Social, Technological (+ Environmental, Legal) | Macro-environment scanning, market entry assessment, policy impact analysis |
| **Porter's Five Forces** | Supplier bargaining power, buyer bargaining power, threat of new entrants, threat of substitutes, industry rivalry | Industry competitive landscape, entry barrier assessment, profit margin analysis |
| **Porter's Diamond Model** | Factor conditions, demand conditions, related industries, firm strategy & structure | National/regional competitive advantage analysis |
| **VRIO Analysis** | Value, Rarity, Imitability, Organization | Core competency assessment, resource advantage analysis |
#### Market & Growth Analysis
| Framework | Description | Best For |
|-----------|-------------|----------|
| **STP Analysis** | Segmentation, Targeting, Positioning | Market segmentation, target market selection, brand positioning |
| **BCG Matrix (Growth-Share Matrix)** | Stars, Cash Cows, Question Marks, Dogs | Product portfolio management, resource allocation decisions |
| **Ansoff Matrix** | Market penetration, market development, product development, diversification | Growth strategy selection |
| **Product Life Cycle (PLC)** | Introduction, growth, maturity, decline | Product strategy formulation, market timing decisions |
| **TAM-SAM-SOM** | Total / Serviceable / Obtainable Market | Market sizing, opportunity quantification |
| **Technology Adoption Lifecycle** | Innovators → Early Adopters → Early Majority → Late Majority → Laggards | Emerging technology/category penetration analysis |
#### Consumer & Behavioral Analysis
| Framework | Description | Best For |
|-----------|-------------|----------|
| **Consumer Decision Journey** | Awareness → Consideration → Evaluation → Purchase → Loyalty | Consumer behavior path mapping, touchpoint optimization |
| **AARRR Funnel (Pirate Metrics)** | Acquisition, Activation, Retention, Revenue, Referral | User growth analysis, conversion rate optimization |
| **RFM Model** | Recency, Frequency, Monetary | Customer value segmentation, precision marketing |
| **Maslow's Hierarchy of Needs** | Physiological → Safety → Social → Esteem → Self-actualization | Consumer psychology analysis, product value proposition |
| **Jobs-to-be-Done (JTBD)** | The "job" a user needs to accomplish in a specific context | Demand insight, product innovation direction |
#### Financial & Valuation Analysis
| Framework | Description | Best For |
|-----------|-------------|----------|
| **DuPont Analysis** | ROE = Net Profit Margin × Asset Turnover × Equity Multiplier | Profitability decomposition, financial health diagnosis |
| **DCF (Discounted Cash Flow)** | Free cash flow discounting | Enterprise/project valuation |
| **Comparable Company Analysis** | PE, PB, PS, EV/EBITDA multiples comparison | Relative valuation, peer benchmarking |
| **EVA (Economic Value Added)** | After-tax operating profit - Cost of capital | Value creation capability assessment |
#### Competitive & Strategic Positioning
| Framework | Description | Best For |
|-----------|-------------|----------|
| **Benchmarking** | Key performance indicator item-by-item comparison | Competitor gap analysis, best practice identification |
| **Strategic Group Mapping** | Cluster competitors along two key dimensions | Competitive landscape visualization, white-space identification |
| **Value Chain Analysis** | Primary activities + support activities value decomposition | Cost advantage sources, differentiation opportunity identification |
| **Blue Ocean Strategy** | Value curve, four-action framework (Eliminate-Reduce-Raise-Create) | Differentiated innovation, new market space creation |
| **Perceptual Mapping** | Plot brand positions along two consumer-perceived dimensions | Brand positioning analysis, market gap discovery |
#### Industry & Supply Chain Analysis
| Framework | Description | Best For |
|-----------|-------------|----------|
| **Industry Value Chain** | Upstream → Midstream → Downstream decomposition | Industry structure understanding, profit distribution analysis |
| **Gartner Hype Cycle** | Technology Trigger → Peak of Inflated Expectations → Trough of Disillusionment → Slope of Enlightenment → Plateau of Productivity | Emerging technology maturity assessment |
| **GE-McKinsey Matrix** | Industry Attractiveness × Competitive Strength | Business portfolio prioritization, investment decisions |
#### Selection Principles
1. **Domain-First**: Based on the domain identified in Step 1.1, select **2-4** most relevant frameworks from the toolkit above
2. **Complementary**: Choose complementary rather than overlapping frameworks (e.g., macro-level with PESTEL + micro-level with Porter's Five Forces)
3. **Depth over Breadth**: Better to deeply apply 2 frameworks than superficially stack 6
4. **Data-Feasible**: Selected frameworks must be supportable by downstream data collection skills — if the data required by a framework cannot be reasonably obtained, downgrade or substitute
5. **Explicit Mapping**: In the chapter skeleton, explicitly annotate which framework each chapter uses and how it is applied
#### Framework Selection Output Format
```markdown
## Framework Selection
| Chapter | Selected Framework(s) | Application |
|---------|----------------------|-------------|
| Market Size & Growth Trends | TAM-SAM-SOM + Product Life Cycle | TAM-SAM-SOM to quantify market space, PLC to determine market stage |
| Competitive Landscape Assessment | Porter's Five Forces + Strategic Group Mapping | Five Forces to assess industry competition intensity, Group Mapping to visualize competitive positioning |
| Consumer Profiling | RFM + Consumer Decision Journey | RFM to segment customer value, Decision Journey to identify key conversion nodes |
| Brand Strategy Recommendations | SWOT + Blue Ocean Strategy | SWOT to summarize overall landscape, Blue Ocean to guide differentiation direction |
```
### Step 1.3: Design Chapter Skeleton
Produce a hierarchical chapter structure. Each chapter must include:
1. **Chapter Title** — Professional, concise, subject-based (follow titling constraints in Formatting section)
2. **Analysis Objective** — What this chapter aims to reveal
3. **Analysis Logic** — The reasoning chain or framework (must reference the frameworks selected in Step 1.2)
4. **Core Hypothesis** — Preliminary hypotheses to be validated or refuted by data
#### Chapter Skeleton Output Format
```markdown
## Analysis Framework
### Chapter 1: [Title]
- **Analysis Objective**: [This chapter aims to...]
- **Analysis Logic**: [Framework or reasoning chain used]
- **Core Hypothesis**: [Hypotheses to validate]
- **Data Requirements**: (see Step 1.4)
- **Visualization Plan**: (see Step 1.5)
### Chapter 2: [Title]
...
```
### Step 1.4: Define Data Query Requirements Per Chapter
For each chapter, specify **exactly what data needs to be collected**. This is the bridge to downstream data collection skills.
Each data requirement entry must include:
| Field | Description |
|-------|-------------|
| **Data Metric** | The specific metric or data point needed (e.g., "China skincare market size 2020-2025 (in billion CNY)") |
| **Data Type** | Quantitative, Qualitative, or Mixed |
| **Suggested Sources** | Suggested source categories: Industry reports, financial statements, government statistics, social media, e-commerce platforms, survey data, news |
| **Search Keywords** | Suggested search queries for data collection agents |
| **Priority** | P0 (Required) / P1 (Important) / P2 (Supplementary) |
| **Time Range** | The time period the data should cover |
#### Data Requirements Output Format (per chapter)
```markdown
#### Data Requirements
| # | Data Metric | Data Type | Suggested Sources | Search Keywords | Priority | Time Range |
|---|-------------|-----------|-------------------|-----------------|----------|------------|
| 1 | Market size (billion CNY) | Quantitative | Industry reports, government statistics | "China skincare market size 2024" | P0 | 2020-2025 |
| 2 | CAGR | Quantitative | Industry reports | "skincare CAGR growth rate" | P0 | 2020-2025 |
| 3 | Sub-category share | Quantitative | E-commerce platforms, industry reports | "skincare category share cream serum sunscreen" | P1 | Latest |
| 4 | Policy & regulatory updates | Qualitative | Government announcements, news | "cosmetics regulation 2024" | P2 | Past 1 year |
```
### Step 1.5: Define Visualization & Content Structure Per Chapter
For each chapter, specify the **planned visualization** and **content structure** for the final report:
| Field | Description |
|-------|-------------|
| **Visualization Type** | Chart type: Line chart, bar chart, pie chart, scatter plot, radar chart, heatmap, Sankey diagram, comparison table, etc. |
| **Visualization Title** | Descriptive title for the chart |
| **Visualization Data Mapping** | Which data indicators map to X/Y axes or segments |
| **Comparison Table Design** | Column headers and comparison dimensions for the data contrast table |
| **Argument Structure** | The planned "What → Why → So What" narrative outline |
#### Visualization Plan Output Format (per chapter)
```markdown
#### Visualization & Content Plan
**Chart 1**: [Type] — [Title]
- X-axis: [Dimension], Y-axis: [Metric]
- Data source: Corresponds to Data Requirement #1, #2
**Comparison Table**:
| Dimension | Item A | Item B | Item C |
|-----------|--------|--------|--------|
**Argument Structure**:
1. **Observation (What)**: [Surface phenomenon revealed by data]
2. **Attribution (Why)**: [Driving factors or underlying causes]
3. **Implication (So What)**: [Strategic implications or recommended actions]
```
### Step 1.6: Output Complete Analysis Framework
Assemble all outputs into a single, structured **Analysis Framework Document**:
```markdown
# [Research Subject] Analysis Framework
## Research Overview
- **Research Subject**: [...]
- **Scope**: [Geography, time range, industry segment]
- **Analysis Domain**: [Market / Finance / Industry / Brand / Consumer / ...]
- **Core Research Questions**: [1-3 key questions]
## Framework Selection
| Chapter | Selected Framework(s) | Application |
|---------|----------------------|-------------|
| ... | ... | ... |
## Chapter Skeleton
### 1. [Chapter Title]
- **Analysis Objective**: [...]
- **Analysis Logic**: [...]
- **Core Hypothesis**: [...]
#### Data Requirements
| # | Data Metric | Data Type | Suggested Sources | Search Keywords | Priority | Time Range |
|---|-------------|-----------|-------------------|-----------------|----------|------------|
| ... | ... | ... | ... | ... | ... | ... |
#### Visualization & Content Plan
[Chart plan + Comparison table design + Argument structure]
### 2. [Chapter Title]
...
### N. [Chapter Title]
...
## Data Collection Task List
[Consolidate all P0/P1 data requirements across chapters into a structured task list for downstream data collection skills to execute]
```
## Phase 1 Quality Checklist
- [ ] Analysis framework covers all natural dimensions for the identified domain
- [ ] 2-4 professional analysis frameworks are selected and explicitly mapped to chapters
- [ ] Selected frameworks are complementary (not overlapping) and data-feasible
- [ ] Each chapter has clear Analysis Objective, Analysis Logic (referencing chosen framework), and Core Hypothesis
- [ ] Data requirements are specific, measurable, and include search keywords
- [ ] Every chapter has at least one visualization plan
- [ ] Data priorities (P0/P1/P2) are assigned realistically
- [ ] The framework is actionable — a data collection agent can execute on the Search Keywords directly
- [ ] Data Collection Task List is comprehensive and deduplicated
---
# Phase 1→2 Handoff: Data Collection & Chart Generation
After the analysis framework is generated, it is handed off to **other data collection skills** (e.g., deep-research, data-analysis, web search agents) to:
1. Execute the **Search Keywords** from each chapter's data requirements
2. Collect quantitative data, qualitative insights, and source URLs
3. Generate charts based on the **Visualization & Content Plan**
4. Return a **Data Package** containing:
- **Data Summary**: Raw numbers, metrics, and qualitative findings per chapter
- **Chart Files**: Generated chart images with local file paths
- **External Search Findings**: Source URLs and summaries for citations
> **This skill does NOT perform data collection.** It only produces the framework (Phase 1) and the final report (Phase 2).
>
> **Chart Generation**: If a visualization/charting skill is available (e.g., data-analysis, image-generation), chart generation can be deferred to the beginning of Phase 2 — see Step 2.3.
---
# Phase 2: Report Generation
## Purpose
Receive the completed **Analysis Framework** and **Data Package** from upstream, and synthesize them into a final consulting-grade report.
## Phase 2 Inputs
| Input | Description | Required |
|-------|-------------|----------|
| **Analysis Framework** | The framework document produced in Phase 1 | Yes |
| **Data Summary** | Collected data organized per chapter from the data collection phase | Yes |
| **Chart Files** | Local file paths for generated chart images. If not provided, will be generated in Step 2.3 using available visualization skills | Optional |
| **External Search Findings** | URLs and summaries for inline citations | Optional |
## Phase 2 Workflow
### Step 2.1: Receive and Validate Inputs
Verify that all required inputs are present:
1. **Analysis Framework** — Confirm it contains chapter skeleton, data requirements, and visualization plans
2. **Data Summary** — Confirm it contains data organized per chapter, cross-reference against P0 requirements
3. **Chart Files** — Confirm file paths are valid local paths
If any P0 data is missing, note it in the report and flag for the user.
### Step 2.2: Map Report Structure
Map the final report structure from the Analysis Framework:
1. **Abstract** — Executive summary with key takeaways
2. **Introduction** — Background, objectives, methodology
3. **Main Body Chapters (2...N)** — Mapped from the Framework's chapter skeleton
4. **Conclusion** — Pure, objective synthesis
5. **References** — GB/T 7714-2015 formatted references
### Step 2.3: Generate Chapter Charts (Pre-Report Visualization)
Before writing the report, generate all planned charts from the Analysis Framework's **Visualization & Content Plan**. This step ensures every sub-chapter has its "Visual Anchor" ready before narrative writing begins.
#### When to Execute This Step
- **Chart Files already provided**: Skip this step — proceed directly to Step 2.4.
- **Chart Files NOT provided but a visualization skill is available**: Execute this step to generate all charts first.
- **No Chart Files and no visualization skill available**: Skip this step — use comparison tables as the primary visual anchor in Step 2.4, and note the absence of charts.
#### Chart Generation Workflow
1. **Extract Chart Tasks**: Parse all `Visualization & Content Plan` entries from the Analysis Framework to build a chart generation task list:
| # | Chapter | Chart Type | Chart Title | Data Mapping | Data Source |
|---|---------|------------|-------------|--------------|-------------|
| 1 | 2.1 | Line chart | Market Size Trend 2020-2025 | X: Year, Y: Market Size (billion CNY) | Data Requirement #1, #2 |
| 2 | 3.1 | Pie chart | Consumer Age Distribution | Segments: Age groups, Values: Share % | Data Requirement #5 |
| ... | ... | ... | ... | ... | ... |
2. **Prepare Chart Data**: For each chart task, extract the corresponding data points from the **Data Summary** and structure them into the format required by the visualization skill (e.g., CSV, JSON, or tabular format).
3. **Delegate to Visualization Skill**: Invoke the available visualization/charting skill (e.g., `data-analysis`) for each chart task with:
- Chart type and title
- Structured data
- Axis labels and formatting preferences
- Output file path convention: `charts/chapter_{N}_{chart_index}.png`
4. **Collect Chart File Paths**: Record all generated chart file paths for embedding in Step 2.4:
```markdown
## Generated Charts
| # | Chapter | Chart Title | File Path |
|---|---------|-------------|-----------|
| 1 | 2.1 | Market Size Trend 2020-2025 | charts/chapter_2_1.png |
| 2 | 3.1 | Consumer Age Distribution | charts/chapter_3_1.png |
```
5. **Validate**: Confirm all P0-priority charts have been generated. If any chart generation fails, note it and fall back to comparison tables for that sub-chapter.
> **Principle**: Complete ALL chart generation before starting report writing. This ensures a consistent visual narrative and avoids interleaving generation with writing.
### Step 2.4: Write the Report
For each sub-chapter, follow the **"Visual Anchor → Data Contrast → Integrated Analysis"** flow:
1. **Visual Evidence Block**: Embed charts using `![Image Description](Actual_File_Path)` — use the file paths collected in Step 2.3
2. **Data Contrast Table**: Create a Markdown comparison table for key metrics
3. **Integrated Narrative Analysis**: Write analytical text following "What → Why → So What"
Each sub-chapter must end with a robust analytical paragraph (min. 200 words) that:
- Synthesizes conflicting or reinforcing data points
- Reveals the underlying user tension or opportunity
- Optionally ends with a punchy "One-Liner Truth" in a blockquote (`>`)
### Step 2.5: Final Structure Self-Check
Before outputting, confirm the report contains **all sections in order**:
```
Abstract → 1. Introduction → 2...N. Body Chapters → N+1. Conclusion → N+2. References
```
Additionally verify:
- All charts generated in Step 2.3 are embedded in the correct sub-chapters
- Chart file paths in `![](path)` references are valid
- Sub-chapters without charts have comparison tables as visual anchors
The report **MUST NOT** stop after the Conclusion — it **MUST** include References as the final section.
## Formatting & Tone Standards
### Consulting Voice
- **Tone**: McKinsey/BCG — Authoritative, Objective, Professional
- **Language**: All headings and content in the language specified by `output_locale`
- **Number Formatting**: Use English commas for thousands separators (`1,000` not `1000`)
- **Data emphasis**: **Bold** important viewpoints and key numbers
### Titling Constraints
- **Numbering**: Use standard numbering (`1.`, `1.1`) directly followed by the title
- **Forbidden Prefixes**: Do NOT use "Chapter", "Part", "Section" as prefixes
- **Allowed Tone Words**: Analysis, Profiling, Overview, Insights, Assessment
- **Forbidden Words**: "Decoding", "DNA", "Secrets", "Mindscape", "Solar System", "Unlocking"
### Sub-Chapter Conclusions
- **Requirement**: End each sub-chapter with a robust analytical paragraph (min. 200 words).
- **Narrative Flow**: This paragraph must look like a natural continuation of the text. It must synthesize the section's findings into a strategic judgment.
- **Content Logic**:
1. Synthesize the conflicting or reinforcing data points above.
2. Reveal the *underlying* user tension or opportunity.
3. Key Insight: **Optional**: Only if you have a concise, punchy "One-Liner Truth", place it at the very end using a **Blockquote** (`>`) to anchor the section.
### Insight Depth (The "So What" Chain)
Every insight must connect **Data → User Psychology → Strategy Implication**:
```
❌ Bad: "Females are 60%. Strategy: Target females."
✅ Good: "Females constitute 60% with a high TGI of 180. **This suggests**
the purchase decision is driven by aesthetic and social validation
rather than pure utility. **Consequently**, media spend should pivot
towards visual-heavy platforms (e.g., RED/Instagram) to maximize CTR,
treating male audiences only as a secondary gift-giving segment."
```
### References
- **Inline**: Use markdown links for sources (e.g. `[Source Title](URL)`) when using External Search Findings
- **References section**: Formatted strictly per **GB/T 7714-2015**
### Markdown Rules
- **Immediate Start**: Begin directly with `# Report Title` — no introductory text
- **No Separators**: Do NOT use horizontal rules (`---`)
## Report Structure Template
```markdown
# [Report Title]
## Abstract
[Executive summary with key takeaways]
## 1. Introduction
[Background, objectives, methodology]
## 2. [Body Chapter Title]
### 2.1 [Sub-chapter Title]
![Chart Description](chart_file_path)
| Metric | Brand A | Brand B |
|--------|---------|--------|
| ... | ... | ... |
[Integrated narrative analysis: What → Why → So What, min. 200 words]
> [Optional: One-liner strategic truth]
### 2.2 [Sub-chapter Title]
...
## N+1. Conclusion
[Pure objective synthesis, NO bullet points, neutral tone]
[Para 1: The fundamental nature of the group/market]
[Para 2: Core tension or behavior pattern]
[Final: One or two sentences stating the objective truth]
## N+2. References
[1] Author. Title[EB/OL]. URL, Date.
[2] ...
```
## Complete Example
### Phase 1 Example: Framework Generation
User provides: Research subject "Gen-Z Skincare Market Analysis"
**Phase 1 output (Analysis Framework):**
```markdown
# Gen-Z Skincare Market Analysis Framework
## Research Overview
- **Research Subject**: Gen-Z Skincare Market Deep Analysis
- **Scope**: China market, 2020-2025, consumers aged 18-27
- **Analysis Domain**: Market Analysis + Consumer Insights
- **Core Research Questions**:
1. What is the size and growth momentum of the Gen-Z skincare market?
2. What is unique about Gen-Z consumer skincare behavior patterns?
3. How can brands effectively reach and convert Gen-Z consumers?
## Chapter Skeleton
### 1. Market Size & Growth Trends
- **Analysis Objective**: Quantify Gen-Z skincare market size and identify growth drivers
- **Analysis Logic**: Total market → Segmentation → Growth rate → Driver decomposition
- **Core Hypothesis**: Gen-Z is becoming the core engine of skincare consumption growth
#### Data Requirements
| # | Data Metric | Data Type | Suggested Sources | Search Keywords | Priority | Time Range |
|---|-------------|-----------|-------------------|-----------------|----------|------------|
| 1 | China skincare market total size | Quantitative | Industry reports | "China skincare market size 2024 2025" | P0 | 2020-2025 |
| 2 | Gen-Z skincare spending share | Quantitative | Industry reports, e-commerce platforms | "Gen-Z skincare spending share youth" | P0 | Latest |
#### Visualization & Content Plan
**Chart 1**: Line chart — China Skincare Market Size Trend 2020-2025
**Argument Structure**:
1. What: Quantified status of market size and Gen-Z share
2. Why: Consumption upgrade, ingredient-conscious consumers, social media driven
3. So What: Brands should prioritize building youth-oriented product lines
### 2. Consumer Profiling & Behavioral Insights
...
## Data Collection Task List
[Consolidated P0/P1 tasks]
```
### Phase 2 Example: Report Generation
After data collection, user provides: Analysis Framework + Data Summary with brand metrics + chart file paths.
**Phase 2 output (Final Report) follows this flow:**
1. Start with `# Gen-Z Skincare Market Deep Analysis Report`
2. Abstract — 3-5 key takeaways in executive summary form
3. 1. Introduction — Market context, research scope, data sources
4. 2. Market Size & Growth Trend Analysis — Embed trend charts, comparison tables, strategic narrative
5. 3. Consumer Profiling & Behavioral Insights — Demographics, purchase drivers, "So What" analysis
6. 4. Brand Competitive Landscape Assessment — Brand positioning, share analysis, competitive dynamics
7. 5. Marketing Strategy & Channel Insights — Channel effectiveness, content strategy implications
8. 6. Conclusion — Objective synthesis in flowing prose (no bullets)
9. 7. References — GB/T 7714-2015 formatted list
---
## Quality Checklists
### Phase 1 Quality Checklist (Analysis Framework)
- [ ] Framework covers all natural analytical dimensions for the identified domain
- [ ] Each chapter has clear Analysis Objective, Analysis Logic, and Core Hypothesis
- [ ] Data requirements are specific, measurable, and include actionable Search Keywords
- [ ] Every chapter has at least one visualization plan with chart type and data mapping
- [ ] Data priorities (P0/P1/P2) are assigned — P0 items are essential for core arguments
- [ ] Data Collection Task List is comprehensive, deduplicated, and ready for downstream execution
- [ ] Framework adapts to the correct domain (market/finance/industry/consumer/etc.)
### Phase 2 Quality Checklist (Final Report)
- [ ] All planned charts generated before report writing (Step 2.3 completed first)
- [ ] All sections present in correct order (Abstract → Introduction → Body → Conclusion → References)
- [ ] Every sub-chapter follows "Visual Anchor → Data Contrast → Integrated Analysis"
- [ ] Every sub-chapter ends with a min. 200-word analytical paragraph
- [ ] All insights follow the "Data → User Psychology → Strategy Implication" chain
- [ ] All headings use proper numbering (no "Chapter/Part/Section" prefixes)
- [ ] Charts are embedded with `![Description](path)` syntax
- [ ] Numbers use English commas for thousands separators
- [ ] Inline references use markdown links where applicable
- [ ] References section follows GB/T 7714-2015
- [ ] No horizontal rules (`---`) in the document
- [ ] Conclusion uses flowing prose — no bullet points
- [ ] Report starts directly with `#` title — no preamble
- [ ] Missing P0 data is explicitly flagged in the report
## Output Format
- **Phase 1**: Output the complete Analysis Framework in **Markdown** format
- **Phase 2**: Output the complete Report in **Markdown** format
## Settings
```
output_locale = zh_CN # configurable per user request
reasoning_locale = en
```
## Notes
- This skill operates in **two phases** of a multi-step agentic workflow:
- **Phase 1** produces the analysis framework and data collection requirements
- **Data collection** is performed by other skills (deep-research, data-analysis, etc.)
- **Phase 2** receives the collected data and produces the final report
- Dynamic titling: **Rewrite** topics from the Framework into professional, concise subject-based headers
- The Conclusion section must contain **NO** detailed recommendations — those belong in the preceding body chapters
- Each statement in the report must be supported by data points from the input Data Summary
- The framework should adapt its analytical dimensions and depth to the specific domain (financial analysis uses different frameworks than consumer insights)
- When the research subject is ambiguous, default to the broadest reasonable scope and note assumptions

View File

@@ -0,0 +1,248 @@
---
name: data-analysis
description: Use this skill when the user uploads Excel (.xlsx/.xls) or CSV files and wants to perform data analysis, generate statistics, create summaries, pivot tables, SQL queries, or any form of structured data exploration. Supports multi-sheet Excel workbooks, aggregation, filtering, joins, and exporting results to CSV/JSON/Markdown.
---
# Data Analysis Skill
## Overview
This skill analyzes user-uploaded Excel/CSV files using DuckDB — an in-process analytical SQL engine. It supports schema inspection, SQL-based querying, statistical summaries, and result export, all through a single Python script.
## Core Capabilities
- Inspect Excel/CSV file structure (sheets, columns, types, row counts)
- Execute arbitrary SQL queries against uploaded data
- Generate statistical summaries (mean, median, stddev, percentiles, nulls)
- Support multi-sheet Excel workbooks (each sheet becomes a table)
- Export query results to CSV, JSON, or Markdown
- Handle large files efficiently with DuckDB's columnar engine
## Workflow
### Step 1: Understand Requirements
When a user uploads data files and requests analysis, identify:
- **File location**: Path(s) to uploaded Excel/CSV files under `/mnt/user-data/uploads/`
- **Analysis goal**: What insights the user wants (summary, filtering, aggregation, comparison, etc.)
- **Output format**: How results should be presented (table, CSV export, JSON, etc.)
- You don't need to check the folder under `/mnt/user-data`
### Step 2: Inspect File Structure
First, inspect the uploaded file to understand its schema:
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/data.xlsx \
--action inspect
```
This returns:
- Sheet names (for Excel) or filename (for CSV)
- Column names, data types, and non-null counts
- Row count per sheet/file
- Sample data (first 5 rows)
### Step 3: Perform Analysis
Based on the schema, construct SQL queries to answer the user's questions.
#### Run SQL Query
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/data.xlsx \
--action query \
--sql "SELECT category, COUNT(*) as count, AVG(amount) as avg_amount FROM Sheet1 GROUP BY category ORDER BY count DESC"
```
#### Generate Statistical Summary
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/data.xlsx \
--action summary \
--table Sheet1
```
This returns for each numeric column: count, mean, std, min, 25%, 50%, 75%, max, null_count.
For string columns: count, unique, top value, frequency, null_count.
#### Export Results
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/data.xlsx \
--action query \
--sql "SELECT * FROM Sheet1 WHERE amount > 1000" \
--output-file /mnt/user-data/outputs/filtered-results.csv
```
Supported output formats (auto-detected from extension):
- `.csv` — Comma-separated values
- `.json` — JSON array of records
- `.md` — Markdown table
### Parameters
| Parameter | Required | Description |
|-----------|----------|-------------|
| `--files` | Yes | Space-separated paths to Excel/CSV files |
| `--action` | Yes | One of: `inspect`, `query`, `summary` |
| `--sql` | For `query` | SQL query to execute |
| `--table` | For `summary` | Table/sheet name to summarize |
| `--output-file` | No | Path to export results (CSV/JSON/MD) |
> [!NOTE]
> Do NOT read the Python file, just call it with the parameters.
## Table Naming Rules
- **Excel files**: Each sheet becomes a table named after the sheet (e.g., `Sheet1`, `Sales`, `Revenue`)
- **CSV files**: Table name is the filename without extension (e.g., `data.csv``data`)
- **Multiple files**: All tables from all files are available in the same query context, enabling cross-file joins
- **Special characters**: Sheet/file names with spaces or special characters are auto-sanitized (spaces → underscores). Use double quotes for names that start with numbers or contain special characters, e.g., `"2024_Sales"`
## Analysis Patterns
### Basic Exploration
```sql
-- Row count
SELECT COUNT(*) FROM Sheet1
-- Distinct values in a column
SELECT DISTINCT category FROM Sheet1
-- Value distribution
SELECT category, COUNT(*) as cnt FROM Sheet1 GROUP BY category ORDER BY cnt DESC
-- Date range
SELECT MIN(date_col), MAX(date_col) FROM Sheet1
```
### Aggregation & Grouping
```sql
-- Revenue by category and month
SELECT category, DATE_TRUNC('month', order_date) as month,
SUM(revenue) as total_revenue
FROM Sales
GROUP BY category, month
ORDER BY month, total_revenue DESC
-- Top 10 customers by spend
SELECT customer_name, SUM(amount) as total_spend
FROM Orders GROUP BY customer_name
ORDER BY total_spend DESC LIMIT 10
```
### Cross-file Joins
```sql
-- Join sales with customer info from different files
SELECT s.order_id, s.amount, c.customer_name, c.region
FROM sales s
JOIN customers c ON s.customer_id = c.id
WHERE s.amount > 500
```
### Window Functions
```sql
-- Running total and rank
SELECT order_date, amount,
SUM(amount) OVER (ORDER BY order_date) as running_total,
RANK() OVER (ORDER BY amount DESC) as amount_rank
FROM Sales
```
### Pivot-style Analysis
```sql
-- Pivot: monthly revenue by category
SELECT category,
SUM(CASE WHEN MONTH(date) = 1 THEN revenue END) as Jan,
SUM(CASE WHEN MONTH(date) = 2 THEN revenue END) as Feb,
SUM(CASE WHEN MONTH(date) = 3 THEN revenue END) as Mar
FROM Sales
GROUP BY category
```
## Complete Example
User uploads `sales_2024.xlsx` (with sheets: `Orders`, `Products`, `Customers`) and asks: "Analyze my sales data — show top products by revenue and monthly trends."
### Step 1: Inspect the file
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/sales_2024.xlsx \
--action inspect
```
### Step 2: Top products by revenue
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/sales_2024.xlsx \
--action query \
--sql "SELECT p.product_name, SUM(o.quantity * o.unit_price) as total_revenue, SUM(o.quantity) as total_units FROM Orders o JOIN Products p ON o.product_id = p.id GROUP BY p.product_name ORDER BY total_revenue DESC LIMIT 10"
```
### Step 3: Monthly revenue trends
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/sales_2024.xlsx \
--action query \
--sql "SELECT DATE_TRUNC('month', order_date) as month, SUM(quantity * unit_price) as revenue FROM Orders GROUP BY month ORDER BY month" \
--output-file /mnt/user-data/outputs/monthly-trends.csv
```
### Step 4: Statistical summary
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/sales_2024.xlsx \
--action summary \
--table Orders
```
Present results to the user with clear explanations of findings, trends, and actionable insights.
## Multi-file Example
User uploads `orders.csv` and `customers.xlsx` and asks: "Which region has the highest average order value?"
```bash
python /mnt/skills/public/data-analysis/scripts/analyze.py \
--files /mnt/user-data/uploads/orders.csv /mnt/user-data/uploads/customers.xlsx \
--action query \
--sql "SELECT c.region, AVG(o.amount) as avg_order_value, COUNT(*) as order_count FROM orders o JOIN Customers c ON o.customer_id = c.id GROUP BY c.region ORDER BY avg_order_value DESC"
```
## Output Handling
After analysis:
- Present query results directly in conversation as formatted tables
- For large results, export to file and share via `present_files` tool
- Always explain findings in plain language with key takeaways
- Suggest follow-up analyses when patterns are interesting
- Offer to export results if the user wants to keep them
## Caching
The script automatically caches loaded data to avoid re-parsing files on every call:
- On first load, files are parsed and stored in a persistent DuckDB database under `/mnt/user-data/workspace/.data-analysis-cache/`
- The cache key is a SHA256 hash of all input file contents — if files change, a new cache is created
- Subsequent calls with the same files will use the cached database directly (near-instant startup)
- Cache is transparent — no extra parameters needed
This is especially useful when running multiple queries against the same data files (inspect → query → summary).
## Notes
- DuckDB supports full SQL including window functions, CTEs, subqueries, and advanced aggregations
- Excel date columns are automatically parsed; use DuckDB date functions (`DATE_TRUNC`, `EXTRACT`, etc.)
- For very large files (100MB+), DuckDB handles them efficiently without loading everything into memory
- Column names with spaces are accessible using double quotes: `"Column Name"`

View File

@@ -0,0 +1,565 @@
"""
Data Analysis Script using DuckDB.
Analyzes Excel (.xlsx/.xls) and CSV files using DuckDB's in-process SQL engine.
Supports schema inspection, SQL queries, statistical summaries, and result export.
"""
import argparse
import hashlib
import json
import logging
import os
import re
import sys
import tempfile
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
try:
import duckdb
except ImportError:
logger.error("duckdb is not installed. Installing...")
os.system(f"{sys.executable} -m pip install duckdb openpyxl -q")
import duckdb
try:
import openpyxl # noqa: F401
except ImportError:
os.system(f"{sys.executable} -m pip install openpyxl -q")
# Cache directory for persistent DuckDB databases
CACHE_DIR = os.path.join(tempfile.gettempdir(), ".data-analysis-cache")
TABLE_MAP_SUFFIX = ".table_map.json"
def compute_files_hash(files: list[str]) -> str:
"""Compute a combined SHA256 hash of all input files for cache key."""
hasher = hashlib.sha256()
for file_path in sorted(files):
try:
with open(file_path, "rb") as f:
while chunk := f.read(8192):
hasher.update(chunk)
except OSError:
# Include path as fallback if file can't be read
hasher.update(file_path.encode())
return hasher.hexdigest()
def get_cache_db_path(files_hash: str) -> str:
"""Get the path to the cached DuckDB database file."""
os.makedirs(CACHE_DIR, exist_ok=True)
return os.path.join(CACHE_DIR, f"{files_hash}.duckdb")
def get_table_map_path(files_hash: str) -> str:
"""Get the path to the cached table map JSON file."""
return os.path.join(CACHE_DIR, f"{files_hash}{TABLE_MAP_SUFFIX}")
def save_table_map(files_hash: str, table_map: dict[str, str]) -> None:
"""Save table map to a JSON file alongside the cached DB."""
path = get_table_map_path(files_hash)
with open(path, "w", encoding="utf-8") as f:
json.dump(table_map, f, ensure_ascii=False)
def load_table_map(files_hash: str) -> dict[str, str] | None:
"""Load table map from cache. Returns None if not found."""
path = get_table_map_path(files_hash)
if not os.path.exists(path):
return None
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
return None
def sanitize_table_name(name: str) -> str:
"""Sanitize a sheet/file name into a valid SQL table name."""
sanitized = re.sub(r"[^\w]", "_", name)
if sanitized and sanitized[0].isdigit():
sanitized = f"t_{sanitized}"
return sanitized
def load_files(con: duckdb.DuckDBPyConnection, files: list[str]) -> dict[str, str]:
"""
Load Excel/CSV files into DuckDB tables.
Returns a mapping of original_name -> sanitized_table_name.
"""
con.execute("INSTALL spatial; LOAD spatial;")
table_map: dict[str, str] = {}
for file_path in files:
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
continue
ext = os.path.splitext(file_path)[1].lower()
if ext in (".xlsx", ".xls"):
_load_excel(con, file_path, table_map)
elif ext == ".csv":
_load_csv(con, file_path, table_map)
else:
logger.warning(f"Unsupported file format: {ext} ({file_path})")
return table_map
def _load_excel(
con: duckdb.DuckDBPyConnection, file_path: str, table_map: dict[str, str]
) -> None:
"""Load all sheets from an Excel file into DuckDB tables."""
import openpyxl
wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
sheet_names = wb.sheetnames
wb.close()
for sheet_name in sheet_names:
table_name = sanitize_table_name(sheet_name)
# Handle duplicate table names
original_table_name = table_name
counter = 1
while table_name in table_map.values():
table_name = f"{original_table_name}_{counter}"
counter += 1
try:
con.execute(
f"""
CREATE TABLE "{table_name}" AS
SELECT * FROM st_read(
'{file_path}',
layer = '{sheet_name}',
open_options = ['HEADERS=FORCE', 'FIELD_TYPES=AUTO']
)
"""
)
table_map[sheet_name] = table_name
row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[
0
]
logger.info(
f" Loaded sheet '{sheet_name}' -> table '{table_name}' ({row_count} rows)"
)
except Exception as e:
logger.warning(f" Failed to load sheet '{sheet_name}': {e}")
def _load_csv(
con: duckdb.DuckDBPyConnection, file_path: str, table_map: dict[str, str]
) -> None:
"""Load a CSV file into a DuckDB table."""
base_name = os.path.splitext(os.path.basename(file_path))[0]
table_name = sanitize_table_name(base_name)
# Handle duplicate table names
original_table_name = table_name
counter = 1
while table_name in table_map.values():
table_name = f"{original_table_name}_{counter}"
counter += 1
try:
con.execute(
f"""
CREATE TABLE "{table_name}" AS
SELECT * FROM read_csv_auto('{file_path}')
"""
)
table_map[base_name] = table_name
row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[0]
logger.info(
f" Loaded CSV '{base_name}' -> table '{table_name}' ({row_count} rows)"
)
except Exception as e:
logger.warning(f" Failed to load CSV '{base_name}': {e}")
def action_inspect(con: duckdb.DuckDBPyConnection, table_map: dict[str, str]) -> str:
"""Inspect the schema of all loaded tables."""
output_parts = []
for original_name, table_name in table_map.items():
output_parts.append(f"\n{'=' * 60}")
output_parts.append(f'Table: {original_name} (SQL name: "{table_name}")')
output_parts.append(f"{'=' * 60}")
# Get row count
row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[0]
output_parts.append(f"Rows: {row_count}")
# Get column info
columns = con.execute(f'DESCRIBE "{table_name}"').fetchall()
output_parts.append(f"\nColumns ({len(columns)}):")
output_parts.append(f"{'Name':<30} {'Type':<15} {'Nullable'}")
output_parts.append(f"{'-' * 30} {'-' * 15} {'-' * 8}")
for col in columns:
col_name, col_type, nullable = col[0], col[1], col[2]
output_parts.append(f"{col_name:<30} {col_type:<15} {nullable}")
# Get non-null counts per column
col_names = [col[0] for col in columns]
non_null_parts = []
for c in col_names:
non_null_parts.append(f'COUNT("{c}") as "{c}"')
non_null_sql = f'SELECT {", ".join(non_null_parts)} FROM "{table_name}"'
try:
non_null_counts = con.execute(non_null_sql).fetchone()
output_parts.append(f"\nNon-null counts:")
for i, c in enumerate(col_names):
output_parts.append(f" {c}: {non_null_counts[i]} / {row_count}")
except Exception:
pass
# Sample data (first 5 rows)
output_parts.append(f"\nSample data (first 5 rows):")
try:
sample = con.execute(f'SELECT * FROM "{table_name}" LIMIT 5').fetchdf()
output_parts.append(sample.to_string(index=False))
except Exception:
sample = con.execute(f'SELECT * FROM "{table_name}" LIMIT 5').fetchall()
header = [col[0] for col in columns]
output_parts.append(" " + " | ".join(header))
for row in sample:
output_parts.append(" " + " | ".join(str(v) for v in row))
result = "\n".join(output_parts)
print(result)
return result
def action_query(
con: duckdb.DuckDBPyConnection,
sql: str,
table_map: dict[str, str],
output_file: str | None = None,
) -> str:
"""Execute a SQL query and return/export results."""
# Replace original sheet/file names with sanitized table names in SQL
modified_sql = sql
for original_name, table_name in sorted(
table_map.items(), key=lambda x: len(x[0]), reverse=True
):
if original_name != table_name:
# Replace occurrences not already quoted
modified_sql = re.sub(
rf"\b{re.escape(original_name)}\b",
f'"{table_name}"',
modified_sql,
)
try:
result = con.execute(modified_sql)
columns = [desc[0] for desc in result.description]
rows = result.fetchall()
except Exception as e:
error_msg = f"SQL Error: {e}\n\nAvailable tables:\n"
for orig, tbl in table_map.items():
cols = con.execute(f'DESCRIBE "{tbl}"').fetchall()
col_names = [c[0] for c in cols]
error_msg += f' "{tbl}" ({orig}): {", ".join(col_names)}\n'
print(error_msg)
return error_msg
# Format output
if output_file:
return _export_results(columns, rows, output_file)
# Print as table
return _format_table(columns, rows)
def _format_table(columns: list[str], rows: list[tuple]) -> str:
"""Format query results as a readable table."""
if not rows:
msg = "Query returned 0 rows."
print(msg)
return msg
# Calculate column widths
col_widths = [len(str(c)) for c in columns]
for row in rows:
for i, val in enumerate(row):
col_widths[i] = max(col_widths[i], len(str(val)))
# Cap column width
max_width = 40
col_widths = [min(w, max_width) for w in col_widths]
# Build table
parts = []
header = " | ".join(str(c).ljust(col_widths[i]) for i, c in enumerate(columns))
separator = "-+-".join("-" * col_widths[i] for i in range(len(columns)))
parts.append(header)
parts.append(separator)
for row in rows:
row_str = " | ".join(
str(v)[:max_width].ljust(col_widths[i]) for i, v in enumerate(row)
)
parts.append(row_str)
parts.append(f"\n({len(rows)} rows)")
result = "\n".join(parts)
print(result)
return result
def _export_results(columns: list[str], rows: list[tuple], output_file: str) -> str:
"""Export query results to a file (CSV, JSON, or Markdown)."""
os.makedirs(os.path.dirname(output_file), exist_ok=True)
ext = os.path.splitext(output_file)[1].lower()
if ext == ".csv":
import csv
with open(output_file, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(columns)
writer.writerows(rows)
elif ext == ".json":
records = []
for row in rows:
record = {}
for i, col in enumerate(columns):
val = row[i]
# Handle non-JSON-serializable types
if hasattr(val, "isoformat"):
val = val.isoformat()
elif isinstance(val, (bytes, bytearray)):
val = val.hex()
record[col] = val
records.append(record)
with open(output_file, "w", encoding="utf-8") as f:
json.dump(records, f, indent=2, ensure_ascii=False, default=str)
elif ext == ".md":
with open(output_file, "w", encoding="utf-8") as f:
# Header
f.write("| " + " | ".join(columns) + " |\n")
f.write("| " + " | ".join("---" for _ in columns) + " |\n")
# Rows
for row in rows:
f.write(
"| " + " | ".join(str(v).replace("|", "\\|") for v in row) + " |\n"
)
else:
msg = f"Unsupported output format: {ext}. Use .csv, .json, or .md"
print(msg)
return msg
msg = f"Results exported to {output_file} ({len(rows)} rows)"
print(msg)
return msg
def action_summary(
con: duckdb.DuckDBPyConnection,
table_name: str,
table_map: dict[str, str],
) -> str:
"""Generate statistical summary for a table."""
# Resolve table name
resolved = table_map.get(table_name, table_name)
try:
columns = con.execute(f'DESCRIBE "{resolved}"').fetchall()
except Exception:
available = ", ".join(f'"{t}" ({o})' for o, t in table_map.items())
msg = f"Table '{table_name}' not found. Available tables: {available}"
print(msg)
return msg
row_count = con.execute(f'SELECT COUNT(*) FROM "{resolved}"').fetchone()[0]
output_parts = []
output_parts.append(f"\nStatistical Summary: {table_name}")
output_parts.append(f"Total rows: {row_count}")
output_parts.append(f"{'=' * 70}")
numeric_types = {
"BIGINT",
"INTEGER",
"SMALLINT",
"TINYINT",
"DOUBLE",
"FLOAT",
"DECIMAL",
"HUGEINT",
"REAL",
"NUMERIC",
}
for col in columns:
col_name, col_type = col[0], col[1].upper()
output_parts.append(f"\n--- {col_name} ({col[1]}) ---")
# Check base type (strip parameterized parts)
base_type = re.sub(r"\(.*\)", "", col_type).strip()
if base_type in numeric_types:
try:
stats = con.execute(f"""
SELECT
COUNT("{col_name}") as count,
AVG("{col_name}")::DOUBLE as mean,
STDDEV("{col_name}")::DOUBLE as std,
MIN("{col_name}") as min,
QUANTILE_CONT("{col_name}", 0.25) as q25,
MEDIAN("{col_name}") as median,
QUANTILE_CONT("{col_name}", 0.75) as q75,
MAX("{col_name}") as max,
COUNT(*) - COUNT("{col_name}") as null_count
FROM "{resolved}"
""").fetchone()
labels = [
"count",
"mean",
"std",
"min",
"25%",
"50%",
"75%",
"max",
"nulls",
]
for label, val in zip(labels, stats):
if isinstance(val, float):
output_parts.append(f" {label:<8}: {val:,.4f}")
else:
output_parts.append(f" {label:<8}: {val}")
except Exception as e:
output_parts.append(f" Error computing stats: {e}")
else:
try:
stats = con.execute(f"""
SELECT
COUNT("{col_name}") as count,
COUNT(DISTINCT "{col_name}") as unique_count,
MODE("{col_name}") as mode_val,
COUNT(*) - COUNT("{col_name}") as null_count
FROM "{resolved}"
""").fetchone()
output_parts.append(f" count : {stats[0]}")
output_parts.append(f" unique : {stats[1]}")
output_parts.append(f" top : {stats[2]}")
output_parts.append(f" nulls : {stats[3]}")
# Show top 5 values
top_vals = con.execute(f"""
SELECT "{col_name}", COUNT(*) as freq
FROM "{resolved}"
WHERE "{col_name}" IS NOT NULL
GROUP BY "{col_name}"
ORDER BY freq DESC
LIMIT 5
""").fetchall()
if top_vals:
output_parts.append(f" top values:")
for val, freq in top_vals:
pct = (freq / row_count * 100) if row_count > 0 else 0
output_parts.append(f" {val}: {freq} ({pct:.1f}%)")
except Exception as e:
output_parts.append(f" Error computing stats: {e}")
result = "\n".join(output_parts)
print(result)
return result
def main():
parser = argparse.ArgumentParser(description="Analyze Excel/CSV files using DuckDB")
parser.add_argument(
"--files",
nargs="+",
required=True,
help="Paths to Excel (.xlsx/.xls) or CSV files",
)
parser.add_argument(
"--action",
required=True,
choices=["inspect", "query", "summary"],
help="Action to perform: inspect, query, or summary",
)
parser.add_argument(
"--sql",
type=str,
default=None,
help="SQL query to execute (required for 'query' action)",
)
parser.add_argument(
"--table",
type=str,
default=None,
help="Table name for summary (required for 'summary' action)",
)
parser.add_argument(
"--output-file",
type=str,
default=None,
help="Path to export results (CSV/JSON/MD)",
)
args = parser.parse_args()
# Validate arguments
if args.action == "query" and not args.sql:
parser.error("--sql is required for 'query' action")
if args.action == "summary" and not args.table:
parser.error("--table is required for 'summary' action")
# Compute file hash for caching
files_hash = compute_files_hash(args.files)
db_path = get_cache_db_path(files_hash)
cached_table_map = load_table_map(files_hash)
if cached_table_map and os.path.exists(db_path):
# Cache hit: connect to existing DB
logger.info(f"Cache hit! Using cached database: {db_path}")
con = duckdb.connect(db_path, read_only=True)
table_map = cached_table_map
logger.info(
f"Loaded {len(table_map)} table(s) from cache: {', '.join(table_map.keys())}"
)
else:
# Cache miss: load files and persist to DB
logger.info("Loading files (first time, will cache for future use)...")
con = duckdb.connect(db_path)
table_map = load_files(con, args.files)
if not table_map:
logger.error("No tables were loaded. Check file paths and formats.")
# Clean up empty DB file
con.close()
if os.path.exists(db_path):
os.remove(db_path)
sys.exit(1)
# Save table map for future cache lookups
save_table_map(files_hash, table_map)
logger.info(
f"\nLoaded {len(table_map)} table(s): {', '.join(table_map.keys())}"
)
logger.info(f"Cached database saved to: {db_path}")
# Perform action
if args.action == "inspect":
action_inspect(con, table_map)
elif args.action == "query":
action_query(con, args.sql, table_map, args.output_file)
elif args.action == "summary":
action_summary(con, args.table, table_map)
con.close()
if __name__ == "__main__":
main()

View File

@@ -1,206 +0,0 @@
---
name: market-analysis
description: Use this skill when the user requests to generate, create, or write market analysis reports, consumer insight reports, or brand analysis reports. Transforms raw data and analysis frameworks into professional consulting-grade reports with structured narratives, embedded charts, and strategic insights.
---
# Market Analysis Report Generation Skill
## Overview
This skill generates professional, consulting-grade market analysis reports in Markdown format. It follows a structured methodology that transforms raw data summaries, analysis framework outlines, and pre-generated charts into comprehensive reports with deep strategic insights. The output adheres to McKinsey/BCG consulting voice standards and Chinese professional writing conventions.
## Core Capabilities
- Transform raw data into structured, high-depth market analysis reports
- Follow the **"Visual Anchor → Data Contrast → Integrated Analysis"** flow per sub-chapter
- Produce insights following the **"Data → User Psychology → Strategy Implication"** chain
- Embed pre-generated charts and construct comparison tables
- Include references formatted per **GB/T 7714-2015** where applicable
- Output reports entirely in Chinese with professional consulting tone
## When to Use This Skill
**Always load this skill when:**
- User asks for a market analysis, or consumer insight report
- User provides data summaries, analysis frameworks, or chart files to be synthesized
- User needs a professional consulting-style report in Chinese
- The task involves transforming research findings into structured strategic narratives
## Inputs
The skill expects the following inputs from the upstream agentic workflow:
| Input | Description | Required |
|-------|-------------|----------|
| **Analysis Framework Outline** | Defines the logic flow and general topics for the report | Yes |
| **Data Summary** | The source of truth containing raw numbers and metrics | Yes |
| **Chart Files** | Local file paths for pre-generated chart images | Yes |
| **External Search Findings** | URLs and summaries for inline references | Optional |
## Workflow
### Step 1: Receive and Validate Inputs
Verify that all required inputs are present:
1. **Analysis Framework Outline** — Confirm it contains the logic flow and topic structure
2. **Data Summary** — Confirm it contains raw numbers and metrics
3. **Chart Files** — Confirm file paths are valid local paths
### Step 2: Plan Report Structure
Map the report structure according to the Analysis Framework Outline:
1. **摘要 (Abstract)** — Executive summary with key takeaways
2. **引言 (Introduction)** — Background, objectives, methodology
3. **Main Body Chapters (2...N)** — Scope-based chapters mapped from the Framework
4. **总结 (Conclusion)** — Pure, objective synthesis
5. **参考文献 (References)** — GB/T 7714-2015 formatted references
### Step 3: Write the Report
For each sub-chapter, follow the **"Visual Anchor → Data Contrast → Integrated Analysis"** flow:
1. **Visual Evidence Block**: Embed charts using `![Image Description](Actual_File_Path)`
2. **Data Contrast Table**: Create a Markdown comparison table for key metrics
3. **Integrated Narrative Analysis**: Write analytical text following "What → Why → So What"
Each sub-chapter must end with a robust analytical paragraph (min. 200 words) that:
- Synthesizes conflicting or reinforcing data points
- Reveals the underlying user tension or opportunity
- Optionally ends with a punchy "One-Liner Truth" in a blockquote (`>`)
### Step 4: Final Structure Self-Check
Before outputting, confirm the report contains **all sections in order**:
```
摘要 → 1. 引言 → 2...N. 主体章节 → N+1. 总结 → N+2. 参考文献
```
The report **MUST NOT** stop after the Conclusion — it **MUST** include References as the final section.
## Formatting & Tone Standards
### Consulting Voice
- **Tone**: McKinsey/BCG — Authoritative, Objective, Professional
- **Language**: All headings and content strictly in **Chinese**
- **Number Formatting**: Use English commas for thousands separators (`1,000` not `1000`)
- **Data emphasis**: **Bold** important viewpoints and key numbers
### Titling Constraints
- **Numbering**: Use standard numbering (`1.`, `1.1`) or Chinese numbering (`一、`) directly followed by the title
- **Forbidden Prefixes**: Do NOT use "Chapter", "Part", "Section"
- **Allowed Tone Words**: 分析, 画像, 概览, 洞察, 评估
- **Forbidden Words**: "Decoding", "DNA", "Secrets", "Mindscape", "Solar System", "Unlocking"
### Insight Depth (The "So What" Chain)
Every insight must connect **Data → User Psychology → Strategy Implication**:
```
❌ Bad: "Females are 60%. Strategy: Target females."
✅ Good: "Females constitute 60% with a high TGI of 180. **This suggests**
the purchase decision is driven by aesthetic and social validation
rather than pure utility. **Consequently**, media spend should pivot
towards visual-heavy platforms (e.g., RED/Instagram) to maximize CTR,
treating male audiences only as a secondary gift-giving segment."
```
### References
- **Inline**: Use markdown links for sources (e.g. `[Source Title](URL)`) when using External Search Findings
- **References section**: Formatted strictly per **GB/T 7714-2015**
### Markdown Rules
- **Immediate Start**: Begin directly with `# Report Title` — no introductory text
- **No Separators**: Do NOT use horizontal rules (`---`)
## Report Structure Template
```markdown
# [报告标题]
## 摘要
[Executive summary with key takeaways]
## 1. 引言
[Background, objectives, methodology]
## 2. [主体章节标题]
### 2.1 [子章节标题]
![Chart Description](chart_file_path)
| 指标 | 品牌A | 品牌B |
|------|-------|-------|
| ... | ... | ... |
[Integrated narrative analysis: What → Why → So What, min. 200 words]
> [Optional: One-liner strategic truth]
### 2.2 [子章节标题]
...
## N+1. 总结
[Pure objective synthesis, NO bullet points, neutral tone]
[Para 1: The fundamental nature of the group/market]
[Para 2: Core tension or behavior pattern]
[Final: One or two sentences stating the objective truth]
## N+2. 参考文献
[1] Author. Title[EB/OL]. URL, Date.
[2] ...
```
## Complete Example
User provides: Analysis Framework about "Gen-Z Skincare Market", Data Summary with brand metrics, and chart file paths.
**Report output follows this flow:**
1. Start with `# Z世代护肤市场深度分析报告`
2. 摘要 — 3-5 key takeaways in executive summary form
3. 1. 引言 — Market context, research scope, data sources
4. 2. 市场规模与增长趋势分析 — Embed trend charts, comparison tables, strategic narrative
5. 3. 消费者画像与行为洞察 — Demographics, purchase drivers, "So What" analysis
6. 4. 品牌竞争格局评估 — Brand positioning, share analysis, competitive dynamics
7. 5. 营销策略与渠道洞察 — Channel effectiveness, content strategy implications
8. 6. 总结 — Objective synthesis in flowing prose (no bullets)
9. 7. 参考文献 — GB/T 7714-2015 formatted list
## Quality Checklist
Before considering the report complete, verify:
- [ ] All sections present in correct order (摘要 → 引言 → 主体 → 总结 → 参考文献)
- [ ] Every sub-chapter follows "Visual Anchor → Data Contrast → Integrated Analysis"
- [ ] Every sub-chapter ends with a min. 200-word analytical paragraph
- [ ] All insights follow the "Data → User Psychology → Strategy Implication" chain
- [ ] All headings are in Chinese with proper numbering (no "Chapter/Part/Section")
- [ ] Charts are embedded with `![Description](path)` syntax
- [ ] Numbers use English commas for thousands separators
- [ ] Inline references use markdown links where applicable
- [ ] References section follows GB/T 7714-2015
- [ ] No horizontal rules (`---`) in the document
- [ ] Conclusion uses flowing prose — no bullet points
- [ ] Report starts directly with `#` title — no preamble
## Output Format
Output the complete report in **Markdown** format only.
## Settings
```
output_locale = zh_CN
reasoning_locale = zh_CN
```
## Notes
- This skill operates in the **final phase** of a multi-step agentic workflow — it receives pre-processed inputs and produces the final deliverable
- Dynamic titling: **Rewrite** topics from the Framework into professional, concise subject-based headers
- The Conclusion section must contain **NO** detailed recommendations — those belong in the preceding body chapters
- Each statement in the report must be supported by data points from the input Data Summary