mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-19 12:24:46 +08:00
feat: add image and video generation skills
This commit is contained in:
96
skills/public/image-generation/scripts/generate.py
Normal file
96
skills/public/image-generation/scripts/generate.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import base64
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def generate_image(
|
||||
prompt_file: str,
|
||||
reference_images: list[str],
|
||||
output_file: str,
|
||||
aspect_ratio: str = "16:9",
|
||||
) -> str:
|
||||
with open(prompt_file, "r") as f:
|
||||
prompt = f.read()
|
||||
parts = []
|
||||
i = 0
|
||||
for reference_image in reference_images:
|
||||
i += 1
|
||||
with open(reference_image, "rb") as f:
|
||||
image_b64 = base64.b64encode(f.read()).decode("utf-8")
|
||||
parts.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": "image/jpeg",
|
||||
"data": image_b64,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
return "GEMINI_API_KEY is not set"
|
||||
response = requests.post(
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent",
|
||||
headers={
|
||||
"x-goog-api-key": api_key,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}},
|
||||
"contents": [{"parts": [*parts, {"text": prompt}]}],
|
||||
},
|
||||
)
|
||||
json = response.json()
|
||||
parts: list[dict] = json["candidates"][0]["content"]["parts"]
|
||||
image_parts = [part for part in parts if part.get("inlineData", False)]
|
||||
if len(image_parts) == 1:
|
||||
base64_image = image_parts[0]["inlineData"]["data"]
|
||||
# Save the image to a file
|
||||
with open(output_file, "wb") as f:
|
||||
f.write(base64.b64decode(base64_image))
|
||||
return f"Successfully generated image to {output_file}"
|
||||
else:
|
||||
raise Exception("Failed to generate image")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate images using Gemini API")
|
||||
parser.add_argument(
|
||||
"--prompt-file",
|
||||
required=True,
|
||||
help="Absolute path to JSON prompt file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reference-images",
|
||||
nargs="*",
|
||||
default=[],
|
||||
help="Absolute paths to reference images (space-separated)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-file",
|
||||
required=True,
|
||||
help="Output path for generated image",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aspect-ratio",
|
||||
required=False,
|
||||
default="16:9",
|
||||
help="Aspect ratio of the generated image",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
print(
|
||||
generate_image(
|
||||
args.prompt_file,
|
||||
args.reference_images,
|
||||
args.output_file,
|
||||
args.aspect_ratio,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
print("Error while generating image.")
|
||||
Reference in New Issue
Block a user