import base64 import os import requests from PIL import Image def validate_image(image_path: str) -> bool: """ Validate if an image file can be opened and is not corrupted. Args: image_path: Path to the image file Returns: True if the image is valid and can be opened, False otherwise """ try: with Image.open(image_path) as img: img.verify() # Verify that it's a valid image # Re-open to check if it can be fully loaded (verify() may not catch all issues) with Image.open(image_path) as img: img.load() # Force load the image data return True except Exception as e: print(f"Warning: Image '{image_path}' is invalid or corrupted: {e}") return False def generate_image( prompt_file: str, reference_images: list[str], output_file: str, aspect_ratio: str = "16:9", ) -> str: with open(prompt_file, "r") as f: prompt = f.read() parts = [] i = 0 # Filter out invalid reference images valid_reference_images = [] for ref_img in reference_images: if validate_image(ref_img): valid_reference_images.append(ref_img) else: print(f"Skipping invalid reference image: {ref_img}") if len(valid_reference_images) < len(reference_images): print(f"Note: {len(reference_images) - len(valid_reference_images)} reference image(s) were skipped due to validation failure.") for reference_image in valid_reference_images: i += 1 with open(reference_image, "rb") as f: image_b64 = base64.b64encode(f.read()).decode("utf-8") parts.append( { "inlineData": { "mimeType": "image/jpeg", "data": image_b64, } } ) api_key = os.getenv("GEMINI_API_KEY") if not api_key: return "GEMINI_API_KEY is not set" response = requests.post( "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent", headers={ "x-goog-api-key": api_key, "Content-Type": "application/json", }, json={ "generationConfig": {"imageConfig": {"aspectRatio": aspect_ratio}}, "contents": [{"parts": [*parts, {"text": prompt}]}], }, ) response.raise_for_status() json = response.json() parts: list[dict] = json["candidates"][0]["content"]["parts"] image_parts = [part for part in parts if part.get("inlineData", False)] if len(image_parts) == 1: base64_image = image_parts[0]["inlineData"]["data"] # Save the image to a file with open(output_file, "wb") as f: f.write(base64.b64decode(base64_image)) return f"Successfully generated image to {output_file}" else: raise Exception("Failed to generate image") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Generate images using Gemini API") parser.add_argument( "--prompt-file", required=True, help="Absolute path to JSON prompt file", ) parser.add_argument( "--reference-images", nargs="*", default=[], help="Absolute paths to reference images (space-separated)", ) parser.add_argument( "--output-file", required=True, help="Output path for generated image", ) parser.add_argument( "--aspect-ratio", required=False, default="16:9", help="Aspect ratio of the generated image", ) args = parser.parse_args() try: print( generate_image( args.prompt_file, args.reference_images, args.output_file, args.aspect_ratio, ) ) except Exception as e: print(f"Error while generating image: {e}")