From 590c62eadd6ee61d47f0107d16f0b77c76ce196b Mon Sep 17 00:00:00 2001 From: henrisel Date: Wed, 12 Nov 2025 17:51:02 +0200 Subject: [PATCH] incorporate InvokeAI into start_pipeline.py --- 3d-generation-pipeline/cloudflare_api.py | 4 +- .../generate_image_local.py | 97 ++++++++++---- 3d-generation-pipeline/invoke_ai_api.py | 72 ----------- .../notebooks/local_image_generation.ipynb | 122 ++++++++++++++++++ 3d-generation-pipeline/start_pipeline.py | 14 +- 5 files changed, 209 insertions(+), 100 deletions(-) delete mode 100644 3d-generation-pipeline/invoke_ai_api.py create mode 100644 3d-generation-pipeline/notebooks/local_image_generation.ipynb diff --git a/3d-generation-pipeline/cloudflare_api.py b/3d-generation-pipeline/cloudflare_api.py index 3412af31..5c4b3cb5 100644 --- a/3d-generation-pipeline/cloudflare_api.py +++ b/3d-generation-pipeline/cloudflare_api.py @@ -8,7 +8,7 @@ load_dotenv() ACCOUNT_ID = os.environ["CLOUDFLARE_ACCOUNT_ID"] API_TOKEN = os.environ["CLOUDFLARE_API_TOKEN"] -def text_to_image(prompt, output_path): +def text_to_image_cloudflare(prompt, output_path): MODEL = "@cf/black-forest-labs/flux-1-schnell" URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}" @@ -34,6 +34,8 @@ def text_to_image(prompt, output_path): with open(output_path, "wb") as f: f.write(img_bytes) + return True + def refine_text_prompt(prompt): MODEL = "@cf/meta/llama-3.2-3b-instruct" diff --git a/3d-generation-pipeline/generate_image_local.py b/3d-generation-pipeline/generate_image_local.py index 9515939d..de8a5193 100644 --- a/3d-generation-pipeline/generate_image_local.py +++ b/3d-generation-pipeline/generate_image_local.py @@ -1,28 +1,81 @@ -import torch -from diffusers import StableDiffusionPipeline, StableDiffusion3Pipeline -import time +import requests -start_timestamp = time.time() -#model = "stabilityai/stable-diffusion-3.5-medium" # generation time: 13 min -model = "stabilityai/stable-diffusion-3-medium-diffusers" # generation time: 10 min -#model = "stabilityai/stable-diffusion-2" # generation time: 4 sec +from invokeai_mcp_server import create_text2img_graph, enqueue_graph, wait_for_completion, get_image_url +from urllib.parse import urljoin -pipe = StableDiffusion3Pipeline.from_pretrained(model, torch_dtype=torch.float16) -#pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16) -pipe = pipe.to("cuda") -model_loaded_timestamp = time.time() -model_load_time = model_loaded_timestamp - start_timestamp -print(f"model load time: {round(model_load_time)} seconds") +INVOKEAI_BASE_URL = "http://127.0.0.1:9090" -prompt = "A majestic broadsword with a golden pommel, no background" -image = pipe( - prompt, - guidance_scale=3.0, -).images[0] -image_name = "image7.png" -image.save(f"images/{image_name}") +async def generate_image(arguments: dict): -generation_time = time.time() - model_loaded_timestamp -print(f"image generation time: {round(generation_time)} seconds") \ No newline at end of file + # Extract parameters + prompt = arguments["prompt"] + negative_prompt = arguments.get("negative_prompt", "") + width = arguments.get("width", 512) + height = arguments.get("height", 512) + steps = arguments.get("steps", 30) + cfg_scale = arguments.get("cfg_scale", 7.5) + scheduler = arguments.get("scheduler", "euler") + seed = arguments.get("seed") + model_key = arguments.get("model_key") + lora_key = arguments.get("lora_key") + lora_weight = arguments.get("lora_weight", 1.0) + vae_key = arguments.get("vae_key") + + print(f"Generating image with prompt: {prompt[:50]}...") + + # Create graph + graph = await create_text2img_graph( + prompt=prompt, + negative_prompt=negative_prompt, + model_key=model_key, + lora_key=lora_key, + lora_weight=lora_weight, + vae_key=vae_key, + width=width, + height=height, + steps=steps, + cfg_scale=cfg_scale, + scheduler=scheduler, + seed=seed + ) + + # Enqueue and wait for completion + result = await enqueue_graph(graph) + batch_id = result["batch"]["batch_id"] + + print(f"Enqueued batch {batch_id}, waiting for completion...") + + completed = await wait_for_completion(batch_id) + + # Extract image name from result + if "result" in completed and "outputs" in completed["result"]: + outputs = completed["result"]["outputs"] + # Find the image output + for node_id, output in outputs.items(): + if output.get("type") == "image_output": + image_name = output["image"]["image_name"] + image_url = await get_image_url(image_name) + + return urljoin(INVOKEAI_BASE_URL, image_url) + + raise RuntimeError("Failed to generate image!") + +def download_file(url, filepath): + response = requests.get(url) + + if response.status_code == 200: + with open(filepath, "wb") as file: + file.write(response.content) + else: + raise RuntimeError(f"Failed to download image. Status code: {response.status_code}") + + +async def text_to_image_invoke_ai(prompt, output_path): + args = { + "prompt": prompt + } + image_url = await generate_image(args) + print("got image url: ", image_url) + download_file(image_url, output_path) diff --git a/3d-generation-pipeline/invoke_ai_api.py b/3d-generation-pipeline/invoke_ai_api.py deleted file mode 100644 index 630400a8..00000000 --- a/3d-generation-pipeline/invoke_ai_api.py +++ /dev/null @@ -1,72 +0,0 @@ -from invokeai_mcp_server import create_text2img_graph, enqueue_graph, wait_for_completion, get_image_url -from urllib.parse import urljoin - -import asyncio - -INVOKEAI_BASE_URL = "http://127.0.0.1:9090" - - -async def generate_image(arguments: dict): - - # Extract parameters - prompt = arguments["prompt"] - negative_prompt = arguments.get("negative_prompt", "") - width = arguments.get("width", 512) - height = arguments.get("height", 512) - steps = arguments.get("steps", 30) - cfg_scale = arguments.get("cfg_scale", 7.5) - scheduler = arguments.get("scheduler", "euler") - seed = arguments.get("seed") - model_key = arguments.get("model_key") - lora_key = arguments.get("lora_key") - lora_weight = arguments.get("lora_weight", 1.0) - vae_key = arguments.get("vae_key") - - #logger.info(f"Generating image with prompt: {prompt[:50]}...") - - # Create graph - graph = await create_text2img_graph( - prompt=prompt, - negative_prompt=negative_prompt, - model_key=model_key, - lora_key=lora_key, - lora_weight=lora_weight, - vae_key=vae_key, - width=width, - height=height, - steps=steps, - cfg_scale=cfg_scale, - scheduler=scheduler, - seed=seed - ) - - # Enqueue and wait for completion - result = await enqueue_graph(graph) - batch_id = result["batch"]["batch_id"] - - #logger.info(f"Enqueued batch {batch_id}, waiting for completion...") - - completed = await wait_for_completion(batch_id) - - # Extract image name from result - if "result" in completed and "outputs" in completed["result"]: - outputs = completed["result"]["outputs"] - # Find the image output - for node_id, output in outputs.items(): - if output.get("type") == "image_output": - image_name = output["image"]["image_name"] - image_url = await get_image_url(image_name) - - text=f"Image generated successfully!\n\nImage Name: {image_name}\nImage URL: {image_url}\n\nYou can view the image at: {urljoin(INVOKEAI_BASE_URL, f'/api/v1/images/i/{image_name}/full')}" - print(text) - - # Fallback if we couldn't find image output - #text=f"Image generation completed but output format was unexpected. Batch ID: {batch_id}\n\nResult: {json.dumps(completed, indent=2)}" - -async def main(): - args = { - "prompt": "a golden katana with a fancy pommel" - } - await generate_image(args) - -asyncio.run(main()) diff --git a/3d-generation-pipeline/notebooks/local_image_generation.ipynb b/3d-generation-pipeline/notebooks/local_image_generation.ipynb new file mode 100644 index 00000000..cf882de0 --- /dev/null +++ b/3d-generation-pipeline/notebooks/local_image_generation.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "50e24baa", + "metadata": {}, + "outputs": [], + "source": [ + "from invokeai_mcp_server import create_text2img_graph, enqueue_graph, wait_for_completion, get_image_url\n", + "from urllib.parse import urljoin\n", + "\n", + "import asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0407cd9a", + "metadata": {}, + "outputs": [], + "source": [ + "INVOKEAI_BASE_URL = \"http://127.0.0.1:9090\"\n", + "\n", + "\n", + "async def generate_image(arguments: dict):\n", + "\n", + " # Extract parameters\n", + " prompt = arguments[\"prompt\"]\n", + " negative_prompt = arguments.get(\"negative_prompt\", \"\")\n", + " width = arguments.get(\"width\", 512)\n", + " height = arguments.get(\"height\", 512)\n", + " steps = arguments.get(\"steps\", 30)\n", + " cfg_scale = arguments.get(\"cfg_scale\", 7.5)\n", + " scheduler = arguments.get(\"scheduler\", \"euler\")\n", + " seed = arguments.get(\"seed\")\n", + " model_key = arguments.get(\"model_key\")\n", + " lora_key = arguments.get(\"lora_key\")\n", + " lora_weight = arguments.get(\"lora_weight\", 1.0)\n", + " vae_key = arguments.get(\"vae_key\")\n", + "\n", + " #logger.info(f\"Generating image with prompt: {prompt[:50]}...\")\n", + "\n", + " # Create graph\n", + " graph = await create_text2img_graph(\n", + " prompt=prompt,\n", + " negative_prompt=negative_prompt,\n", + " model_key=model_key,\n", + " lora_key=lora_key,\n", + " lora_weight=lora_weight,\n", + " vae_key=vae_key,\n", + " width=width,\n", + " height=height,\n", + " steps=steps,\n", + " cfg_scale=cfg_scale,\n", + " scheduler=scheduler,\n", + " seed=seed\n", + " )\n", + "\n", + " # Enqueue and wait for completion\n", + " result = await enqueue_graph(graph)\n", + " batch_id = result[\"batch\"][\"batch_id\"]\n", + "\n", + " #logger.info(f\"Enqueued batch {batch_id}, waiting for completion...\")\n", + "\n", + " completed = await wait_for_completion(batch_id)\n", + "\n", + " # Extract image name from result\n", + " if \"result\" in completed and \"outputs\" in completed[\"result\"]:\n", + " outputs = completed[\"result\"][\"outputs\"]\n", + " # Find the image output\n", + " for node_id, output in outputs.items():\n", + " if output.get(\"type\") == \"image_output\":\n", + " image_name = output[\"image\"][\"image_name\"]\n", + " image_url = await get_image_url(image_name)\n", + "\n", + " text=f\"Image generated successfully!\\n\\nImage Name: {image_name}\\nImage URL: {image_url}\\n\\nYou can view the image at: {urljoin(INVOKEAI_BASE_URL, f'/api/v1/images/i/{image_name}/full')}\"\n", + " print(text)\n", + "\n", + " # Fallback if we couldn't find image output\n", + " #text=f\"Image generation completed but output format was unexpected. Batch ID: {batch_id}\\n\\nResult: {json.dumps(completed, indent=2)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cf9d879", + "metadata": {}, + "outputs": [], + "source": [ + "async def main():\n", + " args = {\n", + " \"prompt\": \"a golden katana with a fancy pommel\"\n", + " }\n", + " await generate_image(args)\n", + "\n", + "asyncio.run(main())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3d-generation-pipeline/start_pipeline.py b/3d-generation-pipeline/start_pipeline.py index e436a0a6..729ed2c0 100644 --- a/3d-generation-pipeline/start_pipeline.py +++ b/3d-generation-pipeline/start_pipeline.py @@ -1,11 +1,13 @@ import os import argparse +import asyncio from pathlib import Path from datetime import datetime from dotenv import load_dotenv -from cloudflare_api import text_to_image, refine_text_prompt +from cloudflare_api import text_to_image_cloudflare, refine_text_prompt +from generate_image_local import text_to_image_invoke_ai from generate_model_local import image_to_3d_api, image_to_3d_subprocess load_dotenv() @@ -17,7 +19,7 @@ def get_timestamp(): return datetime.now().strftime("%Y-%m-%d-%H-%M-%S") -def main(): +async def main(): parser = argparse.ArgumentParser(description="Text to 3D model pipeline") parser.add_argument("--prompt", type=str, required=True, help="User text prompt") args = parser.parse_args() @@ -35,13 +37,15 @@ def main(): timestamp = get_timestamp() pipeline_folder = Path(PIPELINE_FOLDER) image_path = pipeline_folder / "images" / f"{timestamp}.jpg" - text_to_image(image_generation_prompt, image_path) + # TODO: use Invoke AI or Cloudflare, depending on env var + #text_to_image_cloudflare(image_generation_prompt, image_path) + await text_to_image_invoke_ai(image_generation_prompt, image_path) + print(f"Generated image file: {image_path}") model_path = pipeline_folder / "models" / timestamp model_file = image_to_3d_api(image_path, model_path) - #model_file_path = model_path / "0" / "mesh.glb" print(f"Generated 3D model file: {model_file}") if __name__ == "__main__": - main() + asyncio.run(main())