diff --git a/3d-generation-pipeline/.gitignore b/3d-generation-pipeline/.gitignore index b694934f..c2eabeca 100644 --- a/3d-generation-pipeline/.gitignore +++ b/3d-generation-pipeline/.gitignore @@ -1 +1,2 @@ -.venv \ No newline at end of file +.venv +.env \ No newline at end of file diff --git a/3d-generation-pipeline/cloudflare_API_test.ipynb b/3d-generation-pipeline/cloudflare_API_test.ipynb new file mode 100644 index 00000000..0d32480c --- /dev/null +++ b/3d-generation-pipeline/cloudflare_API_test.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1dc6faae", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import base64\n", + "import requests\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3107275", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv()\n", + "\n", + "ACCOUNT_ID = os.environ[\"CF_ACCOUNT_ID\"]\n", + "API_TOKEN = os.environ[\"CF_API_TOKEN\"]" + ] + }, + { + "cell_type": "markdown", + "id": "999adf95", + "metadata": {}, + "source": [ + "## Text to image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40b35163", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved: output.jpg (263282 bytes)\n", + "Saved: image9.jpg (263282 bytes)\n" + ] + } + ], + "source": [ + "MODEL = \"@cf/black-forest-labs/flux-1-schnell\"\n", + "URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n", + "\n", + "payload = {\n", + " \"prompt\": \"a slightly curved broadsword with a fancy golden crossguard\",\n", + "}\n", + "\n", + "headers = {\n", + " \"Authorization\": f\"Bearer {API_TOKEN}\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "\n", + "resp = requests.post(URL, json=payload, headers=headers, timeout=60)\n", + "resp.raise_for_status()\n", + "\n", + "data = resp.json()\n", + "b64 = data[\"result\"][\"image\"]\n", + "if not b64:\n", + " raise RuntimeError(f\"Unexpected response structure: {data}\")\n", + "\n", + "img_bytes = base64.b64decode(b64)\n", + "\n", + "out_path = \"output.jpg\"\n", + "with open(out_path, \"wb\") as f:\n", + " f.write(img_bytes)\n", + "\n", + "print(f\"Saved: {out_path} ({len(img_bytes)} bytes)\")\n", + "\n", + "b64 = data[\"result\"][\"image\"]\n", + "if not b64:\n", + " raise RuntimeError(f\"Unexpected response structure: {data}\")\n", + "\n", + "img_bytes = base64.b64decode(b64)\n", + "\n", + "out_path = \"image9.jpg\"\n", + "with open(out_path, \"wb\") as f:\n", + " f.write(img_bytes)\n", + "\n", + "print(f\"Saved: {out_path} ({len(img_bytes)} bytes)\")" + ] + }, + { + "cell_type": "markdown", + "id": "14a874c4", + "metadata": {}, + "source": [ + "## Text prompt refinement" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "485f6f46", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"dark wooden battleaxe with bronze blade\"\n" + ] + } + ], + "source": [ + "MODEL = \"@cf/meta/llama-3.2-3b-instruct\"\n", + "URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n", + "\n", + "instructions = \"\"\"\n", + "User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.\n", + "Example:\n", + "User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'\n", + "You: 'a slightly curved sword with bright orange crossguard'\n", + "\"\"\"\n", + "prompt = \"Umm, alright, can you please give me an epic battleaxe? It should have a dark wooden shaft and bronze blade.\"\n", + "\n", + "response = requests.post(URL,\n", + " headers={\"Authorization\": f\"Bearer {API_TOKEN}\"},\n", + " json={\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": instructions},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " }\n", + ")\n", + "data = response.json()\n", + "result_text = data[\"result\"][\"response\"]\n", + "print(result_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76fa21f0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/3d-generation-pipeline/generate_image.py b/3d-generation-pipeline/generate_image.py deleted file mode 100644 index 4f37f390..00000000 --- a/3d-generation-pipeline/generate_image.py +++ /dev/null @@ -1,17 +0,0 @@ -import torch -from diffusers import StableDiffusion3Pipeline - -model_name = "stabilityai/stable-diffusion-3.5-medium" - -pipe = StableDiffusion3Pipeline.from_pretrained(model_name, use_safetensors=True, variant="fp16") -pipe = pipe.to("cuda") - -prompt = "A cute cat eating a slice of pizza, stunning color scheme, masterpiece, illustration" -image = pipe( - prompt, - guidance_scale=3.0, - generator=torch.Generator("cuda") -).images[0] - -image_name = "image.png" -image.save(image_name) diff --git a/3d-generation-pipeline/generate_image_local.py b/3d-generation-pipeline/generate_image_local.py new file mode 100644 index 00000000..9515939d --- /dev/null +++ b/3d-generation-pipeline/generate_image_local.py @@ -0,0 +1,28 @@ +import torch +from diffusers import StableDiffusionPipeline, StableDiffusion3Pipeline +import time + +start_timestamp = time.time() +#model = "stabilityai/stable-diffusion-3.5-medium" # generation time: 13 min +model = "stabilityai/stable-diffusion-3-medium-diffusers" # generation time: 10 min +#model = "stabilityai/stable-diffusion-2" # generation time: 4 sec + +pipe = StableDiffusion3Pipeline.from_pretrained(model, torch_dtype=torch.float16) +#pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16) +pipe = pipe.to("cuda") + +model_loaded_timestamp = time.time() +model_load_time = model_loaded_timestamp - start_timestamp +print(f"model load time: {round(model_load_time)} seconds") + +prompt = "A majestic broadsword with a golden pommel, no background" +image = pipe( + prompt, + guidance_scale=3.0, +).images[0] + +image_name = "image7.png" +image.save(f"images/{image_name}") + +generation_time = time.time() - model_loaded_timestamp +print(f"image generation time: {round(generation_time)} seconds") \ No newline at end of file diff --git a/3d-generation-pipeline/images/flux-1-schnell.jpg b/3d-generation-pipeline/images/flux-1-schnell.jpg new file mode 100644 index 00000000..f516890f --- /dev/null +++ b/3d-generation-pipeline/images/flux-1-schnell.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53740c46ef6b695325497113072aa0ebf7f083cf773679b2229dd4c747ef6e2 +size 263282 diff --git a/3d-generation-pipeline/images/stable-diffusion-2.png b/3d-generation-pipeline/images/stable-diffusion-2.png new file mode 100644 index 00000000..13724225 --- /dev/null +++ b/3d-generation-pipeline/images/stable-diffusion-2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f607a95989404e18d60d44a974af5856c25dfdadd8c14db69f40c41a95a3be5 +size 1127930 diff --git a/3d-generation-pipeline/images/stable-diffusion-3-5-medium.png b/3d-generation-pipeline/images/stable-diffusion-3-5-medium.png new file mode 100644 index 00000000..a7ed478e --- /dev/null +++ b/3d-generation-pipeline/images/stable-diffusion-3-5-medium.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:720c4cc992166f8f808e60d26874fe582d2a299c5497e1429f5cf7763b347e94 +size 1902777 diff --git a/3d-generation-pipeline/images/stable-diffusion-3-medium.png b/3d-generation-pipeline/images/stable-diffusion-3-medium.png new file mode 100644 index 00000000..0058e72c --- /dev/null +++ b/3d-generation-pipeline/images/stable-diffusion-3-medium.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796d3211f277d139d2d23979d11d865b956ad62bad81f87cab2bf2e720cc34a3 +size 1568601 diff --git a/3d-generation-pipeline/generate_image.ipynb b/3d-generation-pipeline/local_model_test.ipynb similarity index 100% rename from 3d-generation-pipeline/generate_image.ipynb rename to 3d-generation-pipeline/local_model_test.ipynb diff --git a/3d-generation-pipeline/requirements.txt b/3d-generation-pipeline/requirements.txt index 43b9add8..b08f5956 100644 --- a/3d-generation-pipeline/requirements.txt +++ b/3d-generation-pipeline/requirements.txt @@ -1,7 +1,6 @@ -torch==2.8.0+cu129 +#torch==2.8.0+cu129 https://pytorch.org/get-started/previous-versions/ transformers==4.57.0 -#diffusers==0.35.1 -it+https://github.com/huggingface/diffusers.git +git+https://github.com/huggingface/diffusers.git accelerate==1.10.1 huggingface_hub[hf_xet]==1.1.10 sentencepiece==0.2.1