test local text-to-image models and cloudflare API

2025-10-18 16:06:54 +03:00
parent 2cf0a9f711
commit ae497eac6e
10 changed files with 216 additions and 21 deletions
@@ -1 +1,2 @@
-.venv
+.venv
+.env
@@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dc6faae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import base64\n",
+    "import requests\n",
+    "from dotenv import load_dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3107275",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()\n",
+    "\n",
+    "ACCOUNT_ID = os.environ[\"CF_ACCOUNT_ID\"]\n",
+    "API_TOKEN = os.environ[\"CF_API_TOKEN\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "999adf95",
+   "metadata": {},
+   "source": [
+    "## Text to image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40b35163",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved: output.jpg  (263282 bytes)\n",
+      "Saved: image9.jpg  (263282 bytes)\n"
+     ]
+    }
+   ],
+   "source": [
+    "MODEL = \"@cf/black-forest-labs/flux-1-schnell\"\n",
+    "URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n",
+    "\n",
+    "payload = {\n",
+    "    \"prompt\": \"a slightly curved broadsword with a fancy golden crossguard\",\n",
+    "}\n",
+    "\n",
+    "headers = {\n",
+    "    \"Authorization\": f\"Bearer {API_TOKEN}\",\n",
+    "    \"Content-Type\": \"application/json\",\n",
+    "}\n",
+    "\n",
+    "resp = requests.post(URL, json=payload, headers=headers, timeout=60)\n",
+    "resp.raise_for_status()\n",
+    "\n",
+    "data = resp.json()\n",
+    "b64 = data[\"result\"][\"image\"]\n",
+    "if not b64:\n",
+    "    raise RuntimeError(f\"Unexpected response structure: {data}\")\n",
+    "\n",
+    "img_bytes = base64.b64decode(b64)\n",
+    "\n",
+    "out_path = \"output.jpg\"\n",
+    "with open(out_path, \"wb\") as f:\n",
+    "    f.write(img_bytes)\n",
+    "\n",
+    "print(f\"Saved: {out_path}  ({len(img_bytes)} bytes)\")\n",
+    "\n",
+    "b64 = data[\"result\"][\"image\"]\n",
+    "if not b64:\n",
+    "    raise RuntimeError(f\"Unexpected response structure: {data}\")\n",
+    "\n",
+    "img_bytes = base64.b64decode(b64)\n",
+    "\n",
+    "out_path = \"image9.jpg\"\n",
+    "with open(out_path, \"wb\") as f:\n",
+    "    f.write(img_bytes)\n",
+    "\n",
+    "print(f\"Saved: {out_path}  ({len(img_bytes)} bytes)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14a874c4",
+   "metadata": {},
+   "source": [
+    "## Text prompt refinement"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "485f6f46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\"dark wooden battleaxe with bronze blade\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "MODEL = \"@cf/meta/llama-3.2-3b-instruct\"\n",
+    "URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n",
+    "\n",
+    "instructions = \"\"\"\n",
+    "User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.\n",
+    "Example:\n",
+    "User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'\n",
+    "You: 'a slightly curved sword with bright orange crossguard'\n",
+    "\"\"\"\n",
+    "prompt = \"Umm, alright, can you please give me an epic battleaxe? It should have a dark wooden shaft and bronze blade.\"\n",
+    "\n",
+    "response = requests.post(URL,\n",
+    "  headers={\"Authorization\": f\"Bearer {API_TOKEN}\"},\n",
+    "  json={\n",
+    "    \"messages\": [\n",
+    "      {\"role\": \"system\", \"content\": instructions},\n",
+    "      {\"role\": \"user\", \"content\": prompt}\n",
+    "    ]\n",
+    "  }\n",
+    ")\n",
+    "data = response.json()\n",
+    "result_text = data[\"result\"][\"response\"]\n",
+    "print(result_text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76fa21f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -1,17 +0,0 @@
-import torch
-from diffusers import StableDiffusion3Pipeline
-
-model_name = "stabilityai/stable-diffusion-3.5-medium"
-
-pipe = StableDiffusion3Pipeline.from_pretrained(model_name, use_safetensors=True, variant="fp16")
-pipe = pipe.to("cuda")
-
-prompt = "A cute cat eating a slice of pizza, stunning color scheme, masterpiece, illustration"
-image = pipe(
-    prompt,
-    guidance_scale=3.0,
-    generator=torch.Generator("cuda")
-).images[0]
-
-image_name = "image.png"
-image.save(image_name)
@@ -0,0 +1,28 @@
+import torch
+from diffusers import StableDiffusionPipeline, StableDiffusion3Pipeline
+import time
+
+start_timestamp = time.time()
+#model = "stabilityai/stable-diffusion-3.5-medium" # generation time: 13 min
+model = "stabilityai/stable-diffusion-3-medium-diffusers" # generation time: 10 min
+#model = "stabilityai/stable-diffusion-2" # generation time: 4 sec
+
+pipe = StableDiffusion3Pipeline.from_pretrained(model, torch_dtype=torch.float16)
+#pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+
+model_loaded_timestamp = time.time()
+model_load_time = model_loaded_timestamp - start_timestamp
+print(f"model load time: {round(model_load_time)} seconds")
+
+prompt = "A majestic broadsword with a golden pommel, no background"
+image = pipe(
+    prompt,
+    guidance_scale=3.0,
+).images[0]
+
+image_name = "image7.png"
+image.save(f"images/{image_name}")
+
+generation_time = time.time() - model_loaded_timestamp
+print(f"image generation time: {round(generation_time)} seconds")
@@ -1,7 +1,6 @@
-torch==2.8.0+cu129
+#torch==2.8.0+cu129 https://pytorch.org/get-started/previous-versions/
 transformers==4.57.0
-#diffusers==0.35.1
-it+https://github.com/huggingface/diffusers.git
+git+https://github.com/huggingface/diffusers.git
 accelerate==1.10.1
 huggingface_hub[hf_xet]==1.1.10
 sentencepiece==0.2.1