1
0
forked from cgvr/DeltaVR

test local text-to-image models and cloudflare API

This commit is contained in:
Henri Sellis 2025-10-18 16:06:54 +03:00
parent 2cf0a9f711
commit ae497eac6e
10 changed files with 216 additions and 21 deletions

View File

@ -1 +1,2 @@
.venv
.venv
.env

View File

@ -0,0 +1,172 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1dc6faae",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import base64\n",
"import requests\n",
"from dotenv import load_dotenv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3107275",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()\n",
"\n",
"ACCOUNT_ID = os.environ[\"CF_ACCOUNT_ID\"]\n",
"API_TOKEN = os.environ[\"CF_API_TOKEN\"]"
]
},
{
"cell_type": "markdown",
"id": "999adf95",
"metadata": {},
"source": [
"## Text to image"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40b35163",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved: output.jpg (263282 bytes)\n",
"Saved: image9.jpg (263282 bytes)\n"
]
}
],
"source": [
"MODEL = \"@cf/black-forest-labs/flux-1-schnell\"\n",
"URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n",
"\n",
"payload = {\n",
" \"prompt\": \"a slightly curved broadsword with a fancy golden crossguard\",\n",
"}\n",
"\n",
"headers = {\n",
" \"Authorization\": f\"Bearer {API_TOKEN}\",\n",
" \"Content-Type\": \"application/json\",\n",
"}\n",
"\n",
"resp = requests.post(URL, json=payload, headers=headers, timeout=60)\n",
"resp.raise_for_status()\n",
"\n",
"data = resp.json()\n",
"b64 = data[\"result\"][\"image\"]\n",
"if not b64:\n",
" raise RuntimeError(f\"Unexpected response structure: {data}\")\n",
"\n",
"img_bytes = base64.b64decode(b64)\n",
"\n",
"out_path = \"output.jpg\"\n",
"with open(out_path, \"wb\") as f:\n",
" f.write(img_bytes)\n",
"\n",
"print(f\"Saved: {out_path} ({len(img_bytes)} bytes)\")\n",
"\n",
"b64 = data[\"result\"][\"image\"]\n",
"if not b64:\n",
" raise RuntimeError(f\"Unexpected response structure: {data}\")\n",
"\n",
"img_bytes = base64.b64decode(b64)\n",
"\n",
"out_path = \"image9.jpg\"\n",
"with open(out_path, \"wb\") as f:\n",
" f.write(img_bytes)\n",
"\n",
"print(f\"Saved: {out_path} ({len(img_bytes)} bytes)\")"
]
},
{
"cell_type": "markdown",
"id": "14a874c4",
"metadata": {},
"source": [
"## Text prompt refinement"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "485f6f46",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"dark wooden battleaxe with bronze blade\"\n"
]
}
],
"source": [
"MODEL = \"@cf/meta/llama-3.2-3b-instruct\"\n",
"URL = f\"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}\"\n",
"\n",
"instructions = \"\"\"\n",
"User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.\n",
"Example:\n",
"User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'\n",
"You: 'a slightly curved sword with bright orange crossguard'\n",
"\"\"\"\n",
"prompt = \"Umm, alright, can you please give me an epic battleaxe? It should have a dark wooden shaft and bronze blade.\"\n",
"\n",
"response = requests.post(URL,\n",
" headers={\"Authorization\": f\"Bearer {API_TOKEN}\"},\n",
" json={\n",
" \"messages\": [\n",
" {\"role\": \"system\", \"content\": instructions},\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" }\n",
")\n",
"data = response.json()\n",
"result_text = data[\"result\"][\"response\"]\n",
"print(result_text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76fa21f0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,17 +0,0 @@
import torch
from diffusers import StableDiffusion3Pipeline
model_name = "stabilityai/stable-diffusion-3.5-medium"
pipe = StableDiffusion3Pipeline.from_pretrained(model_name, use_safetensors=True, variant="fp16")
pipe = pipe.to("cuda")
prompt = "A cute cat eating a slice of pizza, stunning color scheme, masterpiece, illustration"
image = pipe(
prompt,
guidance_scale=3.0,
generator=torch.Generator("cuda")
).images[0]
image_name = "image.png"
image.save(image_name)

View File

@ -0,0 +1,28 @@
import torch
from diffusers import StableDiffusionPipeline, StableDiffusion3Pipeline
import time
start_timestamp = time.time()
#model = "stabilityai/stable-diffusion-3.5-medium" # generation time: 13 min
model = "stabilityai/stable-diffusion-3-medium-diffusers" # generation time: 10 min
#model = "stabilityai/stable-diffusion-2" # generation time: 4 sec
pipe = StableDiffusion3Pipeline.from_pretrained(model, torch_dtype=torch.float16)
#pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
model_loaded_timestamp = time.time()
model_load_time = model_loaded_timestamp - start_timestamp
print(f"model load time: {round(model_load_time)} seconds")
prompt = "A majestic broadsword with a golden pommel, no background"
image = pipe(
prompt,
guidance_scale=3.0,
).images[0]
image_name = "image7.png"
image.save(f"images/{image_name}")
generation_time = time.time() - model_loaded_timestamp
print(f"image generation time: {round(generation_time)} seconds")

BIN
3d-generation-pipeline/images/flux-1-schnell.jpg (Stored with Git LFS) Normal file

Binary file not shown.

BIN
3d-generation-pipeline/images/stable-diffusion-2.png (Stored with Git LFS) Normal file

Binary file not shown.

Binary file not shown.

BIN
3d-generation-pipeline/images/stable-diffusion-3-medium.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -1,7 +1,6 @@
torch==2.8.0+cu129
#torch==2.8.0+cu129 https://pytorch.org/get-started/previous-versions/
transformers==4.57.0
#diffusers==0.35.1
it+https://github.com/huggingface/diffusers.git
git+https://github.com/huggingface/diffusers.git
accelerate==1.10.1
huggingface_hub[hf_xet]==1.1.10
sentencepiece==0.2.1