integrate local trellis api into start_pipeline.py

This commit is contained in:
henrisel 2025-11-07 16:22:13 +02:00
parent 447449e1b3
commit 09f764c0df
6 changed files with 183 additions and 94 deletions

View File

@ -1,4 +1,8 @@
PIPELINE_FOLDER=
REFINE_PROMPT=0
CLOUDFLARE_ACCOUNT_ID= CLOUDFLARE_ACCOUNT_ID=
CLOUDFLARE_API_TOKEN= CLOUDFLARE_API_TOKEN=
PIPELINE_FOLDER=
3D_GENERATION_URL=
MODEL_FOLDER= MODEL_FOLDER=

View File

@ -1,4 +1,5 @@
.venv .venv
.env .env
__pycache__
images/ images/
models/ models/

View File

@ -0,0 +1,59 @@
import base64
import requests
import os
from dotenv import load_dotenv
load_dotenv()
ACCOUNT_ID = os.environ["CLOUDFLARE_ACCOUNT_ID"]
API_TOKEN = os.environ["CLOUDFLARE_API_TOKEN"]
def text_to_image(prompt, output_path):
MODEL = "@cf/black-forest-labs/flux-1-schnell"
URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
payload = {
"prompt": prompt,
}
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
resp = requests.post(URL, json=payload, headers=headers, timeout=60)
resp.raise_for_status()
data = resp.json()
b64 = data["result"]["image"]
if not b64:
raise RuntimeError(f"Unexpected response structure: {data}")
img_bytes = base64.b64decode(b64)
with open(output_path, "wb") as f:
f.write(img_bytes)
def refine_text_prompt(prompt):
MODEL = "@cf/meta/llama-3.2-3b-instruct"
URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
instructions = """
User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.
Example:
User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'
You: 'a slightly curved sword with bright orange crossguard'
"""
response = requests.post(URL,
headers={"Authorization": f"Bearer {API_TOKEN}"},
json={
"messages": [
{"role": "system", "content": instructions},
{"role": "user", "content": prompt}
]
}
)
data = response.json()
return data["result"]["response"]

View File

@ -0,0 +1,92 @@
import subprocess
import os
import time
import requests
import base64
from dotenv import load_dotenv
load_dotenv()
MODEL_FOLDER = os.environ["MODEL_FOLDER"]
API_URL = os.environ["3D_GENERATION_URL"]
def image_to_3d_subprocess(image_path, output_path):
venv_python = MODEL_FOLDER + r"\.venv\Scripts\python.exe"
script_path = MODEL_FOLDER + r"\run.py"
args = [image_path, "--output-dir", output_path]
command = [venv_python, script_path] + args
try:
# Run the subprocess
result = subprocess.run(command, capture_output=True, text=True)
# Print output and errors
print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)
print("Return Code:", result.returncode)
except Exception as e:
print(f"Error occurred: {e}")
def generate_no_preview(image_base64: str):
"""Generate 3D model from a single base64-encoded image without previews.
Args:
image_base64: Base64 string of the image (without 'data:image/...' prefix)
"""
try:
# Set generation parameters
params = {
'image_base64': image_base64,
'seed': 42,
'ss_guidance_strength': 7.5,
'ss_sampling_steps': 30,
'slat_guidance_strength': 7.5,
'slat_sampling_steps': 30,
'mesh_simplify_ratio': 0.95,
'texture_size': 1024,
'output_format': 'glb'
}
# Start generation
print("Starting generation...")
response = requests.post(f"{API_URL}/generate_no_preview", data=params)
response.raise_for_status()
# Poll status until complete
while True:
status = requests.get(f"{API_URL}/status").json()
print(f"Progress: {status['progress']}%")
if status['status'] == 'COMPLETE':
break
elif status['status'] == 'FAILED':
raise Exception(f"Generation failed: {status['message']}")
time.sleep(1)
# Download the model
print("Downloading model...")
response = requests.get(f"{API_URL}/download/model")
response.raise_for_status()
return response.content
except Exception as e:
print(f"Error: {str(e)}")
return None
def image_to_3d_api(image_path, output_path):
with open(image_path, 'rb') as image_file:
image_data = image_file.read()
base64_encoded = base64.b64encode(image_data).decode('utf-8')
model_binary = generate_no_preview(base64_encoded)
with open(output_path, 'wb') as f:
f.write(model_binary)

File diff suppressed because one or more lines are too long

View File

@ -1,111 +1,46 @@
import os import os
import base64
import requests
import argparse import argparse
import subprocess
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from dotenv import load_dotenv from dotenv import load_dotenv
from cloudflare_api import text_to_image, refine_text_prompt
from generate_model_local import image_to_3d_api, image_to_3d_subprocess
load_dotenv() load_dotenv()
ACCOUNT_ID = os.environ["CLOUDFLARE_ACCOUNT_ID"]
API_TOKEN = os.environ["CLOUDFLARE_API_TOKEN"]
PIPELINE_FOLDER = os.environ["PIPELINE_FOLDER"] PIPELINE_FOLDER = os.environ["PIPELINE_FOLDER"]
MODEL_FOLDER = os.environ["MODEL_FOLDER"]
def get_timestamp(): def get_timestamp():
return datetime.now().strftime("%Y-%m-%d-%H-%M-%S") return datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
def text_to_image(prompt, output_path):
MODEL = "@cf/black-forest-labs/flux-1-schnell"
URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
payload = {
"prompt": prompt,
}
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json",
}
resp = requests.post(URL, json=payload, headers=headers, timeout=60)
resp.raise_for_status()
data = resp.json()
b64 = data["result"]["image"]
if not b64:
raise RuntimeError(f"Unexpected response structure: {data}")
img_bytes = base64.b64decode(b64)
with open(output_path, "wb") as f:
f.write(img_bytes)
def refine_text_prompt(prompt):
MODEL = "@cf/meta/llama-3.2-3b-instruct"
URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
instructions = """
User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.
Example:
User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'
You: 'a slightly curved sword with bright orange crossguard'
"""
response = requests.post(URL,
headers={"Authorization": f"Bearer {API_TOKEN}"},
json={
"messages": [
{"role": "system", "content": instructions},
{"role": "user", "content": prompt}
]
}
)
data = response.json()
return data["result"]["response"]
def image_to_3d(image_path, output_path):
venv_python = MODEL_FOLDER + r"\.venv\Scripts\python.exe"
script_path = MODEL_FOLDER + r"\run.py"
args = [image_path, "--output-dir", output_path]
command = [venv_python, script_path] + args
try:
# Run the subprocess
result = subprocess.run(command, capture_output=True, text=True)
# Print output and errors
print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)
print("Return Code:", result.returncode)
except Exception as e:
print(f"Error occurred: {e}")
def main(): def main():
parser = argparse.ArgumentParser(description="Text to 3D model pipeline") parser = argparse.ArgumentParser(description="Text to 3D model pipeline")
parser.add_argument("--prompt", type=str, required=True, help="User text prompt") parser.add_argument("--prompt", type=str, required=True, help="User text prompt")
args = parser.parse_args() args = parser.parse_args()
user_prompt = args.prompt input_prompt = args.prompt
print(f"User prompt: {user_prompt}") print(f"Input prompt: {input_prompt}")
refined_prompt = refine_text_prompt(user_prompt)
print(f"Refined prompt: {refined_prompt}") refine_prompt = os.environ["REFINE_PROMPT"] == "1"
if refine_prompt:
image_generation_prompt = refine_text_prompt(input_prompt)
print(f"Refined prompt: {image_generation_prompt}")
else:
image_generation_prompt = input_prompt
timestamp = get_timestamp() timestamp = get_timestamp()
pipeline_folder = Path(PIPELINE_FOLDER) pipeline_folder = Path(PIPELINE_FOLDER)
image_path = pipeline_folder / "images" / f"{timestamp}.jpg" image_path = pipeline_folder / "images" / f"{timestamp}.jpg"
text_to_image(refined_prompt, image_path) text_to_image(image_generation_prompt, image_path)
print(f"Generated image file: {image_path}") print(f"Generated image file: {image_path}")
model_path = pipeline_folder / "models" / timestamp model_path = pipeline_folder / "models" / timestamp
image_to_3d(image_path, model_path) image_to_3d_api(image_path, model_path)
model_file_path = model_path / "0" / "mesh.glb" #model_file_path = model_path / "0" / "mesh.glb"
print(f"Generated 3D model file: {model_file_path}") print(f"Generated 3D model file: {model_path}")
if __name__ == "__main__": if __name__ == "__main__":