integrate local trellis api into start_pipeline.py

2025-11-07 16:22:13 +02:00
parent 447449e1b3
commit 09f764c0df
6 changed files with 183 additions and 94 deletions
--- a/3d-generation-pipeline/.env.example
+++ b/3d-generation-pipeline/.env.example
@@ -1,4 +1,8 @@
+PIPELINE_FOLDER=
+REFINE_PROMPT=0
+
 CLOUDFLARE_ACCOUNT_ID=
 CLOUDFLARE_API_TOKEN=
-PIPELINE_FOLDER=
-MODEL_FOLDER=
+
+3D_GENERATION_URL=
+MODEL_FOLDER=
--- a/3d-generation-pipeline/.gitignore
+++ b/3d-generation-pipeline/.gitignore
@@ -1,4 +1,5 @@
 .venv
 .env
+__pycache__
 images/
 models/
--- a/3d-generation-pipeline/cloudflare_api.py
+++ b/3d-generation-pipeline/cloudflare_api.py
@@ -0,0 +1,59 @@
+import base64
+import requests
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+ACCOUNT_ID = os.environ["CLOUDFLARE_ACCOUNT_ID"]
+API_TOKEN = os.environ["CLOUDFLARE_API_TOKEN"]
+
+def text_to_image(prompt, output_path):
+    MODEL = "@cf/black-forest-labs/flux-1-schnell"
+    URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
+
+    payload = {
+        "prompt": prompt,
+    }
+
+    headers = {
+        "Authorization": f"Bearer {API_TOKEN}",
+        "Content-Type": "application/json",
+    }
+
+    resp = requests.post(URL, json=payload, headers=headers, timeout=60)
+    resp.raise_for_status()
+
+    data = resp.json()
+    b64 = data["result"]["image"]
+    if not b64:
+        raise RuntimeError(f"Unexpected response structure: {data}")
+
+    img_bytes = base64.b64decode(b64)
+
+    with open(output_path, "wb") as f:
+        f.write(img_bytes)
+
+
+def refine_text_prompt(prompt):
+    MODEL = "@cf/meta/llama-3.2-3b-instruct"
+    URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
+
+    instructions = """
+    User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.
+    Example:
+    User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'
+    You: 'a slightly curved sword with bright orange crossguard'
+    """
+
+    response = requests.post(URL,
+    headers={"Authorization": f"Bearer {API_TOKEN}"},
+    json={
+        "messages": [
+        {"role": "system", "content": instructions},
+        {"role": "user", "content": prompt}
+        ]
+    }
+    )
+    data = response.json()
+    return data["result"]["response"]
--- a/3d-generation-pipeline/generate_model_local.py
+++ b/3d-generation-pipeline/generate_model_local.py
@@ -0,0 +1,92 @@
+import subprocess
+import os
+import time
+import requests
+import base64
+
+from dotenv import load_dotenv
+
+load_dotenv()
+MODEL_FOLDER = os.environ["MODEL_FOLDER"]
+API_URL = os.environ["3D_GENERATION_URL"]
+
+
+def image_to_3d_subprocess(image_path, output_path):
+    venv_python = MODEL_FOLDER + r"\.venv\Scripts\python.exe"
+    script_path = MODEL_FOLDER + r"\run.py"
+
+    args = [image_path, "--output-dir", output_path]
+    command = [venv_python, script_path] + args
+
+    try:
+        # Run the subprocess
+        result = subprocess.run(command, capture_output=True, text=True)
+
+        # Print output and errors
+        print("STDOUT:\n", result.stdout)
+        print("STDERR:\n", result.stderr)
+        print("Return Code:", result.returncode)
+
+    except Exception as e:
+        print(f"Error occurred: {e}")
+
+
+
+def generate_no_preview(image_base64: str):
+    """Generate 3D model from a single base64-encoded image without previews.
+    
+    Args:
+        image_base64: Base64 string of the image (without 'data:image/...' prefix)
+    """
+    try:
+        # Set generation parameters
+        params = {
+            'image_base64': image_base64,
+            'seed': 42,
+            'ss_guidance_strength': 7.5,
+            'ss_sampling_steps': 30,
+            'slat_guidance_strength': 7.5,
+            'slat_sampling_steps': 30,
+            'mesh_simplify_ratio': 0.95,
+            'texture_size': 1024,
+            'output_format': 'glb'
+        }
+        
+        # Start generation
+        print("Starting generation...")
+        response = requests.post(f"{API_URL}/generate_no_preview", data=params)
+        response.raise_for_status()
+        
+        # Poll status until complete
+        while True:
+            status = requests.get(f"{API_URL}/status").json()
+            print(f"Progress: {status['progress']}%")
+            
+            if status['status'] == 'COMPLETE':
+                break
+            elif status['status'] == 'FAILED':
+                raise Exception(f"Generation failed: {status['message']}")
+            
+            time.sleep(1)
+        
+        # Download the model
+        print("Downloading model...")
+        response = requests.get(f"{API_URL}/download/model")
+        response.raise_for_status()
+        return response.content
+        
+        
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return None
+    
+def image_to_3d_api(image_path, output_path):
+    with open(image_path, 'rb') as image_file:
+        image_data = image_file.read()
+
+    base64_encoded = base64.b64encode(image_data).decode('utf-8')
+    model_binary = generate_no_preview(base64_encoded)
+
+    with open(output_path, 'wb') as f:
+        f.write(model_binary)
+    
--- a/3d-generation-pipeline/notebooks/local_model_generation_test.ipynb
+++ b/3d-generation-pipeline/notebooks/local_model_generation_test.ipynb
--- a/3d-generation-pipeline/start_pipeline.py
+++ b/3d-generation-pipeline/start_pipeline.py
@@ -1,111 +1,46 @@
 import os
-import base64
-import requests
 import argparse
-import subprocess
+
 from pathlib import Path
 from datetime import datetime
 from dotenv import load_dotenv

+from cloudflare_api import text_to_image, refine_text_prompt
+from generate_model_local import image_to_3d_api, image_to_3d_subprocess
+
 load_dotenv()

-ACCOUNT_ID = os.environ["CLOUDFLARE_ACCOUNT_ID"]
-API_TOKEN = os.environ["CLOUDFLARE_API_TOKEN"]
 PIPELINE_FOLDER = os.environ["PIPELINE_FOLDER"]
-MODEL_FOLDER = os.environ["MODEL_FOLDER"]


 def get_timestamp():
    return datetime.now().strftime("%Y-%m-%d-%H-%M-%S")


-def text_to_image(prompt, output_path):
-    MODEL = "@cf/black-forest-labs/flux-1-schnell"
-    URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
-
-    payload = {
-        "prompt": prompt,
-    }
-
-    headers = {
-        "Authorization": f"Bearer {API_TOKEN}",
-        "Content-Type": "application/json",
-    }
-
-    resp = requests.post(URL, json=payload, headers=headers, timeout=60)
-    resp.raise_for_status()
-
-    data = resp.json()
-    b64 = data["result"]["image"]
-    if not b64:
-        raise RuntimeError(f"Unexpected response structure: {data}")
-
-    img_bytes = base64.b64decode(b64)
-
-    with open(output_path, "wb") as f:
-        f.write(img_bytes)
-
-
-def refine_text_prompt(prompt):
-    MODEL = "@cf/meta/llama-3.2-3b-instruct"
-    URL = f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/{MODEL}"
-
-    instructions = """
-    User is talking about some object. Your task is to generate a short and concise description of it. Use only user's own words, keep it as short as possible.
-    Example:
-    User: 'Umm, okay, I would like a really cool sword, with for example a bright orange crossguard. And also it should be slightly curved.'
-    You: 'a slightly curved sword with bright orange crossguard'
-    """
-
-    response = requests.post(URL,
-    headers={"Authorization": f"Bearer {API_TOKEN}"},
-    json={
-        "messages": [
-        {"role": "system", "content": instructions},
-        {"role": "user", "content": prompt}
-        ]
-    }
-    )
-    data = response.json()
-    return data["result"]["response"]
-
-def image_to_3d(image_path, output_path):
-    venv_python = MODEL_FOLDER + r"\.venv\Scripts\python.exe"
-    script_path = MODEL_FOLDER + r"\run.py"
-
-    args = [image_path, "--output-dir", output_path]
-    command = [venv_python, script_path] + args
-
-    try:
-        # Run the subprocess
-        result = subprocess.run(command, capture_output=True, text=True)
-
-        # Print output and errors
-        print("STDOUT:\n", result.stdout)
-        print("STDERR:\n", result.stderr)
-        print("Return Code:", result.returncode)
-
-    except Exception as e:
-        print(f"Error occurred: {e}")
-
 def main():
    parser = argparse.ArgumentParser(description="Text to 3D model pipeline")
    parser.add_argument("--prompt", type=str, required=True, help="User text prompt")
    args = parser.parse_args()

-    user_prompt = args.prompt
-    print(f"User prompt: {user_prompt}")
-    refined_prompt = refine_text_prompt(user_prompt)
-    print(f"Refined prompt: {refined_prompt}")
+    input_prompt = args.prompt
+    print(f"Input prompt: {input_prompt}")
+
+    refine_prompt = os.environ["REFINE_PROMPT"] == "1"
+    if refine_prompt:
+        image_generation_prompt = refine_text_prompt(input_prompt)
+        print(f"Refined prompt: {image_generation_prompt}")
+    else:
+        image_generation_prompt = input_prompt
+    
    timestamp = get_timestamp()
    pipeline_folder = Path(PIPELINE_FOLDER)
    image_path = pipeline_folder / "images" / f"{timestamp}.jpg"
-    text_to_image(refined_prompt, image_path)
+    text_to_image(image_generation_prompt, image_path)
    print(f"Generated image file: {image_path}")
    model_path = pipeline_folder / "models" / timestamp
-    image_to_3d(image_path, model_path)
-    model_file_path = model_path / "0" / "mesh.glb"
-    print(f"Generated 3D model file: {model_file_path}")
+    image_to_3d_api(image_path, model_path)
+    #model_file_path = model_path / "0" / "mesh.glb"
+    print(f"Generated 3D model file: {model_path}")


 if __name__ == "__main__":