Spaces:

CDL-AMLRT
/

OpenFakeArena

Sleeping

App Files Files Community

aarodi commited on May 15

Commit

2dff18f

1 Parent(s): 4f3c6f1

Upload app.py

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import torchvision.transforms as transforms
 import json
 import os
 import numpy as np
 from huggingface_hub import snapshot_download, HfApi
 from transformers import CLIPTokenizer
@@ -16,6 +18,7 @@ HUB_JSON = "leaderboard.json"
 MODEL_PATH = "mobilenet_v2_fake_detector.onnx"
 CLIP_IMAGE_ENCODER_PATH = "clip_image_encoder.onnx"
 CLIP_TEXT_ENCODER_PATH = "clip_text_encoder.onnx"
 PROMPT_MATCH_THRESHOLD = 10  # percent
 # --- Download leaderboard + model checkpoint from HF Hub ---
@@ -26,13 +29,28 @@ def load_assets():
             local_dir=".",
             repo_type="dataset",
             token=HF_TOKEN,
-            allow_patterns=[HUB_JSON, MODEL_PATH, CLIP_IMAGE_ENCODER_PATH, CLIP_TEXT_ENCODER_PATH]
         )
     except Exception as e:
         print(f"Failed to load assets from HF Hub: {e}")
 load_assets()
 # --- Load leaderboard ---
 def load_leaderboard():
     try:
@@ -82,12 +100,10 @@ transform = transforms.Compose([
 def compute_prompt_match(image: Image.Image, prompt: str) -> float:
     try:
-        # Encode image
         img_tensor = transform(image).unsqueeze(0).numpy().astype(np.float32)
         image_features = clip_image_sess.run(None, {clip_image_sess.get_inputs()[0].name: img_tensor})[0][0]
-        image_features /= np.linalg.norm(image_features)  # Normalize
-        # Encode text
         inputs = clip_tokenizer(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=77)
         input_ids = inputs["input_ids"]
         attention_mask = inputs["attention_mask"]
@@ -95,16 +111,14 @@ def compute_prompt_match(image: Image.Image, prompt: str) -> float:
             clip_text_sess.get_inputs()[0].name: input_ids,
             clip_text_sess.get_inputs()[1].name: attention_mask
         })[0][0]
-        text_features /= np.linalg.norm(text_features)  # Normalize
-        # Cosine similarity
         sim = np.dot(image_features, text_features)
         return round(sim * 100, 2)
     except Exception as e:
         print(f"CLIP ONNX match failed: {e}")
         return 0.0
 # --- Main prediction logic ---
 def detect_with_model(image: Image.Image, prompt: str, username: str):
     if not username.strip():
@@ -118,7 +132,7 @@ def detect_with_model(image: Image.Image, prompt: str, username: str):
     image_tensor = transforms.Resize((224, 224))(image)
     image_tensor = transforms.ToTensor()(image_tensor).unsqueeze(0).numpy().astype(np.float32)
     outputs = session.run(None, {input_name: image_tensor})
-    prob = round(1 / (1 + np.exp(-outputs[0][0][0])), 2)  # sigmoid
     prediction = "Real" if prob > 0.5 else "Fake"
     score = 1 if prediction == "Real" else 0
@@ -146,6 +160,9 @@ def detect_with_model(image: Image.Image, prompt: str, username: str):
     )
 # --- UI Layout ---
 with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
     gr.Markdown("## 🎝 OpenFake Arena")
     gr.Markdown("Welcome to the OpenFake Arena!\n\n**Your mission:** Generate a synthetic image for the prompt, upload it, and try to fool the AI detector into thinking it’s real.\n\n**Rules:**\n- Only synthetic images allowed!\n- No cheating with real photos.\n- Licensing is your responsibility.\n\nMake it wild. Make it weird. Most of all — make it fun.")
@@ -157,7 +174,7 @@ with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
             prompt_input = gr.Textbox(
                 label="Suggested Prompt",
                 placeholder="e.g., A portrait photograph of a politician delivering a speech...",
-                value="A portrait photograph of Barack Obama delivering a speech, with the United States flag in the background",
                 lines=2
             )

 import json
 import os
 import numpy as np
+import pandas as pd
+import random
 from huggingface_hub import snapshot_download, HfApi
 from transformers import CLIPTokenizer
 MODEL_PATH = "mobilenet_v2_fake_detector.onnx"
 CLIP_IMAGE_ENCODER_PATH = "clip_image_encoder.onnx"
 CLIP_TEXT_ENCODER_PATH = "clip_text_encoder.onnx"
+PROMPT_CSV_PATH = "generate2_1.csv"
 PROMPT_MATCH_THRESHOLD = 10  # percent
 # --- Download leaderboard + model checkpoint from HF Hub ---
             local_dir=".",
             repo_type="dataset",
             token=HF_TOKEN,
+            allow_patterns=[HUB_JSON, MODEL_PATH, CLIP_IMAGE_ENCODER_PATH, CLIP_TEXT_ENCODER_PATH, PROMPT_CSV_PATH]
         )
     except Exception as e:
         print(f"Failed to load assets from HF Hub: {e}")
 load_assets()
+# --- Load prompts from CSV ---
+def load_prompts():
+    try:
+        df = pd.read_csv(PROMPT_CSV_PATH)
+        if "prompt" in df.columns:
+            return df["prompt"].dropna().tolist()
+        else:
+            print("CSV missing 'prompt' column.")
+            return []
+    except Exception as e:
+        print(f"Failed to load prompts: {e}")
+        return []
+PROMPT_LIST = load_prompts()
 # --- Load leaderboard ---
 def load_leaderboard():
     try:
 def compute_prompt_match(image: Image.Image, prompt: str) -> float:
     try:
         img_tensor = transform(image).unsqueeze(0).numpy().astype(np.float32)
         image_features = clip_image_sess.run(None, {clip_image_sess.get_inputs()[0].name: img_tensor})[0][0]
+        image_features /= np.linalg.norm(image_features)
         inputs = clip_tokenizer(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=77)
         input_ids = inputs["input_ids"]
         attention_mask = inputs["attention_mask"]
             clip_text_sess.get_inputs()[0].name: input_ids,
             clip_text_sess.get_inputs()[1].name: attention_mask
         })[0][0]
+        text_features /= np.linalg.norm(text_features)
         sim = np.dot(image_features, text_features)
         return round(sim * 100, 2)
     except Exception as e:
         print(f"CLIP ONNX match failed: {e}")
         return 0.0
 # --- Main prediction logic ---
 def detect_with_model(image: Image.Image, prompt: str, username: str):
     if not username.strip():
     image_tensor = transforms.Resize((224, 224))(image)
     image_tensor = transforms.ToTensor()(image_tensor).unsqueeze(0).numpy().astype(np.float32)
     outputs = session.run(None, {input_name: image_tensor})
+    prob = round(1 / (1 + np.exp(-outputs[0][0][0])), 2)
     prediction = "Real" if prob > 0.5 else "Fake"
     score = 1 if prediction == "Real" else 0
     )
 # --- UI Layout ---
+def get_random_prompt():
+    return random.choice(PROMPT_LIST) if PROMPT_LIST else "A synthetic scene with dramatic lighting"
 with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
     gr.Markdown("## 🎝 OpenFake Arena")
     gr.Markdown("Welcome to the OpenFake Arena!\n\n**Your mission:** Generate a synthetic image for the prompt, upload it, and try to fool the AI detector into thinking it’s real.\n\n**Rules:**\n- Only synthetic images allowed!\n- No cheating with real photos.\n- Licensing is your responsibility.\n\nMake it wild. Make it weird. Most of all — make it fun.")
             prompt_input = gr.Textbox(
                 label="Suggested Prompt",
                 placeholder="e.g., A portrait photograph of a politician delivering a speech...",
+                value=get_random_prompt(),
                 lines=2
             )