Spaces:

facebook
/

omnisealbench

Running

App Files Files Community

mduppes commited on 8 days ago

Commit

cca4f79

1 Parent(s): db54d0f

Add dataset selector for examples

Browse files

Files changed (5) hide show

backend/app.py +23 -20
backend/config.py +50 -43
backend/examples.py +19 -8
frontend/src/API.ts +20 -2
frontend/src/components/Examples.tsx +24 -3

backend/app.py CHANGED Viewed

@@ -1,32 +1,27 @@
-from backend.config import (
-    ABS_DATASET_DOMAIN,
-    get_dataset_config,
-    get_datasets,
-)
 from backend.descriptions import (
     DATASET_DESCRIPTIONS,
     DESCRIPTIONS,
     METRIC_DESCRIPTIONS,
     MODEL_DESCRIPTIONS,
 )
-from backend.examples import (
-    get_examples_tab,
-)
-from flask import Flask, Response, send_from_directory, request
 from flask_cors import CORS
-import os
-import logging
-import pandas as pd
-import json
-from io import StringIO
 from tools import (
     get_leaderboard_filters,
     get_old_format_dataframe,
 )  # Import your function
-import typing as tp
-import requests
-from urllib.parse import unquote
-import mimetypes
 logger = logging.getLogger(__name__)
@@ -110,9 +105,17 @@ def example_files(type):
     """
     Serve example files from S3 or locally based on config
     """
-    result = get_examples_tab(type)
-    return Response(json.dumps(result), mimetype="application/json")
 @app.route("/descriptions")

+import json
+import logging
+import mimetypes
+import os
+import typing as tp
+from io import StringIO
+from urllib.parse import unquote
+import pandas as pd
+import requests
+from backend.config import ABS_DATASET_DOMAIN, get_dataset_config, get_datasets
 from backend.descriptions import (
     DATASET_DESCRIPTIONS,
     DESCRIPTIONS,
     METRIC_DESCRIPTIONS,
     MODEL_DESCRIPTIONS,
 )
+from backend.examples import get_examples_tab
+from flask import Flask, request, Response, send_from_directory
 from flask_cors import CORS
 from tools import (
     get_leaderboard_filters,
     get_old_format_dataframe,
 )  # Import your function
 logger = logging.getLogger(__name__)
     """
     Serve example files from S3 or locally based on config
     """
+    # Get dataset parameter from query string
+    dataset_name = request.args.get("dataset")
+    if not dataset_name:
+        return {"error": "Dataset parameter is required"}, 400
+    try:
+        result = get_examples_tab(type, dataset_name)
+        return Response(json.dumps(result), mimetype="application/json")
+    except ValueError as e:
+        return {"error": str(e)}, 400
 @app.route("/descriptions")

backend/config.py CHANGED Viewed

@@ -2,8 +2,6 @@
 # IMPORTANT: When running from docker more setup is required (e.g. on Huggingface)
 import os
 from collections import defaultdict
-from copy import deepcopy
-from typing import Any, Dict
 ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
@@ -127,7 +125,7 @@ MODALITY_CONFIG_CONSTANTS = {
             "H264rgb",
             "H265",
         ],
-    }
 }
 DATASET_CONFIGS = {
@@ -139,30 +137,6 @@ DATASET_CONFIGS = {
 }
-EXAMPLE_CONFIGS = {
-    "audio": {
-        "dataset_name": "voxpopuli_1k",
-        "path": ABS_DATASET_PATH,
-        "db_key": "voxpopuli",
-    },
-    # "image": {
-    #     "dataset_name": "val2014_1k_v2",
-    #     "path": ABS_DATASET_PATH,
-    #     "db_key": "local_val2014",
-    # },
-    "image": {
-        "dataset_name": "sa_1b_val_1k",
-        "path": ABS_DATASET_PATH,
-        "db_key": "local_valid",
-    },
-    "video": {
-        "dataset_name": "sav_val_full_v2",
-        "path": ABS_DATASET_PATH,
-        "db_key": "sa-v_sav_val_videos",
-    },
-}
 def get_user_dataset():
     datasets = defaultdict(list)
     user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
@@ -170,7 +144,9 @@ def get_user_dataset():
         for user_data in os.listdir(user_data_dir):
             if not os.path.isdir(os.path.join(user_data_dir, user_data)):
                 continue
-            user_dtype = os.listdir(os.path.join(user_data_dir, user_data, "examples"))[0]
             datasets[user_dtype].append(user_data + "/" + user_dtype)
     return datasets
@@ -192,28 +168,59 @@ def get_datasets():
     return grouped
-def get_example_config(type):
-    if type not in EXAMPLE_CONFIGS:
-        raise ValueError(f"Unknown example type: {type}")
-    examples_config: Dict[str, Any] = deepcopy(EXAMPLE_CONFIGS[type])
     user_datasets = get_user_dataset()
     user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
-    if len(user_datasets) > 0:
-        assert user_data_dir, f"OMNISEAL_LEADERBOARD_DATA is reset during loading the examples for {type}. Please set it correctly"
-        for dtype, user_names in user_datasets.items():
-            if dtype == type:
-                dataset_name = user_names[0].split("/")[0]
-                path = user_data_dir + "/"
-                examples_config = {
-                    "dataset_name": dataset_name,
-                    "path": path,
-                    "db_key": dataset_name,
-                }
     return examples_config
 def get_dataset_config(dataset_name):
     if dataset_name in DATASET_CONFIGS:
         cfg = DATASET_CONFIGS[dataset_name]

 # IMPORTANT: When running from docker more setup is required (e.g. on Huggingface)
 import os
 from collections import defaultdict
 ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
             "H264rgb",
             "H265",
         ],
+    },
 }
 DATASET_CONFIGS = {
 }
 def get_user_dataset():
     datasets = defaultdict(list)
     user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
         for user_data in os.listdir(user_data_dir):
             if not os.path.isdir(os.path.join(user_data_dir, user_data)):
                 continue
+            user_dtype = os.listdir(os.path.join(user_data_dir, user_data, "examples"))[
+                0
+            ]
             datasets[user_dtype].append(user_data + "/" + user_dtype)
     return datasets
     return grouped
+def get_example_config(type, dataset_name):
+    """Get example configuration for a specific dataset."""
+    if not dataset_name:
+        raise ValueError(f"Dataset name is required")
+    # Check if it's a valid dataset for this type
+    all_datasets = get_datasets()
+    if dataset_name not in all_datasets.get(type, []):
+        raise ValueError(f"Unknown dataset {dataset_name} for type {type}")
+    # Extract the dataset name without the type suffix
+    dataset_base_name = dataset_name.split("/")[0]
+    # Check if it's a user dataset
     user_datasets = get_user_dataset()
     user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
+    if dataset_name in user_datasets.get(type, []):
+        # It's a user dataset
+        examples_config = {
+            "dataset_name": dataset_base_name,
+            "path": user_data_dir + "/",
+            "db_key": dataset_base_name,
+        }
+    else:
+        # It's a predefined dataset from DATASET_CONFIGS
+        if dataset_name in DATASET_CONFIGS:
+            dataset_config = DATASET_CONFIGS[dataset_name]
+            examples_config = {
+                "dataset_name": dataset_base_name,
+                "path": dataset_config["path"],
+                "db_key": _get_db_key_for_dataset(dataset_base_name, type),
+            }
+        else:
+            raise ValueError(f"Dataset {dataset_name} not found in configurations")
     return examples_config
+def _get_db_key_for_dataset(dataset_base_name, type):
+    """Helper function to determine the database key for a dataset"""
+    # Map of dataset names to their db keys
+    db_key_mapping = {
+        "voxpopuli_1k": "voxpopuli",
+        "val2014_1k_v2": "local_val2014",
+        "sa_1b_val_1k": "local_valid",
+        "sav_val_full_v2": "sa-v_sav_val_videos",
+        "ravdess_1k": "ravdess",  # Add mapping for ravdess dataset
+    }
+    return db_key_mapping.get(dataset_base_name, dataset_base_name)
 def get_dataset_config(dataset_name):
     if dataset_name in DATASET_CONFIGS:
         cfg = DATASET_CONFIGS[dataset_name]

backend/examples.py CHANGED Viewed

@@ -92,9 +92,9 @@ def build_description(
         }
-def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
-    def generate_file_patterns(prefixes, extensions, indices):
         return [
             f"{prefix}_{index:05d}.{ext}"
             for prefix in prefixes
@@ -102,6 +102,11 @@ def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
             for ext in extensions
         ]
     if datatype == "audio":
         quality_metrics = ["snr", "sisnr", "stoi", "pesq"]
         extensions = ["wav"]
@@ -118,7 +123,7 @@ def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
         datatype_abbr = "video"
         # indices = [0, 1, 3, 4, 5]
-    eval_results_path = abs_path + f"{dataset_name}/examples_eval_results.json"
     # Determine if eval_results_path is a URL or local file
     if eval_results_path.startswith("http://") or eval_results_path.startswith(
@@ -141,7 +146,9 @@ def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
     first_model = next(iter(dataset.keys()))
     first_attack = next(iter(dataset[first_model].keys()))
     first_attack_variant = next(iter(dataset[first_model][first_attack].keys()))
-    indices = [item["idx"] for item in dataset[first_model][first_attack][first_attack_variant]]
     prefixes = [
         f"attacked_{datatype_abbr}",
@@ -168,11 +175,15 @@ def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
                     attack = attack_name
                 else:
                     # TODO: Update data on S3 with new Omniseal Bench V2 eval script
-                    if str(abs_path).startswith("http") or str(abs_path).startswith("https") or str(abs_path).startswith("s3://"):
                         attack = f"{attack_name}_{attack_variant}"
                     else:
                         attack = f"{attack_name}__{attack_variant}"
                 if len(attack_rows) == 0:
                     model_infos[attack] = []
                     continue
@@ -227,8 +238,8 @@ def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str):
     return infos
-def get_examples_tab(datatype: str):
-    config = get_example_config(datatype)
     infos = build_infos(
         config["path"],
         datatype=datatype,

         }
+def build_infos(abs_path, datatype: str, dataset_name: str, db_key: str):
+    def generate_file_patterns(prefixes, extensions, indices):
         return [
             f"{prefix}_{index:05d}.{ext}"
             for prefix in prefixes
             for ext in extensions
         ]
+    # Initialize defaults
+    quality_metrics = []
+    extensions = []
+    datatype_abbr = ""
     if datatype == "audio":
         quality_metrics = ["snr", "sisnr", "stoi", "pesq"]
         extensions = ["wav"]
         datatype_abbr = "video"
         # indices = [0, 1, 3, 4, 5]
+    eval_results_path = str(abs_path) + f"{dataset_name}/examples_eval_results.json"
     # Determine if eval_results_path is a URL or local file
     if eval_results_path.startswith("http://") or eval_results_path.startswith(
     first_model = next(iter(dataset.keys()))
     first_attack = next(iter(dataset[first_model].keys()))
     first_attack_variant = next(iter(dataset[first_model][first_attack].keys()))
+    indices = [
+        item["idx"] for item in dataset[first_model][first_attack][first_attack_variant]
+    ]
     prefixes = [
         f"attacked_{datatype_abbr}",
                     attack = attack_name
                 else:
                     # TODO: Update data on S3 with new Omniseal Bench V2 eval script
+                    if (
+                        str(abs_path).startswith("http")
+                        or str(abs_path).startswith("https")
+                        or str(abs_path).startswith("s3://")
+                    ):
                         attack = f"{attack_name}_{attack_variant}"
                     else:
                         attack = f"{attack_name}__{attack_variant}"
                 if len(attack_rows) == 0:
                     model_infos[attack] = []
                     continue
     return infos
+def get_examples_tab(datatype: str, dataset_name: str):
+    config = get_example_config(datatype, dataset_name)
     infos = build_infos(
         config["path"],
         datatype=datatype,

frontend/src/API.ts CHANGED Viewed

@@ -17,8 +17,11 @@ class API {
   }
   // Rename the method to fetchExamplesByType
-  static fetchExamplesByType(type: 'image' | 'audio' | 'video'): Promise<any> {
-    return fetch(`${VITE_API_SERVER_URL}/examples/${type}`).then((response) => {
       if (!response.ok) {
         throw new Error(`Failed to fetch examples of type ${type}`)
       }
@@ -52,6 +55,21 @@ class API {
     if (!response.ok) throw new Error('Failed to fetch descriptions')
     return response.json()
   }
 }
 export default API

   }
   // Rename the method to fetchExamplesByType
+  static fetchExamplesByType(type: 'image' | 'audio' | 'video', dataset?: string): Promise<any> {
+    const url = dataset
+      ? `${VITE_API_SERVER_URL}/examples/${type}?dataset=${encodeURIComponent(dataset)}`
+      : `${VITE_API_SERVER_URL}/examples/${type}`
+    return fetch(url).then((response) => {
       if (!response.ok) {
         throw new Error(`Failed to fetch examples of type ${type}`)
       }
     if (!response.ok) throw new Error('Failed to fetch descriptions')
     return response.json()
   }
+  // Fetch leaderboard data from the backend
+  static async fetchLeaderboard(datasetName: string): Promise<any> {
+    const response = await fetch(`${VITE_API_SERVER_URL}/data/${datasetName}?dataset_type=benchmark`)
+    if (!response.ok) throw new Error(`Failed to fetch leaderboard for ${datasetName}`)
+    return response.json()
+  }
+  // Fetch leaderboard data from the backend
+  static async fetchChart(datasetName: string): Promise<any> {
+    const response = await fetch(`${VITE_API_SERVER_URL}/data/${datasetName}?dataset_type=attacks_variations`)
+    if (!response.ok) throw new Error(`Failed to fetch chart data for ${datasetName}`)
+    return response.json()
+  }
 }
 export default API
+export { VITE_API_SERVER_URL as API_BASE_URL }

frontend/src/components/Examples.tsx CHANGED Viewed

@@ -7,6 +7,7 @@ import AudioGallery from './AudioGallery'
 import VideoGallery from './VideoGallery'
 import ModelInfoIcon from './ModelInfoIcon'
 import Descriptions from '../Descriptions'
 interface ExamplesProps {
   fileType: 'image' | 'audio' | 'video'
@@ -32,11 +33,24 @@ const Examples = ({ fileType }: ExamplesProps) => {
   const [selectedModel, setSelectedModel] = useState<string | null>(null)
   const [selectedAttack, setSelectedAttack] = useState<string | null>(null)
   const [descriptionsLoaded, setDescriptionsLoaded] = useState(false)
   const descriptions = useRef(Descriptions.getInstance())
   useEffect(() => {
     descriptions.current.load().then(() => setDescriptionsLoaded(true))
-  }, [])
   const location = useLocation()
   // Parse query params for model and attack
@@ -72,9 +86,11 @@ const Examples = ({ fileType }: ExamplesProps) => {
   }, [location.search, selectedModel, examples])
   useEffect(() => {
     setLoading(true)
     setError(null)
-    API.fetchExamplesByType(fileType)
       .then((data) => {
         setExamples(data)
         const models = Object.keys(data)
@@ -97,7 +113,7 @@ const Examples = ({ fileType }: ExamplesProps) => {
         setError(err.message)
         setLoading(false)
       })
-  }, [fileType])
   if (loading) {
     return <LoadingSpinner />
@@ -106,6 +122,11 @@ const Examples = ({ fileType }: ExamplesProps) => {
   return (
     <div className="examples-container">
       <div className="selectors-container flex flex-col gap-4">
         <fieldset className="fieldset w-full p-4 rounded border border-gray-700 bg-base-200">
           <legend className="fieldset-legend font-semibold">Model</legend>
           <div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-1 max-h-48 overflow-y-auto pr-2">

 import VideoGallery from './VideoGallery'
 import ModelInfoIcon from './ModelInfoIcon'
 import Descriptions from '../Descriptions'
+import DatasetSelector from './DatasetSelector'
 interface ExamplesProps {
   fileType: 'image' | 'audio' | 'video'
   const [selectedModel, setSelectedModel] = useState<string | null>(null)
   const [selectedAttack, setSelectedAttack] = useState<string | null>(null)
   const [descriptionsLoaded, setDescriptionsLoaded] = useState(false)
+  const [datasets, setDatasets] = useState<any>({})
+  const [selectedDataset, setSelectedDataset] = useState<string>('')
   const descriptions = useRef(Descriptions.getInstance())
   useEffect(() => {
     descriptions.current.load().then(() => setDescriptionsLoaded(true))
+    // Fetch datasets when component loads
+    API.fetchDatasets().then((datasetsData) => {
+      setDatasets(datasetsData)
+      // Set default selected dataset based on fileType
+      const datasetsForType = datasetsData[fileType] || []
+      if (datasetsForType.length > 0) {
+        setSelectedDataset(datasetsForType[0])
+      }
+    }).catch((err) => {
+      console.error('Failed to fetch datasets:', err)
+    })
+  }, [fileType])
   const location = useLocation()
   // Parse query params for model and attack
   }, [location.search, selectedModel, examples])
   useEffect(() => {
+    if (!selectedDataset) return
     setLoading(true)
     setError(null)
+    API.fetchExamplesByType(fileType, selectedDataset)
       .then((data) => {
         setExamples(data)
         const models = Object.keys(data)
         setError(err.message)
         setLoading(false)
       })
+  }, [fileType, selectedDataset])
   if (loading) {
     return <LoadingSpinner />
   return (
     <div className="examples-container">
       <div className="selectors-container flex flex-col gap-4">
+        <DatasetSelector
+          datasetNames={datasets[fileType] || []}
+          selectedDatasetName={selectedDataset}
+          onDatasetNameChange={setSelectedDataset}
+        />
         <fieldset className="fieldset w-full p-4 rounded border border-gray-700 bg-base-200">
           <legend className="fieldset-legend font-semibold">Model</legend>
           <div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-1 max-h-48 overflow-y-auto pr-2">