|
import os |
|
import datasets |
|
import pandas as pd |
|
from datetime import datetime |
|
|
|
from config import BACKUP_FOLDER, HF_DATASET_REPO_ID, HF_TOKEN, RESULTS_CSV_FILE, CSV_HEADERS |
|
|
|
def main(): |
|
""" |
|
Gets the dataset from HF Hub where preferences are being collected, |
|
save it locally to a backup folder with a timestamp. |
|
Then creates an empty dataset with the same structure and saves it to the HF Hub. |
|
""" |
|
print(f"Attempting to load dataset '{HF_DATASET_REPO_ID}' from Hugging Face Hub (file: {RESULTS_CSV_FILE})...") |
|
dataset = datasets.load_dataset(HF_DATASET_REPO_ID, data_files=RESULTS_CSV_FILE, token=HF_TOKEN, split='train') |
|
print(f"Successfully loaded dataset. It has {len(dataset)} entries.") |
|
dataset_df = dataset.to_pandas() |
|
|
|
|
|
|
|
if not os.path.exists(BACKUP_FOLDER): |
|
os.makedirs(BACKUP_FOLDER) |
|
print(f"Created backup folder: {BACKUP_FOLDER}") |
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
backup_filename = f"preferences_backup_{timestamp}.csv" |
|
backup_filepath = os.path.join(BACKUP_FOLDER, backup_filename) |
|
try: |
|
dataset_df.to_csv(backup_filepath, index=False) |
|
print(f"Successfully backed up current preferences to: {backup_filepath}") |
|
except Exception as e: |
|
print(f"Error saving backup to {backup_filepath}: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |