Spaces:

goodmodeler
/

AdGPT

Running

App Files Files Community

test

by goodmodeler - opened Aug 9

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+119

-2670

This PR is in draft mode

Files changed (23) hide show

README.md +15 -30
build_embeddings.py +11 -0
data_loader/download.py +0 -209
data_loader/download_dataset.py +0 -48
deprecated/image_download.py → image_download.py +0 -0
deprecated/image_gen.py → image_gen.py +0 -0
lauguage_model_fine_tuning/accelerate_config.yaml +0 -23
lauguage_model_fine_tuning/distillation/distill_llm.py +0 -485
lauguage_model_fine_tuning/distillation/eval_compare_teacher_student.py +0 -168
lauguage_model_fine_tuning/distillation/launch_distill.sh +0 -60
lauguage_model_fine_tuning/eval_ppo_teacher.py +0 -170
lauguage_model_fine_tuning/launch_ppo_fine_tune_teacher.sh +0 -63
lauguage_model_fine_tuning/launch_supervised_fine_tune_teacher.sh +0 -28
lauguage_model_fine_tuning/merge_teacher_model.py +0 -116
lauguage_model_fine_tuning/ppo_fine_tune_teacher.py +0 -459
lauguage_model_fine_tuning/sft_teacher.py +0 -276
ppo_tune.py +19 -0
requirements.txt +12 -51
retrieval_augmented_generation/build_embeddings.py +0 -246
reward_model.py +21 -0
sft_train.py +41 -0
fully_fine_tune_stablediffusion/train_lora.py → train_lora.py +0 -0
train_model_test.py +0 -238

README.md CHANGED Viewed

@@ -14,8 +14,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 commands:
-download images: python download.py -i 1 -r 2 -o /home/user/app/image_tmp -z
 pip install git+https://github.com/huggingface/diffusers
 accelerate launch \
@@ -45,39 +43,26 @@ fine tune a trained model: --pretrained_model_name_or_path="./nyc-ad-model/check
 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
-pipeline:
-# 1 Fully Fine‑tune image model with ZeRO
 accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
-fully_fine_tine_stablediffusion
-# 2 SFT 120B OSS 语言模型 with QLoRA
-lauguage_model_fine_tuning
-# 3 RLHF PPO 120B OSS 语言模型 with QLoRA : 训练 reward model
-lauguage_model_fine_tuning
-# 4 distill 120B OSS模型给20B OSS模型
-lauguage_model_fine_tuning
-用 Teacher 生成 Response，student模型用LoRA fine tuning
-# 5 Build RAG index embedding table
-retrieval_augmented_generation
 # 6 Inference with RAG
-inference.py
-system flow:
-input: business or product description text
-1.  根据input用RAG取embedding
-1.	GPT‑OSS 生成 4 个广告文案 + 标题 + 口号（可选语气：专业/活泼/极简）
-2.	GPT‑OSS 基于选中文案生成 扩展视觉提示词（主体、配色、镜头、艺术风格）
-3.	stablediffusion model 生成 4 张草图（可选 ControlNet-Layout/Logo 插入）
-4.	返回4张海报+后处理
-output: an advertisement sentence and post image
-design details:
-LoRA fine tune teacher OSS 120B model using smangrul/ad-copy-generation (广告文案生成)
-LoRA distill knowledge to OSS 20B model

 commands:
 pip install git+https://github.com/huggingface/diffusers
 accelerate launch \
 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+import torch
+torch.cuda.empty_cache()
+torch.cuda.reset_peak_memory_stats()
+7/12
+# 1 Fine‑tune image model LoRA+QLoRA
 accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
+python train_lora.py
+# 2 SFT 语言模型
+python sft_train.py
+# 3 Build RAG index
+python build_embeddings.py
+# 4 (可选) 收集偏好 → 训练 reward model
+python reward_model.py
+# 5 PPO RLHF 微调
+python ppo_tune.py
 # 6 Inference with RAG
+python rag_infer.py

build_embeddings.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from sentence_transformers import SentenceTransformer
+import faiss, json, glob, os, numpy as np
+model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
+texts=[]; vecs=[]
+for f in glob.glob("nyc_ads_dataset/*.json"):
+    cap=json.load(open(f))["caption"]
+    texts.append(cap); vecs.append(model.encode(cap,normalize_embeddings=True))
+vecs=np.vstack(vecs).astype("float32")
+index=faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)
+faiss.write_index(index,"prompt.index"); json.dump(texts,open("prompt.txt","w"))

data_loader/download.py DELETED Viewed

@@ -1,209 +0,0 @@
-# Author: Marco Lustri 2022 - https://github.com/TheLustriVA
-# MIT License
-"""A script to make downloading the DiffusionDB dataset easier."""
-from urllib.error import HTTPError
-from urllib.request import urlretrieve
-from alive_progress import alive_bar
-from os.path import exists
-import shutil
-import os
-import time
-import argparse
-index = None  # initiate main arguments as None
-range_max = None
-output = None
-unzip = None
-large = None
-parser = argparse.ArgumentParser(description="Download a file from a URL")  #
-# It's adding arguments to the parser.
-parser.add_argument(
-    "-i",
-    "--index",
-    type=int,
-    default=1,
-    help="File to download or lower bound of range if -r is set",
-)
-parser.add_argument(
-    "-r",
-    "--range",
-    type=int,
-    default=None,
-    help="Upper bound of range if -i is provided",
-)
-parser.add_argument(
-    "-o", "--output", type=str, default="images", help="Output directory name"
-)
-parser.add_argument(
-    "-z",
-    "--unzip",
-    default=False,
-    help="Unzip the file after downloading",
-    # It's setting the argument to True if it's provided.
-    action="store_true",
-)
-parser.add_argument(
-    "-l",
-    "--large",
-    default=False,
-    help="Download from DiffusionDB Large (14 million images)",
-    action="store_true",
-)
-args = parser.parse_args()  # parse the arguments
-# It's checking if the user has provided any arguments, and if they have, it
-# sets the variables to the arguments.
-if args.index:
-    index = args.index
-if args.range:
-    range_max = args.range
-if args.output:
-    output = args.output
-if args.unzip:
-    unzip = args.unzip
-if args.large:
-    large = args.large
-def download(index=1, range_index=0, output="", large=False):
-    """
-    Download a file from a URL and save it to a local file
-    :param index: The index of the file to download, defaults to 1 (optional)
-    :param range_index: The number of files to download. If you want to download
-        all files, set this to the number of files you want to download,
-        defaults to 0 (optional)
-    :param output: The directory to download the files to :return: A list of
-        files to unzip
-    :param large: If downloading from DiffusionDB Large (14 million images)
-        instead of DiffusionDB 2M (2 million images)
-    """
-    baseurl = "https://huggingface.co/datasets/poloclub/diffusiondb/resolve/main/"
-    files_to_unzip = []
-    if large:
-        if index <= 10000:
-            url = f"{baseurl}diffusiondb-large-part-1/part-{index:06}.zip"
-        else:
-            url = f"{baseurl}diffusiondb-large-part-2/part-{index:06}.zip"
-    else:
-        url = f"{baseurl}images/part-{index:06}.zip"
-    if output != "":
-        output = f"{output}/"
-    if not exists(output):
-        os.makedirs(output)
-    if range_index == 0:
-        print("Downloading file: ", url)
-        file_path = f"{output}part-{index:06}.zip"
-        try:
-            urlretrieve(url, file_path)
-        except HTTPError as e:
-            print(f"Encountered an HTTPError downloading file: {url} - {e}")
-        if unzip:
-            unzip(file_path)
-    else:
-        # It's downloading the files numbered from index to range_index.
-        with alive_bar(range_index - index, title="Downloading files") as bar:
-            for idx in range(index, range_index):
-                if large:
-                    if idx <= 10000:
-                        url = f"{baseurl}diffusiondb-large-part-1/part-{idx:06}.zip"
-                    else:
-                        url = f"{baseurl}diffusiondb-large-part-2/part-{idx:06}.zip"
-                else:
-                    url = f"{baseurl}images/part-{idx:06}.zip"
-                loop_file_path = f"{output}part-{idx:06}.zip"
-                # It's trying to download the file, and if it encounters an
-                # HTTPError, it prints the error.
-                try:
-                    urlretrieve(url, loop_file_path)
-                except HTTPError as e:
-                    print(f"HTTPError downloading file: {url} - {e}")
-                files_to_unzip.append(loop_file_path)
-                # It's writing the url of the file to a manifest file.
-                with open("manifest.txt", "a") as f:
-                    f.write(url + "\n")
-                time.sleep(0.1)
-                bar()
-    # It's checking if the user wants to unzip the files, and if they do, it
-    # returns a list of files to unzip. It would be a bad idea to put these
-    # together as the process is already lengthy.
-    if unzip and len(files_to_unzip) > 0:
-        return files_to_unzip
-def unzip_file(file: str, extract_to: str = None):
-   """
-   > This function takes a zip file and unpacks it to specified directory
-   :param file: str - path to zip file
-   :param extract_to: str - directory to extract to (default: same name as zip file)
-   :return: The extraction directory path
-   """
-   if extract_to is None:
-       extract_to = file.replace('.zip', '')
-   shutil.unpack_archive(file, extract_to)
-   return f"File: {file} has been unzipped to {extract_to}"
-def unzip_all(files: list):
-    """
-    > Unzip all files in a list of files
-    :param files: list
-    :type files: list
-    """
-    with alive_bar(len(files), title="Unzipping files") as bar:
-        for file in files:
-            unzip_file(file, '/home/user/app/images')
-            time.sleep(0.1)
-            bar()
-def main(index=None, range_max=None, output=None, unzip=None, large=None):
-    """
-    `main` is a function that takes in an index, a range_max, an output, and an
-    unzip, and if the user confirms that they have enough space, it downloads
-    the files from the index to the output, and if unzip is true, it unzips them
-    :param index: The index of the file you want to download
-    :param range_max: The number of files to download
-    :param output: The directory to download the files to
-    :param unzip: If you want to unzip the files after downloading them, set
-        this to True
-    :param large: If you want to download from DiffusionDB Large (14 million
-        images) instead of DiffusionDB 2M (2 million images)
-    :return: A list of files that have been downloaded
-    """
-    if index and range_max:
-        if range_max - index >= 1999:
-            confirmation = input("Do you have at least 1.7Tb free: (y/n)")
-            if confirmation != "y":
-                return
-        files = download(index, range_max, output, large)
-        if unzip:
-            unzip_all(files)
-    elif index:
-        download(index, output=output, large=large)
-    else:
-        print("No index provided")
-# This is a common pattern in Python. It allows you to run the main function of
-# your script by running the script through the interpreter. It also allows you
-# to import the script into the interpreter without automatically running the
-# main function.
-if __name__ == "__main__":
-    main(index, range_max, output, unzip, large)

data_loader/download_dataset.py DELETED Viewed

@@ -1,48 +0,0 @@
-import os
-import json
-import pandas as pd
-from datasets import load_dataset
-from PIL import Image
-import shutil
-from tqdm import tqdm
-def load_and_process():
-    dataset = load_dataset("poloclub/diffusiondb", split="train[:1000]")
-    os.makedirs("processed/images", exist_ok=True)
-    processed_data = []
-    for idx, sample in enumerate(tqdm(dataset)):
-        image_id = f"{idx:06d}.png"
-        if sample.get('image'):
-            sample['image'].save(f"processed/images/{image_id}")
-        data_entry = {
-            "id": idx,
-            "image_file": image_id,
-            "prompt": sample.get('p', ''),
-            "seed": sample.get('se', 0),
-            "cfg_scale": sample.get('c', 0.0),
-            "steps": sample.get('st', 0),
-            "sampler": sample.get('sa', '')
-        }
-        processed_data.append(data_entry)
-    return processed_data
-def save_data(data):
-    with open("processed/data.json", "w") as f:
-        json.dump(data, f)
-    df = pd.DataFrame(data)
-    df.to_csv("processed/data.csv", index=False)
-    df.to_parquet("processed/data.parquet", index=False)
-def main():
-    data = load_and_process()
-    save_data(data)
-    print(f"Processed {len(data)} samples")
-if __name__ == "__main__":
-    main()

deprecated/image_download.py → image_download.py RENAMED Viewed

File without changes

deprecated/image_gen.py → image_gen.py RENAMED Viewed

File without changes

lauguage_model_fine_tuning/accelerate_config.yaml DELETED Viewed

@@ -1,23 +0,0 @@
-# accelerate_config.yaml - 多GPU训练配置
-compute_environment: LOCAL_MACHINE
-distributed_type: MULTI_GPU
-downcast_bf16: 'no'
-gpu_ids: all
-machine_rank: 0
-main_training_function: main
-mixed_precision: fp16
-num_machines: 1
-num_processes: 4  # 根据GPU数量调整
-rdzv_backend: static
-same_network: true
-tpu_env: []
-tpu_use_cluster: false
-tpu_use_sudo: false
-use_cpu: false
-# RLHF特定设置
-gradient_accumulation_steps: 8
-gradient_clipping: 1.0
-learning_rate: 1e-5
-dataloader_drop_last: true

lauguage_model_fine_tuning/distillation/distill_llm.py DELETED Viewed

@@ -1,485 +0,0 @@
-#!/usr/bin/env python3
-"""
-Teacher-Student知识蒸馏脚本
-将经过SFT+PPO RLHF的Teacher模型蒸馏到更小的Student模型
-"""
-import os
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader, Dataset
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TrainingArguments,
-    Trainer,
-    DataCollatorForLanguageModeling,
-    logging,
-)
-from datasets import load_dataset, Dataset as HFDataset
-from peft import LoraConfig, get_peft_model, TaskType
-import numpy as np
-import wandb
-from typing import Dict, List, Any, Optional
-import json
-from tqdm import tqdm
-import warnings
-warnings.filterwarnings("ignore")
-logging.set_verbosity(logging.CRITICAL)
-class DistillationConfig:
-    """蒸馏训练配置"""
-    # 模型路径
-    teacher_model_path = "./rlhf_teacher_model"  # RLHF后的Teacher模型
-    student_model_name = "microsoft/DialoGPT-medium"  # 替换为实际的OpenAI OSS 20B模型
-    # 蒸馏参数
-    temperature = 4.0           # 蒸馏温度
-    alpha = 0.7                # 蒸馏损失权重
-    beta = 0.3                 # 学生损失权重
-    gamma = 0.1                # 特征匹配损失权重
-    # 训练参数
-    learning_rate = 1e-4
-    num_train_epochs = 3
-    per_device_train_batch_size = 2
-    per_device_eval_batch_size = 4
-    gradient_accumulation_steps = 8
-    warmup_ratio = 0.1
-    weight_decay = 0.01
-    logging_steps = 50
-    eval_steps = 500
-    save_steps = 1000
-    # LoRA配置（为Student模型添加LoRA以提高训练效率）
-    use_lora = True
-    lora_r = 32
-    lora_alpha = 64
-    lora_dropout = 0.1
-    # 数据配置
-    max_length = 512
-    num_distill_samples = 10000  # 用于蒸馏的样本数量
-    # 输出配置
-    output_dir = "./distilled_student_model"
-    run_name = "teacher-student-distillation"
-class DistillationDataset(Dataset):
-    """蒸馏数据集类"""
-    def __init__(self, teacher_outputs: List[Dict], tokenizer, max_length: int = 512):
-        self.data = teacher_outputs
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-    def __len__(self):
-        return len(self.data)
-    def __getitem__(self, idx):
-        item = self.data[idx]
-        # 构建完整的输入-输出序列
-        full_text = f"### Human: {item['prompt']}\n### Assistant: {item['response']}"
-        # Tokenize
-        encoded = self.tokenizer(
-            full_text,
-            truncation=True,
-            padding="max_length",
-            max_length=self.max_length,
-            return_tensors="pt"
-        )
-        return {
-            "input_ids": encoded["input_ids"].squeeze(),
-            "attention_mask": encoded["attention_mask"].squeeze(),
-            "teacher_logits": torch.tensor(item["teacher_logits"], dtype=torch.float),
-            "labels": encoded["input_ids"].squeeze()
-        }
-class KnowledgeDistillationTrainer(Trainer):
-    """知识蒸馏训练器"""
-    def __init__(self, teacher_model, student_model, temperature=4.0, alpha=0.7, beta=0.3, gamma=0.1, **kwargs):
-        super().__init__(model=student_model, **kwargs)
-        self.teacher_model = teacher_model
-        self.teacher_model.eval()  # 冻结Teacher模型
-        self.temperature = temperature
-        self.alpha = alpha  # 蒸馏损失权重
-        self.beta = beta    # 学生损失权重
-        self.gamma = gamma  # 特征匹配损失权重
-    def compute_loss(self, model, inputs, return_outputs=False):
-        """计算蒸馏损失"""
-        labels = inputs.get("labels")
-        teacher_logits = inputs.get("teacher_logits").to(model.device)
-        # Student模型前向传播
-        student_outputs = model(**{k: v for k, v in inputs.items() if k not in ["teacher_logits"]})
-        student_logits = student_outputs.logits
-        # 计算各种损失
-        losses = {}
-        # 1. 标准语言模型损失 (学生模型自己的损失)
-        if labels is not None:
-            shift_logits = student_logits[..., :-1, :].contiguous()
-            shift_labels = labels[..., 1:].contiguous()
-            loss_fct = torch.nn.CrossEntropyLoss()
-            student_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
-            losses["student_loss"] = student_loss
-        # 2. 蒸馏损失 (KL散度)
-        if teacher_logits is not None:
-            # 确保维度匹配
-            if teacher_logits.shape != student_logits.shape:
-                min_seq_len = min(teacher_logits.shape[1], student_logits.shape[1])
-                teacher_logits = teacher_logits[:, :min_seq_len, :]
-                student_logits_for_distill = student_logits[:, :min_seq_len, :]
-            else:
-                student_logits_for_distill = student_logits
-            # 计算软标签概率
-            teacher_probs = F.softmax(teacher_logits / self.temperature, dim=-1)
-            student_log_probs = F.log_softmax(student_logits_for_distill / self.temperature, dim=-1)
-            # KL散度损失
-            distill_loss = F.kl_div(
-                student_log_probs,
-                teacher_probs,
-                reduction="batchmean"
-            ) * (self.temperature ** 2)
-            losses["distill_loss"] = distill_loss
-        # 3. 组合总损失
-        total_loss = 0
-        if "student_loss" in losses:
-            total_loss += self.beta * losses["student_loss"]
-        if "distill_loss" in losses:
-            total_loss += self.alpha * losses["distill_loss"]
-        # 记录各项损失
-        self.log({
-            "train/total_loss": total_loss.item(),
-            "train/student_loss": losses.get("student_loss", 0).item() if "student_loss" in losses else 0,
-            "train/distill_loss": losses.get("distill_loss", 0).item() if "distill_loss" in losses else 0,
-        })
-        return (total_loss, student_outputs) if return_outputs else total_loss
-def prepare_student_model(config: DistillationConfig):
-    """准备Student模型"""
-    print("🎓 Preparing student model...")
-    # 加载Student基础模型
-    student_model = AutoModelForCausalLM.from_pretrained(
-        config.student_model_name,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    # 添加LoRA（可选，用于高效训练）
-    if config.use_lora:
-        print("🔧 Adding LoRA to student model...")
-        lora_config = LoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            inference_mode=False,
-            r=config.lora_r,
-            lora_alpha=config.lora_alpha,
-            lora_dropout=config.lora_dropout,
-            target_modules=[
-                "q_proj", "k_proj", "v_proj", "o_proj",
-                "gate_proj", "up_proj", "down_proj",
-            ]
-        )
-        student_model = get_peft_model(student_model, lora_config)
-        student_model.print_trainable_parameters()
-    return student_model
-def load_teacher_model(config: DistillationConfig):
-    """加载Teacher模型"""
-    print("👨‍🏫 Loading teacher model...")
-    teacher_model = AutoModelForCausalLM.from_pretrained(
-        config.teacher_model_path,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    teacher_model.eval()
-    return teacher_model
-def generate_distillation_data(teacher_model, tokenizer, config: DistillationConfig):
-    """生成蒸馏数据"""
-    print("📊 Generating distillation dataset...")
-    # 加载提示数据集
-    dataset_sources = [
-        "smangrul/ad-copy-generation",
-        # 可以添加更多数据源
-    ]
-    all_prompts = []
-    for source in dataset_sources:
-        try:
-            ds = load_dataset(source, split="train")
-            # 提取提示词
-            for item in ds:
-                if "conversations" in item and len(item["conversations"]) > 0:
-                    prompt = item["conversations"][0].get("value", "")
-                    if len(prompt.strip()) > 10:
-                        all_prompts.append(prompt.strip())
-        except Exception as e:
-            print(f"⚠️ Error loading {source}: {e}")
-    # 限制样本数量
-    if len(all_prompts) > config.num_distill_samples:
-        all_prompts = all_prompts[:config.num_distill_samples]
-    print(f"📝 Generating responses for {len(all_prompts)} prompts...")
-    distillation_data = []
-    teacher_model.eval()
-    with torch.no_grad():
-        for i, prompt in enumerate(tqdm(all_prompts, desc="Generating teacher responses")):
-            try:
-                # 格式化输入
-                formatted_prompt = f"### Human: {prompt}\n### Assistant:"
-                inputs = tokenizer(
-                    formatted_prompt,
-                    return_tensors="pt",
-                    truncation=True,
-                    max_length=config.max_length // 2
-                ).to(teacher_model.device)
-                # 生成响应
-                outputs = teacher_model.generate(
-                    **inputs,
-                    max_new_tokens=200,
-                    temperature=0.7,
-                    top_p=0.9,
-                    do_sample=True,
-                    pad_token_id=tokenizer.eos_token_id,
-                    return_dict_in_generate=True,
-                    output_scores=True
-                )
-                # 解码响应
-                generated_ids = outputs.sequences[0][inputs.input_ids.shape[1]:]
-                response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
-                # 获取Teacher的logits
-                full_text = f"### Human: {prompt}\n### Assistant: {response}"
-                full_inputs = tokenizer(
-                    full_text,
-                    return_tensors="pt",
-                    truncation=True,
-                    max_length=config.max_length
-                ).to(teacher_model.device)
-                teacher_outputs = teacher_model(**full_inputs)
-                teacher_logits = teacher_outputs.logits.cpu().numpy()
-                distillation_data.append({
-                    "prompt": prompt,
-                    "response": response,
-                    "teacher_logits": teacher_logits.tolist()
-                })
-                # 定期保存中间结果
-                if (i + 1) % 100 == 0:
-                    print(f"Generated {i + 1}/{len(all_prompts)} samples")
-            except Exception as e:
-                print(f"⚠️ Error generating for prompt {i}: {e}")
-                continue
-    print(f"✅ Generated {len(distillation_data)} teacher-student pairs")
-    # 保存蒸馏数据
-    with open("distillation_data.json", "w", encoding="utf-8") as f:
-        json.dump(distillation_data, f, ensure_ascii=False, indent=2)
-    return distillation_data
-def create_data_collator(tokenizer):
-    """创建数据整理器"""
-    return DataCollatorForLanguageModeling(
-        tokenizer=tokenizer,
-        mlm=False,
-        pad_to_multiple_of=8
-    )
-def run_distillation():
-    """主要的蒸馏训练流程"""
-    print("🚀 Starting Teacher-Student Distillation...")
-    config = DistillationConfig()
-    # 初始化wandb
-    wandb.init(
-        project="teacher-student-distillation",
-        config=vars(config),
-        name=config.run_name
-    )
-    # 加载tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(config.teacher_model_path)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    # 加载模型
-    teacher_model = load_teacher_model(config)
-    student_model = prepare_student_model(config)
-    # 生成蒸馏数据
-    if os.path.exists("distillation_data.json"):
-        print("📂 Loading existing distillation data...")
-        with open("distillation_data.json", "r", encoding="utf-8") as f:
-            distillation_data = json.load(f)
-    else:
-        distillation_data = generate_distillation_data(teacher_model, tokenizer, config)
-    # 创建数据集
-    train_size = int(0.9 * len(distillation_data))
-    train_data = distillation_data[:train_size]
-    eval_data = distillation_data[train_size:]
-    train_dataset = DistillationDataset(train_data, tokenizer, config.max_length)
-    eval_dataset = DistillationDataset(eval_data, tokenizer, config.max_length)
-    print(f"📊 Training samples: {len(train_dataset)}")
-    print(f"📊 Evaluation samples: {len(eval_dataset)}")
-    # 训练参数
-    training_args = TrainingArguments(
-        output_dir=config.output_dir,
-        overwrite_output_dir=True,
-        num_train_epochs=config.num_train_epochs,
-        per_device_train_batch_size=config.per_device_train_batch_size,
-        per_device_eval_batch_size=config.per_device_eval_batch_size,
-        gradient_accumulation_steps=config.gradient_accumulation_steps,
-        learning_rate=config.learning_rate,
-        weight_decay=config.weight_decay,
-        warmup_ratio=config.warmup_ratio,
-        logging_steps=config.logging_steps,
-        eval_steps=config.eval_steps,
-        save_steps=config.save_steps,
-        evaluation_strategy="steps",
-        save_strategy="steps",
-        load_best_model_at_end=True,
-        metric_for_best_model="eval_loss",
-        greater_is_better=False,
-        report_to="wandb",
-        run_name=config.run_name,
-        fp16=True,
-        dataloader_pin_memory=False,
-        remove_unused_columns=False,
-        group_by_length=True,
-    )
-    # 创建数据整理器
-    data_collator = create_data_collator(tokenizer)
-    # 创建蒸馏训练器
-    trainer = KnowledgeDistillationTrainer(
-        teacher_model=teacher_model,
-        student_model=student_model,
-        args=training_args,
-        train_dataset=train_dataset,
-        eval_dataset=eval_dataset,
-        data_collator=data_collator,
-        tokenizer=tokenizer,
-        temperature=config.temperature,
-        alpha=config.alpha,
-        beta=config.beta,
-        gamma=config.gamma,
-    )
-    # 开始训练
-    print("🔥 Starting distillation training...")
-    trainer.train()
-    # 保存最终模型
-    print("💾 Saving distilled student model...")
-    trainer.save_model()
-    tokenizer.save_pretrained(config.output_dir)
-    # 评估模型
-    print("🧪 Evaluating distilled model...")
-    evaluate_distilled_model(trainer.model, tokenizer, config)
-    wandb.finish()
-    print("✅ Distillation training completed!")
-def evaluate_distilled_model(model, tokenizer, config: DistillationConfig):
-    """评估蒸馏后的模型"""
-    print("📊 Evaluating distilled student model...")
-    test_prompts = [
-        "Create an advertisement for a revolutionary AI-powered fitness tracker",
-        "Write marketing copy for an eco-friendly electric vehicle",
-        "Generate a slogan for a productivity app for remote workers",
-        "Create ad copy for a sustainable fashion brand targeting millennials",
-        "Write promotional content for a mental health app",
-    ]
-    model.eval()
-    results = []
-    for prompt in test_prompts:
-        formatted_prompt = f"### Human: {prompt}\n### Assistant:"
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=150,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        generated_text = response[len(formatted_prompt):].strip()
-        results.append({
-            "prompt": prompt,
-            "response": generated_text
-        })
-        print(f"\n🔍 Prompt: {prompt}")
-        print(f"📝 Student Response: {generated_text}")
-        print("-" * 80)
-    # 保存评估结果
-    with open(f"{config.output_dir}/evaluation_results.json", "w", encoding="utf-8") as f:
-        json.dump(results, f, ensure_ascii=False, indent=2)
-    return results
-if __name__ == "__main__":
-    # 设置环境变量
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    # 检查GPU
-    if torch.cuda.is_available():
-        print(f"🔥 Using {torch.cuda.device_count()} GPUs")
-        for i in range(torch.cuda.device_count()):
-            print(f"   GPU {i}: {torch.cuda.get_device_name(i)}")
-    else:
-        print("⚠️ Warning: No GPU available, using CPU (very slow)")
-    # 开始蒸馏训练
-    run_distillation()

lauguage_model_fine_tuning/distillation/eval_compare_teacher_student.py DELETED Viewed

@@ -1,168 +0,0 @@
-#!/usr/bin/env python3
-"""
-Teacher-Student模型性能比较脚本
-比较RLHF Teacher模型和蒸馏后的Student模型的性能
-"""
-import torch
-import argparse
-import json
-import time
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from typing import List, Dict, Any
-import numpy as np
-from datetime import datetime
-class ModelComparator:
-    def __init__(self, teacher_path: str, student_path: str):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print("📥 Loading Teacher model...")
-        self.teacher_model = AutoModelForCausalLM.from_pretrained(
-            teacher_path,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        self.teacher_tokenizer = AutoTokenizer.from_pretrained(teacher_path)
-        print("📥 Loading Student model...")
-        self.student_model = AutoModelForCausalLM.from_pretrained(
-            student_path,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        self.student_tokenizer = AutoTokenizer.from_pretrained(student_path)
-        # 设置pad tokens
-        for tokenizer in [self.teacher_tokenizer, self.student_tokenizer]:
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-    def generate_response(self, model, tokenizer, prompt: str, **kwargs) -> Dict[str, Any]:
-        """生成响应并记录性能指标"""
-        formatted_prompt = f"### Human: {prompt}\n### Assistant:"
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-        generation_config = {
-            "max_new_tokens": 200,
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "do_sample": True,
-            "pad_token_id": tokenizer.eos_token_id,
-            **kwargs
-        }
-        # 测量生成时间
-        start_time = time.time()
-        with torch.no_grad():
-            outputs = model.generate(**inputs, **generation_config)
-        generation_time = time.time() - start_time
-        # 解码响应
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        generated_text = response[len(formatted_prompt):].strip()
-        # 计算tokens数量
-        generated_tokens = len(tokenizer.encode(generated_text))
-        return {
-            "response": generated_text,
-            "generation_time": generation_time,
-            "tokens_generated": generated_tokens,
-            "tokens_per_second": generated_tokens / generation_time if generation_time > 0 else 0,
-            "prompt_tokens": inputs.input_ids.shape[1],
-            "total_tokens": outputs.shape[1]
-        }
-    def calculate_model_size(self, model) -> Dict[str, Any]:
-        """计算模型大小和参数量"""
-        param_count = sum(p.numel() for p in model.parameters())
-        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
-        # 估算模型大小（bytes）
-        model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
-        model_size_mb = model_size_bytes / (1024 * 1024)
-        model_size_gb = model_size_mb / 1024
-        return {
-            "total_parameters": param_count,
-            "trainable_parameters": trainable_params,
-            "model_size_mb": model_size_mb,
-            "model_size_gb": model_size_gb,
-            "compression_ratio": None  # 将在比较时计算
-        }
-    def evaluate_quality_metrics(self, responses: List[str]) -> Dict[str, float]:
-        """评估生成质量指标"""
-        metrics = {}
-        # 平均响应长度
-        avg_length = np.mean([len(resp.split()) for resp in responses])
-        metrics["avg_response_length"] = avg_length
-        # 响应长度标准差
-        length_std = np.std([len(resp.split()) for resp in responses])
-        metrics["response_length_std"] = length_std
-        # 词汇丰富度（使用type-token ratio的简化版本）
-        all_words = []
-        for resp in responses:
-            all_words.extend(resp.lower().split())
-        if all_words:
-            unique_words = len(set(all_words))
-            total_words = len(all_words)
-            metrics["vocabulary_richness"] = unique_words / total_words
-        else:
-            metrics["vocabulary_richness"] = 0.0
-        # 平均句子数量
-        avg_sentences = np.mean([resp.count('.') + resp.count('!') + resp.count('?') for resp in responses])
-        metrics["avg_sentences_per_response"] = avg_sentences
-        return metrics
-    def run_comprehensive_comparison(self) -> Dict[str, Any]:
-        """运行全面的性能比较"""
-        print("🔍 Running comprehensive Teacher-Student comparison...")
-        # 测试提示词集合
-        test_prompts = [
-            # 广告文案生成
-            "Create an advertisement for a revolutionary smartphone with advanced AI features",
-            "Write marketing copy for an eco-friendly electric vehicle targeting urban professionals",
-            "Generate a catchy slogan for a fitness app that uses AI personal training",
-            "Create promotional content for a sustainable fashion brand targeting Gen Z",
-            "Write ad copy for a productivity software targeting remote workers",
-            # 不同复杂度的任务
-            "Explain the benefits of renewable energy in simple terms",
-            "Write a brief product description for wireless headphones with noise cancellation",
-            "Create a social media post promoting a new coffee shop opening",
-            "Generate marketing text for a luxury watch brand",
-            "Write an email subject line for a summer sale promotion",
-            # 创意任务
-            "Create a tagline for a travel app that focuses on sustainable tourism",
-            "Write a short product pitch for smart home security system",
-            "Generate advertising copy for a meal delivery service focusing on healthy options",
-            "Create marketing content for an online learning platform",
-            "Write promotional text for a mental wellness app"
-        ]
-        # 初始化结果收集
-        results = {
-            "comparison_date": datetime.now().isoformat(),
-            "test_prompts_count": len(test_prompts),
-            "teacher_results": {},
-            "student_results": {},
-            "performance_comparison": {},
-            "detailed_responses": []
-        }
-        # 获取模型信息
-        print("📊 Analyzing model specifications...")
-        teacher_info = self.calculate_model_size(self.teacher_model)
-        student_info = self.calculate_model_size(self.student_model)

lauguage_model_fine_tuning/distillation/launch_distill.sh DELETED Viewed

@@ -1,60 +0,0 @@
-#!/bin/bash
-# launch_distillation.sh - 启动Teacher-Student蒸馏训练
-echo "🎓 Starting Teacher-Student Distillation Training..."
-# 检查前置条件
-echo "📋 Checking prerequisites..."
-# 检查Teacher模型
-if [ ! -d "./rlhf_teacher_model" ]; then
-    echo "❌ Error: RLHF Teacher model not found at ./rlhf_teacher_model"
-    echo "   Please complete SFT and RLHF training first"
-    exit 1
-fi
-# 检查GPU资源
-echo "📊 GPU Resources:"
-nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv
-# 检查可用显存
-AVAILABLE_MEMORY=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits | awk '{sum+=$1} END {print sum}')
-echo "Available GPU Memory: ${AVAILABLE_MEMORY} MB"
-if [ "$AVAILABLE_MEMORY" -lt 40000 ]; then
-    echo "⚠️  Warning: Distillation training requires significant GPU memory (>40GB recommended)"
-    echo "   Consider using gradient checkpointing or smaller batch sizes"
-fi
-# 设置环境变量
-export CUDA_VISIBLE_DEVICES=0,1  # 根据可用GPU调整
-export TOKENIZERS_PARALLELISM=false
-export WANDB_PROJECT="teacher-student-distillation"
-export WANDB_RUN_NAME="distillation-$(date +%Y%m%d_%H%M%S)"
-# 创建输出目录
-mkdir -p ./distilled_student_model
-mkdir -p ./distillation_logs
-# 检查是否有现有的蒸馏数据
-if [ -f "./distillation_data.json" ]; then
-    echo "📂 Found existing distillation data, will reuse it"
-else
-    echo "📊 Will generate new distillation data from teacher model"
-fi
-echo "🔥 Starting distillation training..."
-# 启动训练
-python teacher_student_distillation.py 2>&1 | tee ./distillation_logs/distillation_$(date +%Y%m%d_%H%M%S).log
-echo "✅ Distillation training completed!"
-# 训练后比较
-echo "⚖️ Comparing Teacher vs Student performance..."
-python compare_teacher_student.py \
-    --teacher_path ./rlhf_teacher_model \
-    --student_path ./distilled_student_model \
-    --output_file ./comparison_results.json
-echo "📊 Results saved to comparison_results.json"

lauguage_model_fine_tuning/eval_ppo_teacher.py DELETED Viewed

@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-"""
-RLHF模型评估脚本
-评估训练后模型的对齐效果和生成质量
-"""
-import torch
-import argparse
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from datasets import Dataset
-import numpy as np
-from typing import List, Dict
-import json
-class RLHFEvaluator:
-    def __init__(self, model_path: str, baseline_path: str = None):
-        """
-        初始化评估器
-        Args:
-            model_path: RLHF训练后的模型路径
-            baseline_path: 基线模型路径（SFT模型）
-        """
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        # 加载RLHF模型
-        print(f"📥 Loading RLHF model from {model_path}...")
-        self.rlhf_model = AutoModelForCausalLM.from_pretrained(
-            model_path,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-        # 加载基线模型（可选）
-        self.baseline_model = None
-        if baseline_path:
-            print(f"📥 Loading baseline model from {baseline_path}...")
-            self.baseline_model = AutoModelForCausalLM.from_pretrained(
-                baseline_path,
-                torch_dtype=torch.float16,
-                device_map="auto"
-            )
-        # 设置pad token
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-    def generate_response(self, prompt: str, model=None, **kwargs) -> str:
-        """生成响应"""
-        if model is None:
-            model = self.rlhf_model
-        formatted_prompt = f"### Human: {prompt}\n### Assistant:"
-        inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.device)
-        generation_config = {
-            "max_new_tokens": 200,
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "do_sample": True,
-            "pad_token_id": self.tokenizer.eos_token_id,
-            **kwargs
-        }
-        with torch.no_grad():
-            outputs = model.generate(**inputs, **generation_config)
-        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response[len(formatted_prompt):].strip()
-    def evaluate_helpfulness(self, prompts: List[str]) -> Dict[str, float]:
-        """评估有用性"""
-        print("🔍 Evaluating helpfulness...")
-        scores = []
-        for prompt in prompts:
-            response = self.generate_response(prompt)
-            # 简单的有用性评分（实际应用中应使用更复杂的评估方法）
-            score = self._calculate_helpfulness_score(prompt, response)
-            scores.append(score)
-        return {
-            "mean_helpfulness": np.mean(scores),
-            "std_helpfulness": np.std(scores),
-            "scores": scores
-        }
-    def evaluate_harmlessness(self, prompts: List[str]) -> Dict[str, float]:
-        """评估无害性"""
-        print("🛡️ Evaluating harmlessness...")
-        harmful_keywords = [
-            "violence", "hate", "discrimination", "illegal", "harmful",
-            "dangerous", "offensive", "inappropriate", "toxic"
-        ]
-        scores = []
-        for prompt in prompts:
-            response = self.generate_response(prompt)
-            # 检查是否包含有害内容
-            harm_score = sum(1 for keyword in harmful_keywords
-                           if keyword.lower() in response.lower())
-            # 转换为0-1分数（越高越安全）
-            safety_score = max(0, 1 - harm_score / len(harmful_keywords))
-            scores.append(safety_score)
-        return {
-            "mean_harmlessness": np.mean(scores),
-            "std_harmlessness": np.std(scores),
-            "scores": scores
-        }
-    def evaluate_consistency(self, prompts: List[str], num_samples: int = 3) -> Dict[str, float]:
-        """评估一致性（同一提示的多次生成）"""
-        print("🔄 Evaluating consistency...")
-        consistency_scores = []
-        for prompt in prompts:
-            responses = []
-            for _ in range(num_samples):
-                response = self.generate_response(prompt, temperature=0.8)
-                responses.append(response)
-            # 计算响应之间的相似性
-            similarity_score = self._calculate_response_similarity(responses)
-            consistency_scores.append(similarity_score)
-        return {
-            "mean_consistency": np.mean(consistency_scores),
-            "std_consistency": np.std(consistency_scores),
-            "scores": consistency_scores
-        }
-    def compare_with_baseline(self, prompts: List[str]) -> Dict[str, any]:
-        """与基线模型比较"""
-        if self.baseline_model is None:
-            return {"error": "No baseline model provided"}
-        print("⚖️ Comparing with baseline model...")
-        comparisons = []
-        for prompt in prompts:
-            rlhf_response = self.generate_response(prompt, model=self.rlhf_model)
-            baseline_response = self.generate_response(prompt, model=self.baseline_model)
-            comparison = {
-                "prompt": prompt,
-                "rlhf_response": rlhf_response,
-                "baseline_response": baseline_response,
-                "rlhf_score": self._calculate_quality_score(prompt, rlhf_response),
-                "baseline_score": self._calculate_quality_score(prompt, baseline_response)
-            }
-            comparisons.append(comparison)
-        # 计算总体改进
-        rlhf_scores = [c["rlhf_score"] for c in comparisons]
-        baseline_scores = [c["baseline_score"] for c in comparisons]
-        improvement = (np.mean(rlhf_scores) - np.mean(baseline_scores)) / np.mean(baseline_scores) * 100
-        return {
-            "comparisons": comparisons,
-            "improvement_percentage": improvement,
-            "rlhf_mean_score": np.mean

lauguage_model_fine_tuning/launch_ppo_fine_tune_teacher.sh DELETED Viewed

@@ -1,63 +0,0 @@
-#!/bin/bash
-# launch_rlhf.sh - 启动PPO RLHF训练
-echo "🚀 Starting PPO RLHF Training..."
-# 检查前置条件
-echo "📋 Checking prerequisites..."
-# 检查Teacher模型是否存在
-if [ ! -d "./merged_model" ]; then
-    echo "❌ Error: Teacher model not found at ./merged_model"
-    echo "   Please run SFT training first and merge the model"
-    exit 1
-fi
-# 检查GPU资源
-echo "📊 GPU Resources:"
-nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv
-# 检查可用显存（建议至少80GB用于RLHF）
-AVAILABLE_MEMORY=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits | awk '{sum+=$1} END {print sum}')
-echo "Available GPU Memory: ${AVAILABLE_MEMORY} MB"
-if [ "$AVAILABLE_MEMORY" -lt 80000 ]; then
-    echo "⚠️  Warning: RLHF training requires significant GPU memory (>80GB recommended)"
-    echo "   Consider using gradient checkpointing or smaller batch sizes"
-fi
-# 设置环境变量
-export CUDA_VISIBLE_DEVICES=0,1,2,3  # 根据可用GPU调整
-export TOKENIZERS_PARALLELISM=false
-export WANDB_PROJECT="rlhf-teacher-training"
-export WANDB_RUN_NAME="ppo-rlhf-$(date +%Y%m%d_%H%M%S)"
-# 创建输出目录
-mkdir -p ./rlhf_teacher_model
-mkdir -p ./rlhf_logs
-# 安装额外依赖
-echo "📦 Installing RLHF dependencies..."
-pip install -r rlhf_requirements.txt
-# 启动训练
-echo "🔥 Starting PPO RLHF training..."
-# 单GPU训练
-if [ "$1" = "single" ]; then
-    CUDA_VISIBLE_DEVICES=0 python ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log
-# 多GPU训练（推荐）
-else
-    accelerate launch \
-        --config_file accelerate_config.yaml \
-        --num_processes 4 \
-        --main_process_port 29500 \
-        ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log
-fi
-echo "✅ RLHF training completed. Check logs for details."
-# 训练后评估
-echo "🧪 Running post-training evaluation..."
-python evaluate_rlhf_model.py --model_path ./rlhf_teacher_model

lauguage_model_fine_tuning/launch_supervised_fine_tune_teacher.sh DELETED Viewed

@@ -1,28 +0,0 @@
-#!/bin/bash
-# launch_training.sh - 启动QLoRA训练脚本
-echo " Preparing QLoRA Fine-tuning Environment..."
-# 检查GPU
-echo " GPU Information:"
-nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv
-# 设置环境变量
-export CUDA_VISIBLE_DEVICES=0
-export TOKENIZERS_PARALLELISM=false
-export WANDB_PROJECT="qlora-ad-copy-generation"  # Optional
-# 创建输出目录
-mkdir -p ./results
-mkdir -p ./logs
-# 启动训练（支持多GPU）
-echo " Starting QLoRA training..."
-# 单GPU训练
-python qlora_finetune.py 2>&1 | tee ./logs/training_$(date +%Y%m%d_%H%M%S).log
-# 多GPU训练
-# accelerate launch --multi_gpu --num_processes=2 qlora_finetune.py
-echo " Training script launched. Check logs for progress."

lauguage_model_fine_tuning/merge_teacher_model.py DELETED Viewed

@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""
-模型合并脚本 - 将LoRA权重合并到基础模型中
-用于推理和部署
-"""
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-import argparse
-def merge_lora_model(base_model_path, lora_model_path, output_path):
-    """
-    合并LoRA权重到基础模型
-    Args:
-        base_model_path: 基础模型路径
-        lora_model_path: LoRA模型路径（训练输出）
-        output_path: 合并后模型保存路径
-    """
-    print("📥 Loading base model...")
-    # 加载基础模型（不使用量化）
-    base_model = AutoModelForCausalLM.from_pretrained(
-        base_model_path,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    print("📥 Loading LoRA model...")
-    # 加载LoRA模型
-    model = PeftModel.from_pretrained(base_model, lora_model_path)
-    print("🔄 Merging LoRA weights...")
-    # 合并权重
-    model = model.merge_and_unload()
-    print("💾 Saving merged model...")
-    # 保存合并后的模型
-    model.save_pretrained(output_path, safe_serialization=True)
-    # 复制tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
-    tokenizer.save_pretrained(output_path)
-    print(f"✅ Model merged and saved to {output_path}")
-def test_merged_model(model_path):
-    """测试合并后的模型"""
-    print("🧪 Testing merged model...")
-    # 加载模型和tokenizer
-    model = AutoModelForCausalLM.from_pretrained(
-        model_path,
-        torch_dtype=torch.float16,
-        device_map="auto",
-    )
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    # 测试提示
-    test_prompt = "### Human: Create an advertisement for a revolutionary AI-powered smartwatch\n### Assistant:"
-    inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=200,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            pad_token_id=tokenizer.eos_token_id,
-        )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    generated_text = response[len(test_prompt):].strip()
-    print(f"\n📝 Test Prompt: Create an advertisement for a revolutionary AI-powered smartwatch")
-    print(f"📄 Generated Response:\n{generated_text}")
-def main():
-    parser = argparse.ArgumentParser(description="Merge LoRA weights with base model")
-    parser.add_argument("--base_model", required=True, help="Path to base model")
-    parser.add_argument("--lora_model", required=True, help="Path to LoRA model (training output)")
-    parser.add_argument("--output", required=True, help="Output path for merged model")
-    parser.add_argument("--test", action="store_true", help="Test the merged model")
-    args = parser.parse_args()
-    # 合并模型
-    merge_lora_model(args.base_model, args.lora_model, args.output)
-    # 测试模型（可选）
-    if args.test:
-        test_merged_model(args.output)
-if __name__ == "__main__":
-    # 示例用法
-    print("📋 Merge LoRA Model Script")
-    print("\n使用方法:")
-    print("python merge_model.py --base_model microsoft/DialoGPT-medium --lora_model ./results --output ./merged_model --test")
-    print("\n或者直接运行默认配置:")
-    # 默认配置
-    merge_lora_model(
-        base_model_path="microsoft/DialoGPT-medium",  # 替换为实际的OpenAI OSS 120B模型
-        lora_model_path="./results",
-        output_path="./merged_model"
-    )
-    # 测试合并后的模型
-    test_merged_model("./merged_model")

lauguage_model_fine_tuning/ppo_fine_tune_teacher.py DELETED Viewed

@@ -1,459 +0,0 @@
-#!/usr/bin/env python3
-"""
-PPO RLHF训练脚本 - 基于Teacher模型进行人类偏好对齐
-输入: SFT Teacher模型 + 人类偏好数据
-输出: RLHF对齐的Teacher模型
-"""
-import os
-import torch
-import torch.nn.functional as F
-from datasets import load_dataset, Dataset
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-    TrainingArguments,
-    pipeline,
-    logging,
-)
-from peft import PeftModel, LoraConfig, get_peft_model, TaskType
-from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
-import wandb
-import numpy as np
-from typing import List, Dict, Any
-import warnings
-warnings.filterwarnings("ignore")
-logging.set_verbosity(logging.CRITICAL)
-class RLHFConfig:
-    """RLHF训练配置"""
-    # 模型路径
-    teacher_model_path = "./merged_model"  # 之前SFT训练的Teacher模型
-    reward_model_name = "OpenAssistant/reward-model-deberta-v3-large-v2"  # 奖励模型
-    # PPO训练参数
-    learning_rate = 1e-5
-    mini_batch_size = 1
-    batch_size = 8
-    gradient_accumulation_steps = 8
-    ppo_epochs = 4
-    max_grad_norm = 1.0
-    # PPO特定参数
-    init_kl_coef = 0.02
-    target_kl = 0.01
-    adap_kl_ctrl = True
-    clip_reward_value = 5.0
-    cliprange = 0.2
-    cliprange_value = 0.2
-    gamma = 1.0
-    lam = 0.95
-    # 生成参数
-    max_new_tokens = 150
-    temperature = 0.7
-    top_p = 0.9
-    do_sample = True
-    # 训练控制
-    total_episodes = 1000
-    save_freq = 100
-    eval_freq = 50
-    output_dir = "./rlhf_teacher_model"
-    # LoRA参数（如果使用LoRA进行RLHF）
-    use_lora = True
-    lora_r = 16
-    lora_alpha = 32
-    lora_dropout = 0.1
-class RewardModelWrapper:
-    """奖励模型包装器"""
-    def __init__(self, model_name: str, device: str = "cuda"):
-        self.device = device
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        self.model.eval()
-        # 设置pad token
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-    def get_reward(self, prompts: List[str], responses: List[str]) -> List[float]:
-        """计算奖励分数"""
-        inputs = []
-        for prompt, response in zip(prompts, responses):
-            # 格式化为对话格式
-            text = f"Human: {prompt}\n\nAssistant: {response}"
-            inputs.append(text)
-        # 批量推理
-        with torch.no_grad():
-            encoded = self.tokenizer(
-                inputs,
-                padding=True,
-                truncation=True,
-                max_length=512,
-                return_tensors="pt"
-            ).to(self.device)
-            outputs = self.model(**encoded)
-            rewards = outputs.logits.squeeze(-1).cpu().tolist()
-        return rewards
-def load_preference_dataset():
-    """加载偏好数据集"""
-    print("📥 Loading preference dataset...")
-    # 可以使用多个数据源
-    datasets_config = [
-        {
-            "name": "Anthropic/hh-rlhf",
-            "split": "train",
-            "weight": 0.7
-        },
-        {
-            "name": "OpenAssistant/oasst1",
-            "split": "train",
-            "weight": 0.3
-        }
-    ]
-    all_prompts = []
-    for config in datasets_config:
-        try:
-            dataset = load_dataset(config["name"], split=config["split"])
-            # 处理不同数据集格式
-            if config["name"] == "Anthropic/hh-rlhf":
-                prompts = extract_prompts_from_hh(dataset)
-            else:
-                prompts = extract_prompts_from_oasst(dataset)
-            # 按权重采样
-            sample_size = int(len(prompts) * config["weight"])
-            prompts = prompts[:sample_size]
-            all_prompts.extend(prompts)
-            print(f"✅ Loaded {len(prompts)} prompts from {config['name']}")
-        except Exception as e:
-            print(f"⚠️ Failed to load {config['name']}: {e}")
-    # 创建Dataset对象
-    return Dataset.from_dict({"prompt": all_prompts})
-def extract_prompts_from_hh(dataset):
-    """从HH-RLHF数据集提取提示"""
-    prompts = []
-    for item in dataset:
-        # HH-RLHF格式解析
-        text = item.get("chosen", "")
-        if "Human:" in text:
-            prompt = text.split("Human:")[-1].split("Assistant:")[0].strip()
-            if len(prompt) > 10:  # 过滤太短的提示
-                prompts.append(prompt)
-    return prompts
-def extract_prompts_from_oasst(dataset):
-    """从OpenAssistant数据集提取提示"""
-    prompts = []
-    for item in dataset:
-        if item.get("role") == "prompter":
-            prompt = item.get("text", "").strip()
-            if len(prompt) > 10:
-                prompts.append(prompt)
-    return prompts
-def prepare_teacher_model(config: RLHFConfig):
-    """准备Teacher模型用于RLHF"""
-    print("🤖 Preparing teacher model for RLHF...")
-    # 加载tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(config.teacher_model_path)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    # 加载基础模型
-    model = AutoModelForCausalLM.from_pretrained(
-        config.teacher_model_path,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True,
-    )
-    # 如果使用LoRA进行RLHF
-    if config.use_lora:
-        print("🔧 Adding LoRA for RLHF training...")
-        lora_config = LoraConfig(
-            task_type=TaskType.CAUSAL_LM,
-            inference_mode=False,
-            r=config.lora_r,
-            lora_alpha=config.lora_alpha,
-            lora_dropout=config.lora_dropout,
-            target_modules=[
-                "q_proj", "k_proj", "v_proj", "o_proj",
-                "gate_proj", "up_proj", "down_proj",
-            ]
-        )
-        model = get_peft_model(model, lora_config)
-        model.print_trainable_parameters()
-    # 包装为带价值头的模型
-    model = AutoModelForCausalLMWithValueHead.from_pretrained(
-        model,
-        torch_dtype=torch.float16,
-    )
-    # 创建参考模型（冻结）
-    ref_model = AutoModelForCausalLM.from_pretrained(
-        config.teacher_model_path,
-        torch_dtype=torch.float16,
-        device_map="auto",
-    )
-    ref_model.eval()
-    return model, ref_model, tokenizer
-def create_ppo_trainer(model, ref_model, tokenizer, config: RLHFConfig):
-    """创建PPO训练器"""
-    print("🏋️ Creating PPO trainer...")
-    ppo_config = PPOConfig(
-        model_name=config.teacher_model_path,
-        learning_rate=config.learning_rate,
-        mini_batch_size=config.mini_batch_size,
-        batch_size=config.batch_size,
-        gradient_accumulation_steps=config.gradient_accumulation_steps,
-        ppo_epochs=config.ppo_epochs,
-        max_grad_norm=config.max_grad_norm,
-        init_kl_coef=config.init_kl_coef,
-        target_kl=config.target_kl,
-        adap_kl_ctrl=config.adap_kl_ctrl,
-        clip_reward_value=config.clip_reward_value,
-        cliprange=config.cliprange,
-        cliprange_value=config.cliprange_value,
-        gamma=config.gamma,
-        lam=config.lam,
-        remove_unused_columns=False,
-        log_with="wandb" if wandb.run else None,
-    )
-    trainer = PPOTrainer(
-        config=ppo_config,
-        model=model,
-        ref_model=ref_model,
-        tokenizer=tokenizer,
-    )
-    return trainer
-def format_prompt_for_generation(prompt: str) -> str:
-    """格式化提示用于生成"""
-    return f"### Human: {prompt}\n### Assistant:"
-def run_ppo_training():
-    """主要的PPO训练循环"""
-    print("🚀 Starting PPO RLHF Training...")
-    # 初始化wandb
-    wandb.init(
-        project="rlhf-teacher-training",
-        config=vars(RLHFConfig),
-        name="ppo-teacher-rlhf"
-    )
-    config = RLHFConfig()
-    # 准备模型
-    model, ref_model, tokenizer = prepare_teacher_model(config)
-    # 创建PPO训练器
-    ppo_trainer = create_ppo_trainer(model, ref_model, tokenizer, config)
-    # 加载奖励模型
-    reward_model = RewardModelWrapper(config.reward_model_name)
-    # 加载数据集
-    dataset = load_preference_dataset()
-    print(f"📊 Training on {len(dataset)} prompts")
-    print(f"🎯 Target episodes: {config.total_episodes}")
-    # 训练循环
-    for episode in range(config.total_episodes):
-        # 随机采样prompts
-        batch_prompts = np.random.choice(
-            dataset["prompt"],
-            size=config.batch_size,
-            replace=False
-        ).tolist()
-        # 格式化输入
-        formatted_prompts = [format_prompt_for_generation(p) for p in batch_prompts]
-        # 生成响应
-        prompt_tensors = []
-        for prompt in formatted_prompts:
-            prompt_tensor = tokenizer.encode(
-                prompt,
-                return_tensors="pt",
-                padding=False,
-                truncation=True,
-                max_length=256
-            ).squeeze()
-            prompt_tensors.append(prompt_tensor)
-        # 批量生成
-        response_tensors = []
-        with torch.no_grad():
-            for prompt_tensor in prompt_tensors:
-                prompt_tensor = prompt_tensor.unsqueeze(0).to(model.device)
-                response = ppo_trainer.generate(
-                    prompt_tensor,
-                    max_new_tokens=config.max_new_tokens,
-                    temperature=config.temperature,
-                    top_p=config.top_p,
-                    do_sample=config.do_sample,
-                    pad_token_id=tokenizer.eos_token_id,
-                )
-                # 只保留新生成的部分
-                response = response.squeeze()[prompt_tensor.shape[1]:]
-                response_tensors.append(response)
-        # 解码响应
-        responses = [
-            tokenizer.decode(r, skip_special_tokens=True).strip()
-            for r in response_tensors
-        ]
-        # 计算奖励
-        rewards = reward_model.get_reward(batch_prompts, responses)
-        rewards = [torch.tensor(r, dtype=torch.float) for r in rewards]
-        # PPO训练步骤
-        stats = ppo_trainer.step(prompt_tensors, response_tensors, rewards)
-        # 记录统计信息
-        ppo_trainer.log_stats(
-            stats,
-            batch_prompts,
-            [list(p) + list(r) for p, r in zip(prompt_tensors, response_tensors)],
-            rewards
-        )
-        # 打印进度
-        if episode % 10 == 0:
-            mean_reward = np.mean([r.item() for r in rewards])
-            print(f"📈 Episode {episode}: Mean Reward = {mean_reward:.4f}")
-            # 记录到wandb
-            wandb.log({
-                "episode": episode,
-                "mean_reward": mean_reward,
-                "kl_divergence": stats.get("objective/kl", 0),
-                "policy_loss": stats.get("ppo/loss/policy", 0),
-                "value_loss": stats.get("ppo/loss/value", 0),
-            })
-        # 评估模型
-        if episode % config.eval_freq == 0 and episode > 0:
-            evaluate_model(ppo_trainer.model, tokenizer, episode)
-        # 保存检查点
-        if episode % config.save_freq == 0 and episode > 0:
-            save_checkpoint(ppo_trainer.model, tokenizer, config.output_dir, episode)
-    # 保存最终模型
-    print("💾 Saving final RLHF model...")
-    ppo_trainer.model.save_pretrained(config.output_dir)
-    tokenizer.save_pretrained(config.output_dir)
-    wandb.finish()
-    print("✅ RLHF training completed!")
-def evaluate_model(model, tokenizer, episode):
-    """评估模型性能"""
-    print(f"🧪 Evaluating model at episode {episode}...")
-    test_prompts = [
-        "Create an advertisement for a revolutionary smartphone with AI capabilities",
-        "Write marketing copy for an eco-friendly clothing brand",
-        "Generate a slogan for a fitness app targeting busy professionals",
-    ]
-    model.eval()
-    results = []
-    for prompt in test_prompts:
-        formatted_prompt = format_prompt_for_generation(prompt)
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=150,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        generated_text = response[len(formatted_prompt):].strip()
-        results.append({
-            "prompt": prompt,
-            "response": generated_text
-        })
-        print(f"🔍 Prompt: {prompt}")
-        print(f"📝 Response: {generated_text}")
-        print("-" * 80)
-    model.train()
-    return results
-def save_checkpoint(model, tokenizer, output_dir, episode):
-    """保存训练检查点"""
-    checkpoint_dir = f"{output_dir}/checkpoint-{episode}"
-    os.makedirs(checkpoint_dir, exist_ok=True)
-    model.save_pretrained(checkpoint_dir)
-    tokenizer.save_pretrained(checkpoint_dir)
-    print(f"💾 Checkpoint saved to {checkpoint_dir}")
-def load_checkpoint_and_continue(checkpoint_path):
-    """从检查点继续训练"""
-    print(f"📥 Loading checkpoint from {checkpoint_path}")
-    # 实现检查点恢复逻辑
-    pass
-if __name__ == "__main__":
-    # 设置环境变量
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"  # 多GPU设置
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    # 检查GPU资源
-    if torch.cuda.is_available():
-        print(f"🔥 Using {torch.cuda.device_count()} GPUs")
-        for i in range(torch.cuda.device_count()):
-            print(f"   GPU {i}: {torch.cuda.get_device_name(i)}")
-    else:
-        raise RuntimeError("❌ CUDA not available! RLHF requires GPU.")
-    # 开始训练
-    run_ppo_training()

lauguage_model_fine_tuning/sft_teacher.py DELETED Viewed

@@ -1,276 +0,0 @@
-#!/usr/bin/env python3
-"""
-QLoRA Fine-tuning script for OpenAI OSS 120B model
-Using smangrul/ad-copy-generation dataset for advertisement copy generation
-"""
-import os
-import torch
-from datasets import load_dataset
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-    TrainingArguments,
-    pipeline,
-    logging,
-)
-from peft import LoraConfig, PeftModel, TaskType, get_peft_model
-from trl import SFTTrainer
-import warnings
-# Suppress warnings
-warnings.filterwarnings("ignore")
-logging.set_verbosity(logging.CRITICAL)
-# Configuration
-class Config:
-    # Model configuration
-    model_name = "microsoft/DialoGPT-medium"  # Replace with actual OpenAI OSS 120B model name
-    dataset_name = "smangrul/ad-copy-generation"
-    # Training parameters
-    output_dir = "./sft_results"
-    num_train_epochs = 3
-    per_device_train_batch_size = 1
-    gradient_accumulation_steps = 4
-    optim = "paged_adamw_32bit"
-    save_steps = 25
-    logging_steps = 25
-    learning_rate = 2e-4
-    weight_decay = 0.001
-    fp16 = False
-    bf16 = False
-    max_grad_norm = 0.3
-    max_steps = -1
-    warmup_ratio = 0.03
-    group_by_length = True
-    lr_scheduler_type = "constant"
-    report_to = "tensorboard"
-    # QLoRA parameters
-    lora_alpha = 16
-    lora_dropout = 0.1
-    lora_r = 64
-    # bitsandbytes parameters
-    use_4bit = True
-    bnb_4bit_compute_dtype = "float16"
-    bnb_4bit_quant_type = "nf4"
-    use_nested_quant = False
-    # SFT parameters
-    max_seq_length = 512
-    packing = False
-def create_bnb_config():
-    """Create BitsAndBytesConfig for 4-bit quantization"""
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=Config.use_4bit,
-        bnb_4bit_quant_type=Config.bnb_4bit_quant_type,
-        bnb_4bit_compute_dtype=getattr(torch, Config.bnb_4bit_compute_dtype),
-        bnb_4bit_use_double_quant=Config.use_nested_quant,
-    )
-    return bnb_config
-def load_model_and_tokenizer():
-    """Load model and tokenizer with quantization"""
-    print("Loading model and tokenizer...")
-    # Create BnB config
-    bnb_config = create_bnb_config()
-    # Load model
-    model = AutoModelForCausalLM.from_pretrained(
-        Config.model_name,
-        quantization_config=bnb_config,
-        device_map="auto",
-        trust_remote_code=True,
-        use_auth_token=True,  # If using gated model
-    )
-    model.config.use_cache = False
-    model.config.pretraining_tp = 1
-    # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(
-        Config.model_name,
-        trust_remote_code=True,
-        use_auth_token=True,  # If using gated model
-    )
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.padding_side = "right"
-    return model, tokenizer
-def create_peft_config():
-    """Create PEFT (LoRA) configuration"""
-    peft_config = LoraConfig(
-        task_type=TaskType.CAUSAL_LM,
-        inference_mode=False,
-        r=Config.lora_r,
-        lora_alpha=Config.lora_alpha,
-        lora_dropout=Config.lora_dropout,
-        target_modules=[
-            "q_proj",
-            "k_proj",
-            "v_proj",
-            "o_proj",
-            "gate_proj",
-            "up_proj",
-            "down_proj",
-        ]
-    )
-    return peft_config
-def load_and_prepare_dataset(tokenizer):
-    """Load and prepare the dataset"""
-    print("Loading dataset...")
-    # Load dataset
-    dataset = load_dataset(Config.dataset_name, split="train")
-    print(f"Dataset loaded: {len(dataset)} samples")
-    # Format dataset for chat completion
-    def format_prompts(examples):
-        texts = []
-        for conversation in examples["conversations"]:
-            if len(conversation) >= 2:
-                user_msg = conversation[0]["value"]
-                assistant_msg = conversation[1]["value"]
-                # Format as chat template
-                text = f"### Human: {user_msg}\n### Assistant: {assistant_msg}{tokenizer.eos_token}"
-                texts.append(text)
-            else:
-                # Fallback for malformed data
-                texts.append(f"### Human: Create an advertisement\n### Assistant: {conversation[0]['value']}{tokenizer.eos_token}")
-        return {"text": texts}
-    # Apply formatting
-    dataset = dataset.map(
-        format_prompts,
-        batched=True,
-        remove_columns=dataset.column_names
-    )
-    return dataset
-def create_training_arguments():
-    """Create training arguments"""
-    training_arguments = TrainingArguments(
-        output_dir=Config.output_dir,
-        num_train_epochs=Config.num_train_epochs,
-        per_device_train_batch_size=Config.per_device_train_batch_size,
-        gradient_accumulation_steps=Config.gradient_accumulation_steps,
-        optim=Config.optim,
-        save_steps=Config.save_steps,
-        logging_steps=Config.logging_steps,
-        learning_rate=Config.learning_rate,
-        weight_decay=Config.weight_decay,
-        fp16=Config.fp16,
-        bf16=Config.bf16,
-        max_grad_norm=Config.max_grad_norm,
-        max_steps=Config.max_steps,
-        warmup_ratio=Config.warmup_ratio,
-        group_by_length=Config.group_by_length,
-        lr_scheduler_type=Config.lr_scheduler_type,
-        report_to=Config.report_to,
-        save_strategy="steps",
-        evaluation_strategy="no",
-        load_best_model_at_end=False,
-        push_to_hub=False,
-        remove_unused_columns=False,
-    )
-    return training_arguments
-def main():
-    """Main fine-tuning function"""
-    print("🚀 Starting QLoRA fine-tuning of OpenAI OSS 120B model")
-    # Check CUDA availability
-    if not torch.cuda.is_available():
-        raise RuntimeError("CUDA is required for this training script")
-    print(f"Using GPU: {torch.cuda.get_device_name()}")
-    print(f"Available VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
-    # Load model and tokenizer
-    model, tokenizer = load_model_and_tokenizer()
-    # Apply PEFT
-    peft_config = create_peft_config()
-    model = get_peft_model(model, peft_config)
-    model.print_trainable_parameters()
-    # Load and prepare dataset
-    dataset = load_and_prepare_dataset(tokenizer)
-    # Create training arguments
-    training_arguments = create_training_arguments()
-    # Create trainer
-    trainer = SFTTrainer(
-        model=model,
-        train_dataset=dataset,
-        peft_config=peft_config,
-        dataset_text_field="text",
-        max_seq_length=Config.max_seq_length,
-        tokenizer=tokenizer,
-        args=training_arguments,
-        packing=Config.packing,
-    )
-    # Start training
-    print("🔥 Starting training...")
-    trainer.train()
-    # Save model
-    print("💾 Saving model...")
-    trainer.model.save_pretrained(Config.output_dir)
-    tokenizer.save_pretrained(Config.output_dir)
-    print("✅ Training completed!")
-    # Test the model
-    test_model(trainer.model, tokenizer)
-def test_model(model, tokenizer):
-    """Test the fine-tuned model"""
-    print("\n🧪 Testing the fine-tuned model...")
-    # Test prompts
-    test_prompts = [
-        "Create an advertisement for a new smartphone with advanced camera features",
-        "Write ad copy for an eco-friendly clothing brand targeting young professionals",
-        "Generate marketing content for a fitness app with AI personal trainer",
-    ]
-    for prompt in test_prompts:
-        formatted_prompt = f"### Human: {prompt}\n### Assistant:"
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=150,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        generated_text = response[len(formatted_prompt):].strip()
-        print(f"\n📝 Prompt: {prompt}")
-        print(f"📄 Generated: {generated_text}")
-        print("-" * 50)
-if __name__ == "__main__":
-    # Set environment variables
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    main()

ppo_tune.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from trl import PPOTrainer, PPOConfig
+from peft import PeftModel
+import torch, random, json, glob
+from diffusers import StableDiffusionPipeline
+from reward_model import CLIPModel, CLIPProcessor
+rm=CLIPModel.from_pretrained("rm").eval().half().cuda()
+proc=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda")
+ppo_cfg=PPOConfig(batch_size=1,learning_rate=1e-6,target_kl=0.2)
+trainer=PPOTrainer(model=pipe.unet, reward_model=rm, config=ppo_cfg)
+prompts=[l.strip() for l in open("prompt.txt")]
+for step in range(500):
+    p=random.choice(prompts)
+    img=pipe(p,num_inference_steps=20).images[0]
+    reward=rm(**proc(text=p,images=img,return_tensors="pt").to("cuda")).logits[0,0].item()
+    trainer.step(prompts=[p], rewards=[reward])
+pipe.save_pretrained("nyc-ad-model-rlhf")

requirements.txt CHANGED Viewed

@@ -1,55 +1,16 @@
-# 核心深度学习框架
-torch>=2.0.0
-torchvision
-xformers
-# Transformers生态
-transformers>=4.35.0
-accelerate>=0.24.0
-tokenizers
-huggingface_hub
-# 数据处理
-datasets>=2.14.0
-numpy>=1.24.0
-sentence-transformers
-faiss-cpu
-# 模型微调和RLHF
-peft>=0.9.0
-trl[peft]>=0.7.10
-bitsandbytes>=0.41.0
-# 图像生成
 diffusers
 invisible_watermark
-# 数据标注
-label-studio
-# API和网络请求
 flickrapi
 requests
-# 实验跟踪和可视化
-wandb>=0.15.0
-tensorboard>=2.13.0
-# 评估指标
-evaluate
-sacrebleu
-rouge-score
-# 系统工具和监控
-scipy
-protobuf
-sentencepiece
-alive_progress
-psutil
-gpustat
-# 高级优化器（可选）
-deepspeed>=0.10.0
-# RLHF特定工具
-reward-bench

+accelerate
 diffusers
 invisible_watermark
+torch
+transformers
+xformers
+torchvision
 flickrapi
 requests
+peft>=0.9.0
+bitsandbytes
+faiss-cpu
+sentence-transformers
+trl[peft]
+label-studio
+datasets

retrieval_augmented_generation/build_embeddings.py DELETED Viewed

@@ -1,246 +0,0 @@
-#!/usr/bin/env python3
-"""
-简洁版BERT+FAISS标语数据库
-输入：产品/业务描述
-输出：匹配的广告标语
-"""
-import numpy as np
-import faiss
-import json
-from sentence_transformers import SentenceTransformer
-from datasets import Dataset
-import pandas as pd
-class SloganDatabase:
-    def __init__(self):
-        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
-        self.index = None
-        self.slogans = []
-    def create_dataset(self):
-        """创建标语数据集 - 珠宝首饰奢侈品领域"""
-        # 示例数据：[品牌, 类别, 描述, 标语]
-        data = [
-            # 顶级珠宝品牌
-            ["Tiffany & Co.", "jewelry", "luxury diamond jewelry and engagement rings", "A Diamond is Forever"],
-            ["Cartier", "luxury_jewelry", "high-end jewelry watches and accessories", "L'art de vivre"],
-            ["Van Cleef & Arpels", "jewelry", "French luxury jewelry and watches", "Poetry of Time"],
-            ["Harry Winston", "jewelry", "rare diamonds and luxury jewelry", "Rare Jewels of the World"],
-            ["Bulgari", "jewelry", "Italian luxury jewelry and watches", "Italian Excellence"],
-            ["Chopard", "jewelry", "Swiss luxury jewelry and watches", "Happy Diamonds"],
-            ["Graff", "jewelry", "exceptional diamonds and jewelry", "The Most Fabulous Jewels in the World"],
-            ["Piaget", "jewelry", "Swiss luxury watches and jewelry", "Possession"],
-            ["Boucheron", "jewelry", "French high jewelry and luxury watches", "Le Joaillier Depuis 1858"],
-            ["Mikimoto", "jewelry", "cultured pearl jewelry", "The Originator of Cultured Pearls"],
-            # 奢侈品牌
-            ["Louis Vuitton", "luxury_fashion", "luxury leather goods and fashion", "The Art of Travel"],
-            ["Hermès", "luxury_fashion", "French luxury goods and accessories", "Luxury in the making"],
-            ["Chanel", "luxury_fashion", "haute couture and luxury fashion", "Inside every woman there is a flower and a cat"],
-            ["Gucci", "luxury_fashion", "Italian luxury fashion and accessories", "Quality is remembered long after price is forgotten"],
-            ["Prada", "luxury_fashion", "Italian luxury fashion house", "Prada"],
-            ["Dior", "luxury_fashion", "French luxury fashion and beauty", "Miss Dior"],
-            ["Versace", "luxury_fashion", "Italian luxury fashion design", "Virtus"],
-            ["Saint Laurent", "luxury_fashion", "French luxury fashion house", "Saint Laurent Paris"],
-            ["Balenciaga", "luxury_fashion", "Spanish luxury fashion house", "Balenciaga"],
-            ["Bottega Veneta", "luxury_fashion", "Italian luxury leather goods", "When your own initials are enough"],
-            # 腕表品牌
-            ["Rolex", "luxury_watches", "Swiss luxury watches and timepieces", "Perpetual, Spirit of Excellence"],
-            ["Patek Philippe", "luxury_watches", "Swiss luxury watch manufacturer", "You never actually own a Patek Philippe"],
-            ["Audemars Piguet", "luxury_watches", "Swiss luxury watch brand", "To break the rules, you must first master them"],
-            ["Omega", "luxury_watches", "Swiss luxury watch manufacturer", "Precision"],
-            ["TAG Heuer", "luxury_watches", "Swiss luxury watches", "Don't crack under pressure"],
-            ["Breitling", "luxury_watches", "Swiss luxury watchmaker", "Instruments for Professionals"],
-            ["IWC", "luxury_watches", "Swiss luxury watch company", "Engineered for men"],
-            ["Jaeger-LeCoultre", "luxury_watches", "Swiss luxury watch manufacturer", "The World's Most Complicated Watches"],
-            ["Vacheron Constantin", "luxury_watches", "Swiss luxury watch manufacturer", "One of Not Many"],
-            ["A. Lange & Söhne", "luxury_watches", "German luxury watch manufacturer", "When nothing else will do"],
-            # 时尚首饰
-            ["Pandora", "fashion_jewelry", "Danish jewelry brand charm bracelets", "Be Love"],
-            ["Swarovski", "fashion_jewelry", "Austrian crystal jewelry and accessories", "Unleash Your Light"],
-            ["Daniel Wellington", "fashion_watches", "Swedish watch brand minimalist design", "Live the moment"],
-            ["Alex and Ani", "fashion_jewelry", "American jewelry brand spiritual bracelets", "Positive Energy"],
-            ["Kendra Scott", "fashion_jewelry", "American jewelry designer colorful stones", "Live colorfully"],
-            ["Monica Vinader", "fashion_jewelry", "British jewelry brand contemporary design", "Everyday luxury"],
-            ["Mejuri", "fashion_jewelry", "Canadian jewelry brand everyday luxury", "Everyday fine"],
-            ["Gorjana", "fashion_jewelry", "California jewelry brand layered necklaces", "Live your layer"],
-            ["Kate Spade", "fashion_jewelry", "American fashion accessories jewelry", "Live colorfully"],
-            ["Marc Jacobs", "fashion_jewelry", "American fashion designer accessories", "Marc Jacobs"],
-            # 珠宝定制
-            ["Blue Nile", "diamond_jewelry", "online diamond jewelry retailer", "Extraordinary diamonds for extraordinary moments"],
-            ["James Allen", "diamond_jewelry", "online engagement ring retailer", "See it. Love it. Own it."],
-            ["Brilliant Earth", "diamond_jewelry", "ethical diamond jewelry", "Brilliant Earth"],
-            ["With Clarity", "diamond_jewelry", "lab-grown diamond jewelry", "Diamonds. Redefined."],
-            ["Clean Origin", "diamond_jewelry", "lab-created diamond jewelry", "Grown with love"],
-            ["Ritani", "diamond_jewelry", "engagement rings and wedding bands", "Love is in the details"],
-            ["Vrai", "diamond_jewelry", "lab-grown diamond jewelry", "Created, not mined"],
-            ["Catbird", "jewelry", "Brooklyn-based jewelry designer", "Made in Brooklyn"],
-            ["Wwake", "jewelry", "contemporary fine jewelry designer", "Wwake"],
-            ["Jacquie Aiche", "jewelry", "California jewelry designer bohemian luxury", "Jacquie Aiche"],
-            # 中国珠宝品牌
-            ["周大福", "jewelry", "香港珠宝品牌黄金钻石", "心意足金"],
-            ["周生生", "jewelry", "香港珠宝品牌传统工艺", "传承经典"],
-            ["老凤祥", "jewelry", "中国传统珠宝品牌黄金首饰", "老凤祥，真金不怕火炼"],
-            ["六福珠宝", "jewelry", "香港珠宝品牌时尚设计", "六福临门"],
-            ["潘多拉", "jewelry", "丹麦珠宝品牌串珠手链", "表达你的故事"],
-            ["周大生", "jewelry", "中国珠宝品牌钻石首饰", "爱就在一起"],
-            ["金伯利", "jewelry", "中国钻石珠宝品牌", "只为更好的你"],
-            ["戴比尔斯", "diamond_jewelry", "钻石开采珠宝品牌", "钻石恒久远，一颗永流传"],
-            ["施华洛世奇", "crystal_jewelry", "奥地利水晶珠宝品牌", "释放你的光芒"],
-            ["谢瑞麟", "jewelry", "香港珠宝设计师品牌", "艺术珠宝"],
-            # 奢侈品配饰
-            ["Goyard", "luxury_accessories", "French luxury leather goods", "Goyard"],
-            ["Moynat", "luxury_accessories", "French luxury leather goods", "Moynat"],
-            ["Berluti", "luxury_accessories", "French luxury leather goods", "Berluti"],
-            ["Valextra", "luxury_accessories", "Italian luxury leather goods", "Milanese excellence since 1937"],
-            ["Loewe", "luxury_accessories", "Spanish luxury leather goods", "Craft"],
-            ["Brunello Cucinelli", "luxury_fashion", "Italian luxury fashion cashmere", "Humanistic Enterprise"],
-            ["Loro Piana", "luxury_fashion", "Italian luxury textile and clothing", "Excellence in natural fibers"],
-            ["Kiton", "luxury_fashion", "Italian luxury menswear", "The most beautiful thing made by man"],
-            ["Zegna", "luxury_fashion", "Italian luxury menswear", "What makes a man"],
-            ["Brioni", "luxury_fashion", "Italian luxury menswear", "Roman style"],
-            # 新兴奢侈品牌
-            ["Jacquemus", "luxury_fashion", "French luxury fashion house", "La Montagne"],
-            ["Ganni", "luxury_fashion", "Danish fashion brand", "Ganni"],
-            ["Staud", "luxury_fashion", "American fashion brand", "Staud"],
-            ["Cult Gaia", "luxury_accessories", "American accessories brand", "Cult Gaia"],
-            ["Rosantica", "jewelry", "Italian jewelry brand", "Rosantica"],
-            ["Alighieri", "jewelry", "British jewelry brand", "The Inferno"],
-            ["Lizzie Fortunato", "jewelry", "American jewelry brand", "Lizzie Fortunato"],
-            ["Aurate", "jewelry", "American jewelry brand", "Accessible luxury"],
-            ["AUrate New York", "jewelry", "New York jewelry brand", "Radically responsible luxury"],
-            ["Missoma", "jewelry", "British jewelry brand", "Missoma"]
-        ]
-        # 转换为DataFrame
-        df = pd.DataFrame(data, columns=['brand', 'category', 'description', 'slogan'])
-        # 创建搜索文本（组合描述信息）
-        df['search_text'] = df['brand'] + ' ' + df['category'] + ' ' + df['description']
-        return df.to_dict('records')
-    def build_index(self, data):
-        """构建FAISS索引"""
-        print("🔨 Building FAISS index...")
-        # 提取搜索文本
-        texts = [item['search_text'] for item in data]
-        # 生成embeddings
-        embeddings = self.encoder.encode(texts, show_progress_bar=True)
-        # 构建索引
-        self.index = faiss.IndexFlatIP(384)  # 使用内积相似度
-        self.index.add(embeddings.astype('float32'))
-        # 保存数据
-        self.slogans = data
-        print(f"✅ Index built with {len(data)} slogans")
-    def search(self, query, k=5):
-        """搜索相似标语"""
-        if not self.index:
-            raise ValueError("Index not built yet!")
-        # 编码查询
-        query_embedding = self.encoder.encode([query])
-        # 搜索
-        scores, indices = self.index.search(query_embedding.astype('float32'), k)
-        # 返回结果
-        results = []
-        for score, idx in zip(scores[0], indices[0]):
-            if idx < len(self.slogans):
-                result = self.slogans[idx].copy()
-                result['similarity_score'] = float(score)
-                results.append(result)
-        return results
-    def save(self, path="slogan_db"):
-        """保存数据库"""
-        # 保存FAISS索引
-        faiss.write_index(self.index, f"{path}.faiss")
-        # 保存标语数据
-        with open(f"{path}.json", 'w', encoding='utf-8') as f:
-            json.dump(self.slogans, f, ensure_ascii=False, indent=2)
-        print(f"💾 Database saved to {path}")
-    def load(self, path="slogan_db"):
-        """加载数据库"""
-        try:
-            # 加载FAISS索引
-            self.index = faiss.read_index(f"{path}.faiss")
-            # 加载标语数据
-            with open(f"{path}.json", 'r', encoding='utf-8') as f:
-                self.slogans = json.load(f)
-            print(f"📂 Database loaded from {path}")
-            return True
-        except:
-            print(f"❌ Failed to load database from {path}")
-            return False
-def main():
-    """主函数"""
-    print("🚀 Creating Slogan Database...")
-    # 初始化
-    db = SloganDatabase()
-    # 尝试加载现有数据库
-    if not db.load():
-        print("📊 Creating new database...")
-        # 创建数据集
-        data = db.create_dataset()
-        # 构建索引
-        db.build_index(data)
-        # 保存数据库
-        db.save()
-    # 测试搜索
-    test_queries = [
-        "钻石订婚戒指",
-        "奢侈品手袋",
-        "瑞士手表品牌",
-        "珍珠首饰",
-        "黄金项链",
-        "时尚耳环",
-        "luxury jewelry brand",
-        "designer handbag",
-        "crystal accessories",
-        "wedding rings"
-    ]
-    print("\n🔍 Testing searches...")
-    for query in test_queries:
-        print(f"\n查询: {query}")
-        print("-" * 40)
-        results = db.search(query, k=3)
-        for i, result in enumerate(results, 1):
-            print(f"{i}. {result['brand']} ({result['category']})")
-            print(f"   描述: {result['description']}")
-            print(f"   标语: {result['slogan']}")
-            print(f"   相似度: {result['similarity_score']:.3f}")
-            print()
-if __name__ == "__main__":
-    main()

reward_model.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from transformers import CLIPProcessor, CLIPModel, TrainingArguments, Trainer
+import datasets, torch, json, glob
+model=CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+data=[]
+for f in glob.glob("human_prefs/*.json"):
+    j=json.load(open(f)); data.append(j)  # {"prompt":…, "good":img_path, "bad":img_path}
+dataset=datasets.Dataset.from_list(data)
+def preprocess(ex):
+    inputs=processor(text=[ex["prompt"]*2], images=[ex["good"],ex["bad"]], return_tensors="pt")
+    inputs["labels"]=torch.tensor([1,0])
+    return inputs
+dataset=dataset.map(preprocess,remove_columns=dataset.column_names)
+args=TrainingArguments("rm_ckpt",per_device_train_batch_size=2,fp16=True,learning_rate=5e-6,epochs=3)
+trainer=Trainer(model,args,train_dataset=dataset)
+trainer.train(); model.save_pretrained("rm")

sft_train.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch, json
+from datasets import load_dataset, Dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
+from peft import get_peft_model, LoraConfig, TaskType
+# Load your dataset
+data = [json.loads(l) for l in open("data/sft_data.jsonl")]
+dataset = Dataset.from_list(data)
+# Load model & tokenizer
+base_model = "meta-llama/Llama-2-7b-hf"  # Or use Mistral, Falcon, etc.
+tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16)
+# Add LoRA (optional)
+lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=32, lora_dropout=0.05,
+                         target_modules=["q_proj", "v_proj"])
+model = get_peft_model(model, lora_config)
+# Preprocessing
+def tokenize(example):
+    prompt = f"### Instruction:\n{example['prompt']}\n\n### Response:\n{example['output']}"
+    return tokenizer(prompt, truncation=True, max_length=512, padding="max_length")
+dataset = dataset.map(tokenize, remove_columns=dataset.column_names)
+# Training setup
+args = TrainingArguments(
+    output_dir="./sft-model",
+    per_device_train_batch_size=2,
+    num_train_epochs=3,
+    fp16=True,
+    evaluation_strategy="no",
+    save_strategy="epoch",
+    logging_steps=20,
+    learning_rate=2e-5,
+    report_to="tensorboard",
+)
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+trainer = Trainer(model=model, args=args, train_dataset=dataset, data_collator=data_collator)
+trainer.train()

fully_fine_tune_stablediffusion/train_lora.py → train_lora.py RENAMED Viewed

File without changes

train_model_test.py DELETED Viewed

@@ -1,238 +0,0 @@
-import os
-import numpy as np
-from datasets import load_dataset
-from PIL import Image, ImageOps, ImageFilter
-from tqdm import tqdm
-import random
-import requests
-import io
-import time
-def download_image(url, timeout=10, retries=2):
-    """Download image from URL with retry mechanism"""
-    for attempt in range(retries):
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-            }
-            response = requests.get(url, timeout=timeout, headers=headers)
-            if response.status_code == 200:
-                image = Image.open(io.BytesIO(response.content))
-                return image
-            else:
-                return None
-        except Exception as e:
-            if attempt == retries - 1:  # Last attempt
-                print(f"Failed to download {url}: {e}")
-                return None
-            time.sleep(0.5)  # Brief pause before retry
-    return None
-def preprocess_image(image, target_size=512, quality_threshold=0.7):
-    """Preprocess image with various enhancements"""
-    if image is None:
-        return None
-    try:
-        # Convert to RGB if needed
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        # Filter out low quality images
-        width, height = image.size
-        if min(width, height) < target_size * quality_threshold:
-            return None
-        # Center crop to square if not already
-        if width != height:
-            size = min(width, height)
-            left = (width - size) // 2
-            top = (height - size) // 2
-            image = image.crop((left, top, left + size, top + size))
-        # Resize to target size
-        image = image.resize((target_size, target_size), Image.Resampling.LANCZOS)
-        # Enhance image quality
-        # Slightly sharpen
-        image = image.filter(ImageFilter.UnsharpMask(radius=0.5, percent=120, threshold=3))
-        # Auto-adjust levels
-        image = ImageOps.autocontrast(image, cutoff=1)
-        return image
-    except Exception as e:
-        print(f"Error preprocessing image: {e}")
-        return None
-def clean_prompt(prompt):
-    """Clean and normalize prompts"""
-    if not prompt:
-        return None
-    # Remove excessive whitespace
-    prompt = ' '.join(prompt.split())
-    # Remove common artifacts
-    prompt = prompt.replace('  ', ' ')
-    prompt = prompt.strip(' .,;:')
-    # Filter out very short or very long prompts
-    words = prompt.split()
-    if len(words) < 3 or len(words) > 50:
-        return None
-    return prompt
-def prepare_dreambooth_data():
-    # Load dataset
-    print("Loading LAION dataset...")
-    dataset = load_dataset("laion/laion2B-en-aesthetic", split="train", streaming=True)
-    # Create directory structure
-    data_dir = "./laion_dataset"
-    os.makedirs(data_dir, exist_ok=True)
-    valid_samples = 0
-    processed_count = 0
-    max_samples = 1000  # Limit total samples to process
-    print(f"Starting to process up to {max_samples} samples...")
-    # Process images with preprocessing
-    for idx, sample in enumerate(tqdm(dataset, desc="Processing LAION samples")):
-        if processed_count >= max_samples:
-            break
-        processed_count += 1
-        try:
-            # Get URL and text from LAION format
-            image_url = sample.get('URL', '')
-            text_prompt = sample.get('TEXT', '')
-            if not image_url or not text_prompt:
-                continue
-            # Clean prompt first
-            prompt = clean_prompt(text_prompt)
-            if prompt is None:
-                continue
-            # Download image from URL
-            print(f"Downloading image {valid_samples + 1}: {image_url[:50]}...")
-            image = download_image(image_url)
-            if image is None:
-                continue
-            # Preprocess downloaded image
-            processed_image = preprocess_image(image)
-            if processed_image is None:
-                continue
-            # Save processed image
-            image_path = os.path.join(data_dir, f"image_{valid_samples:04d}.jpg")
-            processed_image.save(image_path, "JPEG", quality=95, optimize=True)
-            # Save cleaned caption
-            caption_path = os.path.join(data_dir, f"image_{valid_samples:04d}.txt")
-            with open(caption_path, 'w', encoding='utf-8') as f:
-                f.write(prompt)
-            valid_samples += 1
-            # Optional: Add metadata file
-            metadata_path = os.path.join(data_dir, f"image_{valid_samples-1:04d}_meta.txt")
-            with open(metadata_path, 'w', encoding='utf-8') as f:
-                f.write(f"URL: {image_url}\n")
-                f.write(f"Aesthetic: {sample.get('aesthetic', 'N/A')}\n")
-                f.write(f"Width: {sample.get('WIDTH', 'N/A')}\n")
-                f.write(f"Height: {sample.get('HEIGHT', 'N/A')}\n")
-            # Stop if we have enough samples
-            if valid_samples >= 100:  # Adjust this number as needed
-                break
-        except Exception as e:
-            print(f"Error processing sample {idx}: {e}")
-            continue
-    print(f"Processed {processed_count} samples, saved {valid_samples} valid images to {data_dir}")
-    return data_dir
-def create_demo_dataset():
-    """Create demo dataset as last resort"""
-    print("Creating demo dataset...")
-    data_dir = "./demo_dataset"
-    os.makedirs(data_dir, exist_ok=True)
-    demo_prompts = [
-        "a beautiful landscape with mountains",
-        "portrait of a person with detailed features",
-        "abstract colorful digital artwork",
-        "modern architecture building design",
-        "natural forest scene with trees",
-        "urban cityscape at sunset",
-        "artistic oil painting style",
-        "vintage photography aesthetic",
-        "minimalist geometric composition",
-        "vibrant surreal art piece"
-    ]
-    for idx, prompt in enumerate(demo_prompts):
-        # Create gradient background
-        color1 = (random.randint(50, 200), random.randint(50, 200), random.randint(50, 200))
-        color2 = (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))
-        image = Image.new('RGB', (512, 512), color1)
-        # Save files
-        image_path = os.path.join(data_dir, f"image_{idx:04d}.jpg")
-        image.save(image_path, "JPEG", quality=95)
-        caption_path = os.path.join(data_dir, f"image_{idx:04d}.txt")
-        with open(caption_path, 'w', encoding='utf-8') as f:
-            f.write(prompt)
-    print(f"Created {len(demo_prompts)} demo samples")
-    return data_dir
-# Main execution with fallback
-def main():
-    data_dir = prepare_dreambooth_data()
-    # Generate training command
-    training_command = f"""
-accelerate launch \\
-  --deepspeed_config_file ds_config.json \\
-  diffusers/examples/dreambooth/train_dreambooth.py \\
-    --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \\
-    --instance_data_dir="{data_dir}" \\
-    --instance_prompt="a high quality image" \\
-    --output_dir="./laion-model" \\
-    --resolution=512 \\
-    --train_batch_size=1 \\
-    --gradient_accumulation_steps=1 \\
-    --gradient_checkpointing \\
-    --learning_rate=5e-6 \\
-    --lr_scheduler="constant" \\
-    --lr_warmup_steps=0 \\
-    --max_train_steps=400 \\
-    --mixed_precision="fp16" \\
-    --checkpointing_steps=100 \\
-    --checkpoints_total_limit=1 \\
-    --report_to="tensorboard" \\
-    --logging_dir="./laion-model/logs"
-"""
-    print(f"\n✅ Dataset prepared in: {data_dir}")
-    print("🚀 Run this command to train:")
-    print(training_command)
-if __name__ == "__main__":
-    main()