axrzce commited on
Commit
338d95d
·
verified ·
1 Parent(s): 459699b

Deploy from GitHub main

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -33
  2. .github/workflows/deploy-to-hf-spaces.yml +52 -0
  3. .gitignore +103 -0
  4. DEPLOYMENT_CHECKLIST.md +147 -0
  5. README.md +366 -9
  6. docs/PHASE1D_EVALUATION_GUIDE.md +205 -0
  7. docs/PHASE1E_LORA_GUIDE.md +270 -0
  8. docs/PHASE1_USAGE.md +454 -0
  9. docs/PHASE2A_AUDIO_TO_IMAGE_GUIDE.md +307 -0
  10. docs/PHASE2B_DATA_TO_IMAGE_GUIDE.md +271 -0
  11. docs/PHASE2C_EMOTION_TO_IMAGE_GUIDE.md +286 -0
  12. docs/PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md +337 -0
  13. docs/PHASE2E_STYLE_REFERENCE_GUIDE.md +271 -0
  14. docs/PHASE3E_PERFORMANCE_GUIDE.md +301 -0
  15. docs/PHASE3_FINAL_DASHBOARD_GUIDE.md +274 -0
  16. docs/PHASE4_DEPLOYMENT_GUIDE.md +110 -0
  17. docs/PHASE4_RUNTIME_OPTIMIZATION.md +111 -0
  18. docs/PROJECT_STRUCTURE.md +158 -0
  19. docs/README.md +38 -0
  20. packages.txt +3 -0
  21. requirements.txt +73 -0
  22. run_phase3_final_dashboard.py +299 -0
  23. src/__init__.py +7 -0
  24. src/config.py +42 -0
  25. src/generators/__init__.py +26 -0
  26. src/generators/compi_phase1_advanced.py +230 -0
  27. src/generators/compi_phase1_text2image.py +117 -0
  28. src/generators/compi_phase1b_advanced_styling.py +338 -0
  29. src/generators/compi_phase1b_styled_generation.py +172 -0
  30. src/generators/compi_phase1d_cli_evaluation.py +341 -0
  31. src/generators/compi_phase1d_evaluate_quality.py +496 -0
  32. src/generators/compi_phase1e_dataset_prep.py +329 -0
  33. src/generators/compi_phase1e_lora_training.py +458 -0
  34. src/generators/compi_phase1e_style_generation.py +406 -0
  35. src/generators/compi_phase1e_style_manager.py +386 -0
  36. src/generators/compi_phase2a_audio_to_image.py +350 -0
  37. src/generators/compi_phase2b_data_to_image.py +432 -0
  38. src/generators/compi_phase2c_emotion_to_image.py +408 -0
  39. src/generators/compi_phase2d_realtime_to_image.py +483 -0
  40. src/generators/compi_phase2e_refimg_to_image.py +578 -0
  41. src/setup_env.py +118 -0
  42. src/ui/__init__.py +14 -0
  43. src/ui/compi_phase3_final_dashboard.py +1709 -0
  44. src/utils/__init__.py +16 -0
  45. src/utils/audio_utils.py +342 -0
  46. src/utils/data_utils.py +654 -0
  47. src/utils/emotion_utils.py +446 -0
  48. src/utils/file_utils.py +149 -0
  49. src/utils/image_utils.py +309 -0
  50. src/utils/logging_utils.py +45 -0
.gitattributes CHANGED
@@ -1,35 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.bin filter=lfs diff=lfs merge=lfs -text
 
3
  *.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
.github/workflows/deploy-to-hf-spaces.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to HF Spaces (on main)
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch: {}
7
+
8
+ concurrency:
9
+ group: deploy-hf-space
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ deploy:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Checkout repo
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Setup Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.11"
23
+
24
+ - name: Install deps
25
+ run: |
26
+ python -m pip install -U pip
27
+ pip install -U "huggingface_hub[cli]"
28
+
29
+ - name: Validate app file exists
30
+ run: |
31
+ test -f src/ui/compi_phase3_final_dashboard.py || (echo "App file missing" && exit 1)
32
+
33
+ - name: Push folder to Space
34
+ env:
35
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
36
+ HF_SPACE_ID: ${{ secrets.HF_SPACE_ID }}
37
+ run: |
38
+ python - << 'PY'
39
+ from huggingface_hub import HfApi
40
+ import os
41
+ api = HfApi()
42
+ repo_id = os.environ["HF_SPACE_ID"] # e.g. "username/compi-final-dashboard"
43
+ api.upload_folder(
44
+ token=os.environ["HF_TOKEN"],
45
+ repo_id=repo_id,
46
+ repo_type="space",
47
+ folder_path=".",
48
+ path_in_repo="",
49
+ commit_message="Deploy from GitHub main"
50
+ )
51
+ print("Uploaded to Space:", repo_id)
52
+ PY
.gitignore ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environments
25
+ compi-env/
26
+ venv/
27
+ env/
28
+ ENV/
29
+ env.bak/
30
+ venv.bak/
31
+
32
+ # IDE
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ *~
38
+
39
+ # Jupyter Notebook
40
+ .ipynb_checkpoints
41
+
42
+ # Environment variables
43
+ .env
44
+ .env.local
45
+ .env.*.local
46
+
47
+ # Data files (large datasets)
48
+ data/raw/
49
+ data/processed/
50
+ *.csv
51
+ *.json
52
+ *.parquet
53
+ *.h5
54
+ *.hdf5
55
+
56
+ # Model files (large pretrained models)
57
+ models/pretrained/
58
+ *.pth
59
+ *.pt
60
+ *.ckpt
61
+ *.safetensors
62
+
63
+ # Generated outputs
64
+ outputs/images/
65
+ outputs/audio/
66
+ outputs/videos/
67
+ *.wav
68
+ *.mp3
69
+ *.mp4
70
+ *.avi
71
+ *.png
72
+ *.jpg
73
+ *.jpeg
74
+ *.gif
75
+
76
+ # Logs
77
+ logs/
78
+ *.log
79
+
80
+ # Cache
81
+ .cache/
82
+ *.cache
83
+
84
+ # OS
85
+ .DS_Store
86
+ .DS_Store?
87
+ ._*
88
+ .Spotlight-V100
89
+ .Trashes
90
+ ehthumbs.db
91
+ Thumbs.db
92
+
93
+ # Temporary files
94
+ *.tmp
95
+ *.temp
96
+ temp/
97
+ tmp/
98
+
99
+ # Weights & Biases
100
+ wandb/
101
+
102
+ # Hugging Face cache
103
+ .cache/huggingface/
DEPLOYMENT_CHECKLIST.md ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 CompI Phase 4 Deployment Checklist
2
+
3
+ ## Prerequisites
4
+ - [ ] GitHub account with your CompI repository
5
+ - [ ] Hugging Face account ([sign up here](https://huggingface.co/join))
6
+ - [ ] OpenWeatherMap API key (optional, for real-time weather data)
7
+
8
+ ## Phase 4.A: Repository Preparation ✅
9
+ - [x] `packages.txt` created with system dependencies
10
+ - [x] `.gitattributes` created for Git LFS support
11
+ - [x] `requirements.txt` verified and ready
12
+ - [x] GitHub Actions workflow created
13
+
14
+ ## Phase 4.B: Hugging Face Space Setup
15
+
16
+ ### Step 1: Create Space
17
+ 1. [ ] Go to [Hugging Face Spaces](https://huggingface.co/spaces)
18
+ 2. [ ] Click "Create new Space"
19
+ 3. [ ] Fill in details:
20
+ - **Owner**: Your username
21
+ - **Space name**: `compi-final-dashboard` (or your choice)
22
+ - **License**: Apache 2.0 (recommended)
23
+ - **SDK**: Streamlit
24
+ - **Hardware**: CPU basic (free)
25
+ - **Visibility**: Public
26
+
27
+ ### Step 2: Configure Space
28
+ 4. [ ] In your new Space, click "Files" → "README.md" → Edit
29
+ 5. [ ] Replace content with this header:
30
+
31
+ ```markdown
32
+ ---
33
+ title: CompI — Final Dashboard
34
+ emoji: 🎨
35
+ sdk: streamlit
36
+ app_file: src/ui/compi_phase3_final_dashboard.py
37
+ pinned: false
38
+ ---
39
+
40
+ # CompI - Multimodal AI Art Generation Platform
41
+
42
+ The ultimate creative platform combining text, audio, data, emotion, and real-time inputs for AI art generation.
43
+
44
+ ## Features
45
+
46
+ 🧩 **Multimodal Inputs** - Text, Audio, Data, Emotion, Real-time feeds
47
+ 🖼️ **Advanced References** - Multi-image upload with role assignment
48
+ ⚙️ **Model Management** - SD 1.5/SDXL switching, LoRA integration
49
+ 🖼️ **Professional Gallery** - Filtering, rating, annotation system
50
+ 💾 **Preset Management** - Save/load complete configurations
51
+ 📦 **Export System** - Complete bundles with metadata
52
+
53
+ ## Usage
54
+
55
+ 1. Configure your inputs in the "Inputs" tab
56
+ 2. Upload reference images in "Advanced References"
57
+ 3. Choose your model and performance settings
58
+ 4. Generate with intelligent fusion of all inputs
59
+ 5. Review results in the gallery and export bundles
60
+
61
+ Built with Streamlit, PyTorch, and Diffusers.
62
+ ```
63
+
64
+ 6. [ ] Commit the README changes
65
+
66
+ ### Step 3: Add Secrets (Optional)
67
+ 7. [ ] Go to Space Settings → Repository secrets
68
+ 8. [ ] Add secret: `OPENWEATHER_KEY` = `your_api_key_here`
69
+
70
+ ## Phase 4.C: GitHub Actions Setup
71
+
72
+ ### Step 1: Get Hugging Face Token
73
+ 9. [ ] Go to [HF Settings → Access Tokens](https://huggingface.co/settings/tokens)
74
+ 10. [ ] Click "New token"
75
+ 11. [ ] Name: `CompI Deployment`
76
+ 12. [ ] Type: **Write**
77
+ 13. [ ] Click "Generate"
78
+ 14. [ ] **Copy the token** (you won't see it again!)
79
+
80
+ ### Step 2: Add GitHub Secrets
81
+ 15. [ ] Go to your GitHub repo → Settings → Secrets and variables → Actions
82
+ 16. [ ] Click "New repository secret"
83
+ 17. [ ] Add secret 1:
84
+ - **Name**: `HF_TOKEN`
85
+ - **Secret**: Paste your HF token from step 14
86
+ 18. [ ] Add secret 2:
87
+ - **Name**: `HF_SPACE_ID`
88
+ - **Secret**: `your-username/your-space-name` (e.g., `AXRZCE/compi-final-dashboard`)
89
+
90
+ ## Phase 4.D: Test Deployment
91
+
92
+ ### Step 1: Trigger First Deploy
93
+ 19. [ ] In your GitHub repo, make a small change (e.g., edit README.md)
94
+ 20. [ ] Commit to `main` branch:
95
+ ```bash
96
+ git add .
97
+ git commit -m "Initial deployment setup"
98
+ git push origin main
99
+ ```
100
+
101
+ ### Step 2: Monitor Deployment
102
+ 21. [ ] Go to GitHub repo → Actions tab
103
+ 22. [ ] Watch the "Deploy to HF Spaces (on main)" workflow
104
+ 23. [ ] Verify it completes successfully (green checkmark)
105
+
106
+ ### Step 3: Test Your Space
107
+ 24. [ ] Go to your HF Space URL: `https://your-username-your-space.hf.space`
108
+ 25. [ ] Wait for the app to build (first time takes 5-10 minutes)
109
+ 26. [ ] Test basic functionality:
110
+ - [ ] Enter a text prompt
111
+ - [ ] Generate an image
112
+ - [ ] Check that the interface loads properly
113
+
114
+ ## Phase 4.E: Production Workflow
115
+
116
+ ### For Future Updates
117
+ - [ ] Create feature branches for new development
118
+ - [ ] Test changes locally: `streamlit run src/ui/compi_phase3_final_dashboard.py`
119
+ - [ ] Open Pull Request to `main`
120
+ - [ ] Merge PR → Automatic deployment to HF Space
121
+
122
+ ### Rollback Process
123
+ - [ ] If deployment breaks, revert the commit on `main`
124
+ - [ ] GitHub Actions will automatically redeploy the previous version
125
+
126
+ ## Troubleshooting
127
+
128
+ ### Common Issues
129
+ - **Space won't start**: Check the build logs in HF Space → Logs
130
+ - **GitHub Action fails**: Check repo secrets are set correctly
131
+ - **App crashes**: Verify `app_file` path in Space README is correct
132
+ - **Missing dependencies**: Check `requirements.txt` and `packages.txt`
133
+
134
+ ### Support Resources
135
+ - [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces)
136
+ - [GitHub Actions Documentation](https://docs.github.com/en/actions)
137
+ - [Streamlit Documentation](https://docs.streamlit.io/)
138
+
139
+ ## Success! 🎉
140
+
141
+ Once complete, your CompI platform will be:
142
+ - ✅ Publicly accessible at your HF Space URL
143
+ - ✅ Automatically deployed on every `main` branch update
144
+ - ✅ Running on free Hugging Face infrastructure
145
+ - ✅ Ready for users worldwide to create multimodal AI art
146
+
147
+ **Share your Space URL**: `https://your-username-your-space.hf.space`
README.md CHANGED
@@ -1,11 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Comp I
3
- emoji: 🔥
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: static
7
- pinned: false
8
- license: mit
9
- ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI - Compositional Intelligence Project
2
+
3
+ A multi-modal AI system that generates creative content by combining text, images, audio, and emotional context.
4
+
5
+ Note: All documentation has been consolidated under docs/. See docs/README.md for an index of guides.
6
+
7
+ ## 🚀 Project Overview
8
+
9
+ CompI (Compositional Intelligence) is designed to create rich, contextually-aware content by:
10
+
11
+ - Processing text prompts with emotional analysis
12
+ - Generating images using Stable Diffusion
13
+ - Creating audio compositions
14
+ - Combining multiple modalities for enhanced creative output
15
+
16
+ ## 📁 Project Structure
17
+
18
+ ```
19
+ Project CompI/
20
+ ├── src/ # Source code
21
+ │ ├── generators/ # Image generation modules
22
+ │ ├── models/ # Model implementations
23
+ │ ├── utils/ # Utility functions
24
+ │ ├── data/ # Data processing
25
+ │ ├── ui/ # User interface components
26
+ │ └── setup_env.py # Environment setup script
27
+ ├── notebooks/ # Jupyter notebooks for experimentation
28
+ ├── data/ # Dataset storage
29
+ ├── outputs/ # Generated content
30
+ ├── tests/ # Unit tests
31
+ ├── run_*.py # Convenience scripts for generators
32
+ ├── requirements.txt # Python dependencies
33
+ └── README.md # This file
34
+ ```
35
+
36
+ ## 🛠️ Setup Instructions
37
+
38
+ ### 1. Create Virtual Environment
39
+
40
+ ```bash
41
+ # Using conda (recommended for ML projects)
42
+ conda create -n compi-env python=3.10 -y
43
+ conda activate compi-env
44
+
45
+ # OR using venv
46
+ python -m venv compi-env
47
+ # Windows
48
+ compi-env\Scripts\activate
49
+ # Linux/Mac
50
+ source compi-env/bin/activate
51
+ ```
52
+
53
+ ### 2. Install Dependencies
54
+
55
+ **For GPU users (recommended for faster generation):**
56
+
57
+ ```bash
58
+ # First, check your CUDA version
59
+ nvidia-smi
60
+
61
+ # Install PyTorch with CUDA support first (replace cu121 with your CUDA version)
62
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
63
+
64
+ # Then install remaining requirements
65
+ pip install -r requirements.txt
66
+ ```
67
+
68
+ **For CPU-only users:**
69
+
70
+ ```bash
71
+ pip install -r requirements.txt
72
+ ```
73
+
74
+ ### 3. Test Installation
75
+
76
+ ```bash
77
+ python src/test_setup.py
78
+ ```
79
+
80
+ ## 🚀 Quick Start
81
+
82
+ ### Phase 1: Text-to-Image Generation
83
+
84
+ ```bash
85
+ # Basic text-to-image generation
86
+ python run_basic_generation.py "A magical forest, digital art"
87
+
88
+ # Advanced generation with style conditioning
89
+ python run_advanced_styling.py "dragon in a crystal cave" --style "oil painting" --mood "dramatic"
90
+
91
+ # Interactive style selection
92
+ python run_styled_generation.py
93
+
94
+ # Quality evaluation and analysis
95
+ python run_evaluation.py
96
+
97
+ # Personal style training with LoRA
98
+ python run_lora_training.py --dataset-dir datasets/my_style
99
+
100
+ # Generate with personal style
101
+ python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "artwork in my_style"
102
+ ```
103
+
104
+ ### Phase 2.A: Audio-to-Image Generation 🎵
105
+
106
+ ```bash
107
+ # Install audio processing dependencies
108
+ pip install openai-whisper
109
+
110
+ # Streamlit UI (Recommended)
111
+ streamlit run src/ui/compi_phase2a_streamlit_ui.py
112
+
113
+ # Command line generation
114
+ python run_phase2a_audio_to_image.py --prompt "mystical forest" --audio "music.mp3"
115
+
116
+ # Interactive mode
117
+ python run_phase2a_audio_to_image.py --interactive
118
+
119
+ # Test installation
120
+ python src/test_phase2a.py
121
+
122
+ # Run examples
123
+ python examples/phase2a_audio_examples.py --example all
124
+ ```
125
+
126
+ ### Phase 2.B: Data/Logic-to-Image Generation 📊
127
+
128
+ ```bash
129
+ # Streamlit UI (Recommended)
130
+ streamlit run src/ui/compi_phase2b_streamlit_ui.py
131
+
132
+ # Command line generation with CSV data
133
+ python run_phase2b_data_to_image.py --prompt "data visualization" --csv "data.csv"
134
+
135
+ # Mathematical formula generation
136
+ python run_phase2b_data_to_image.py --prompt "mathematical harmony" --formula "np.sin(np.linspace(0, 4*np.pi, 100))"
137
+
138
+ # Batch processing
139
+ python run_phase2b_data_to_image.py --batch-csv "data_folder/" --prompt "scientific patterns"
140
+
141
+ # Interactive mode
142
+ python run_phase2b_data_to_image.py --interactive
143
+ ```
144
+
145
+ ### Phase 2.C: Emotional/Contextual Input to Image Generation 🌀
146
+
147
+ ```bash
148
+ # Streamlit UI (Recommended)
149
+ streamlit run src/ui/compi_phase2c_streamlit_ui.py
150
+
151
+ # Command line generation with preset emotion
152
+ python run_phase2c_emotion_to_image.py --prompt "mystical forest" --emotion "mysterious"
153
+
154
+ # Custom emotion generation
155
+ python run_phase2c_emotion_to_image.py --prompt "urban landscape" --emotion "🤩" --type custom
156
+
157
+ # Descriptive emotion generation
158
+ python run_phase2c_emotion_to_image.py --prompt "mountain vista" --emotion "I feel a sense of wonder" --type text
159
+
160
+ # Batch emotion processing
161
+ python run_phase2c_emotion_to_image.py --batch-emotions "joyful,sad,mysterious" --prompt "abstract art"
162
+
163
+ # Interactive mode
164
+ python run_phase2c_emotion_to_image.py --interactive
165
+ ```
166
+
167
+ ### Phase 2.D: Real-Time Data Feeds to Image Generation 🌎
168
+
169
+ ```bash
170
+ # Streamlit UI (Recommended)
171
+ streamlit run src/ui/compi_phase2d_streamlit_ui.py
172
+
173
+ # Command line generation with weather data
174
+ python run_phase2d_realtime_to_image.py --prompt "cityscape" --weather --city "Tokyo"
175
+
176
+ # News-driven generation
177
+ python run_phase2d_realtime_to_image.py --prompt "abstract art" --news --category "technology"
178
+
179
+ # Multi-source generation
180
+ python run_phase2d_realtime_to_image.py --prompt "world state" --weather --news --financial
181
+
182
+ # Temporal series generation
183
+ python run_phase2d_realtime_to_image.py --prompt "evolving world" --weather --temporal "0,30,60"
184
+
185
+ # Interactive mode
186
+ python run_phase2d_realtime_to_image.py --interactive
187
+ ```
188
+
189
+ ### Phase 2.E: Style Reference/Example Image to AI Art 🖼️
190
+
191
+ ```bash
192
+ # Streamlit UI (Recommended)
193
+ streamlit run src/ui/compi_phase2e_streamlit_ui.py
194
+
195
+ # Command line generation with reference image
196
+ python run_phase2e_refimg_to_image.py --prompt "magical forest" --reference "path/to/image.jpg" --strength 0.6
197
+
198
+ # Web URL reference
199
+ python run_phase2e_refimg_to_image.py --prompt "cyberpunk city" --reference "https://example.com/artwork.jpg"
200
+
201
+ # Batch generation with multiple variations
202
+ python run_phase2e_refimg_to_image.py --prompt "fantasy landscape" --reference "image.png" --num-images 3
203
+
204
+ # Style analysis only
205
+ python run_phase2e_refimg_to_image.py --analyze-only --reference "artwork.jpg"
206
+
207
+ # Interactive mode
208
+ python run_phase2e_refimg_to_image.py --interactive
209
+ ```
210
+
211
+ ## 🧪 NEW: Ultimate Multimodal Dashboard (True Fusion) 🚀
212
+
213
+ **Revolutionary upgrade with REAL processing of each input type!**
214
+
215
+ ```bash
216
+ # Launch the upgraded dashboard with true multimodal fusion
217
+ python run_ultimate_multimodal_dashboard.py
218
+
219
+ # Or run directly
220
+ streamlit run src/ui/compi_ultimate_multimodal_dashboard.py --server.port 8503
221
+ ```
222
+
223
+ **Key Improvements:**
224
+
225
+ - ✅ **Real Audio Analysis**: Whisper transcription + librosa features
226
+ - ✅ **Actual Data Processing**: CSV analysis + formula evaluation
227
+ - ✅ **True Emotion Analysis**: TextBlob sentiment classification
228
+ - ✅ **Live Real-time Data**: Weather/news API integration
229
+ - ✅ **Advanced References**: img2img + ControlNet processing
230
+ - ✅ **Intelligent Fusion**: Actual content processing (not static keywords)
231
+
232
+ **Access at:** `http://localhost:8503`
233
+
234
+ **See:** `ULTIMATE_MULTIMODAL_DASHBOARD_README.md` for detailed documentation.
235
+
236
+ ## 🖼️ NEW: Phase 3.C Advanced Reference Integration 🚀
237
+
238
+ **Professional multi-reference control with hybrid generation modes!**
239
+
240
+ **Key Features:**
241
+
242
+ - ✅ **Role-Based Reference Assignment**: Select images for style vs structure
243
+ - ✅ **Live ControlNet Previews**: Real-time Canny/Depth preprocessing
244
+ - ✅ **Hybrid Generation Modes**: CN + IMG2IMG simultaneous processing
245
+ - ✅ **Professional Controls**: Independent strength tuning for style/structure
246
+ - ✅ **Seamless Integration**: Works with all CompI multimodal phases
247
+
248
+ **See:** `PHASE3C_ADVANCED_REFERENCE_INTEGRATION.md` for complete documentation.
249
+
250
+ ## 🗂️ NEW: Phase 3.D Professional Workflow Manager 🚀
251
+
252
+ **Complete creative workflow platform with unified logging, presets, and export bundles!**
253
+
254
+ **Key Features:**
255
+
256
+ - ✅ **Unified Run Logging**: Auto-ingests from all CompI phases
257
+ - ✅ **Professional Gallery**: Advanced filtering and search
258
+ - ✅ **Preset System**: Save/load complete generation configs
259
+ - ✅ **Export Bundles**: ZIP packages with metadata and reproducibility
260
+ - ✅ **Annotation System**: Ratings, tags, and notes for workflow management
261
+
262
+ **Launch:** `python run_phase3d_workflow_manager.py` | **Access:** `http://localhost:8504`
263
+
264
+ **See:** `docs/PHASE3D_WORKFLOW_MANAGER_GUIDE.md` for complete documentation.
265
+
266
+ ## ⚙️ NEW: Phase 3.E Performance, Model Management & Reliability 🚀
267
+
268
+ **Production-grade performance optimization, model switching, and intelligent reliability!**
269
+
270
+ **Key Features:**
271
+
272
+ - ✅ **Model Manager**: Dynamic SD 1.5 ↔ SDXL switching with auto-availability checking
273
+ - ✅ **LoRA Integration**: Universal LoRA loading with scale control across all models
274
+ - ✅ **Performance Controls**: xFormers, attention slicing, VAE optimizations, precision control
275
+ - ✅ **VRAM Monitoring**: Real-time GPU memory usage tracking and alerts
276
+ - ✅ **Reliability Engine**: OOM-safe auto-retry with intelligent fallbacks
277
+ - ✅ **Batch Processing**: Seed-controlled batch generation with memory management
278
+ - ✅ **Upscaler Integration**: Optional 2x latent upscaling for enhanced quality
279
+
280
+ **Launch:** `python run_phase3e_performance_manager.py` | **Access:** `http://localhost:8505`
281
+
282
+ **See:** `docs/PHASE3E_PERFORMANCE_GUIDE.md` for complete documentation.
283
+
284
+ ## 🧪 ULTIMATE: Phase 3 Final Dashboard - Complete Integration! 🎉
285
+
286
+ **The ultimate CompI interface that integrates ALL Phase 3 components into one unified creative environment!**
287
+
288
+ **Complete Feature Integration:**
289
+
290
+ - ✅ **🧩 Multimodal Fusion (3.A/3.B)**: Real audio, data, emotion, real-time processing
291
+ - ✅ **🖼️ Advanced References (3.C)**: Role assignment, ControlNet, live previews
292
+ - ✅ **⚙️ Performance Management (3.E)**: Model switching, LoRA, VRAM monitoring
293
+ - ✅ **🎛️ Intelligent Generation**: Hybrid modes with automatic fallback strategies
294
+ - ✅ **🖼️ Professional Gallery (3.D)**: Filtering, rating, annotation system
295
+ - ✅ **💾 Preset Management (3.D)**: Save/load complete configurations
296
+ - ✅ **📦 Export System (3.D)**: Complete bundles with metadata and reproducibility
297
+
298
+ **Professional Workflow:**
299
+
300
+ 1. **Configure multimodal inputs** (text, audio, data, emotion, real-time)
301
+ 2. **Upload and assign references** (style vs structure roles)
302
+ 3. **Choose model and optimize performance** (SD 1.5/SDXL, LoRA, optimizations)
303
+ 4. **Generate with intelligent fusion** (automatic mode selection)
304
+ 5. **Review and annotate results** (gallery with rating/tagging)
305
+ 6. **Save presets and export bundles** (complete reproducibility)
306
+
307
+ **Launch:** `python run_phase3_final_dashboard.py` | **Access:** `http://localhost:8506`
308
+
309
+ **See:** `docs/PHASE3_FINAL_DASHBOARD_GUIDE.md` for complete documentation.
310
+
311
  ---
 
 
 
 
 
 
 
 
312
 
313
+ ## 🎯 **CompI Project Status: COMPLETE**
314
+
315
+ **CompI has achieved its ultimate vision: the world's most comprehensive and production-ready multimodal AI art generation platform!**
316
+
317
+ ### **✅ All Phases Complete:**
318
+
319
+ - **✅ Phase 1**: Foundation (text-to-image, styling, evaluation, LoRA training)
320
+ - **✅ Phase 2**: Multimodal integration (audio, data, emotion, real-time, references)
321
+ - **✅ Phase 3**: Advanced features (fusion dashboard, advanced references, workflow management, performance optimization)
322
+
323
+ ### **🚀 What CompI Offers:**
324
+
325
+ - **Complete Creative Platform**: From generation to professional workflow management
326
+ - **Production-Grade Reliability**: Robust error handling and performance optimization
327
+ - **Professional Tools**: Industry-standard features for serious creative and commercial work
328
+ - **Universal Compatibility**: Works across different hardware configurations
329
+ - **Extensible Foundation**: Ready for future enhancements and integrations
330
+
331
+ **CompI is now the ultimate multimodal AI art generation platform - ready for professional creative work!** 🎨✨
332
+
333
+ ## 🎯 Core Features
334
+
335
+ - **Text Analysis**: Emotion detection and sentiment analysis
336
+ - **Image Generation**: Stable Diffusion integration with advanced conditioning
337
+ - **Audio Processing**: Music and sound analysis with Whisper integration
338
+ - **Data Processing**: CSV analysis and mathematical formula evaluation
339
+ - **Emotion Processing**: Preset emotions, custom emotions, emoji, and contextual analysis
340
+ - **Real-Time Integration**: Live weather, news, and financial data feeds
341
+ - **Style Reference**: Upload/URL image guidance with AI-powered style analysis
342
+ - **Multi-modal Fusion**: Combining text, audio, data, emotions, real-time feeds, and visual references
343
+ - **Pattern Recognition**: Automatic detection of trends, correlations, and seasonality
344
+ - **Poetic Interpretation**: Converting data patterns and emotions into artistic language
345
+ - **Color Psychology**: Emotion-based color palette generation and conditioning
346
+ - **Temporal Awareness**: Time-sensitive data processing and evolution tracking
347
+
348
+ ## 🔧 Tech Stack
349
+
350
+ - **Deep Learning**: PyTorch, Transformers, Diffusers
351
+ - **Audio**: librosa, soundfile
352
+ - **UI**: Streamlit/Gradio
353
+ - **Data**: pandas, numpy
354
+ - **Visualization**: matplotlib, seaborn
355
+
356
+ ## 📝 Usage
357
+
358
+ Coming soon - basic usage examples and API documentation.
359
+
360
+ ## 🤝 Contributing
361
+
362
+ This is a development project. Feel free to experiment and extend functionality.
363
+
364
+ ## 📄 License
365
+
366
+ MIT License - see LICENSE file for details.
367
+
368
+ # Project_CompI
docs/PHASE1D_EVALUATION_GUIDE.md ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 1.D: Baseline Output Quality Evaluation Guide
2
+
3
+ ## 🎯 Overview
4
+
5
+ Phase 1.D provides comprehensive tools for systematically evaluating the quality, coherence, and consistency of images generated by your CompI pipeline. This phase is crucial for understanding your model's performance and tracking improvements over time.
6
+
7
+ ## 🛠️ Tools Provided
8
+
9
+ ### 1. **Streamlit Web Interface** (`compi_phase1d_evaluate_quality.py`)
10
+ - **Interactive visual evaluation** with side-by-side image and metadata display
11
+ - **Multi-criteria scoring system** (1-5 stars) for comprehensive assessment
12
+ - **Objective metrics calculation** (perceptual hashes, file size, dimensions)
13
+ - **Persistent evaluation logging** with CSV export
14
+ - **Batch evaluation capabilities** for efficient processing
15
+
16
+ ### 2. **Command-Line Interface** (`compi_phase1d_cli_evaluation.py`)
17
+ - **Batch processing** for automated evaluation workflows
18
+ - **Statistical analysis** and performance summaries
19
+ - **Detailed report generation** with recommendations
20
+ - **Filtering and listing** capabilities for organized review
21
+
22
+ ### 3. **Convenient Launcher** (`run_evaluation.py`)
23
+ - **One-click startup** for the web interface
24
+ - **Automatic environment checking** and error handling
25
+
26
+ ## 📊 Evaluation Criteria
27
+
28
+ The evaluation system uses **5 comprehensive criteria**, each scored on a **1-5 scale**:
29
+
30
+ ### 1. **Prompt Adherence**
31
+ - How well does the image match the text prompt?
32
+ - Scale: 1=Poor match → 5=Perfect match
33
+
34
+ ### 2. **Style Consistency**
35
+ - How well does the image reflect the intended artistic style?
36
+ - Scale: 1=Style not evident → 5=Style perfectly executed
37
+
38
+ ### 3. **Mood & Atmosphere**
39
+ - How well does the image convey the intended mood/atmosphere?
40
+ - Scale: 1=Wrong mood → 5=Perfect mood
41
+
42
+ ### 4. **Technical Quality**
43
+ - Overall image quality (resolution, composition, artifacts)
44
+ - Scale: 1=Poor quality → 5=Excellent quality
45
+
46
+ ### 5. **Creative Appeal**
47
+ - Subjective aesthetic and creative value
48
+ - Scale: 1=Unappealing → 5=Highly appealing
49
+
50
+ ## 🚀 Quick Start
51
+
52
+ ### Web Interface (Recommended for Manual Review)
53
+
54
+ ```bash
55
+ # Install required dependency
56
+ pip install imagehash
57
+
58
+ # Launch the evaluation interface
59
+ python run_evaluation.py
60
+
61
+ # Or run directly
62
+ streamlit run src/generators/compi_phase1d_evaluate_quality.py
63
+ ```
64
+
65
+ The web interface will open at `http://localhost:8501` with:
66
+ - **Single Image Review**: Detailed evaluation of individual images
67
+ - **Batch Evaluation**: Quick scoring for multiple images
68
+ - **Summary Analysis**: Statistics and performance insights
69
+
70
+ ### Command-Line Interface (For Automation)
71
+
72
+ ```bash
73
+ # Analyze existing evaluations
74
+ python src/generators/compi_phase1d_cli_evaluation.py --analyze
75
+
76
+ # List unevaluated images
77
+ python src/generators/compi_phase1d_cli_evaluation.py --list-unevaluated
78
+
79
+ # Batch score all unevaluated images (prompt, style, mood, quality, appeal)
80
+ python src/generators/compi_phase1d_cli_evaluation.py --batch-score 4 3 4 4 3 --notes "Initial baseline evaluation"
81
+
82
+ # Generate detailed report
83
+ python src/generators/compi_phase1d_cli_evaluation.py --report --output evaluation_report.txt
84
+ ```
85
+
86
+ ## 📁 File Structure
87
+
88
+ ```
89
+ outputs/
90
+ ├── [generated images].png # Your CompI-generated images
91
+ ├── evaluation_log.csv # Detailed evaluation data
92
+ └── evaluation_summary.json # Summary statistics
93
+
94
+ src/generators/
95
+ ├── compi_phase1d_evaluate_quality.py # Main Streamlit interface
96
+ └── compi_phase1d_cli_evaluation.py # Command-line tools
97
+
98
+ run_evaluation.py # Convenient launcher
99
+ ```
100
+
101
+ ## 📈 Understanding Your Data
102
+
103
+ ### Evaluation Log (`outputs/evaluation_log.csv`)
104
+
105
+ Contains detailed records with columns:
106
+ - **Image metadata**: filename, prompt, style, mood, seed, variation
107
+ - **Evaluation scores**: All 5 criteria scores (1-5)
108
+ - **Objective metrics**: dimensions, file size, perceptual hashes
109
+ - **Evaluation metadata**: timestamp, notes, evaluator comments
110
+
111
+ ### Key Metrics to Track
112
+
113
+ 1. **Overall Score Trends**: Are your images improving over time?
114
+ 2. **Criteria Performance**: Which aspects (prompt match, style, etc.) need work?
115
+ 3. **Style/Mood Effectiveness**: Which combinations work best?
116
+ 4. **Consistency**: Are similar prompts producing consistent results?
117
+
118
+ ## 🎯 Best Practices
119
+
120
+ ### Systematic Evaluation Workflow
121
+
122
+ 1. **Generate a batch** of images using your CompI tools
123
+ 2. **Evaluate systematically** using consistent criteria
124
+ 3. **Analyze patterns** in the data to identify strengths/weaknesses
125
+ 4. **Adjust generation parameters** based on insights
126
+ 5. **Re-evaluate** to measure improvements
127
+
128
+ ### Evaluation Tips
129
+
130
+ - **Be consistent** in your scoring criteria across sessions
131
+ - **Use notes** to capture specific observations and issues
132
+ - **Evaluate in batches** of similar style/mood for better comparison
133
+ - **Track changes** over time as you refine your generation process
134
+
135
+ ### Interpreting Scores
136
+
137
+ - **4.0+ average**: Excellent performance, ready for production use
138
+ - **3.0-3.9 average**: Good performance, minor improvements needed
139
+ - **2.0-2.9 average**: Moderate performance, significant improvements needed
140
+ - **Below 2.0**: Poor performance, major adjustments required
141
+
142
+ ## 🔧 Advanced Usage
143
+
144
+ ### Filtering and Analysis
145
+
146
+ ```bash
147
+ # Analyze only specific styles
148
+ python src/generators/compi_phase1d_cli_evaluation.py --analyze --style "anime"
149
+
150
+ # List images by mood
151
+ python src/generators/compi_phase1d_cli_evaluation.py --list-all --mood "dramatic"
152
+
153
+ # Generate style-specific report
154
+ python src/generators/compi_phase1d_cli_evaluation.py --report --style "oil painting" --output oil_painting_analysis.txt
155
+ ```
156
+
157
+ ### Custom Evaluation Workflows
158
+
159
+ The evaluation tools are designed to be flexible:
160
+ - **Modify criteria** by editing `EVALUATION_CRITERIA` in the source
161
+ - **Add custom metrics** by extending the `get_image_metrics()` function
162
+ - **Integrate with other tools** using the CSV export functionality
163
+
164
+ ## 📊 Sample Analysis Output
165
+
166
+ ```
167
+ 📊 CompI Phase 1.D - Evaluation Analysis
168
+ ==================================================
169
+ Total Evaluated Images: 25
170
+
171
+ 📈 Score Statistics:
172
+ Prompt Adherence : 3.84 ± 0.75 (range: 2-5)
173
+ Style Consistency : 3.52 ± 0.87 (range: 2-5)
174
+ Mood & Atmosphere : 3.68 ± 0.69 (range: 2-5)
175
+ Technical Quality : 4.12 ± 0.60 (range: 3-5)
176
+ Creative Appeal : 3.76 ± 0.83 (range: 2-5)
177
+
178
+ 🎨 Top Performing Styles (by Prompt Match):
179
+ anime : 4.20
180
+ oil painting : 3.90
181
+ digital art : 3.75
182
+ ```
183
+
184
+ ## 🚀 Next Steps
185
+
186
+ After completing Phase 1.D evaluation:
187
+
188
+ 1. **Identify improvement areas** from your evaluation data
189
+ 2. **Experiment with parameter adjustments** for low-scoring criteria
190
+ 3. **Document successful combinations** for future use
191
+ 4. **Consider Phase 2** development based on baseline performance
192
+ 5. **Set up regular evaluation cycles** for continuous improvement
193
+
194
+ ## 🤝 Integration with Other Phases
195
+
196
+ Phase 1.D evaluation data can inform:
197
+ - **Phase 1.A/1.B parameter tuning**: Adjust generation settings based on quality scores
198
+ - **Phase 1.C UI improvements**: Highlight best-performing style/mood combinations
199
+ - **Future phases**: Use baseline metrics to measure advanced feature improvements
200
+
201
+ ---
202
+
203
+ **Happy Evaluating! 🎨📊**
204
+
205
+ The systematic evaluation provided by Phase 1.D is essential for understanding and improving your CompI system's performance. Use these tools regularly to maintain high-quality output and track your progress over time.
docs/PHASE1E_LORA_GUIDE.md ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 1.E: Personal Style Fine-tuning with LoRA
2
+
3
+ ## 🎯 Overview
4
+
5
+ Phase 1.E enables you to train **personalized artistic styles** using LoRA (Low-Rank Adaptation) fine-tuning on Stable Diffusion. This allows you to create AI art that reflects your unique artistic vision or mimics specific artistic styles.
6
+
7
+ **LoRA Benefits:**
8
+ - ✅ **Lightweight**: Only trains a small adapter (~10-100MB vs full model ~4GB)
9
+ - ✅ **Fast**: Training takes minutes to hours instead of days
10
+ - ✅ **Flexible**: Can be combined with different base models
11
+ - ✅ **Efficient**: Runs on consumer GPUs (8GB+ VRAM recommended)
12
+
13
+ ## 🛠️ Tools Provided
14
+
15
+ ### 1. **Dataset Preparation** (`compi_phase1e_dataset_prep.py`)
16
+ - Organize and validate your style images
17
+ - Generate appropriate training captions
18
+ - Resize and format images for optimal training
19
+ - Create train/validation splits
20
+
21
+ ### 2. **LoRA Training** (`compi_phase1e_lora_training.py`)
22
+ - Full LoRA fine-tuning pipeline with PEFT integration
23
+ - Configurable training parameters and monitoring
24
+ - Automatic checkpoint saving and validation
25
+ - Memory-efficient training with gradient checkpointing
26
+
27
+ ### 3. **Style Generation** (`compi_phase1e_style_generation.py`)
28
+ - Generate images using your trained LoRA styles
29
+ - Interactive and batch generation modes
30
+ - Adjustable style strength and parameters
31
+ - Integration with existing CompI pipeline
32
+
33
+ ### 4. **Style Management** (`compi_phase1e_style_manager.py`)
34
+ - Manage multiple trained LoRA styles
35
+ - Cleanup old checkpoints and organize models
36
+ - Export style information and analytics
37
+ - Switch between different personal styles
38
+
39
+ ## 🚀 Quick Start Guide
40
+
41
+ ### Step 1: Install Dependencies
42
+
43
+ ```bash
44
+ # Install LoRA training dependencies
45
+ pip install peft datasets bitsandbytes
46
+
47
+ # Verify installation
48
+ python -c "import peft, datasets; print('✅ Dependencies installed')"
49
+ ```
50
+
51
+ ### Step 2: Prepare Your Style Dataset
52
+
53
+ ```bash
54
+ # Organize your style images in a folder
55
+ mkdir my_artwork
56
+ # Copy 10-50 images of your artistic style to my_artwork/
57
+
58
+ # Prepare dataset for training
59
+ python src/generators/compi_phase1e_dataset_prep.py \
60
+ --input-dir my_artwork \
61
+ --style-name "my_art_style" \
62
+ --trigger-word "myart"
63
+ ```
64
+
65
+ **Dataset Requirements:**
66
+ - **10-50 images** (more is better, but 20+ is usually sufficient)
67
+ - **Consistent style** across all images
68
+ - **512x512 pixels** recommended (will be auto-resized)
69
+ - **High quality** images without watermarks or text
70
+
71
+ ### Step 3: Train Your LoRA Style
72
+
73
+ ```bash
74
+ # Start LoRA training
75
+ python run_lora_training.py \
76
+ --dataset-dir datasets/my_art_style \
77
+ --epochs 100 \
78
+ --learning-rate 1e-4
79
+
80
+ # Or with custom settings
81
+ python run_lora_training.py \
82
+ --dataset-dir datasets/my_art_style \
83
+ --epochs 200 \
84
+ --batch-size 2 \
85
+ --lora-rank 8 \
86
+ --lora-alpha 32
87
+ ```
88
+
89
+ **Training Tips:**
90
+ - **Start with 100 epochs** for initial testing
91
+ - **Increase to 200-500 epochs** for stronger style learning
92
+ - **Monitor validation loss** to avoid overfitting
93
+ - **Use gradient checkpointing** if you run out of memory
94
+
95
+ ### Step 4: Generate with Your Style
96
+
97
+ ```bash
98
+ # Generate images with your trained style
99
+ python run_style_generation.py \
100
+ --lora-path lora_models/my_art_style/checkpoint-1000 \
101
+ "a cat in myart style" \
102
+ --variations 4
103
+
104
+ # Interactive mode
105
+ python run_style_generation.py \
106
+ --lora-path lora_models/my_art_style/checkpoint-1000 \
107
+ --interactive
108
+ ```
109
+
110
+ ## 📊 Advanced Usage
111
+
112
+ ### Training Configuration
113
+
114
+ ```bash
115
+ # High-quality training (slower but better results)
116
+ python run_lora_training.py \
117
+ --dataset-dir datasets/my_style \
118
+ --epochs 300 \
119
+ --learning-rate 5e-5 \
120
+ --lora-rank 16 \
121
+ --lora-alpha 32 \
122
+ --batch-size 1 \
123
+ --gradient-checkpointing
124
+
125
+ # Fast training (quicker results for testing)
126
+ python run_lora_training.py \
127
+ --dataset-dir datasets/my_style \
128
+ --epochs 50 \
129
+ --learning-rate 2e-4 \
130
+ --lora-rank 4 \
131
+ --lora-alpha 16
132
+ ```
133
+
134
+ ### Style Management
135
+
136
+ ```bash
137
+ # List all trained styles
138
+ python src/generators/compi_phase1e_style_manager.py --list
139
+
140
+ # Get detailed info about a style
141
+ python src/generators/compi_phase1e_style_manager.py --info my_art_style
142
+
143
+ # Clean up old checkpoints (keep only 3 most recent)
144
+ python src/generators/compi_phase1e_style_manager.py --cleanup my_art_style --keep 3
145
+
146
+ # Export styles information to CSV
147
+ python src/generators/compi_phase1e_style_manager.py --export my_styles_report.csv
148
+ ```
149
+
150
+ ### Generation Parameters
151
+
152
+ ```bash
153
+ # Adjust style strength
154
+ python run_style_generation.py \
155
+ --lora-path lora_models/my_style/checkpoint-1000 \
156
+ --lora-scale 0.8 \
157
+ "portrait in myart style"
158
+
159
+ # High-quality generation
160
+ python run_style_generation.py \
161
+ --lora-path lora_models/my_style/checkpoint-1000 \
162
+ --steps 50 \
163
+ --guidance 8.0 \
164
+ --width 768 \
165
+ --height 768 \
166
+ "landscape in myart style"
167
+ ```
168
+
169
+ ## 🎨 Best Practices
170
+
171
+ ### Dataset Preparation
172
+ 1. **Consistent Style**: All images should represent the same artistic style
173
+ 2. **Quality over Quantity**: 20 high-quality images > 100 low-quality ones
174
+ 3. **Diverse Subjects**: Include various subjects (people, objects, landscapes)
175
+ 4. **Clean Images**: Remove watermarks, text, and irrelevant elements
176
+ 5. **Proper Captions**: Use consistent trigger words in captions
177
+
178
+ ### Training Tips
179
+ 1. **Start Small**: Begin with 50-100 epochs to test
180
+ 2. **Monitor Progress**: Check validation loss and sample generations
181
+ 3. **Adjust Learning Rate**: Lower if loss oscillates, higher if learning is slow
182
+ 4. **Use Checkpoints**: Save frequently to avoid losing progress
183
+ 5. **Experiment with LoRA Rank**: Higher rank = more capacity but slower training
184
+
185
+ ### Generation Guidelines
186
+ 1. **Include Trigger Words**: Always use your trigger word in prompts
187
+ 2. **Adjust Style Strength**: Use `--lora-scale` to control style intensity
188
+ 3. **Combine with Techniques**: Mix with existing CompI style/mood systems
189
+ 4. **Iterate and Refine**: Generate multiple variations and select best results
190
+
191
+ ## 🔧 Troubleshooting
192
+
193
+ ### Common Issues
194
+
195
+ **Out of Memory Error:**
196
+ ```bash
197
+ # Reduce batch size and enable gradient checkpointing
198
+ python run_lora_training.py \
199
+ --dataset-dir datasets/my_style \
200
+ --batch-size 1 \
201
+ --gradient-checkpointing \
202
+ --mixed-precision
203
+ ```
204
+
205
+ **Style Not Learning:**
206
+ - Increase epochs (try 200-500)
207
+ - Check dataset consistency
208
+ - Increase LoRA rank (try 8 or 16)
209
+ - Lower learning rate (try 5e-5)
210
+
211
+ **Generated Images Don't Match Style:**
212
+ - Include trigger word in prompts
213
+ - Increase LoRA scale (try 1.2-1.5)
214
+ - Train for more epochs
215
+ - Check dataset quality
216
+
217
+ **Training Too Slow:**
218
+ - Reduce image resolution to 512x512
219
+ - Use mixed precision training
220
+ - Enable gradient checkpointing
221
+ - Reduce LoRA rank to 4
222
+
223
+ ## 📁 File Structure
224
+
225
+ ```
226
+ Project CompI/
227
+ ├── datasets/ # Prepared training datasets
228
+ │ └── my_art_style/
229
+ │ ├── train/ # Training images
230
+ │ ├── validation/ # Validation images
231
+ │ ├── train_captions.txt # Training captions
232
+ │ └── dataset_info.json # Dataset metadata
233
+ ├── lora_models/ # Trained LoRA models
234
+ │ └── my_art_style/
235
+ │ ├── checkpoint-100/ # Training checkpoints
236
+ │ ├── checkpoint-200/
237
+ │ └── training_info.json # Training metadata
238
+ ├── src/generators/
239
+ │ ├── compi_phase1e_dataset_prep.py # Dataset preparation
240
+ │ ├── compi_phase1e_lora_training.py # LoRA training
241
+ │ ├── compi_phase1e_style_generation.py # Style generation
242
+ │ └── compi_phase1e_style_manager.py # Style management
243
+ ├── run_lora_training.py # Training launcher
244
+ └── run_style_generation.py # Generation launcher
245
+ ```
246
+
247
+ ## 🎯 Integration with CompI
248
+
249
+ Phase 1.E integrates seamlessly with existing CompI tools:
250
+
251
+ 1. **Combine with Phase 1.B**: Use LoRA styles alongside predefined styles
252
+ 2. **Evaluate with Phase 1.D**: Assess your LoRA-generated images systematically
253
+ 3. **UI Integration**: Add LoRA styles to Streamlit/Gradio interfaces
254
+ 4. **Batch Processing**: Generate multiple variations for evaluation
255
+
256
+ ## 🚀 Next Steps
257
+
258
+ After mastering Phase 1.E:
259
+
260
+ 1. **Experiment with Multiple Styles**: Train different LoRA adapters for various artistic approaches
261
+ 2. **Style Mixing**: Combine multiple LoRA styles for unique effects
262
+ 3. **Advanced Techniques**: Explore Textual Inversion, DreamBooth, or ControlNet integration
263
+ 4. **Community Sharing**: Share your trained styles with the CompI community
264
+ 5. **Phase 2 Preparation**: Use personal styles as foundation for multimodal integration
265
+
266
+ ---
267
+
268
+ **Happy Style Training! 🎨✨**
269
+
270
+ Phase 1.E opens up endless possibilities for personalized AI art generation. With LoRA fine-tuning, you can teach the AI to understand and replicate your unique artistic vision, creating truly personalized creative content.
docs/PHASE1_USAGE.md ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 1: Text-to-Image Generation Usage Guide
2
+
3
+ This guide covers the Phase 1 implementation of CompI's text-to-image generation capabilities using Stable Diffusion.
4
+
5
+ ## 🚀 Quick Start
6
+
7
+ ### Basic Usage
8
+
9
+ ```bash
10
+ # Simple generation with interactive prompt
11
+ python run_basic_generation.py
12
+
13
+ # Generate from command line
14
+ python run_basic_generation.py "A magical forest, digital art, highly detailed"
15
+
16
+ # Or run directly from src/generators/
17
+ python src/generators/compi_phase1_text2image.py "A magical forest"
18
+ ```
19
+
20
+ ### Advanced Usage
21
+
22
+ ```bash
23
+ # Advanced script with more options
24
+ python run_advanced_generation.py "cyberpunk city at sunset" --negative "blurry, low quality" --steps 50 --batch 3
25
+
26
+ # Interactive mode for experimentation
27
+ python run_advanced_generation.py --interactive
28
+
29
+ # Or run directly from src/generators/
30
+ python src/generators/compi_phase1_advanced.py --interactive
31
+ ```
32
+
33
+ ## 📋 Available Scripts
34
+
35
+ ### 1. `compi_phase1_text2image.py` - Basic Implementation
36
+
37
+ **Features:**
38
+
39
+ - Simple, standalone text-to-image generation
40
+ - Automatic GPU/CPU detection
41
+ - Command line or interactive prompts
42
+ - Automatic output saving with descriptive filenames
43
+ - Comprehensive logging
44
+
45
+ **Usage:**
46
+
47
+ ```bash
48
+ python compi_phase1_text2image.py [prompt]
49
+ ```
50
+
51
+ ### 2. `compi_phase1_advanced.py` - Enhanced Implementation
52
+
53
+ **Features:**
54
+
55
+ - Batch generation (multiple images)
56
+ - Negative prompts (what to avoid)
57
+ - Customizable parameters (steps, guidance, dimensions)
58
+ - Interactive mode for experimentation
59
+ - Metadata saving (JSON files with generation parameters)
60
+ - Multiple model support
61
+
62
+ **Command Line Options:**
63
+
64
+ ```bash
65
+ python compi_phase1_advanced.py [OPTIONS] [PROMPT]
66
+
67
+ Options:
68
+ --negative, -n TEXT Negative prompt (what to avoid)
69
+ --steps, -s INTEGER Number of inference steps (default: 30)
70
+ --guidance, -g FLOAT Guidance scale (default: 7.5)
71
+ --seed INTEGER Random seed for reproducibility
72
+ --batch, -b INTEGER Number of images to generate
73
+ --width, -w INTEGER Image width (default: 512)
74
+ --height INTEGER Image height (default: 512)
75
+ --model, -m TEXT Model to use (default: runwayml/stable-diffusion-v1-5)
76
+ --output, -o TEXT Output directory (default: outputs)
77
+ --interactive, -i Interactive mode
78
+ ```
79
+
80
+ ## 🎨 Example Commands
81
+
82
+ ### Basic Examples
83
+
84
+ ```bash
85
+ # Simple landscape
86
+ python run_basic_generation.py "serene mountain lake, golden hour, photorealistic"
87
+
88
+ # Digital art style
89
+ python run_basic_generation.py "futuristic robot, neon lights, cyberpunk style, digital art"
90
+ ```
91
+
92
+ ### Advanced Examples
93
+
94
+ ```bash
95
+ # High-quality generation with negative prompts
96
+ python run_advanced_generation.py "beautiful portrait of a woman, oil painting style" \
97
+ --negative "blurry, distorted, low quality, bad anatomy" \
98
+ --steps 50 --guidance 8.0
99
+
100
+ # Batch generation with fixed seed
101
+ python run_advanced_generation.py "abstract geometric patterns, colorful" \
102
+ --batch 5 --seed 12345 --steps 40
103
+
104
+ # Custom dimensions for landscape
105
+ python run_advanced_generation.py "panoramic view of alien landscape" \
106
+ --width 768 --height 512 --steps 35
107
+
108
+ # Interactive experimentation
109
+ python run_advanced_generation.py --interactive
110
+ ```
111
+
112
+ ## 📁 Output Structure
113
+
114
+ Generated images are saved in the `outputs/` directory with descriptive filenames:
115
+
116
+ ```
117
+ outputs/
118
+ ├── magical_forest_digital_art_20241225_143022_seed42.png
119
+ ├── magical_forest_digital_art_20241225_143022_seed42_metadata.json
120
+ ├── cyberpunk_city_sunset_20241225_143156_seed1337.png
121
+ └── cyberpunk_city_sunset_20241225_143156_seed1337_metadata.json
122
+ ```
123
+
124
+ ### Metadata Files
125
+
126
+ Each generated image (in advanced mode) includes a JSON metadata file with:
127
+
128
+ - Original prompt and negative prompt
129
+ - Generation parameters (steps, guidance, seed)
130
+ - Image dimensions and model used
131
+ - Timestamp and batch information
132
+
133
+ ## ⚙️ Configuration Tips
134
+
135
+ ### For Best Quality
136
+
137
+ - Use 30-50 inference steps
138
+ - Guidance scale 7.5-12.0
139
+ - Include style descriptors ("digital art", "oil painting", "photorealistic")
140
+ - Use negative prompts to avoid unwanted elements
141
+
142
+ ### For Speed
143
+
144
+ - Use 20-25 inference steps
145
+ - Lower guidance scale (6.0-7.5)
146
+ - Stick to 512x512 resolution
147
+
148
+ ### For Experimentation
149
+
150
+ - Use interactive mode
151
+ - Try different seeds with the same prompt
152
+ - Experiment with guidance scale values
153
+ - Use batch generation to explore variations
154
+
155
+ ## 🔧 Troubleshooting
156
+
157
+ ### Common Issues
158
+
159
+ 1. **CUDA out of memory**: Reduce batch size or image dimensions
160
+ 2. **Slow generation**: Ensure CUDA is available and working
161
+ 3. **Poor quality**: Increase steps, adjust guidance scale, improve prompts
162
+ 4. **Model download fails**: Check internet connection, try again
163
+
164
+ ### Performance Optimization
165
+
166
+ - The scripts automatically enable attention slicing for memory efficiency
167
+ - GPU detection is automatic
168
+ - Models are cached after first download
169
+
170
+ ## 🎨 Phase 1.B: Style Conditioning & Prompt Engineering
171
+
172
+ ### 3. `compi_phase1b_styled_generation.py` - Style Conditioning
173
+
174
+ **Features:**
175
+
176
+ - Interactive style and mood selection from curated lists
177
+ - Intelligent prompt engineering and combination
178
+ - Multiple variations with unique seeds
179
+ - Comprehensive logging and filename organization
180
+
181
+ **Usage:**
182
+
183
+ ```bash
184
+ python run_styled_generation.py [prompt]
185
+ # Or directly: python src/generators/compi_phase1b_styled_generation.py [prompt]
186
+ ```
187
+
188
+ ### 4. `compi_phase1b_advanced_styling.py` - Advanced Style Control
189
+
190
+ **Features:**
191
+
192
+ - 13 predefined art styles with optimized prompts and negative prompts
193
+ - 9 mood categories with atmospheric conditioning
194
+ - Quality presets (draft/standard/high)
195
+ - Command line and interactive modes
196
+ - Comprehensive metadata saving
197
+
198
+ **Command Line Options:**
199
+
200
+ ```bash
201
+ python run_advanced_styling.py [OPTIONS] [PROMPT]
202
+ # Or directly: python src/generators/compi_phase1b_advanced_styling.py [OPTIONS] [PROMPT]
203
+
204
+ Options:
205
+ --style, -s TEXT Art style (or number from list)
206
+ --mood, -m TEXT Mood/atmosphere (or number from list)
207
+ --variations, -v INT Number of variations (default: 1)
208
+ --quality, -q CHOICE Quality preset [draft/standard/high]
209
+ --negative, -n TEXT Negative prompt
210
+ --interactive, -i Interactive mode
211
+ --list-styles List available styles and exit
212
+ --list-moods List available moods and exit
213
+ ```
214
+
215
+ ### Style Conditioning Examples
216
+
217
+ **Basic Style Selection:**
218
+
219
+ ```bash
220
+ # Interactive mode with guided selection
221
+ python run_styled_generation.py
222
+
223
+ # Command line with style selection
224
+ python run_advanced_styling.py "mountain landscape" --style cyberpunk --mood dramatic
225
+ ```
226
+
227
+ **Advanced Style Control:**
228
+
229
+ ```bash
230
+ # High quality with multiple variations
231
+ python run_advanced_styling.py "portrait of a wizard" \
232
+ --style "oil painting" --mood "mysterious" \
233
+ --quality high --variations 3 \
234
+ --negative "blurry, distorted, amateur"
235
+
236
+ # List available options
237
+ python run_advanced_styling.py --list-styles
238
+ python run_advanced_styling.py --list-moods
239
+ ```
240
+
241
+ **Available Styles:**
242
+
243
+ - digital art, oil painting, watercolor, cyberpunk
244
+ - impressionist, concept art, anime, photorealistic
245
+ - minimalist, surrealism, pixel art, steampunk, 3d render
246
+
247
+ **Available Moods:**
248
+
249
+ - dreamy, dark, peaceful, vibrant, melancholic
250
+ - mysterious, whimsical, dramatic, retro
251
+
252
+ ## 🖥️ Phase 1.C: Interactive Web UI
253
+
254
+ ### 5. `compi_phase1c_streamlit_ui.py` - Streamlit Web Interface
255
+
256
+ **Features:**
257
+
258
+ - Complete web-based interface for text-to-image generation
259
+ - Interactive style and mood selection with custom options
260
+ - Advanced settings (steps, guidance, dimensions, negative prompts)
261
+ - Real-time image generation and display
262
+ - Progress tracking and generation logs
263
+ - Automatic saving with comprehensive metadata
264
+
265
+ **Usage:**
266
+
267
+ ```bash
268
+ python run_ui.py
269
+ # Or directly: streamlit run src/ui/compi_phase1c_streamlit_ui.py
270
+ ```
271
+
272
+ ### 6. `compi_phase1c_gradio_ui.py` - Gradio Web Interface
273
+
274
+ **Features:**
275
+
276
+ - Alternative web interface with Gradio framework
277
+ - Gallery view for multiple image variations
278
+ - Collapsible advanced settings
279
+ - Real-time generation logs
280
+ - Mobile-friendly responsive design
281
+
282
+ **Usage:**
283
+
284
+ ```bash
285
+ python run_gradio_ui.py
286
+ # Or directly: python src/ui/compi_phase1c_gradio_ui.py
287
+ ```
288
+
289
+ ## 📊 Phase 1.D: Quality Evaluation Tools
290
+
291
+ ### 7. `compi_phase1d_evaluate_quality.py` - Comprehensive Evaluation Interface
292
+
293
+ **Features:**
294
+
295
+ - Systematic image quality assessment with 5-criteria scoring system
296
+ - Interactive Streamlit web interface for detailed evaluation
297
+ - Objective metrics calculation (perceptual hashes, dimensions, file size)
298
+ - Batch evaluation capabilities for efficient processing
299
+ - Comprehensive logging and CSV export for trend analysis
300
+ - Summary analytics with performance insights and recommendations
301
+
302
+ **Usage:**
303
+
304
+ ```bash
305
+ python run_evaluation.py
306
+ # Or directly: streamlit run src/generators/compi_phase1d_evaluate_quality.py
307
+ ```
308
+
309
+ ### 8. `compi_phase1d_cli_evaluation.py` - Command-Line Evaluation Tools
310
+
311
+ **Features:**
312
+
313
+ - Batch evaluation and analysis from command line
314
+ - Statistical summaries and performance reports
315
+ - Filtering by style, mood, and evaluation status
316
+ - Automated scoring for large image sets
317
+ - Detailed report generation with recommendations
318
+
319
+ **Command Line Options:**
320
+
321
+ ```bash
322
+ python src/generators/compi_phase1d_cli_evaluation.py [OPTIONS]
323
+
324
+ Options:
325
+ --analyze Display evaluation summary and statistics
326
+ --report Generate detailed evaluation report
327
+ --batch-score P S M Q A Batch score images (1-5 for each criteria)
328
+ --list-all List all images with evaluation status
329
+ --list-evaluated List only evaluated images
330
+ --list-unevaluated List only unevaluated images
331
+ --style TEXT Filter by style
332
+ --mood TEXT Filter by mood
333
+ --notes TEXT Notes for batch evaluation
334
+ --output FILE Output file for reports
335
+ ```
336
+
337
+ ## 🎨 Phase 1.E: Personal Style Fine-tuning (LoRA)
338
+
339
+ ### 9. `compi_phase1e_dataset_prep.py` - Dataset Preparation for LoRA Training
340
+
341
+ **Features:**
342
+
343
+ - Organize and validate personal style images for training
344
+ - Generate appropriate training captions with trigger words
345
+ - Resize and format images for optimal LoRA training
346
+ - Create train/validation splits with metadata tracking
347
+ - Support for multiple image formats and quality validation
348
+
349
+ **Usage:**
350
+
351
+ ```bash
352
+ python src/generators/compi_phase1e_dataset_prep.py --input-dir my_artwork --style-name "my_art_style"
353
+ # Or via wrapper: python run_dataset_prep.py --input-dir my_artwork --style-name "my_art_style"
354
+ ```
355
+
356
+ ### 10. `compi_phase1e_lora_training.py` - LoRA Fine-tuning Engine
357
+
358
+ **Features:**
359
+
360
+ - Full LoRA (Low-Rank Adaptation) fine-tuning pipeline
361
+ - Memory-efficient training with gradient checkpointing
362
+ - Configurable LoRA parameters (rank, alpha, learning rate)
363
+ - Automatic checkpoint saving and validation monitoring
364
+ - Integration with PEFT library for optimal performance
365
+
366
+ **Command Line Options:**
367
+
368
+ ```bash
369
+ python run_lora_training.py [OPTIONS] --dataset-dir DATASET_DIR
370
+
371
+ Options:
372
+ --dataset-dir DIR Required: Prepared dataset directory
373
+ --epochs INT Number of training epochs (default: 100)
374
+ --learning-rate FLOAT Learning rate (default: 1e-4)
375
+ --lora-rank INT LoRA rank (default: 4)
376
+ --lora-alpha INT LoRA alpha (default: 32)
377
+ --batch-size INT Training batch size (default: 1)
378
+ --save-steps INT Save checkpoint every N steps
379
+ --gradient-checkpointing Enable gradient checkpointing for memory efficiency
380
+ --mixed-precision Use mixed precision training
381
+ ```
382
+
383
+ ### 11. `compi_phase1e_style_generation.py` - Personal Style Generation
384
+
385
+ **Features:**
386
+
387
+ - Generate images using trained LoRA personal styles
388
+ - Adjustable style strength and generation parameters
389
+ - Interactive and batch generation modes
390
+ - Integration with existing CompI pipeline and metadata
391
+ - Support for multiple LoRA styles and model switching
392
+
393
+ **Usage:**
394
+
395
+ ```bash
396
+ python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "a cat in my_style"
397
+ # Or directly: python src/generators/compi_phase1e_style_generation.py --lora-path PATH PROMPT
398
+ ```
399
+
400
+ ### 12. `compi_phase1e_style_manager.py` - LoRA Style Management
401
+
402
+ **Features:**
403
+
404
+ - Manage multiple trained LoRA styles and checkpoints
405
+ - Cleanup old checkpoints and organize model storage
406
+ - Export style information and training analytics
407
+ - Style database with automatic scanning and metadata
408
+ - Batch operations for style maintenance and organization
409
+
410
+ **Command Line Options:**
411
+
412
+ ```bash
413
+ python src/generators/compi_phase1e_style_manager.py [OPTIONS]
414
+
415
+ Options:
416
+ --list List all available LoRA styles
417
+ --info STYLE_NAME Show detailed information about a style
418
+ --refresh Refresh the styles database
419
+ --cleanup STYLE_NAME Clean up old checkpoints for a style
420
+ --export OUTPUT_FILE Export styles information to CSV
421
+ --delete STYLE_NAME Delete a LoRA style (requires --confirm)
422
+ ```
423
+
424
+ ### Web UI Examples
425
+
426
+ **Streamlit Interface:**
427
+
428
+ - Navigate to http://localhost:8501 after running
429
+ - Full-featured interface with sidebar settings
430
+ - Progress bars and status updates
431
+ - Expandable sections for details
432
+
433
+ **Gradio Interface:**
434
+
435
+ - Navigate to http://localhost:7860 after running
436
+ - Gallery-style image display
437
+ - Compact, mobile-friendly design
438
+ - Real-time generation feedback
439
+
440
+ ## 🎯 Next Steps
441
+
442
+ Phase 1 establishes the foundation for CompI's text-to-image capabilities. Future phases will add:
443
+
444
+ - Audio input processing
445
+ - Emotion and style conditioning
446
+ - Real-time data integration
447
+ - Multimodal fusion
448
+ - Advanced UI interfaces
449
+
450
+ ## 📚 Resources
451
+
452
+ - [Stable Diffusion Documentation](https://huggingface.co/docs/diffusers)
453
+ - [Prompt Engineering Guide](https://prompthero.com/stable-diffusion-prompt-guide)
454
+ - [CompI Development Plan](development.md)
docs/PHASE2A_AUDIO_TO_IMAGE_GUIDE.md ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 2.A: Audio-to-Image Generation Guide
2
+
3
+ Welcome to **CompI Phase 2.A**, the next evolution in multimodal AI art generation! This phase introduces the ability to generate images influenced by audio input, combining the power of text prompts with the emotional and rhythmic qualities of sound.
4
+
5
+ ## 🎵 What's New in Phase 2.A
6
+
7
+ ### Core Features
8
+
9
+ - **Audio Analysis**: Extract tempo, energy, spectral features, and harmonic content from audio files
10
+ - **Audio Captioning**: Convert speech, music, and ambient sounds to descriptive text using OpenAI Whisper
11
+ - **Multimodal Fusion**: Intelligently combine text prompts with audio-derived features
12
+ - **Rich Metadata**: Comprehensive logging of audio features and generation context
13
+ - **Multiple Interfaces**: Streamlit UI, CLI, and programmatic API
14
+
15
+ ### Supported Audio Formats
16
+
17
+ - MP3, WAV, FLAC, M4A, OGG
18
+ - Recommended: Under 60 seconds for optimal processing speed
19
+ - Automatic resampling to 16kHz for analysis
20
+
21
+ ## 🚀 Quick Start
22
+
23
+ ### 1. Install Dependencies
24
+
25
+ First, ensure you have the Phase 2.A dependencies:
26
+
27
+ ```bash
28
+ pip install openai-whisper
29
+ ```
30
+
31
+ All other dependencies should already be installed from Phase 1.
32
+
33
+ ### 2. Streamlit UI (Recommended for Beginners)
34
+
35
+ Launch the interactive web interface:
36
+
37
+ ```bash
38
+ streamlit run src/ui/compi_phase2a_streamlit_ui.py
39
+ ```
40
+
41
+ Features:
42
+
43
+ - 🎵 Audio upload and playback
44
+ - 📊 Real-time audio analysis visualization
45
+ - 🎨 Interactive generation controls
46
+ - 📝 Enhanced prompt preview
47
+ - 🖼️ Instant results display
48
+
49
+ ### 3. Command Line Interface
50
+
51
+ For power users and automation:
52
+
53
+ ```bash
54
+ # Basic usage
55
+ python run_phase2a_audio_to_image.py --prompt "mystical forest" --audio "music.mp3"
56
+
57
+ # With style and mood
58
+ python run_phase2a_audio_to_image.py \
59
+ --prompt "cyberpunk city" \
60
+ --style "digital art" \
61
+ --mood "neon, futuristic" \
62
+ --audio "electronic.wav"
63
+
64
+ # Multiple variations
65
+ python run_phase2a_audio_to_image.py \
66
+ --prompt "abstract art" \
67
+ --audio "ambient.flac" \
68
+ --num-images 3
69
+
70
+ # Interactive mode
71
+ python run_phase2a_audio_to_image.py --interactive
72
+ ```
73
+
74
+ ### 4. Programmatic Usage
75
+
76
+ ```python
77
+ from src.generators.compi_phase2a_audio_to_image import CompIPhase2AAudioToImage
78
+
79
+ # Initialize generator
80
+ generator = CompIPhase2AAudioToImage()
81
+
82
+ # Generate image with audio conditioning
83
+ results = generator.generate_image(
84
+ text_prompt="A serene mountain landscape",
85
+ style="impressionist",
86
+ mood="peaceful, contemplative",
87
+ audio_path="nature_sounds.wav",
88
+ num_images=2
89
+ )
90
+
91
+ # Access results
92
+ for result in results:
93
+ print(f"Generated: {result['filename']}")
94
+ result['image'].show() # Display image
95
+ ```
96
+
97
+ ## 🎨 How Audio Influences Art
98
+
99
+ ### Audio Feature Extraction
100
+
101
+ CompI Phase 2.A analyzes multiple aspects of your audio:
102
+
103
+ 1. **Tempo**: Beats per minute → influences rhythm and energy descriptors
104
+ 2. **Energy (RMS)**: Overall loudness → affects intensity and power descriptors
105
+ 3. **Zero Crossing Rate**: Rhythmic content → adds percussive/smooth qualities
106
+ 4. **Spectral Centroid**: Brightness → influences warm/bright color palettes
107
+ 5. **MFCC**: Timbre characteristics → affects texture and style
108
+ 6. **Chroma**: Harmonic content → influences mood and atmosphere
109
+
110
+ ### Intelligent Prompt Fusion
111
+
112
+ The system automatically enhances your text prompt based on audio analysis:
113
+
114
+ **Original Prompt**: "A mystical forest"
115
+ **Audio**: Slow, ambient music with low energy
116
+ **Enhanced Prompt**: "A mystical forest, slow and contemplative, gentle and subtle, warm and deep"
117
+
118
+ ### Audio Captioning
119
+
120
+ Using OpenAI Whisper, the system can describe what it "hears":
121
+
122
+ - **Speech**: Transcribes spoken words and incorporates meaning
123
+ - **Music**: Identifies instruments, genres, and emotional qualities
124
+ - **Ambient**: Describes environmental sounds and atmospheres
125
+
126
+ ## 📊 Understanding Audio Analysis
127
+
128
+ ### Tempo Classifications
129
+
130
+ - **Very Slow** (< 60 BPM): Meditative, ethereal qualities
131
+ - **Slow** (60-90 BPM): Contemplative, peaceful atmospheres
132
+ - **Moderate** (90-120 BPM): Balanced, natural rhythms
133
+ - **Fast** (120-140 BPM): Energetic, dynamic compositions
134
+ - **Very Fast** (> 140 BPM): Intense, high-energy visuals
135
+
136
+ ### Energy Levels
137
+
138
+ - **Low Energy** (< 0.02): Subtle, gentle, minimalist styles
139
+ - **Medium Energy** (0.02-0.05): Balanced, harmonious compositions
140
+ - **High Energy** (> 0.05): Vibrant, powerful, dramatic visuals
141
+
142
+ ### Spectral Characteristics
143
+
144
+ - **Bright** (High Spectral Centroid): Light colors, sharp details
145
+ - **Dark** (Low Spectral Centroid): Deep colors, soft textures
146
+ - **Percussive** (High ZCR): Rhythmic patterns, geometric shapes
147
+ - **Smooth** (Low ZCR): Flowing forms, organic shapes
148
+
149
+ ## 🎯 Best Practices
150
+
151
+ ### Audio Selection
152
+
153
+ 1. **Quality Matters**: Use clear, well-recorded audio for best results
154
+ 2. **Length**: 10-60 seconds is optimal for processing speed
155
+ 3. **Variety**: Experiment with different genres and sound types
156
+ 4. **Context**: Choose audio that complements your text prompt
157
+
158
+ ### Prompt Writing
159
+
160
+ 1. **Be Descriptive**: Rich text prompts work better with audio conditioning
161
+ 2. **Leave Room**: Let audio features add nuance to your base concept
162
+ 3. **Experiment**: Try the same prompt with different audio files
163
+ 4. **Balance**: Don't over-specify if you want audio to have strong influence
164
+
165
+ ### Generation Settings
166
+
167
+ 1. **Steps**: 30-50 steps for high quality (20 for quick tests)
168
+ 2. **Guidance**: 7.5 is balanced (lower for more audio influence)
169
+ 3. **Variations**: Generate multiple images to see different interpretations
170
+ 4. **Seeds**: Save seeds of favorite results for consistency
171
+
172
+ ## 🔧 Advanced Features
173
+
174
+ ### Batch Processing
175
+
176
+ Process multiple audio files with the same prompt:
177
+
178
+ ```bash
179
+ python run_phase2a_audio_to_image.py \
180
+ --prompt "abstract expressionism" \
181
+ --audio-dir "./music_collection/" \
182
+ --batch
183
+ ```
184
+
185
+ ### Custom Audio Analysis
186
+
187
+ ```python
188
+ from src.utils.audio_utils import AudioProcessor, MultimodalPromptFusion
189
+
190
+ # Analyze audio separately
191
+ processor = AudioProcessor()
192
+ features = processor.analyze_audio_file("my_audio.wav")
193
+
194
+ # Create custom prompt fusion
195
+ fusion = MultimodalPromptFusion()
196
+ enhanced_prompt = fusion.fuse_prompt_with_audio(
197
+ "base prompt", "style", "mood", features, "audio caption"
198
+ )
199
+ ```
200
+
201
+ ### Metadata and Tracking
202
+
203
+ Every generated image includes comprehensive metadata:
204
+
205
+ - Original and enhanced prompts
206
+ - Complete audio analysis results
207
+ - Generation parameters
208
+ - Timestamps and seeds
209
+ - Audio tags and classifications
210
+
211
+ ## 🎪 Example Use Cases
212
+
213
+ ### 1. Music Visualization
214
+
215
+ Transform your favorite songs into visual art:
216
+
217
+ - **Classical**: Orchestral pieces → elegant, flowing compositions
218
+ - **Electronic**: Synthesized music → geometric, neon aesthetics
219
+ - **Jazz**: Improvisational music → abstract, dynamic forms
220
+ - **Ambient**: Atmospheric sounds → ethereal, dreamlike scenes
221
+
222
+ ### 2. Voice-to-Art
223
+
224
+ Convert spoken content into visuals:
225
+
226
+ - **Poetry Reading**: Emotional recitation → expressive, literary art
227
+ - **Storytelling**: Narrative audio → scene illustrations
228
+ - **Meditation**: Guided meditation → peaceful, spiritual imagery
229
+ - **Lectures**: Educational content → informative, structured visuals
230
+
231
+ ### 3. Environmental Soundscapes
232
+
233
+ Capture the essence of places and moments:
234
+
235
+ - **Nature Sounds**: Forest, ocean, rain → organic, natural scenes
236
+ - **Urban Audio**: City sounds, traffic → industrial, modern aesthetics
237
+ - **Historical**: Period-appropriate audio → era-specific artwork
238
+ - **Sci-Fi**: Futuristic sounds → otherworldly, technological visuals
239
+
240
+ ### 4. Therapeutic Applications
241
+
242
+ Use audio-visual generation for wellness:
243
+
244
+ - **Relaxation**: Calming audio → soothing, peaceful imagery
245
+ - **Motivation**: Energetic music → inspiring, powerful visuals
246
+ - **Focus**: Concentration aids → clean, organized compositions
247
+ - **Creativity**: Experimental sounds → abstract, innovative art
248
+
249
+ ## 🐛 Troubleshooting
250
+
251
+ ### Common Issues
252
+
253
+ **Audio Not Loading**
254
+
255
+ - Check file format (MP3, WAV, FLAC, M4A, OGG supported)
256
+ - Ensure file isn't corrupted
257
+ - Try converting to WAV format
258
+
259
+ **Whisper Model Loading Fails**
260
+
261
+ - Install with: `pip install openai-whisper`
262
+ - Check available disk space (models are 100MB-1GB)
263
+ - Try smaller model size: `--whisper-model tiny`
264
+
265
+ **Generation Too Slow**
266
+
267
+ - Use `--no-caption` to skip audio captioning
268
+ - Reduce `--steps` for faster generation
269
+ - Use smaller Whisper model
270
+ - Process shorter audio clips
271
+
272
+ **Out of Memory**
273
+
274
+ - Use CPU mode: `--device cpu`
275
+ - Reduce image size: `--size 256x256`
276
+ - Close other applications
277
+ - Process one image at a time
278
+
279
+ ### Performance Tips
280
+
281
+ 1. **GPU Acceleration**: CUDA significantly speeds up generation
282
+ 2. **Model Caching**: First run downloads models (1-2GB total)
283
+ 3. **Audio Preprocessing**: Shorter clips process faster
284
+ 4. **Batch Processing**: More efficient for multiple files
285
+ 5. **Memory Management**: Close UI between large batches
286
+
287
+ ## 🔮 What's Next?
288
+
289
+ Phase 2.A is just the beginning of CompI's multimodal journey. Coming soon:
290
+
291
+ - **Phase 2.B**: Real-time audio processing and live generation
292
+ - **Phase 2.C**: Video-to-image conditioning
293
+ - **Phase 2.D**: Multi-sensor input fusion
294
+ - **Phase 3.A**: 3D model generation from multimodal input
295
+
296
+ ## 📚 Additional Resources
297
+
298
+ - [CompI Project Structure](PROJECT_STRUCTURE.md)
299
+ - [Phase 1 Usage Guide](PHASE1_USAGE.md)
300
+ - [Audio Processing Documentation](src/utils/audio_utils.py)
301
+ - [Example Audio Files] (removed in cleanup)
302
+
303
+ ---
304
+
305
+ **Happy Creating! 🎨🎵**
306
+
307
+ _CompI Phase 2.A brings together the worlds of sound and vision, creating art that truly resonates with your audio experiences._
docs/PHASE2B_DATA_TO_IMAGE_GUIDE.md ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 2.B: Data/Logic Input to Image Generation
2
+
3
+ ## 🚀 Overview
4
+
5
+ Phase 2.B transforms structured data and mathematical formulas into stunning AI-generated art. This phase combines data analysis, pattern recognition, and poetic interpretation to create unique visual experiences that reflect the essence of your data.
6
+
7
+ ## ✨ Key Features
8
+
9
+ ### 📊 Data Processing
10
+ - **CSV Data Analysis**: Upload spreadsheets, time series, measurements, or any numeric data
11
+ - **Mathematical Formula Evaluation**: Enter Python/NumPy expressions for mathematical art
12
+ - **Pattern Recognition**: Automatic detection of trends, correlations, and seasonality
13
+ - **Statistical Analysis**: Comprehensive data profiling and feature extraction
14
+
15
+ ### 🎨 Artistic Integration
16
+ - **Poetic Text Generation**: Convert data patterns into descriptive, artistic language
17
+ - **Data Visualization**: Create beautiful charts and plots from your data
18
+ - **Prompt Enhancement**: Intelligently merge data insights with your creative prompts
19
+ - **Visual Conditioning**: Use data visualizations to inspire AI art generation
20
+
21
+ ### 🔧 Technical Capabilities
22
+ - **Safe Formula Execution**: Secure evaluation of mathematical expressions
23
+ - **Batch Processing**: Handle multiple datasets or formulas simultaneously
24
+ - **Comprehensive Metadata**: Detailed logging of all generation parameters
25
+ - **Flexible Output**: Save both generated art and data visualizations
26
+
27
+ ## 🛠️ Installation & Setup
28
+
29
+ ### Prerequisites
30
+ Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
31
+
32
+ ### Additional Dependencies
33
+ Phase 2.B uses the existing CompI dependencies, specifically:
34
+ - `pandas>=2.0.0` - Data manipulation and analysis
35
+ - `numpy>=1.24.0` - Mathematical operations
36
+ - `matplotlib>=3.7.0` - Data visualization
37
+ - `seaborn>=0.12.0` - Statistical plotting
38
+
39
+ ## 🎯 Quick Start
40
+
41
+ ### 1. Launch the Streamlit Interface
42
+
43
+ ```bash
44
+ # Navigate to your CompI project directory
45
+ cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
46
+
47
+ # Run the Phase 2.B interface
48
+ streamlit run src/ui/compi_phase2b_streamlit_ui.py
49
+ ```
50
+
51
+ ### 2. Using CSV Data
52
+
53
+ 1. **Upload your CSV file** containing numeric data
54
+ 2. **Enter your creative prompt** (e.g., "A flowing river of data")
55
+ 3. **Set style and mood** (e.g., "abstract digital art", "serene and flowing")
56
+ 4. **Click Generate** and watch your data transform into art!
57
+
58
+ ### 3. Using Mathematical Formulas
59
+
60
+ 1. **Enter a mathematical formula** using Python/NumPy syntax
61
+ 2. **Combine with your prompt** for artistic interpretation
62
+ 3. **Generate unique mathematical art** based on your equations
63
+
64
+ ## 📚 Examples
65
+
66
+ ### CSV Data Examples
67
+
68
+ #### Time Series Data
69
+ ```csv
70
+ date,temperature,humidity,pressure
71
+ 2024-01-01,22.5,65,1013.2
72
+ 2024-01-02,23.1,62,1015.8
73
+ 2024-01-03,21.8,68,1012.4
74
+ ...
75
+ ```
76
+
77
+ **Prompt**: "Weather patterns dancing across the sky"
78
+ **Style**: "impressionist painting"
79
+ **Result**: Art inspired by temperature fluctuations and atmospheric pressure
80
+
81
+ #### Financial Data
82
+ ```csv
83
+ date,price,volume,volatility
84
+ 2024-01-01,100.5,1000000,0.15
85
+ 2024-01-02,102.3,1200000,0.18
86
+ 2024-01-03,99.8,900000,0.22
87
+ ...
88
+ ```
89
+
90
+ **Prompt**: "The rhythm of market forces"
91
+ **Style**: "geometric abstract"
92
+ **Result**: Visual representation of market dynamics
93
+
94
+ ### Mathematical Formula Examples
95
+
96
+ #### Sine Wave with Decay
97
+ ```python
98
+ np.sin(np.linspace(0, 4*np.pi, 100)) * np.exp(-np.linspace(0, 1, 100))
99
+ ```
100
+ **Prompt**: "Fading echoes in a digital realm"
101
+ **Result**: Art representing diminishing oscillations
102
+
103
+ #### Spiral Pattern
104
+ ```python
105
+ t = np.linspace(0, 4*np.pi, 200)
106
+ np.sin(t) * t
107
+ ```
108
+ **Prompt**: "The golden ratio in nature"
109
+ **Result**: Spiral-inspired organic art
110
+
111
+ #### Complex Harmonic
112
+ ```python
113
+ x = np.linspace(0, 6*np.pi, 300)
114
+ np.sin(x) + 0.5*np.cos(3*x) + 0.25*np.sin(5*x)
115
+ ```
116
+ **Prompt**: "Musical harmonies visualized"
117
+ **Result**: Multi-layered wave patterns
118
+
119
+ ## 🎨 Creative Workflow
120
+
121
+ ### 1. Data Preparation
122
+ - **Clean your data**: Remove or handle missing values
123
+ - **Choose meaningful columns**: Focus on numeric data that tells a story
124
+ - **Consider time series**: Temporal data often creates compelling patterns
125
+
126
+ ### 2. Prompt Engineering
127
+ - **Start with your data story**: What does your data represent?
128
+ - **Add artistic style**: Choose styles that complement your data's nature
129
+ - **Set the mood**: Match the emotional tone to your data's characteristics
130
+
131
+ ### 3. Style Recommendations
132
+
133
+ | Data Type | Recommended Styles | Mood Suggestions |
134
+ |-----------|-------------------|------------------|
135
+ | Time Series | flowing, organic, wave-like | rhythmic, temporal, evolving |
136
+ | Statistical | geometric, structured, minimal | analytical, precise, clean |
137
+ | Financial | dynamic, angular, sharp | energetic, volatile, intense |
138
+ | Scientific | technical, detailed, precise | methodical, systematic, clear |
139
+ | Random/Chaotic | abstract, expressionist, wild | unpredictable, chaotic, free |
140
+
141
+ ## 🔧 Advanced Usage
142
+
143
+ ### Programmatic Access
144
+
145
+ ```python
146
+ from src.generators.compi_phase2b_data_to_image import CompIPhase2BDataToImage
147
+
148
+ # Initialize generator
149
+ generator = CompIPhase2BDataToImage()
150
+
151
+ # Generate from CSV
152
+ results = generator.generate_image(
153
+ text_prompt="Data flowing like water",
154
+ style="fluid abstract",
155
+ mood="serene, continuous",
156
+ csv_path="path/to/your/data.csv",
157
+ num_images=2
158
+ )
159
+
160
+ # Generate from formula
161
+ results = generator.generate_image(
162
+ text_prompt="Mathematical harmony",
163
+ style="geometric precision",
164
+ mood="balanced, rhythmic",
165
+ formula="np.sin(np.linspace(0, 4*np.pi, 100))",
166
+ num_images=1
167
+ )
168
+ ```
169
+
170
+ ### Batch Processing
171
+
172
+ ```python
173
+ # Process multiple CSV files
174
+ results = generator.batch_process_csv_files(
175
+ csv_directory="data/experiments/",
176
+ text_prompt="Scientific visualization",
177
+ style="technical illustration",
178
+ mood="precise, analytical"
179
+ )
180
+
181
+ # Process multiple formulas
182
+ formulas = [
183
+ "np.sin(x)",
184
+ "np.cos(x)",
185
+ "np.tan(x/2)"
186
+ ]
187
+ results = generator.batch_process_formulas(
188
+ formulas=formulas,
189
+ text_prompt="Trigonometric art",
190
+ style="mathematical beauty"
191
+ )
192
+ ```
193
+
194
+ ## 📊 Understanding Data Features
195
+
196
+ Phase 2.B analyzes your data and extracts several key features:
197
+
198
+ ### Statistical Features
199
+ - **Means, Medians, Standard Deviations**: Basic statistical measures
200
+ - **Ranges and Distributions**: Data spread and shape
201
+ - **Trends**: Increasing, decreasing, stable, or volatile patterns
202
+
203
+ ### Pattern Features
204
+ - **Correlations**: Relationships between different data columns
205
+ - **Seasonality**: Repeating patterns in time series data
206
+ - **Complexity Score**: Measure of data intricacy (0-1)
207
+ - **Variability Score**: Measure of data diversity (0-1)
208
+ - **Pattern Strength**: Measure of detectable patterns (0-1)
209
+
210
+ ### Poetic Interpretation
211
+ The system converts these features into artistic language:
212
+ - **Trend descriptions**: "ascending", "flowing", "turbulent"
213
+ - **Pattern adjectives**: "intricate", "harmonious", "dynamic"
214
+ - **Artistic metaphors**: "like brushstrokes on canvas", "dancing with precision"
215
+
216
+ ## 🎯 Tips for Best Results
217
+
218
+ ### Data Tips
219
+ 1. **Quality over quantity**: Clean, meaningful data works better than large messy datasets
220
+ 2. **Numeric focus**: Ensure your CSV has numeric columns for analysis
221
+ 3. **Reasonable size**: Keep datasets under 10,000 rows for faster processing
222
+ 4. **Meaningful names**: Use descriptive column names for better interpretation
223
+
224
+ ### Formula Tips
225
+ 1. **Use NumPy functions**: Leverage `np.sin`, `np.cos`, `np.exp`, etc.
226
+ 2. **Define ranges**: Use `np.linspace()` to create smooth curves
227
+ 3. **Experiment with complexity**: Combine multiple functions for richer patterns
228
+ 4. **Consider scale**: Ensure your formula produces reasonable numeric ranges
229
+
230
+ ### Prompt Tips
231
+ 1. **Be descriptive**: Rich prompts lead to more interesting results
232
+ 2. **Match your data**: Align artistic style with data characteristics
233
+ 3. **Experiment**: Try different style/mood combinations
234
+ 4. **Use the preview**: Check the enhanced prompt before generating
235
+
236
+ ## 🔍 Troubleshooting
237
+
238
+ ### Common Issues
239
+
240
+ **"Error analyzing data"**
241
+ - Check that your CSV has numeric columns
242
+ - Ensure the file is properly formatted
243
+ - Try with a smaller dataset first
244
+
245
+ **"Invalid formula"**
246
+ - Use only safe mathematical functions
247
+ - Check your NumPy syntax
248
+ - Ensure parentheses are balanced
249
+
250
+ **"Generation failed"**
251
+ - Check your GPU memory if using CUDA
252
+ - Try reducing the number of inference steps
253
+ - Ensure your prompt isn't too long
254
+
255
+ ### Performance Optimization
256
+ - Use GPU acceleration when available
257
+ - Reduce image dimensions for faster generation
258
+ - Process smaller datasets for quicker analysis
259
+ - Use fewer inference steps for rapid prototyping
260
+
261
+ ## 🚀 Next Steps
262
+
263
+ After mastering Phase 2.B, consider:
264
+ 1. **Combining with Phase 2.A**: Use audio + data for multimodal art
265
+ 2. **Creating data stories**: Build narratives around your visualizations
266
+ 3. **Exploring advanced formulas**: Try complex mathematical expressions
267
+ 4. **Building datasets**: Create custom data for specific artistic goals
268
+
269
+ ---
270
+
271
+ **Ready to transform your data into art?** Launch the Streamlit interface and start creating! 🎨📊✨
docs/PHASE2C_EMOTION_TO_IMAGE_GUIDE.md ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 2.C: Emotional/Contextual Input to Image Generation
2
+
3
+ ## 🌀 Overview
4
+
5
+ Phase 2.C transforms emotions, moods, and feelings into stunning AI-generated art. This phase combines emotion detection, sentiment analysis, and contextual understanding to create artwork that resonates with your emotional state and inner feelings.
6
+
7
+ ## ✨ Key Features
8
+
9
+ ### 🎭 Emotion Processing
10
+ - **Preset Emotions**: Choose from 25+ carefully curated emotions
11
+ - **Custom Emotions**: Enter any emotion word or feeling
12
+ - **Emoji Support**: Use emojis to express emotions naturally
13
+ - **Descriptive Text**: Describe complex emotional states in your own words
14
+ - **Sentiment Analysis**: Automatic emotion detection from text using TextBlob
15
+
16
+ ### 🎨 Artistic Integration
17
+ - **Emotion-to-Color Mapping**: Automatic color palette generation based on emotions
18
+ - **Artistic Descriptors**: Emotion-specific visual styles and atmospheres
19
+ - **Prompt Enhancement**: Intelligent fusion of emotions with creative prompts
20
+ - **Intensity Levels**: Low, medium, and high emotional intensity processing
21
+ - **Mood Modifiers**: Contextual atmosphere enhancement
22
+
23
+ ### 🔧 Technical Capabilities
24
+ - **Multi-Input Support**: Preset, custom, emoji, and text-based emotion input
25
+ - **Confidence Scoring**: Emotion detection confidence levels
26
+ - **Batch Processing**: Generate art for multiple emotions simultaneously
27
+ - **Color Conditioning**: Optional color palette integration into prompts
28
+ - **Comprehensive Metadata**: Detailed emotion analysis and generation tracking
29
+
30
+ ## 🛠️ Installation & Setup
31
+
32
+ ### Prerequisites
33
+ Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
34
+
35
+ ### Additional Dependencies
36
+ Phase 2.C uses existing CompI dependencies, specifically:
37
+ - `textblob>=0.17.0` - Sentiment analysis and emotion detection
38
+ - `emoji` (optional) - Enhanced emoji processing
39
+
40
+ ### Optional Setup
41
+ For enhanced sentiment analysis, download TextBlob corpora:
42
+ ```bash
43
+ python -m textblob.download_corpora
44
+ ```
45
+
46
+ ## 🎯 Quick Start
47
+
48
+ ### 1. Launch the Streamlit Interface
49
+
50
+ ```bash
51
+ # Navigate to your CompI project directory
52
+ cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
53
+
54
+ # Run the Phase 2.C interface
55
+ streamlit run src/ui/compi_phase2c_streamlit_ui.py
56
+
57
+ # Or use the main CompI interface
58
+ streamlit run compi_complete_app.py
59
+ # Then select "🌀 Phase 2.C: Emotion-to-Image"
60
+ ```
61
+
62
+ ### 2. Using Preset Emotions
63
+
64
+ 1. **Select "Preset Emotions"** as your input method
65
+ 2. **Choose an emotion category** (Joy & Happiness, Love & Romance, etc.)
66
+ 3. **Pick a specific emotion** from the category
67
+ 4. **Enter your creative prompt** and style
68
+ 5. **Generate** and watch your emotion transform into art!
69
+
70
+ ### 3. Using Custom Emotions or Emojis
71
+
72
+ 1. **Select "Custom Emotion/Emoji"** as your input method
73
+ 2. **Type any emotion** (e.g., "contemplative", "bittersweet")
74
+ 3. **Or use emojis** (🤩, 💫, 🌙) to express feelings
75
+ 4. **Use quick emoji buttons** for common emotions
76
+ 5. **Generate** emotion-infused artwork
77
+
78
+ ### 4. Using Descriptive Text
79
+
80
+ 1. **Select "Descriptive Text"** as your input method
81
+ 2. **Describe your feeling** in natural language
82
+ 3. **Example**: "I feel hopeful after the rain" or "There's anticipation in the air"
83
+ 4. **AI analyzes sentiment** and extracts emotional context
84
+ 5. **Generate** art based on your emotional description
85
+
86
+ ## 📚 Emotion Categories & Examples
87
+
88
+ ### 🌟 Joy & Happiness
89
+ - **joyful**: Bright, radiant, effervescent artwork
90
+ - **ecstatic**: High-energy, explosive, vibrant creations
91
+ - **cheerful**: Light, uplifting, warm compositions
92
+ - **uplifting**: Inspiring, elevating, positive imagery
93
+
94
+ ### 💙 Sadness & Melancholy
95
+ - **melancholic**: Wistful, contemplative, blue-toned art
96
+ - **nostalgic**: Memory-tinged, sepia-like, reflective pieces
97
+ - **somber**: Muted, serious, thoughtful compositions
98
+ - **wistful**: Longing, gentle sadness, soft imagery
99
+
100
+ ### ❤️ Love & Romance
101
+ - **romantic**: Warm, tender, passionate artwork
102
+ - **loving**: Affectionate, caring, heart-centered pieces
103
+ - **passionate**: Intense, fiery, deep emotional art
104
+ - **tender**: Gentle, soft, intimate compositions
105
+
106
+ ### 🕊️ Peace & Serenity
107
+ - **peaceful**: Calm, balanced, harmonious imagery
108
+ - **serene**: Tranquil, still, meditative artwork
109
+ - **tranquil**: Quiet, restful, soothing compositions
110
+ - **harmonious**: Balanced, unified, flowing pieces
111
+
112
+ ### 🔮 Mystery & Drama
113
+ - **mysterious**: Enigmatic, shadowy, intriguing art
114
+ - **dramatic**: Bold, intense, theatrical compositions
115
+ - **enigmatic**: Puzzling, cryptic, thought-provoking pieces
116
+ - **suspenseful**: Tension-filled, anticipatory artwork
117
+
118
+ ### ⚡ Energy & Power
119
+ - **energetic**: Dynamic, vibrant, high-movement art
120
+ - **powerful**: Strong, bold, commanding compositions
121
+ - **intense**: Deep, concentrated, focused imagery
122
+ - **fierce**: Wild, untamed, strong emotional pieces
123
+
124
+ ## 🎨 Creative Workflow
125
+
126
+ ### 1. Emotion Selection Strategy
127
+ - **Start with your current mood**: What are you feeling right now?
128
+ - **Consider the artwork's purpose**: What emotion should it evoke?
129
+ - **Match emotion to subject**: Align feelings with your prompt content
130
+ - **Experiment with intensity**: Try different emotional strengths
131
+
132
+ ### 2. Prompt Engineering with Emotions
133
+ - **Base prompt**: Start with your core visual concept
134
+ - **Emotion integration**: Let the system enhance with emotional context
135
+ - **Style coordination**: Choose styles that complement your emotion
136
+ - **Atmosphere setting**: Use mood modifiers for deeper impact
137
+
138
+ ### 3. Emotion-Style Combinations
139
+
140
+ | Emotion | Recommended Styles | Color Palettes | Atmosphere |
141
+ |---------|-------------------|----------------|------------|
142
+ | Joyful | impressionist, vibrant digital art | golds, oranges, bright blues | radiant, luminous |
143
+ | Melancholic | oil painting, watercolor | blues, grays, muted tones | contemplative, wistful |
144
+ | Romantic | soft digital art, renaissance | pinks, reds, warm tones | tender, passionate |
145
+ | Mysterious | dark fantasy, gothic | purples, blacks, deep blues | enigmatic, shadowy |
146
+ | Energetic | abstract, dynamic digital | bright colors, neons | electric, vibrant |
147
+ | Peaceful | minimalist, zen art | soft greens, blues, whites | serene, harmonious |
148
+
149
+ ## 🔧 Advanced Usage
150
+
151
+ ### Programmatic Access
152
+
153
+ ```python
154
+ from src.generators.compi_phase2c_emotion_to_image import CompIPhase2CEmotionToImage
155
+
156
+ # Initialize generator
157
+ generator = CompIPhase2CEmotionToImage()
158
+
159
+ # Generate with preset emotion
160
+ results = generator.generate_image(
161
+ text_prompt="A mystical forest",
162
+ style="digital painting",
163
+ emotion_input="mysterious",
164
+ emotion_type="preset",
165
+ enhancement_strength=0.8,
166
+ num_images=2
167
+ )
168
+
169
+ # Generate with custom emotion
170
+ results = generator.generate_image(
171
+ text_prompt="Urban landscape",
172
+ style="cyberpunk",
173
+ emotion_input="🤖",
174
+ emotion_type="custom",
175
+ enhancement_strength=0.6
176
+ )
177
+
178
+ # Generate with descriptive text
179
+ results = generator.generate_image(
180
+ text_prompt="Mountain vista",
181
+ style="landscape painting",
182
+ emotion_input="I feel a sense of wonder and awe",
183
+ emotion_type="text",
184
+ contextual_text="Standing at the peak, overwhelmed by nature's beauty"
185
+ )
186
+ ```
187
+
188
+ ### Batch Processing
189
+
190
+ ```python
191
+ # Process multiple emotions
192
+ emotions = ["joyful", "melancholic", "mysterious", "energetic"]
193
+ results = generator.batch_process_emotions(
194
+ text_prompt="Abstract composition",
195
+ style="modern art",
196
+ emotions=emotions,
197
+ enhancement_strength=0.7
198
+ )
199
+
200
+ # Color palette conditioning
201
+ results = generator.generate_emotion_palette_art(
202
+ text_prompt="Flowing water",
203
+ style="fluid art",
204
+ emotion_input="peaceful",
205
+ use_color_conditioning=True
206
+ )
207
+ ```
208
+
209
+ ## 📊 Understanding Emotion Analysis
210
+
211
+ Phase 2.C analyzes emotions across multiple dimensions:
212
+
213
+ ### Emotion Detection
214
+ - **Primary Emotion**: Main detected emotion category
215
+ - **Confidence Score**: How certain the system is (0-1)
216
+ - **Secondary Emotions**: Related emotional states
217
+ - **Intensity Level**: Low, medium, or high emotional strength
218
+
219
+ ### Sentiment Analysis
220
+ - **Polarity**: Negative (-1) to Positive (+1) sentiment
221
+ - **Subjectivity**: Objective (0) to Subjective (1) content
222
+ - **Keywords**: Emotion-related words detected in text
223
+ - **Emojis**: Emotional emojis found in input
224
+
225
+ ### Artistic Mapping
226
+ - **Color Palette**: 3-5 colors representing the emotion
227
+ - **Artistic Descriptors**: Visual style words (vibrant, muted, etc.)
228
+ - **Mood Modifiers**: Atmospheric enhancements
229
+ - **Enhancement Tags**: Descriptive tags for the emotion
230
+
231
+ ## 🎯 Tips for Best Results
232
+
233
+ ### Emotion Selection Tips
234
+ 1. **Be specific**: "melancholic" is more precise than "sad"
235
+ 2. **Consider intensity**: Strong emotions create more dramatic art
236
+ 3. **Match context**: Align emotions with your prompt's subject matter
237
+ 4. **Experiment freely**: Try unexpected emotion-prompt combinations
238
+
239
+ ### Prompt Enhancement Tips
240
+ 1. **Start simple**: Let emotions enhance rather than complicate
241
+ 2. **Trust the system**: Emotion analysis often captures nuances you might miss
242
+ 3. **Adjust strength**: Use the enhancement slider to control emotional impact
243
+ 4. **Combine thoughtfully**: Ensure emotions complement your artistic vision
244
+
245
+ ### Style Coordination Tips
246
+ 1. **Emotional styles**: Some styles naturally align with certain emotions
247
+ 2. **Color harmony**: Consider how emotion colors work with your chosen style
248
+ 3. **Atmospheric consistency**: Ensure mood modifiers enhance rather than conflict
249
+ 4. **Intensity matching**: High-intensity emotions work well with bold styles
250
+
251
+ ## 🔍 Troubleshooting
252
+
253
+ ### Common Issues
254
+
255
+ **"Emotion not detected"**
256
+ - Try more specific emotion words
257
+ - Use descriptive phrases instead of single words
258
+ - Check for typos in emotion input
259
+
260
+ **"Weak emotional enhancement"**
261
+ - Increase the enhancement strength slider
262
+ - Use more emotionally charged language
263
+ - Try preset emotions for stronger effects
264
+
265
+ **"Conflicting emotional signals"**
266
+ - Simplify your emotional input
267
+ - Focus on one primary emotion
268
+ - Avoid mixing opposing emotions
269
+
270
+ ### Performance Optimization
271
+ - Use preset emotions for fastest processing
272
+ - Shorter descriptive texts analyze faster
273
+ - Batch processing is more efficient for multiple emotions
274
+ - GPU acceleration improves generation speed
275
+
276
+ ## 🚀 Next Steps
277
+
278
+ After mastering Phase 2.C, consider:
279
+ 1. **Multimodal combinations**: Combine emotions with audio (Phase 2.A) or data (Phase 2.B)
280
+ 2. **Emotional storytelling**: Create series of images with evolving emotions
281
+ 3. **Personal emotion mapping**: Develop your own emotion-to-art style
282
+ 4. **Therapeutic applications**: Use emotional art for self-expression and healing
283
+
284
+ ---
285
+
286
+ **Ready to transform your emotions into art?** Launch the interface and start creating emotionally-rich artwork! 🌀🎨✨
docs/PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 2.D: Real-Time Data Feeds to Image Generation
2
+
3
+ ## 🌎 Overview
4
+
5
+ Phase 2.D connects your art to the pulse of the world through real-time data feeds. This phase integrates live weather data, breaking news, financial markets, and other real-time information to create artwork that captures the current moment in time and reflects the world's dynamic state.
6
+
7
+ ## ✨ Key Features
8
+
9
+ ### 🌐 Real-Time Data Integration
10
+ - **Weather Data**: Live weather conditions from OpenWeatherMap API
11
+ - **News Headlines**: Breaking news from RSS feeds and NewsAPI
12
+ - **Financial Data**: Cryptocurrency prices and exchange rates
13
+ - **Social Trends**: Real-time social media and trending topics (extensible)
14
+ - **Custom RSS Feeds**: Support for any RSS/XML data source
15
+
16
+ ### 🧠 Intelligent Context Processing
17
+ - **Data Summarization**: Automatic summarization of multiple data sources
18
+ - **Mood Detection**: Extract emotional context from real-time data
19
+ - **Theme Analysis**: Identify key themes and topics
20
+ - **Temporal Context**: Time-aware data processing and analysis
21
+ - **Artistic Inspiration**: Convert data patterns into creative prompts
22
+
23
+ ### 🔧 Technical Capabilities
24
+ - **Data Caching**: Intelligent caching to respect API rate limits
25
+ - **Batch Processing**: Multiple data source configurations
26
+ - **Temporal Series**: Generate art evolution over time
27
+ - **Error Handling**: Robust fallback mechanisms for API failures
28
+ - **Comprehensive Metadata**: Detailed real-time context tracking
29
+
30
+ ## 🛠️ Installation & Setup
31
+
32
+ ### Prerequisites
33
+ Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
34
+
35
+ ### Additional Dependencies
36
+ Phase 2.D uses additional packages for real-time data processing:
37
+ ```bash
38
+ pip install requests feedparser
39
+ ```
40
+
41
+ ### API Keys (Optional)
42
+ While Phase 2.D works with free data sources, you can enhance functionality with API keys:
43
+
44
+ #### OpenWeatherMap (Weather Data)
45
+ 1. Sign up at [OpenWeatherMap](https://openweathermap.org/api)
46
+ 2. Get your free API key (1000 calls/day)
47
+ 3. Enter in the interface or set as environment variable
48
+
49
+ #### NewsAPI (News Data)
50
+ 1. Sign up at [NewsAPI](https://newsapi.org/)
51
+ 2. Get your free API key (100 requests/day)
52
+ 3. Enter in the interface or set as environment variable
53
+
54
+ **Note**: Phase 2.D works without API keys using free RSS feeds and demo keys.
55
+
56
+ ## 🎯 Quick Start
57
+
58
+ ### 1. Launch the Interface
59
+
60
+ ```bash
61
+ # Navigate to your CompI project directory
62
+ cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
63
+
64
+ # Run the Phase 2.D interface
65
+ streamlit run src/ui/compi_phase2d_streamlit_ui.py
66
+
67
+ # Or use the main CompI interface
68
+ streamlit run compi_complete_app.py
69
+ # Then select "🌎 Phase 2.D: Real-Time Data-to-Image"
70
+ ```
71
+
72
+ ### 2. Basic Real-Time Generation
73
+
74
+ 1. **Enter your creative prompt** (e.g., "A cityscape reflecting today's energy")
75
+ 2. **Choose your art style** (e.g., "cyberpunk digital art")
76
+ 3. **Enable data sources** (Weather, News, or Financial)
77
+ 4. **Configure data settings** (city for weather, news category, etc.)
78
+ 5. **Generate** and watch real-time data transform into art!
79
+
80
+ ### 3. Advanced Features
81
+
82
+ - **Batch Processing**: Generate multiple images with different data combinations
83
+ - **Temporal Series**: Create art evolution over time intervals
84
+ - **Context Strength**: Control how strongly real-time data influences the art
85
+ - **Data Preview**: See real-time context before generation
86
+
87
+ ## 📚 Data Sources & Examples
88
+
89
+ ### 🌤️ Weather Data Integration
90
+
91
+ #### Current Weather Conditions
92
+ ```python
93
+ # Example: Sunny weather in Paris
94
+ Weather Context: "Clear skies, 22°C, low humidity"
95
+ Artistic Influence: "bright and optimistic atmosphere"
96
+ Enhanced Prompt: "Parisian street scene, impressionist style, bright and optimistic atmosphere"
97
+ ```
98
+
99
+ #### Weather Mood Mapping
100
+ - **Clear/Sunny**: Bright, optimistic, radiant
101
+ - **Cloudy**: Contemplative, soft, muted
102
+ - **Rainy**: Melancholic, reflective, dramatic
103
+ - **Stormy**: Intense, powerful, dynamic
104
+ - **Snowy**: Serene, peaceful, ethereal
105
+ - **Foggy**: Mysterious, ethereal, dreamlike
106
+
107
+ ### 📰 News Data Integration
108
+
109
+ #### Breaking News Headlines
110
+ ```python
111
+ # Example: Technology news
112
+ Headlines: "AI breakthrough in medical research; New space mission launched"
113
+ Artistic Influence: "capturing the pulse of current events, inspired by innovation"
114
+ Enhanced Prompt: "Futuristic laboratory, sci-fi art, capturing innovation and discovery"
115
+ ```
116
+
117
+ #### News Category Mapping
118
+ - **Technology**: Futuristic, innovative, digital
119
+ - **Science**: Discovery, exploration, analytical
120
+ - **World**: Global, diverse, interconnected
121
+ - **Business**: Dynamic, structured, professional
122
+ - **General**: Contemporary, relevant, timely
123
+
124
+ ### 💹 Financial Data Integration
125
+
126
+ #### Market Conditions
127
+ ```python
128
+ # Example: Rising Bitcoin price
129
+ Financial Context: "Bitcoin: $45,000 USD, USD/EUR: 0.85"
130
+ Artistic Influence: "reflecting market dynamics and economic energy"
131
+ Enhanced Prompt: "Abstract composition, geometric art, reflecting economic energy and growth"
132
+ ```
133
+
134
+ #### Market Mood Indicators
135
+ - **Rising Markets**: Energetic, upward, optimistic
136
+ - **Falling Markets**: Dramatic, intense, volatile
137
+ - **Stable Markets**: Balanced, steady, calm
138
+ - **High Volatility**: Dynamic, chaotic, electric
139
+
140
+ ## 🎨 Creative Workflows
141
+
142
+ ### 1. Moment Capture Workflow
143
+ **Goal**: Capture the current moment in artistic form
144
+
145
+ 1. **Enable all data sources** (Weather + News + Financial)
146
+ 2. **Use high context strength** (0.8-1.0)
147
+ 3. **Choose responsive styles** (abstract, impressionist, contemporary)
148
+ 4. **Generate immediately** to capture the current moment
149
+
150
+ ### 2. Temporal Evolution Workflow
151
+ **Goal**: Show how the world changes over time
152
+
153
+ 1. **Configure temporal series** (e.g., every 30 minutes)
154
+ 2. **Use consistent prompt and style**
155
+ 3. **Enable news feeds** for evolving content
156
+ 4. **Create time-lapse art series**
157
+
158
+ ### 3. Location-Based Workflow
159
+ **Goal**: Create art reflecting specific locations
160
+
161
+ 1. **Enable weather data** for target city
162
+ 2. **Use location-specific news** if available
163
+ 3. **Choose appropriate styles** (landscape, urban, cultural)
164
+ 4. **Incorporate local context** in prompts
165
+
166
+ ### 4. Thematic Workflow
167
+ **Goal**: Focus on specific themes or topics
168
+
169
+ 1. **Select relevant news categories** (technology, science, etc.)
170
+ 2. **Use thematic prompts** aligned with data
171
+ 3. **Adjust context strength** based on desired influence
172
+ 4. **Create thematic art series**
173
+
174
+ ## 🔧 Advanced Usage
175
+
176
+ ### Programmatic Access
177
+
178
+ ```python
179
+ from src.generators.compi_phase2d_realtime_to_image import CompIPhase2DRealTimeToImage
180
+
181
+ # Initialize generator
182
+ generator = CompIPhase2DRealTimeToImage()
183
+
184
+ # Generate with weather data
185
+ results = generator.generate_image(
186
+ text_prompt="A landscape reflecting today's weather",
187
+ style="impressionist painting",
188
+ include_weather=True,
189
+ weather_city="Tokyo",
190
+ weather_api_key="your_api_key", # Optional
191
+ context_strength=0.8,
192
+ num_images=2
193
+ )
194
+
195
+ # Generate with news data
196
+ results = generator.generate_image(
197
+ text_prompt="Abstract representation of current events",
198
+ style="modern digital art",
199
+ include_news=True,
200
+ news_category="technology",
201
+ max_news=5,
202
+ context_strength=0.7
203
+ )
204
+
205
+ # Generate with all data sources
206
+ results = generator.generate_image(
207
+ text_prompt="The world's current state",
208
+ style="surreal digital art",
209
+ include_weather=True,
210
+ weather_city="New York",
211
+ include_news=True,
212
+ news_category="world",
213
+ include_financial=True,
214
+ context_strength=0.9
215
+ )
216
+ ```
217
+
218
+ ### Batch Processing
219
+
220
+ ```python
221
+ # Multiple data source configurations
222
+ data_configs = [
223
+ {"include_weather": True, "weather_city": "London"},
224
+ {"include_news": True, "news_category": "technology"},
225
+ {"include_financial": True},
226
+ {"include_weather": True, "include_news": True, "include_financial": True}
227
+ ]
228
+
229
+ results = generator.batch_process_data_sources(
230
+ text_prompt="Global perspectives",
231
+ style="contemporary art",
232
+ data_source_configs=data_configs,
233
+ context_strength=0.7
234
+ )
235
+ ```
236
+
237
+ ### Temporal Series Generation
238
+
239
+ ```python
240
+ # Generate art evolution over time
241
+ results = generator.generate_temporal_series(
242
+ text_prompt="The changing world",
243
+ style="abstract expressionism",
244
+ data_config={
245
+ "include_weather": True,
246
+ "weather_city": "Paris",
247
+ "include_news": True,
248
+ "news_category": "general"
249
+ },
250
+ time_intervals=[0, 30, 60, 120], # 0, 30min, 1hr, 2hr
251
+ context_strength=0.8
252
+ )
253
+ ```
254
+
255
+ ## 📊 Understanding Real-Time Context
256
+
257
+ Phase 2.D processes real-time data across multiple dimensions:
258
+
259
+ ### Data Processing Pipeline
260
+ 1. **Data Fetching**: Retrieve data from multiple APIs and feeds
261
+ 2. **Caching**: Store data to respect rate limits and improve performance
262
+ 3. **Analysis**: Extract mood indicators, themes, and patterns
263
+ 4. **Summarization**: Create concise summaries of current context
264
+ 5. **Artistic Translation**: Convert data insights into creative prompts
265
+
266
+ ### Context Components
267
+ - **Summary**: Concise description of all data sources
268
+ - **Mood Indicators**: Emotional context derived from data
269
+ - **Key Themes**: Main topics and subjects identified
270
+ - **Temporal Context**: Time-aware contextual information
271
+ - **Artistic Inspiration**: Creative interpretation for prompt enhancement
272
+
273
+ ### Context Strength Levels
274
+ - **High (0.7-1.0)**: Strong data influence, detailed context integration
275
+ - **Medium (0.4-0.6)**: Moderate data influence, balanced integration
276
+ - **Low (0.1-0.3)**: Subtle data influence, minimal context addition
277
+
278
+ ## 🎯 Tips for Best Results
279
+
280
+ ### Data Source Selection
281
+ 1. **Weather**: Best for location-specific, atmospheric art
282
+ 2. **News**: Ideal for contemporary, socially-relevant themes
283
+ 3. **Financial**: Great for abstract, dynamic, economic themes
284
+ 4. **Combined**: Use multiple sources for rich, complex context
285
+
286
+ ### Prompt Engineering
287
+ 1. **Responsive prompts**: Use prompts that can adapt to data context
288
+ 2. **Flexible styles**: Choose styles that work with various moods
289
+ 3. **Context awareness**: Consider how data might influence your vision
290
+ 4. **Temporal relevance**: Use time-aware language when appropriate
291
+
292
+ ### Context Strength Guidelines
293
+ 1. **High strength**: When data should drive the artistic direction
294
+ 2. **Medium strength**: For balanced data-art integration
295
+ 3. **Low strength**: When data should provide subtle inspiration
296
+ 4. **Variable strength**: Experiment to find optimal balance
297
+
298
+ ## 🔍 Troubleshooting
299
+
300
+ ### Common Issues
301
+
302
+ **"No real-time data available"**
303
+ - Check internet connection
304
+ - Verify API keys if using premium features
305
+ - Try different data sources
306
+ - Check API rate limits
307
+
308
+ **"API connection failed"**
309
+ - Verify API keys are correct
310
+ - Check if APIs are operational
311
+ - Try using free RSS feeds instead
312
+ - Reduce request frequency
313
+
314
+ **"Weak data influence"**
315
+ - Increase context strength
316
+ - Use more responsive prompts
317
+ - Enable multiple data sources
318
+ - Check data quality and relevance
319
+
320
+ ### Performance Optimization
321
+ - Use data caching to reduce API calls
322
+ - Enable only needed data sources
323
+ - Use appropriate context strength levels
324
+ - Monitor API rate limits and usage
325
+
326
+ ## 🚀 Next Steps
327
+
328
+ After mastering Phase 2.D, consider:
329
+ 1. **Multimodal Fusion**: Combine real-time data with emotions (2.C) or audio (2.A)
330
+ 2. **Custom Data Sources**: Add your own RSS feeds or APIs
331
+ 3. **Temporal Art Projects**: Create long-term data evolution series
332
+ 4. **Location-Based Art**: Develop city or region-specific art projects
333
+ 5. **News Art Automation**: Set up automated news-driven art generation
334
+
335
+ ---
336
+
337
+ **Ready to connect your art to the world's pulse?** Launch the interface and start creating real-time responsive artwork! 🌎📡🎨
docs/PHASE2E_STYLE_REFERENCE_GUIDE.md ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Phase 2.E: Style Reference/Example Image to AI Art - User Guide
2
+
3
+ ## 🎨 Transform Any Image into AI Art Guidance
4
+
5
+ Phase 2.E allows you to use **any reference image** (from your device or the web) to guide the style, mood, and composition of your AI-generated art. Upload local files or paste URLs from Google Images, Pinterest, Instagram, or any other source!
6
+
7
+ ## 🚀 Quick Start
8
+
9
+ ### 1. **Launch the Application**
10
+ ```bash
11
+ streamlit run src/ui/compi_phase2e_streamlit_ui.py
12
+ ```
13
+
14
+ ### 2. **Basic Workflow**
15
+ 1. **Enter your text prompt** - Describe what you want to create
16
+ 2. **Add reference image** - Upload file or paste web URL
17
+ 3. **Review AI suggestions** - See automatic style analysis
18
+ 4. **Adjust settings** - Control reference strength and parameters
19
+ 5. **Generate art** - Create AI art guided by your reference
20
+ 6. **Download results** - Save images with full metadata
21
+
22
+ ## 📸 Reference Image Sources
23
+
24
+ ### **Supported Input Methods**
25
+
26
+ #### 🖼️ **Local File Upload**
27
+ - **Formats**: PNG, JPG, JPEG, BMP, TIFF, WebP
28
+ - **Size Limit**: Up to 10MB per file
29
+ - **Quality**: Higher resolution = better style analysis
30
+
31
+ #### 🌐 **Web URL Input**
32
+ - **Google Images**: Right-click → "Copy image address"
33
+ - **Pinterest**: Click image → Copy URL from address bar
34
+ - **Instagram**: Use image direct links
35
+ - **Art Websites**: DeviantArt, ArtStation, Behance
36
+ - **Any Website**: Direct image URLs (.jpg, .png, etc.)
37
+
38
+ ### **URL Examples**
39
+ ```
40
+ ✅ Good URLs:
41
+ https://example.com/artwork.jpg
42
+ https://pinterest.com/pin/123456789/
43
+ https://images.unsplash.com/photo-123/image.jpg
44
+
45
+ ❌ Avoid:
46
+ https://website.com/gallery-page (not direct image)
47
+ https://social-media.com/post/123 (post page, not image)
48
+ ```
49
+
50
+ ## 🎛️ Interface Guide
51
+
52
+ ### **Main Controls**
53
+
54
+ #### **Text Input Section**
55
+ - **Main Prompt**: Primary description of what you want to generate
56
+ - **Style Keywords**: Additional artistic style descriptors
57
+ - **Mood/Atmosphere**: Emotional tone and feeling
58
+
59
+ #### **Reference Image Section**
60
+ - **Upload Tab**: Drag & drop or browse for local files
61
+ - **URL Tab**: Paste any web image URL
62
+ - **Analysis Display**: Real-time style analysis and suggestions
63
+
64
+ #### **Generation Settings**
65
+ - **Reference Strength**: How closely to follow the reference (0.1-0.9)
66
+ - **Number of Images**: Generate 1-4 variations
67
+ - **Quality Settings**: Inference steps and guidance scale
68
+ - **Seed Control**: Random or fixed for reproducibility
69
+
70
+ ### **AI Style Analysis**
71
+
72
+ When you load a reference image, the AI automatically analyzes:
73
+
74
+ - **Visual Properties**: Brightness, contrast, color distribution
75
+ - **Style Characteristics**: Artistic technique, complexity, mood
76
+ - **Suggested Keywords**: Automatically generated style descriptors
77
+ - **Enhancement Options**: One-click addition to your prompt
78
+
79
+ ## ⚙️ Settings Guide
80
+
81
+ ### **Reference Strength Control**
82
+
83
+ The reference strength determines how closely your generated art follows the reference image:
84
+
85
+ | Strength | Effect | Best For |
86
+ |----------|--------|----------|
87
+ | **0.1-0.3** | Loose inspiration, high creativity | Abstract concepts, creative freedom |
88
+ | **0.4-0.6** | Balanced style transfer | Most use cases, artistic guidance |
89
+ | **0.7-0.9** | Close adherence to reference | Style mimicking, specific looks |
90
+
91
+ ### **Quality vs Speed Settings**
92
+
93
+ | Setting | Fast (10-15 steps) | Balanced (20-30 steps) | High Quality (40-50 steps) |
94
+ |---------|-------------------|------------------------|---------------------------|
95
+ | **Time** | 30-60 seconds | 1-2 minutes | 3-5 minutes |
96
+ | **Quality** | Good for testing | Recommended | Best results |
97
+ | **Use Case** | Quick iterations | Final generation | Professional work |
98
+
99
+ ### **Guidance Scale**
100
+
101
+ | Scale | Effect | Best For |
102
+ |-------|--------|----------|
103
+ | **5-10** | More creative, loose interpretation | Artistic freedom, abstract art |
104
+ | **10-15** | Balanced prompt following | Most use cases |
105
+ | **15-20** | Strict prompt adherence | Precise requirements |
106
+
107
+ ## 🎨 Creative Techniques
108
+
109
+ ### **Effective Reference Selection**
110
+
111
+ #### ✅ **Good References**
112
+ - **Clear artistic style** (paintings, digital art, photography styles)
113
+ - **Strong visual identity** (distinctive colors, techniques, moods)
114
+ - **Good composition** (well-balanced, not cluttered)
115
+ - **High contrast** (clear light/dark areas)
116
+
117
+ #### ❌ **Challenging References**
118
+ - **Cluttered images** with too many elements
119
+ - **Low contrast** or very dark/bright images
120
+ - **Screenshots** or UI elements
121
+ - **Text-heavy** images
122
+
123
+ ### **Prompt Enhancement Tips**
124
+
125
+ #### **Combine Multiple Styles**
126
+ ```
127
+ Base: "A serene mountain landscape"
128
+ + Style: "oil painting, impressionist"
129
+ + Mood: "golden hour, peaceful"
130
+ + AI Suggestions: "soft brushstrokes, warm colors"
131
+ = Enhanced: "A serene mountain landscape, oil painting, impressionist, golden hour, peaceful, soft brushstrokes, warm colors"
132
+ ```
133
+
134
+ #### **Layer Your Descriptions**
135
+ 1. **Subject**: What you want to see
136
+ 2. **Style**: Artistic technique or medium
137
+ 3. **Mood**: Emotional atmosphere
138
+ 4. **Details**: Specific elements or effects
139
+
140
+ ### **Reference Strength Strategies**
141
+
142
+ #### **Creative Exploration** (Low Strength: 0.2-0.4)
143
+ - Use reference for general mood/color inspiration
144
+ - Allow AI maximum creative freedom
145
+ - Good for abstract or conceptual art
146
+
147
+ #### **Style Transfer** (Medium Strength: 0.5-0.7)
148
+ - Balance between reference and creativity
149
+ - Maintain reference style while changing content
150
+ - Most versatile approach
151
+
152
+ #### **Style Mimicking** (High Strength: 0.7-0.9)
153
+ - Close adherence to reference technique
154
+ - Minimal creative deviation
155
+ - Good for specific artistic styles
156
+
157
+ ## 📁 Output Management
158
+
159
+ ### **File Naming Convention**
160
+
161
+ Generated files follow a comprehensive naming pattern:
162
+ ```
163
+ {prompt}_{style}_{mood}_{timestamp}_seed{number}_{REFIMG|NOREFIMG}_v{variation}.png
164
+ ```
165
+
166
+ **Example:**
167
+ ```
168
+ magical_forest_fantasy_mystical_20250701_143022_seed12345_REFIMG_v1.png
169
+ ```
170
+
171
+ ### **Metadata Files**
172
+
173
+ Each image includes a JSON metadata file with:
174
+ - Complete generation parameters
175
+ - Reference image information
176
+ - AI style analysis results
177
+ - Reproducibility data
178
+
179
+ ### **Organization Tips**
180
+
181
+ - **Create project folders** for different art series
182
+ - **Use consistent naming** for easy searching
183
+ - **Save metadata** for reproducing successful results
184
+ - **Export favorites** to separate collections
185
+
186
+ ## 🔧 Troubleshooting
187
+
188
+ ### **Common Issues**
189
+
190
+ #### **"Failed to load image from URL"**
191
+ - ✅ Check URL is a direct image link
192
+ - ✅ Try right-clicking image → "Copy image address"
193
+ - ✅ Ensure URL ends with .jpg, .png, etc.
194
+ - ✅ Test URL in browser first
195
+
196
+ #### **"Generation taking too long"**
197
+ - ✅ Reduce inference steps (try 15-20)
198
+ - ✅ Lower image count (try 1-2 images)
199
+ - ✅ Check GPU memory availability
200
+ - ✅ Restart application if needed
201
+
202
+ #### **"Poor quality results"**
203
+ - ✅ Increase inference steps (try 30-40)
204
+ - ✅ Adjust reference strength
205
+ - ✅ Improve prompt specificity
206
+ - ✅ Try different reference images
207
+
208
+ #### **"Out of memory errors"**
209
+ - ✅ Enable memory optimizations in settings
210
+ - ✅ Reduce batch size to 1 image
211
+ - ✅ Close other applications
212
+ - ✅ Use CPU mode if necessary
213
+
214
+ ### **Performance Optimization**
215
+
216
+ #### **For Better Speed**
217
+ - Use GPU if available
218
+ - Enable memory optimizations
219
+ - Start with lower inference steps
220
+ - Generate fewer images per batch
221
+
222
+ #### **For Better Quality**
223
+ - Use high-resolution reference images
224
+ - Increase inference steps (30-50)
225
+ - Fine-tune reference strength
226
+ - Use descriptive, specific prompts
227
+
228
+ ## 🎯 Best Practices
229
+
230
+ ### **Reference Image Selection**
231
+ 1. **Choose clear, high-quality images**
232
+ 2. **Match the style you want to achieve**
233
+ 3. **Consider color palette and mood**
234
+ 4. **Test different reference strengths**
235
+
236
+ ### **Prompt Writing**
237
+ 1. **Be specific about desired elements**
238
+ 2. **Use artistic terminology**
239
+ 3. **Describe lighting and atmosphere**
240
+ 4. **Combine multiple style keywords**
241
+
242
+ ### **Iterative Improvement**
243
+ 1. **Start with medium reference strength**
244
+ 2. **Generate multiple variations**
245
+ 3. **Adjust settings based on results**
246
+ 4. **Save successful parameter combinations**
247
+
248
+ ### **Workflow Efficiency**
249
+ 1. **Prepare reference images in advance**
250
+ 2. **Use consistent naming conventions**
251
+ 3. **Save metadata for reproducibility**
252
+ 4. **Organize outputs by project/style**
253
+
254
+ ## 🚀 Advanced Tips
255
+
256
+ ### **Multi-Style Blending**
257
+ - Use reference for base style
258
+ - Add contrasting style keywords
259
+ - Experiment with different strengths
260
+
261
+ ### **Series Creation**
262
+ - Use same reference with different prompts
263
+ - Maintain consistent style across images
264
+ - Vary only specific elements
265
+
266
+ ### **Style Evolution**
267
+ - Start with high reference strength
268
+ - Gradually reduce for more creativity
269
+ - Create progression series
270
+
271
+ This guide provides everything you need to master CompI Phase 2.E and create stunning AI art guided by any reference image!
docs/PHASE3E_PERFORMANCE_GUIDE.md ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ⚙️ CompI Phase 3.E: Performance, Model Management & Reliability - Complete Guide
2
+
3
+ ## 🎯 **What Phase 3.E Delivers**
4
+
5
+ **Phase 3.E transforms CompI into a production-grade platform with professional performance management, intelligent reliability, and advanced model capabilities.**
6
+
7
+ ### **🤖 Model Manager**
8
+ - **Dynamic Model Switching**: Switch between SD 1.5 and SDXL based on requirements
9
+ - **Auto-Availability Checking**: Intelligent detection of model compatibility and VRAM requirements
10
+ - **Universal LoRA Support**: Load and scale LoRA weights across all models and generation modes
11
+ - **Smart Recommendations**: Hardware-based model suggestions and optimization advice
12
+
13
+ ### **⚡ Performance Controls**
14
+ - **xFormers Integration**: Memory-efficient attention with automatic fallback
15
+ - **Advanced Memory Optimization**: Attention slicing, VAE slicing/tiling, CPU offloading
16
+ - **Precision Control**: Automatic dtype selection (fp16/bf16/fp32) based on hardware
17
+ - **Batch Optimization**: Memory-aware batch processing with intelligent sizing
18
+
19
+ ### **📊 VRAM Monitoring**
20
+ - **Real-time Tracking**: Live GPU memory usage monitoring and alerts
21
+ - **Usage Analytics**: Memory usage patterns and optimization suggestions
22
+ - **Threshold Warnings**: Automatic alerts when approaching memory limits
23
+ - **Cache Management**: Intelligent GPU cache clearing and memory cleanup
24
+
25
+ ### **🛡️ Reliability Engine**
26
+ - **OOM-Safe Generation**: Automatic retry with progressive fallback strategies
27
+ - **Intelligent Fallbacks**: Reduce size → reduce steps → CPU fallback progression
28
+ - **Error Classification**: Smart error detection and appropriate response strategies
29
+ - **Graceful Degradation**: Maintain functionality even under resource constraints
30
+
31
+ ### **📦 Batch Processing**
32
+ - **Seed-Controlled Batches**: Deterministic seed sequences for reproducible results
33
+ - **Memory-Aware Batching**: Automatic batch size optimization based on available VRAM
34
+ - **Progress Tracking**: Detailed progress monitoring with per-image status
35
+ - **Failure Recovery**: Continue batch processing even if individual images fail
36
+
37
+ ### **🔍 Upscaler Integration**
38
+ - **Latent Upscaler**: Optional 2x upscaling using Stable Diffusion Latent Upscaler
39
+ - **Graceful Degradation**: Clean fallback when upscaler unavailable
40
+ - **Memory Management**: Intelligent memory allocation for upscaling operations
41
+ - **Quality Enhancement**: Professional-grade image enhancement capabilities
42
+
43
+ ---
44
+
45
+ ## 🚀 **Quick Start Guide**
46
+
47
+ ### **1. Launch Phase 3.E**
48
+ ```bash
49
+ # Method 1: Using launcher script (recommended)
50
+ python run_phase3e_performance_manager.py
51
+
52
+ # Method 2: Direct Streamlit launch
53
+ streamlit run src/ui/compi_phase3e_performance_manager.py --server.port 8505
54
+ ```
55
+
56
+ ### **2. System Requirements Check**
57
+ The launcher automatically checks:
58
+ - **GPU Setup**: CUDA availability and VRAM capacity
59
+ - **Dependencies**: Required and optional packages
60
+ - **Model Support**: SD 1.5 and SDXL availability
61
+ - **Performance Features**: xFormers and upscaler support
62
+
63
+ ### **3. Access the Interface**
64
+ - **URL:** `http://localhost:8505`
65
+ - **Interface:** Professional Streamlit dashboard with real-time monitoring
66
+ - **Sidebar:** Live VRAM monitoring and system status
67
+
68
+ ---
69
+
70
+ ## 🎨 **Professional Workflow**
71
+
72
+ ### **Step 1: Model Selection**
73
+ 1. **Choose Base Model**: SD 1.5 (fast, compatible) or SDXL (high quality, more VRAM)
74
+ 2. **Select Generation Mode**: txt2img or img2img
75
+ 3. **Check Compatibility**: System automatically validates model/mode combinations
76
+ 4. **Review VRAM Requirements**: See memory requirements and availability status
77
+
78
+ ### **Step 2: LoRA Integration (Optional)**
79
+ 1. **Enable LoRA**: Toggle LoRA support
80
+ 2. **Specify Path**: Enter path to LoRA weights (diffusers format)
81
+ 3. **Set Scale**: Adjust LoRA influence (0.1-2.0)
82
+ 4. **Verify Status**: Check LoRA loading status and compatibility
83
+
84
+ ### **Step 3: Performance Optimization**
85
+ 1. **Choose Optimization Level**: Conservative, Balanced, Aggressive, or Extreme
86
+ 2. **Monitor VRAM**: Watch real-time memory usage in sidebar
87
+ 3. **Adjust Settings**: Fine-tune individual optimization features
88
+ 4. **Enable Reliability**: Configure OOM retry and CPU fallback options
89
+
90
+ ### **Step 4: Generation**
91
+ 1. **Single Images**: Generate individual images with full control
92
+ 2. **Batch Processing**: Create multiple images with seed sequences
93
+ 3. **Monitor Progress**: Track generation progress and memory usage
94
+ 4. **Review Results**: Analyze generation statistics and performance metrics
95
+
96
+ ---
97
+
98
+ ## 🔧 **Advanced Features**
99
+
100
+ ### **🤖 Model Manager Deep Dive**
101
+
102
+ #### **Model Compatibility Matrix**
103
+ ```python
104
+ SD 1.5:
105
+ ✅ txt2img (512x512 optimal)
106
+ ✅ img2img (all strengths)
107
+ ✅ ControlNet (full support)
108
+ ✅ LoRA (universal compatibility)
109
+ 💾 VRAM: 4+ GB recommended
110
+
111
+ SDXL:
112
+ ✅ txt2img (1024x1024 optimal)
113
+ ✅ img2img (limited support)
114
+ ⚠️ ControlNet (requires special handling)
115
+ ✅ LoRA (SDXL-compatible weights only)
116
+ 💾 VRAM: 8+ GB recommended
117
+ ```
118
+
119
+ #### **Automatic Model Selection Logic**
120
+ - **VRAM < 6GB**: Recommends SD 1.5 only
121
+ - **VRAM 6-8GB**: SD 1.5 preferred, SDXL with warnings
122
+ - **VRAM 8GB+**: Full SDXL support with optimizations
123
+ - **CPU Mode**: SD 1.5 only with aggressive optimizations
124
+
125
+ ### **⚡ Performance Optimization Levels**
126
+
127
+ #### **Conservative Mode**
128
+ - Basic attention slicing
129
+ - Standard precision (fp16/fp32)
130
+ - Minimal memory optimizations
131
+ - **Best for**: Stable systems, first-time users
132
+
133
+ #### **Balanced Mode (Default)**
134
+ - xFormers attention (if available)
135
+ - Attention + VAE slicing
136
+ - Automatic precision selection
137
+ - **Best for**: Most users, good performance/stability balance
138
+
139
+ #### **Aggressive Mode**
140
+ - All memory optimizations enabled
141
+ - VAE tiling for large images
142
+ - Maximum memory efficiency
143
+ - **Best for**: Limited VRAM, large batch processing
144
+
145
+ #### **Extreme Mode**
146
+ - CPU offloading enabled
147
+ - Maximum memory savings
148
+ - Slower but uses minimal VRAM
149
+ - **Best for**: Very limited VRAM (<4GB)
150
+
151
+ ### **🛡️ Reliability Engine Strategies**
152
+
153
+ #### **Fallback Progression**
154
+ ```python
155
+ Strategy 1: Original settings (100% size, 100% steps)
156
+ Strategy 2: Reduced size (75% size, 90% steps)
157
+ Strategy 3: Half size (50% size, 80% steps)
158
+ Strategy 4: Minimal (50% size, 60% steps)
159
+ Final: CPU fallback if all GPU attempts fail
160
+ ```
161
+
162
+ #### **Error Classification**
163
+ - **CUDA OOM**: Triggers progressive fallback
164
+ - **Model Loading**: Suggests alternative models
165
+ - **LoRA Errors**: Disables LoRA and retries
166
+ - **General Errors**: Logs and reports with context
167
+
168
+ ### **📊 VRAM Monitoring System**
169
+
170
+ #### **Real-time Metrics**
171
+ - **Total VRAM**: Hardware capacity
172
+ - **Used VRAM**: Currently allocated memory
173
+ - **Free VRAM**: Available for new operations
174
+ - **Usage Percentage**: Current utilization level
175
+
176
+ #### **Smart Alerts**
177
+ - **Green (0-60%)**: Optimal usage
178
+ - **Yellow (60-80%)**: Moderate usage, monitor closely
179
+ - **Red (80%+)**: High usage, optimization recommended
180
+
181
+ #### **Memory Management**
182
+ - **Automatic Cache Clearing**: Between batch generations
183
+ - **Memory Leak Detection**: Identifies and resolves memory issues
184
+ - **Optimization Suggestions**: Hardware-specific recommendations
185
+
186
+ ---
187
+
188
+ ## 📈 **Performance Benchmarks**
189
+
190
+ ### **Generation Speed Comparison**
191
+ ```
192
+ SD 1.5 (512x512, 20 steps):
193
+ RTX 4090: ~15-25 seconds
194
+ RTX 3080: ~25-35 seconds
195
+ RTX 2080: ~45-60 seconds
196
+ CPU: ~5-10 minutes
197
+
198
+ SDXL (1024x1024, 20 steps):
199
+ RTX 4090: ~30-45 seconds
200
+ RTX 3080: ~60-90 seconds
201
+ RTX 2080: ~2-3 minutes (with optimizations)
202
+ CPU: ~15-30 minutes
203
+ ```
204
+
205
+ ### **Memory Usage Patterns**
206
+ ```
207
+ SD 1.5:
208
+ Base: ~3.5GB VRAM
209
+ + LoRA: ~3.7GB VRAM
210
+ + Upscaler: ~5.5GB VRAM
211
+
212
+ SDXL:
213
+ Base: ~6.5GB VRAM
214
+ + LoRA: ~7.0GB VRAM
215
+ + Upscaler: ~9.0GB VRAM
216
+ ```
217
+
218
+ ---
219
+
220
+ ## 🔍 **Troubleshooting Guide**
221
+
222
+ ### **Common Issues & Solutions**
223
+
224
+ #### **"CUDA Out of Memory" Errors**
225
+ 1. **Enable OOM Auto-Retry**: Automatic fallback handling
226
+ 2. **Reduce Image Size**: Use 512x512 instead of 1024x1024
227
+ 3. **Lower Batch Size**: Generate fewer images simultaneously
228
+ 4. **Enable Aggressive Optimizations**: Use VAE slicing/tiling
229
+ 5. **Clear GPU Cache**: Use sidebar "Clear GPU Cache" button
230
+
231
+ #### **Slow Generation Speed**
232
+ 1. **Enable xFormers**: Significant speed improvement if available
233
+ 2. **Use Balanced Optimization**: Good speed/quality trade-off
234
+ 3. **Reduce Inference Steps**: 15-20 steps often sufficient
235
+ 4. **Check VRAM Usage**: Ensure not hitting memory limits
236
+
237
+ #### **Model Loading Failures**
238
+ 1. **Check Internet Connection**: Models download on first use
239
+ 2. **Verify Disk Space**: Models require 2-7GB storage each
240
+ 3. **Try Alternative Model**: Switch between SD 1.5 and SDXL
241
+ 4. **Clear Model Cache**: Remove cached models and re-download
242
+
243
+ #### **LoRA Loading Issues**
244
+ 1. **Verify Path**: Ensure LoRA files exist at specified path
245
+ 2. **Check Format**: Use diffusers-compatible LoRA weights
246
+ 3. **Model Compatibility**: Ensure LoRA matches base model type
247
+ 4. **Scale Adjustment**: Try different LoRA scale values
248
+
249
+ ---
250
+
251
+ ## 🎯 **Best Practices**
252
+
253
+ ### **📝 Performance Optimization**
254
+ 1. **Start Conservative**: Begin with balanced settings, adjust as needed
255
+ 2. **Monitor VRAM**: Keep usage below 80% for stability
256
+ 3. **Batch Wisely**: Use smaller batches on limited hardware
257
+ 4. **Clear Cache Regularly**: Prevent memory accumulation
258
+
259
+ ### **🤖 Model Selection**
260
+ 1. **SD 1.5 for Speed**: Faster generation, lower VRAM requirements
261
+ 2. **SDXL for Quality**: Higher resolution, better detail
262
+ 3. **Match Hardware**: Choose model based on available VRAM
263
+ 4. **Test Compatibility**: Verify model works with your use case
264
+
265
+ ### **🛡️ Reliability**
266
+ 1. **Enable Auto-Retry**: Let system handle OOM errors automatically
267
+ 2. **Use Fallbacks**: Allow progressive degradation for reliability
268
+ 3. **Monitor Logs**: Check run logs for patterns and issues
269
+ 4. **Plan for Failures**: Design workflows that handle generation failures
270
+
271
+ ---
272
+
273
+ ## 🚀 **Integration with CompI Ecosystem**
274
+
275
+ ### **Universal Enhancement**
276
+ Phase 3.E enhances ALL existing CompI components:
277
+ - **Ultimate Dashboard**: Model switching and performance controls
278
+ - **Phase 2.A-2.E**: Reliability and optimization for all multimodal phases
279
+ - **Phase 1.A-1.E**: Enhanced foundation with professional features
280
+ - **Phase 3.D**: Performance metrics in workflow management
281
+
282
+ ### **Backward Compatibility**
283
+ - **Graceful Degradation**: Works on all hardware configurations
284
+ - **Default Settings**: Optimal defaults for most users
285
+ - **Progressive Enhancement**: Advanced features when available
286
+ - **Legacy Support**: Maintains compatibility with existing workflows
287
+
288
+ ---
289
+
290
+ ## 🎉 **Phase 3.E: Production-Grade CompI Complete**
291
+
292
+ **Phase 3.E transforms CompI into a production-grade platform with professional performance management, intelligent reliability, and advanced model capabilities.**
293
+
294
+ **Key Benefits:**
295
+ - ✅ **Professional Performance**: Industry-standard optimization and monitoring
296
+ - ✅ **Intelligent Reliability**: Automatic error handling and recovery
297
+ - ✅ **Advanced Model Management**: Dynamic switching and LoRA integration
298
+ - ✅ **Production Ready**: Suitable for commercial and professional use
299
+ - ✅ **Universal Enhancement**: Improves all existing CompI features
300
+
301
+ **CompI is now a complete, production-grade multimodal AI art generation platform!** 🎨✨
docs/PHASE3_FINAL_DASHBOARD_GUIDE.md ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧪 CompI Phase 3 Final Dashboard - Complete Integration Guide
2
+
3
+ ## 🎯 **What This Delivers**
4
+
5
+ **The Phase 3 Final Dashboard is the ultimate CompI interface that integrates ALL Phase 3 components into a single, unified creative environment.**
6
+
7
+ ### **🚀 Complete Feature Integration:**
8
+
9
+ #### **🧩 Phase 3.A/3.B: True Multimodal Fusion**
10
+ - **Real Audio Processing**: Whisper transcription + librosa feature analysis
11
+ - **Actual Data Analysis**: CSV processing + mathematical formula evaluation
12
+ - **Sentiment Analysis**: TextBlob emotion detection with polarity scoring
13
+ - **Live Real-time Data**: Weather API + RSS news feeds integration
14
+ - **Intelligent Fusion**: All inputs combined into enhanced prompts
15
+
16
+ #### **🖼️ Phase 3.C: Advanced References**
17
+ - **Multi-Reference Support**: Upload files + paste URLs simultaneously
18
+ - **Role-Based Assignment**: Separate style vs structure reference selection
19
+ - **Live ControlNet Previews**: Real-time Canny/Depth map generation
20
+ - **Hybrid Generation**: CN+I2I with intelligent fallback to two-pass approach
21
+ - **Professional Controls**: Fine-grained parameter control for all aspects
22
+
23
+ #### **⚙️ Phase 3.E: Performance Management**
24
+ - **Model Switching**: SD 1.5 ↔ SDXL with automatic availability checking
25
+ - **LoRA Integration**: Load and scale LoRA weights with visual feedback
26
+ - **Performance Optimizations**: xFormers, attention slicing, VAE optimizations
27
+ - **VRAM Monitoring**: Real-time GPU memory usage tracking
28
+ - **OOM Recovery**: Progressive fallback with intelligent retry strategies
29
+ - **Optional Upscaling**: Latent upscaler integration for quality enhancement
30
+
31
+ #### **🎛️ Phase 3.D: Professional Workflow**
32
+ - **Advanced Gallery**: Image filtering by mode, prompt, steps with visual grid
33
+ - **Annotation System**: Rating (1-5), tags, notes for comprehensive organization
34
+ - **Preset Management**: Save/load complete generation configurations
35
+ - **Export Bundles**: Complete ZIP packages with images, metadata, annotations, presets
36
+
37
+ ---
38
+
39
+ ## 🏗️ **Architecture Overview**
40
+
41
+ ### **7-Tab Unified Interface:**
42
+ ```python
43
+ 1. 🧩 Inputs (Text/Audio/Data/Emotion/Real‑time) # Phase 3.A/3.B
44
+ 2. 🖼️ Advanced References # Phase 3.C
45
+ 3. ⚙️ Model & Performance # Phase 3.E
46
+ 4. 🎛️ Generate # Unified generation
47
+ 5. 🖼️ Gallery & Annotate # Phase 3.D
48
+ 6. 💾 Presets # Phase 3.D
49
+ 7. 📦 Export # Phase 3.D
50
+ ```
51
+
52
+ ### **Intelligent Generation Modes:**
53
+ ```python
54
+ # Smart mode selection based on available inputs:
55
+ mode = "T2I" # Text-to-Image (baseline)
56
+ if have_cn and have_style: mode = "CN+I2I" # Hybrid ControlNet + Img2Img
57
+ elif have_cn: mode = "CN" # ControlNet only
58
+ elif have_style: mode = "I2I" # Img2Img only
59
+ ```
60
+
61
+ ### **Real-time Performance Monitoring:**
62
+ ```python
63
+ # Live VRAM tracking in header
64
+ colA: Device (CUDA/CPU)
65
+ colB: Total VRAM (GB)
66
+ colC: Used VRAM (GB)
67
+ colD: PyTorch version + status
68
+ ```
69
+
70
+ ---
71
+
72
+ ## 🎨 **Professional Workflow**
73
+
74
+ ### **Complete Creative Process:**
75
+
76
+ #### **1. Configure Multimodal Inputs (Tab 1)**
77
+ - **Text & Style**: Main prompt, artistic style, mood, negative prompt
78
+ - **Audio Analysis**: Upload audio → Whisper transcription → librosa features
79
+ - **Data Processing**: CSV upload or mathematical formulas → visualization
80
+ - **Emotion Analysis**: Sentiment analysis with TextBlob polarity scoring
81
+ - **Real-time Feeds**: Weather data + news headlines integration
82
+
83
+ #### **2. Advanced References (Tab 2)**
84
+ - **Multi-Reference Upload**: Files + URLs simultaneously supported
85
+ - **Role Assignment**: Select images for style influence vs structure control
86
+ - **ControlNet Integration**: Choose Canny or Depth with live preview
87
+ - **Parameter Control**: Conditioning scale, img2img strength adjustment
88
+
89
+ #### **3. Model & Performance (Tab 3)**
90
+ - **Model Selection**: SD 1.5 (fast) or SDXL (quality) based on VRAM
91
+ - **LoRA Integration**: Load custom LoRA weights with scale control
92
+ - **Performance Tuning**: xFormers, attention slicing, VAE optimizations
93
+ - **Reliability Settings**: OOM auto-retry, batch processing, upscaling
94
+
95
+ #### **4. Intelligent Generation (Tab 4)**
96
+ - **Fusion Preview**: See combined prompt from all inputs
97
+ - **Smart Mode Selection**: Automatic best approach based on available inputs
98
+ - **Batch Processing**: Multiple images with seed control
99
+ - **Real-time Feedback**: Progress tracking and error handling
100
+
101
+ #### **5. Gallery Management (Tab 5)**
102
+ - **Advanced Filtering**: By mode, prompt content, generation parameters
103
+ - **Visual Gallery**: 4-column grid with image previews and metadata
104
+ - **Annotation System**: Rate (1-5), tag, and add notes to images
105
+ - **Batch Operations**: Select multiple images for annotation
106
+
107
+ #### **6. Preset System (Tab 6)**
108
+ - **Configuration Capture**: Save complete generation settings
109
+ - **JSON Preview**: See exact preset structure before saving
110
+ - **Load Management**: Browse and load existing presets
111
+ - **Reusability**: Apply saved settings to new generations
112
+
113
+ #### **7. Export Bundles (Tab 7)**
114
+ - **Complete Packages**: Images + metadata + annotations + presets
115
+ - **Reproducibility**: Full environment snapshots for exact reproduction
116
+ - **Professional Format**: ZIP bundles with manifest and README
117
+ - **Selective Export**: Choose specific images and include optional presets
118
+
119
+ ---
120
+
121
+ ## 🚀 **Quick Start Guide**
122
+
123
+ ### **1. Launch the Dashboard**
124
+ ```bash
125
+ # Method 1: Using launcher (recommended)
126
+ python run_phase3_final_dashboard.py
127
+
128
+ # Method 2: Direct Streamlit launch
129
+ streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
130
+ ```
131
+
132
+ ### **2. Access the Interface**
133
+ - **URL:** `http://localhost:8506`
134
+ - **Interface:** Professional 7-tab dashboard with real-time monitoring
135
+ - **Header:** Live VRAM usage and system status
136
+
137
+ ### **3. Basic Workflow**
138
+ 1. **Configure Inputs**: Set up text, audio, data, emotion, real-time feeds
139
+ 2. **Add References**: Upload images and assign style/structure roles
140
+ 3. **Choose Model**: Select SD 1.5 or SDXL based on your hardware
141
+ 4. **Generate**: Create art with intelligent fusion of all inputs
142
+ 5. **Review & Annotate**: Rate and organize results in gallery
143
+ 6. **Save & Export**: Create presets and export complete bundles
144
+
145
+ ---
146
+
147
+ ## 🔧 **Advanced Features**
148
+
149
+ ### **🎵 Audio Processing Pipeline**
150
+ ```python
151
+ # Complete audio analysis chain:
152
+ 1. Upload audio file (.wav/.mp3)
153
+ 2. Librosa feature extraction (tempo, energy, ZCR)
154
+ 3. Whisper transcription (base model)
155
+ 4. Intelligent tag generation
156
+ 5. Prompt enhancement with audio context
157
+ ```
158
+
159
+ ### **📊 Data Integration System**
160
+ ```python
161
+ # Dual data processing modes:
162
+ 1. CSV Upload: Pandas analysis → statistical summary → visualization
163
+ 2. Formula Mode: NumPy evaluation → pattern generation → plotting
164
+ 3. Poetic summarization for prompt enhancement
165
+ ```
166
+
167
+ ### **🖼️ Advanced Reference System**
168
+ ```python
169
+ # Role-based reference processing:
170
+ Style References: Used for img2img artistic influence
171
+ Structure References: Used for ControlNet composition control
172
+ Live Previews: Real-time Canny/Depth map generation
173
+ Hybrid Modes: CN+I2I with intelligent fallback strategies
174
+ ```
175
+
176
+ ### **⚡ Performance Optimization**
177
+ ```python
178
+ # Multi-level optimization system:
179
+ 1. xFormers: Memory-efficient attention (if available)
180
+ 2. Attention Slicing: Reduce memory usage
181
+ 3. VAE Slicing/Tiling: Handle large images efficiently
182
+ 4. OOM Recovery: Progressive fallback (size → steps → CPU)
183
+ 5. VRAM Monitoring: Real-time usage tracking
184
+ ```
185
+
186
+ ### **🛡️ Reliability Features**
187
+ ```python
188
+ # Production-grade error handling:
189
+ 1. Graceful Degradation: Features work even when components unavailable
190
+ 2. Intelligent Fallbacks: CN+I2I → two-pass approach when needed
191
+ 3. OOM Recovery: Automatic retry with reduced parameters
192
+ 4. Error Classification: Specific handling for different error types
193
+ ```
194
+
195
+ ---
196
+
197
+ ## 📊 **Performance Benchmarks**
198
+
199
+ ### **Generation Speed (Approximate)**
200
+ ```
201
+ SD 1.5 (512x512, 20 steps):
202
+ RTX 4090: ~15-25 seconds
203
+ RTX 3080: ~25-35 seconds
204
+ RTX 2080: ~45-60 seconds
205
+ CPU: ~5-10 minutes
206
+
207
+ SDXL (1024x1024, 20 steps):
208
+ RTX 4090: ~30-45 seconds
209
+ RTX 3080: ~60-90 seconds
210
+ RTX 2080: ~2-3 minutes (with optimizations)
211
+ CPU: ~15-30 minutes
212
+ ```
213
+
214
+ ### **Memory Requirements**
215
+ ```
216
+ SD 1.5 Base: ~3.5GB VRAM
217
+ SD 1.5 + LoRA: ~3.7GB VRAM
218
+ SD 1.5 + Upscaler: ~5.5GB VRAM
219
+
220
+ SDXL Base: ~6.5GB VRAM
221
+ SDXL + LoRA: ~7.0GB VRAM
222
+ SDXL + Upscaler: ~9.0GB VRAM
223
+ ```
224
+
225
+ ---
226
+
227
+ ## 🎯 **Best Practices**
228
+
229
+ ### **📝 Optimal Workflow**
230
+ 1. **Start Simple**: Begin with text-only generation to test setup
231
+ 2. **Add Gradually**: Introduce multimodal inputs one at a time
232
+ 3. **Monitor VRAM**: Keep usage below 80% for stability
233
+ 4. **Use Presets**: Save successful configurations for reuse
234
+ 5. **Export Regularly**: Create bundles of your best work
235
+
236
+ ### **🤖 Model Selection**
237
+ 1. **SD 1.5 for Speed**: Faster generation, lower VRAM, wide compatibility
238
+ 2. **SDXL for Quality**: Higher resolution, better detail, requires more VRAM
239
+ 3. **Match Hardware**: Choose model based on available VRAM
240
+ 4. **Test First**: Verify model works with your specific use case
241
+
242
+ ### **🖼️ Reference Usage**
243
+ 1. **Style References**: Use 2-4 images for artistic influence
244
+ 2. **Structure Reference**: Use 1 clear image for composition control
245
+ 3. **Quality Matters**: Higher quality references produce better results
246
+ 4. **Role Clarity**: Clearly separate style vs structure purposes
247
+
248
+ ### **⚡ Performance Tuning**
249
+ 1. **Enable xFormers**: Significant speed improvement if available
250
+ 2. **Use Attention Slicing**: Always enable for memory efficiency
251
+ 3. **Monitor Usage**: Watch VRAM meter and adjust accordingly
252
+ 4. **Batch Wisely**: Use smaller batches on limited hardware
253
+
254
+ ---
255
+
256
+ ## 🎉 **Phase 3 Complete Achievement**
257
+
258
+ **The Phase 3 Final Dashboard represents the complete realization of the CompI vision: a unified, production-grade, multimodal AI art generation platform.**
259
+
260
+ ### **✅ All Phase 3 Components Integrated:**
261
+ - **✅ Phase 3.A**: Multimodal input processing
262
+ - **✅ Phase 3.B**: True fusion engine with real processing
263
+ - **✅ Phase 3.C**: Advanced references with role assignment
264
+ - **✅ Phase 3.D**: Professional workflow management
265
+ - **✅ Phase 3.E**: Performance optimization and model management
266
+
267
+ ### **🚀 Key Benefits:**
268
+ - **Single Interface**: All CompI features in one unified dashboard
269
+ - **Professional Workflow**: From input to export in one seamless process
270
+ - **Production Ready**: Robust error handling and performance optimization
271
+ - **Universal Compatibility**: Works across different hardware configurations
272
+ - **Complete Integration**: All phases work together harmoniously
273
+
274
+ **CompI Phase 3 is now complete - the ultimate multimodal AI art generation platform!** 🎨✨
docs/PHASE4_DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phase 4: Web Deployment Guide
2
+
3
+ ## Overview
4
+
5
+ Phase 4 deploys CompI to Hugging Face Spaces with automatic CI/CD from GitHub. This enables public access to your multimodal AI art generation platform.
6
+
7
+ ## 4.A: Repository Preparation ✅
8
+
9
+ The following files have been added to your repo:
10
+
11
+ - `packages.txt` - System dependencies for audio processing and OpenGL
12
+ - `.gitattributes` - Git LFS configuration for model files
13
+ - `requirements.txt` - Already present with Python dependencies
14
+
15
+ ## 4.B: Create Hugging Face Space
16
+
17
+ ### Step 1: Create New Space
18
+
19
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
20
+ 2. Click "Create new Space"
21
+ 3. Choose:
22
+ - **SDK**: Streamlit
23
+ - **Space name**: `compi-final-dashboard` (or your preference)
24
+ - **Visibility**: Public
25
+ - **Hardware**: CPU basic (free tier)
26
+
27
+ ### Step 2: Configure Space README
28
+
29
+ Replace the default README.md in your Space with:
30
+
31
+ ```markdown
32
+ ---
33
+ title: CompI — Final Dashboard
34
+ emoji: 🎨
35
+ sdk: streamlit
36
+ app_file: src/ui/compi_phase3_final_dashboard.py
37
+ pinned: false
38
+ ---
39
+
40
+ # CompI - Multimodal AI Art Generation Platform
41
+
42
+ The ultimate creative platform combining text, audio, data, emotion, and real-time inputs for AI art generation.
43
+
44
+ ## Features
45
+
46
+ 🧩 **Multimodal Inputs** - Text, Audio, Data, Emotion, Real-time feeds
47
+ 🖼️ **Advanced References** - Multi-image upload with role assignment
48
+ ⚙️ **Model Management** - SD 1.5/SDXL switching, LoRA integration
49
+ 🖼️ **Professional Gallery** - Filtering, rating, annotation system
50
+ 💾 **Preset Management** - Save/load complete configurations
51
+ 📦 **Export System** - Complete bundles with metadata
52
+
53
+ ## Usage
54
+
55
+ 1. Configure your inputs in the "Inputs" tab
56
+ 2. Upload reference images in "Advanced References"
57
+ 3. Choose your model and performance settings
58
+ 4. Generate with intelligent fusion of all inputs
59
+ 5. Review results in the gallery and export bundles
60
+
61
+ Built with Streamlit, PyTorch, and Diffusers.
62
+ ```
63
+
64
+ ### Step 3: Add Secrets (Optional)
65
+
66
+ In your Space Settings → Repository secrets, add:
67
+ - `OPENWEATHER_KEY` - Your OpenWeatherMap API key for real-time weather data
68
+
69
+ **Important**: Do NOT link the Space to GitHub yet. We'll deploy via CI/CD.
70
+
71
+ ## 4.C: GitHub Actions Setup
72
+
73
+ ### Step 1: Add GitHub Secrets
74
+
75
+ In your GitHub repo, go to Settings → Secrets and variables → Actions:
76
+
77
+ 1. **New repository secret**: `HF_TOKEN`
78
+ - Value: Your Hugging Face **Write** token from [HF Settings → Access Tokens](https://huggingface.co/settings/tokens)
79
+
80
+ 2. **New repository secret**: `HF_SPACE_ID`
81
+ - Value: `your-username/your-space-name` (e.g., `AXRZCE/compi-final-dashboard`)
82
+
83
+ ### Step 2: GitHub Actions Workflow
84
+
85
+ The workflow file `.github/workflows/deploy-to-hf-spaces.yml` will be created next.
86
+
87
+ ## 4.D: Runtime Optimization
88
+
89
+ Default settings optimized for free CPU tier:
90
+ - **Model**: SD 1.5 (faster than SDXL)
91
+ - **Resolution**: 512×512 (good quality/speed balance)
92
+ - **Steps**: 20-24 (sufficient for good results)
93
+ - **Batch size**: 1 (memory efficient)
94
+ - **ControlNet**: Off by default (users can enable)
95
+
96
+ ## 4.E: Deployment Workflow
97
+
98
+ 1. **Development**: Work on feature branches
99
+ 2. **Testing**: Test locally with `streamlit run src/ui/compi_phase3_final_dashboard.py`
100
+ 3. **Deploy**: Merge to `main` → GitHub Actions automatically deploys to HF Space
101
+ 4. **Rollback**: Revert commit on `main` if issues occur
102
+
103
+ ## Next Steps
104
+
105
+ 1. Complete the HF Space setup above
106
+ 2. Add GitHub secrets
107
+ 3. The GitHub Actions workflow will be created automatically
108
+ 4. Test deployment by pushing to `main`
109
+
110
+ Your deployed app will be available at: `https://your-username-your-space.hf.space`
docs/PHASE4_RUNTIME_OPTIMIZATION.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phase 4.D: Runtime Optimization for Free CPU Deployment
2
+
3
+ ## Current Default Settings Analysis
4
+
5
+ The CompI Phase 3 Final Dashboard is already well-optimized for free CPU deployment with the following defaults:
6
+
7
+ ### Model Selection Defaults
8
+ - **Base Model**: SD 1.5 (v1-5) - Fast and CPU-friendly
9
+ - **Generation Mode**: txt2img - Most efficient mode
10
+ - **SDXL**: Available but not default (requires more resources)
11
+
12
+ ### Image Generation Defaults
13
+ - **Width**: 512px (optimal balance of quality/speed)
14
+ - **Height**: 512px (standard square format)
15
+ - **Steps**: 30 (good quality with reasonable speed)
16
+ - **Guidance**: 7.5 (balanced prompt adherence)
17
+ - **Batch Size**: 1 (memory efficient)
18
+ - **Seed**: 0 (random, no additional computation)
19
+
20
+ ### Performance Optimizations (Already Enabled)
21
+ - **xFormers**: Enabled by default for memory efficiency
22
+ - **Attention Slicing**: Enabled to reduce VRAM usage
23
+ - **VAE Slicing**: Enabled for memory optimization
24
+ - **VAE Tiling**: Available for large images
25
+ - **OOM Auto-retry**: Enabled with progressive size reduction
26
+
27
+ ### Advanced Features (Disabled by Default)
28
+ - **ControlNet**: Off by default (users can enable in References tab)
29
+ - **LoRA**: Off by default (optional enhancement)
30
+ - **Upsampling**: Off by default (2x processing time)
31
+
32
+ ## CPU-Specific Optimizations
33
+
34
+ ### Automatic Fallback Sizes
35
+ The app includes intelligent OOM recovery with progressive fallbacks:
36
+ 1. Original size (e.g., 512x512, 30 steps)
37
+ 2. Half size (384x384, 22 steps)
38
+ 3. Safe size (384x384, 18 steps)
39
+ 4. Minimal size (256x256, 14 steps)
40
+
41
+ ### Fast Preset Available
42
+ Users can click "🧼 Reset to defaults" or use the "Fast" preset:
43
+ - SD 1.5 model
44
+ - 512x512 resolution
45
+ - 30 steps
46
+ - All optimizations enabled
47
+
48
+ ## Recommended Settings for Free Tier
49
+
50
+ ### For Best Performance
51
+ ```
52
+ Model: SD 1.5 (v1-5)
53
+ Resolution: 512x512 or 448x448
54
+ Steps: 20-25
55
+ Guidance: 7.5
56
+ Batch: 1
57
+ ControlNet: Off
58
+ LoRA: Off
59
+ Upsampling: Off
60
+ ```
61
+
62
+ ### For Higher Quality (Slower)
63
+ ```
64
+ Model: SD 1.5 (v1-5)
65
+ Resolution: 768x512 or 512x768
66
+ Steps: 30-35
67
+ Guidance: 7.0-8.0
68
+ Batch: 1
69
+ ControlNet: Optional
70
+ LoRA: Optional (if available)
71
+ ```
72
+
73
+ ## User Guidance in UI
74
+
75
+ The app provides helpful guidance:
76
+ - VRAM safety indicators with color-coded warnings
77
+ - Quick tips in expandable sections
78
+ - Preset buttons for common use cases
79
+ - Performance optimization toggles with explanations
80
+
81
+ ## Deployment Considerations
82
+
83
+ ### Hugging Face Spaces Free Tier
84
+ - **CPU**: 2 vCPUs
85
+ - **RAM**: 16GB
86
+ - **Storage**: 50GB
87
+ - **Timeout**: 48 hours idle
88
+
89
+ ### Expected Performance
90
+ - **512x512, 20 steps**: ~30-60 seconds per image
91
+ - **768x512, 30 steps**: ~60-120 seconds per image
92
+ - **With ControlNet**: +50-100% generation time
93
+ - **With LoRA**: +10-20% generation time
94
+
95
+ ## Monitoring and Optimization
96
+
97
+ The dashboard includes:
98
+ - Real-time VRAM monitoring (when available)
99
+ - Generation time tracking
100
+ - Automatic error recovery
101
+ - Progressive quality degradation on resource constraints
102
+
103
+ ## Conclusion
104
+
105
+ The CompI Phase 3 Final Dashboard is already optimized for free CPU deployment with:
106
+ - Sensible defaults for speed/quality balance
107
+ - Automatic fallback mechanisms
108
+ - User-friendly performance controls
109
+ - Clear guidance for optimization
110
+
111
+ No code changes are required for Phase 4.D - the existing defaults are optimal for free tier deployment.
docs/PROJECT_STRUCTURE.md ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Project Structure
2
+
3
+ This document outlines the organized structure of the CompI (Compositional Intelligence) project.
4
+
5
+ ## 📁 Directory Structure
6
+
7
+ ```
8
+ Project CompI/
9
+ ├── 📁 src/ # Source code (organized modules)
10
+ │ ├── 📁 generators/ # Image generation modules
11
+ │ │ ├── __init__.py # Module initialization
12
+ │ │ ├── compi_phase1_text2image.py # Basic text-to-image
13
+ │ │ ├── compi_phase1_advanced.py # Advanced generation
14
+ │ │ ├── compi_phase1b_styled_generation.py # Style conditioning
15
+ │ │ ├── compi_phase1b_advanced_styling.py # Advanced styling
16
+ │ │ ├── compi_phase1d_evaluate_quality.py # Quality evaluation (Streamlit)
17
+ │ │ ├── compi_phase1d_cli_evaluation.py # Quality evaluation (CLI)
18
+ │ │ ├── compi_phase1e_dataset_prep.py # LoRA dataset preparation
19
+ │ │ ├── compi_phase1e_lora_training.py # LoRA fine-tuning
20
+ │ │ ├── compi_phase1e_style_generation.py # Personal style generation
21
+ │ │ └── compi_phase1e_style_manager.py # LoRA style management
22
+ │ ├── 📁 models/ # Model implementations (future)
23
+ │ ├── 📁 utils/ # Utility functions
24
+ │ │ ├── __init__.py
25
+ │ │ ├── logging_utils.py
26
+ │ │ └── file_utils.py
27
+ │ ├── 📁 data/ # Data processing modules (future)
28
+ │ ├── 📁 ui/ # User interface components (future)
29
+ │ ├── config.py # Project configuration
30
+ │ ├── setup_env.py # Environment setup script
31
+ │ └── test_setup.py # Environment testing
32
+ ├── 📁 notebooks/ # Jupyter notebooks
33
+ │ └── 01_getting_started.ipynb # Tutorial notebook
34
+ ├── 📁 data/ # Dataset storage
35
+ ├── 📁 outputs/ # Generated content
36
+ │ ├── images/ # Generated images
37
+ │ └── metadata/ # Generation metadata
38
+ ├── 📁 tests/ # Unit tests (future)
39
+ ├── 🐍 run_basic_generation.py # Convenience: Basic generation
40
+ ├── 🐍 run_advanced_generation.py # Convenience: Advanced generation
41
+ ├── 🐍 run_styled_generation.py # Convenience: Style conditioning
42
+ ├── 🐍 run_advanced_styling.py # Convenience: Advanced styling
43
+ ├── 🐍 run_evaluation.py # Convenience: Quality evaluation
44
+ ├── 🐍 run_lora_training.py # Convenience: LoRA training
45
+ ├── 🐍 run_style_generation.py # Convenience: Personal style generation
46
+ ├── 📄 requirements.txt # Python dependencies
47
+ ├── 📄 development.md # Development roadmap
48
+ ├── 📄 PHASE1_USAGE.md # Phase 1 usage guide
49
+ ├── 📄 PROJECT_STRUCTURE.md # This file
50
+ ├── 📄 .gitignore # Git ignore rules
51
+ └── 📄 README.md # Project overview
52
+ ```
53
+
54
+ ## 🚀 Usage Patterns
55
+
56
+ ### Convenience Scripts (Recommended)
57
+
58
+ Run from project root for easy access:
59
+
60
+ ```bash
61
+ # Basic text-to-image generation
62
+ python run_basic_generation.py "prompt here"
63
+
64
+ # Advanced generation with options
65
+ python run_advanced_generation.py "prompt" --negative "unwanted" --steps 50
66
+
67
+ # Interactive style selection
68
+ python run_styled_generation.py
69
+
70
+ # Advanced style conditioning
71
+ python run_advanced_styling.py "prompt" --style "oil painting" --mood "dramatic"
72
+
73
+ # Quality evaluation interface
74
+ python run_evaluation.py
75
+
76
+ # LoRA personal style training
77
+ python run_lora_training.py --dataset-dir datasets/my_style
78
+
79
+ # Generate with personal style
80
+ python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "prompt"
81
+
82
+ # LoRA personal style training
83
+ python run_lora_training.py --dataset-dir datasets/my_style
84
+
85
+ # Generate with personal style
86
+ python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "prompt"
87
+ ```
88
+
89
+ ### Direct Module Access
90
+
91
+ Run generators directly from their organized location:
92
+
93
+ ```bash
94
+ # Direct access to generators
95
+ python src/generators/compi_phase1_text2image.py "prompt"
96
+ python src/generators/compi_phase1b_advanced_styling.py --list-styles
97
+
98
+ # Environment setup and testing
99
+ python src/setup_env.py
100
+ python src/test_setup.py
101
+ ```
102
+
103
+ ## 🎯 Benefits of This Organization
104
+
105
+ ### 1. **Clean Separation of Concerns**
106
+
107
+ - **`src/generators/`** - All image generation logic
108
+ - **`src/utils/`** - Reusable utility functions
109
+ - **`src/`** - Core project modules and configuration
110
+ - **Root level** - Convenience scripts and documentation
111
+
112
+ ### 2. **Professional Python Structure**
113
+
114
+ - Proper module organization with `__init__.py` files
115
+ - Clear import paths and dependencies
116
+ - Scalable architecture for future expansion
117
+
118
+ ### 3. **Easy Access**
119
+
120
+ - Convenience scripts provide simple access from project root
121
+ - Direct module access for advanced users
122
+ - Maintains backward compatibility
123
+
124
+ ### 4. **Future-Ready**
125
+
126
+ - Organized structure ready for Phase 2+ implementations
127
+ - Clear places for audio processing, UI components, etc.
128
+ - Modular design supports easy testing and maintenance
129
+
130
+ ## 🔧 Development Guidelines
131
+
132
+ ### Adding New Generators
133
+
134
+ 1. Create new generator in `src/generators/`
135
+ 2. Add imports to `src/generators/__init__.py`
136
+ 3. Create convenience script in project root if needed
137
+ 4. Update documentation
138
+
139
+ ### Adding New Utilities
140
+
141
+ 1. Add utility functions to appropriate module in `src/utils/`
142
+ 2. Update `src/utils/__init__.py` imports
143
+ 3. Import in generators as needed
144
+
145
+ ### Testing
146
+
147
+ 1. Add tests to `tests/` directory
148
+ 2. Use `src/test_setup.py` for environment verification
149
+ 3. Test both convenience scripts and direct module access
150
+
151
+ ## 📚 Documentation
152
+
153
+ - **README.md** - Project overview and quick start
154
+ - **development.md** - Comprehensive development roadmap
155
+ - **PHASE1_USAGE.md** - Detailed Phase 1 usage guide
156
+ - **PROJECT_STRUCTURE.md** - This organizational guide
157
+
158
+ This structure provides a solid foundation for the CompI project's continued development through all planned phases.
docs/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CompI Documentation Index
2
+
3
+ Welcome to the CompI docs. All phase guides and project structure are collected here.
4
+
5
+ ## Quick links
6
+
7
+ - Project Structure — PROJECT_STRUCTURE.md
8
+ - Phase 1
9
+ - Usage — PHASE1_USAGE.md
10
+ - Evaluation (1.D) — PHASE1D_EVALUATION_GUIDE.md
11
+ - LoRA (1.E) — PHASE1E_LORA_GUIDE.md
12
+ - Phase 2
13
+ - Audio to Image (2.A) — PHASE2A_AUDIO_TO_IMAGE_GUIDE.md
14
+ - Data to Image (2.B) — PHASE2B_DATA_TO_IMAGE_GUIDE.md
15
+ - Emotion to Image (2.C) — PHASE2C_EMOTION_TO_IMAGE_GUIDE.md
16
+ - Real-time Data (2.D) — PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md
17
+ - Style Reference (2.E) — PHASE2E_STYLE_REFERENCE_GUIDE.md
18
+ - Phase 3
19
+ - Final Dashboard — PHASE3_FINAL_DASHBOARD_GUIDE.md
20
+ - Performance (3.E) — PHASE3E_PERFORMANCE_GUIDE.md
21
+ - Phase 4
22
+ - Deployment Guide — PHASE4_DEPLOYMENT_GUIDE.md
23
+ - Runtime Optimization — PHASE4_RUNTIME_OPTIMIZATION.md
24
+
25
+ ## Getting started
26
+
27
+ - Launch the integrated Phase 3 dashboard:
28
+
29
+ - python run_phase3_final_dashboard.py
30
+ - Or: streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
31
+
32
+ - Generated images and logs are stored in outputs/
33
+ - Presets live in presets/, export bundles are saved to exports/
34
+
35
+ ## Notes
36
+
37
+ - Some guides describe optional workflows (e.g., dataset preparation and LoRA training). Those features remain available via src/generators/\* scripts but are not required for using the Phase 3 Final Dashboard.
38
+ - If you reorganize folders, ensure src/config.py and the Streamlit app constants continue to point to valid directories.
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ffmpeg
2
+ libsndfile1
3
+ libgl1
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core Deep Learning
2
+ torch>=2.1.0
3
+ torchvision>=0.16.0
4
+ torchaudio>=2.1.0
5
+ transformers>=4.35.0
6
+ diffusers>=0.24.0
7
+ accelerate>=0.24.0
8
+
9
+ # Audio Processing
10
+ librosa>=0.10.0
11
+ soundfile>=0.12.0
12
+ audioread>=3.0.0
13
+
14
+ # Data Processing & Analysis
15
+ numpy>=1.24.0
16
+ pandas>=2.0.0
17
+ scipy>=1.10.0
18
+ scikit-learn>=1.3.0
19
+
20
+ # Computer Vision
21
+ opencv-python>=4.8.0
22
+ opencv-python-headless>=4.8.0
23
+ Pillow>=10.0.0
24
+
25
+ # Natural Language Processing
26
+ textblob>=0.17.0
27
+ nltk>=3.8.0
28
+ spacy>=3.6.0
29
+
30
+ # Visualization
31
+ matplotlib>=3.7.0
32
+ seaborn>=0.12.0
33
+ plotly>=5.15.0
34
+
35
+ # UI Frameworks
36
+ streamlit>=1.28.0
37
+ gradio>=4.0.0
38
+
39
+ # Utilities
40
+ requests>=2.31.0
41
+ feedparser>=6.0.0
42
+ tqdm>=4.65.0
43
+ python-dotenv>=1.0.0
44
+
45
+ # Development & Testing
46
+ pytest>=7.4.0
47
+ jupyter>=1.0.0
48
+ jupyterlab>=4.0.0
49
+ ipywidgets>=8.0.0
50
+
51
+ # Image Analysis & Evaluation
52
+ imagehash>=4.3.0
53
+
54
+ # LoRA Fine-tuning & Personal Style Training
55
+ peft>=0.7.0
56
+ datasets>=2.14.0
57
+ bitsandbytes>=0.41.0
58
+
59
+ # Audio-to-Text Processing (Phase 2.A)
60
+ openai-whisper>=20231117
61
+
62
+ # Optional: Advanced ML Libraries
63
+ # Uncomment as needed for advanced features
64
+ open-clip-torch>=2.24.0
65
+ controlnet-aux>=0.4.0
66
+ xformers>=0.0.22 # For memory-efficient attention (GPU recommended)
67
+ # wandb>=0.16.0 # Experiment tracking
68
+ safetensors>=0.4.0 # Safe model loading
69
+
70
+ # Optional: Cloud/API Integration
71
+ # boto3>=1.34.0 # AWS SDK
72
+ # google-cloud-storage>=2.10.0 # Google Cloud Platform
73
+ # openai>=1.0.0 # OpenAI API (updated to v1+)
run_phase3_final_dashboard.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 3 Final Dashboard Launcher
4
+
5
+ Launch the complete Phase 3 integrated dashboard that combines ALL CompI features:
6
+
7
+ Phase 3.A/3.B: True multimodal fusion with real processing
8
+ - Real audio transcription and analysis
9
+ - Actual data processing and visualization
10
+ - Sentiment analysis and emotion detection
11
+ - Live real-time data feeds (weather, news)
12
+
13
+ Phase 3.C: Advanced references with role assignment
14
+ - Multi-image upload and URL support
15
+ - Style vs structure role assignment
16
+ - Live ControlNet previews (Canny/Depth)
17
+ - Hybrid CN+I2I generation modes
18
+
19
+ Phase 3.D: Professional workflow management
20
+ - Gallery with advanced filtering
21
+ - Rating, tagging, and annotation system
22
+ - Preset save/load functionality
23
+ - Complete export bundles with metadata
24
+
25
+ Phase 3.E: Performance management and model switching
26
+ - SD 1.5 ↔ SDXL model switching
27
+ - LoRA integration with scale control
28
+ - Performance optimizations (xFormers, attention slicing, VAE)
29
+ - VRAM monitoring and OOM auto-retry
30
+ - Optional latent upscaling
31
+
32
+ Usage:
33
+ python run_phase3_final_dashboard.py
34
+
35
+ or
36
+
37
+ streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
38
+ """
39
+
40
+ import os
41
+ import sys
42
+ import subprocess
43
+ from pathlib import Path
44
+
45
+ def check_dependencies():
46
+ """Check for required dependencies"""
47
+ print("📦 Checking dependencies...")
48
+
49
+ required_packages = {
50
+ "torch": "PyTorch",
51
+ "diffusers": "Diffusers",
52
+ "transformers": "Transformers",
53
+ "accelerate": "Accelerate",
54
+ "streamlit": "Streamlit",
55
+ "pillow": "Pillow (PIL)",
56
+ "numpy": "NumPy",
57
+ "pandas": "Pandas",
58
+ "librosa": "Librosa (audio processing)",
59
+ "matplotlib": "Matplotlib (plotting)",
60
+ "requests": "Requests (HTTP)",
61
+ "feedparser": "FeedParser (RSS feeds)",
62
+ "textblob": "TextBlob (sentiment analysis)"
63
+ }
64
+
65
+ # Special check for OpenCV (accept either opencv-python or opencv-python-headless)
66
+ opencv_available = False
67
+ try:
68
+ import cv2
69
+ opencv_available = True
70
+ required_packages["cv2"] = "OpenCV (image processing)"
71
+ except ImportError:
72
+ pass
73
+
74
+ missing_packages = []
75
+ available_packages = []
76
+
77
+ for package, name in required_packages.items():
78
+ try:
79
+ __import__(package.replace("-", "_"))
80
+ available_packages.append(name)
81
+ except ImportError:
82
+ if package != "cv2": # cv2 already checked above
83
+ missing_packages.append(package)
84
+
85
+ # Add opencv to missing if not available
86
+ if not opencv_available:
87
+ missing_packages.append("opencv-python")
88
+
89
+ print(f"✅ Available: {', '.join(available_packages)}")
90
+
91
+ if missing_packages:
92
+ print(f"❌ Missing: {', '.join(missing_packages)}")
93
+ return False
94
+
95
+ return True
96
+
97
+ def check_optional_features():
98
+ """Check for optional features"""
99
+ print("\n🔍 Checking optional features...")
100
+
101
+ # Check Whisper
102
+ try:
103
+ import whisper
104
+ print("✅ Whisper available for audio transcription")
105
+ except ImportError:
106
+ print("⚠️ Whisper not available (will be installed on first use)")
107
+
108
+ # Check SDXL availability
109
+ try:
110
+ from diffusers import StableDiffusionXLPipeline
111
+ print("✅ SDXL support available")
112
+ except ImportError:
113
+ print("⚠️ SDXL not available (requires newer diffusers)")
114
+
115
+ # Check ControlNet availability
116
+ try:
117
+ from diffusers import StableDiffusionControlNetPipeline
118
+ print("✅ ControlNet available")
119
+ except ImportError:
120
+ print("⚠️ ControlNet not available")
121
+
122
+ # Check upscaler availability
123
+ try:
124
+ from diffusers import StableDiffusionLatentUpscalePipeline
125
+ print("✅ Latent Upscaler available")
126
+ except ImportError:
127
+ print("⚠️ Latent Upscaler not available")
128
+
129
+ # Check xFormers
130
+ try:
131
+ import xformers
132
+ print("✅ xFormers available for memory optimization")
133
+ except ImportError:
134
+ print("⚠️ xFormers not available (optional performance boost)")
135
+
136
+ def check_gpu_setup():
137
+ """Check GPU setup and provide recommendations"""
138
+ print("\n🔍 Checking GPU setup...")
139
+
140
+ try:
141
+ import torch
142
+
143
+ if torch.cuda.is_available():
144
+ gpu_count = torch.cuda.device_count()
145
+ gpu_name = torch.cuda.get_device_name(0)
146
+ total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
147
+
148
+ print(f"✅ CUDA available: {gpu_count} GPU(s)")
149
+ print(f" Primary GPU: {gpu_name}")
150
+ print(f" VRAM: {total_memory:.1f} GB")
151
+
152
+ if total_memory >= 12.0:
153
+ print("✅ Excellent VRAM for all features including SDXL")
154
+ elif total_memory >= 8.0:
155
+ print("✅ Good VRAM for SDXL and most features")
156
+ elif total_memory >= 6.0:
157
+ print("✅ Sufficient VRAM for SD 1.5 and most features")
158
+ print("⚠️ SDXL may require optimizations")
159
+ elif total_memory >= 4.0:
160
+ print("✅ Minimum VRAM for SD 1.5")
161
+ print("⚠️ Use aggressive optimizations for best performance")
162
+ else:
163
+ print("⚠️ Limited VRAM - consider CPU mode or cloud GPU")
164
+
165
+ return True
166
+ else:
167
+ print("⚠️ CUDA not available - will use CPU mode")
168
+ print("💡 CPU mode is slower but still functional")
169
+ return False
170
+
171
+ except ImportError:
172
+ print("❌ PyTorch not found")
173
+ return False
174
+
175
+ def install_missing_dependencies():
176
+ """Install missing dependencies"""
177
+ print("\n📦 Installing missing dependencies...")
178
+
179
+ try:
180
+ # Core dependencies
181
+ core_packages = [
182
+ "torch", "torchvision", "torchaudio",
183
+ "diffusers>=0.21.0", "transformers", "accelerate",
184
+ "streamlit", "pillow", "numpy", "pandas",
185
+ "librosa", "opencv-python", "matplotlib",
186
+ "requests", "feedparser", "textblob"
187
+ ]
188
+
189
+ print("Installing core packages...")
190
+ subprocess.check_call([
191
+ sys.executable, "-m", "pip", "install"
192
+ ] + core_packages)
193
+
194
+ print("✅ Core dependencies installed")
195
+
196
+ # Optional performance packages (skip xformers due to compatibility issues)
197
+ print("⚠️ Skipping xFormers installation (compatibility issues with current PyTorch version)")
198
+
199
+ return True
200
+
201
+ except subprocess.CalledProcessError as e:
202
+ print(f"❌ Installation failed: {e}")
203
+ return False
204
+
205
+ def main():
206
+ """Launch Phase 3 Final Dashboard"""
207
+
208
+ print("🧪 CompI Phase 3 Final Dashboard")
209
+ print("=" * 80)
210
+ print()
211
+ print("🎯 Complete Phase 3 Integration (3.A → 3.E):")
212
+ print(" • 🧩 Multimodal Inputs: Text, Audio, Data, Emotion, Real-time")
213
+ print(" • 🖼️ Advanced References: Role assignment, ControlNet, live previews")
214
+ print(" • ⚙️ Model & Performance: SD 1.5/SDXL, LoRA, VRAM monitoring")
215
+ print(" • 🎛️ Intelligent Generation: Hybrid modes, OOM recovery")
216
+ print(" • 🖼️ Professional Gallery: Filtering, rating, annotation")
217
+ print(" • 💾 Preset Management: Save/load configurations")
218
+ print(" • 📦 Export System: Complete bundles with metadata")
219
+ print()
220
+
221
+ # Check if the UI file exists
222
+ ui_file = Path("src/ui/compi_phase3_final_dashboard.py")
223
+ if not ui_file.exists():
224
+ print(f"❌ Error: {ui_file} not found!")
225
+ print("Make sure you're running this from the project root directory.")
226
+ return 1
227
+
228
+ # Check dependencies
229
+ if not check_dependencies():
230
+ print("\n❌ Missing dependencies detected.")
231
+ install = input("Install missing dependencies? (y/n): ").lower().strip()
232
+
233
+ if install == 'y':
234
+ if not install_missing_dependencies():
235
+ print("❌ Failed to install dependencies")
236
+ return 1
237
+ else:
238
+ print("❌ Cannot proceed without required dependencies")
239
+ return 1
240
+
241
+ # Check GPU setup
242
+ has_gpu = check_gpu_setup()
243
+
244
+ # Check optional features
245
+ check_optional_features()
246
+
247
+ print()
248
+ print("🚀 Launching Phase 3 Final Dashboard...")
249
+ print("📍 Access at: http://localhost:8506")
250
+ print()
251
+
252
+ if has_gpu:
253
+ print("💡 GPU Tips:")
254
+ print(" • Monitor VRAM usage in the top metrics bar")
255
+ print(" • Use performance optimizations in Model & Performance tab")
256
+ print(" • Enable OOM auto-retry for reliability")
257
+ print(" • Try SDXL for higher quality (requires 8+ GB VRAM)")
258
+ else:
259
+ print("💡 CPU Tips:")
260
+ print(" • Generation will be slower but still functional")
261
+ print(" • Use smaller image sizes (512x512 or less)")
262
+ print(" • Reduce inference steps for faster generation")
263
+ print(" • Stick to SD 1.5 model for best performance")
264
+
265
+ print()
266
+ print("🎨 Getting Started:")
267
+ print(" 1. 🧩 Configure multimodal inputs (audio, data, emotion, real-time)")
268
+ print(" 2. 🖼️ Upload reference images and assign roles (style vs structure)")
269
+ print(" 3. ⚙️ Choose model and optimize performance settings")
270
+ print(" 4. 🎛️ Generate with intelligent fusion of all inputs")
271
+ print(" 5. 🖼️ Review results in gallery and add annotations")
272
+ print(" 6. 💾 Save presets for reuse")
273
+ print(" 7. 📦 Export complete bundles with metadata")
274
+ print()
275
+
276
+ # Launch Streamlit
277
+ try:
278
+ cmd = [
279
+ sys.executable, "-m", "streamlit", "run",
280
+ str(ui_file),
281
+ "--server.port", "8506",
282
+ "--server.headless", "true",
283
+ "--browser.gatherUsageStats", "false"
284
+ ]
285
+
286
+ subprocess.run(cmd)
287
+
288
+ except KeyboardInterrupt:
289
+ print("\n👋 Phase 3 Final Dashboard stopped by user")
290
+ return 0
291
+ except Exception as e:
292
+ print(f"❌ Error launching Streamlit: {e}")
293
+ return 1
294
+
295
+ return 0
296
+
297
+ if __name__ == "__main__":
298
+ exit_code = main()
299
+ sys.exit(exit_code)
src/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI - Compositional Intelligence Project
3
+ A multi-modal AI system for creative content generation.
4
+ """
5
+
6
+ __version__ = "0.1.0"
7
+ __author__ = "CompI Development Team"
src/config.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration settings for CompI project.
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ # Project paths
9
+ PROJECT_ROOT = Path(__file__).parent.parent
10
+ DATA_DIR = PROJECT_ROOT / "data"
11
+ OUTPUTS_DIR = PROJECT_ROOT / "outputs"
12
+ MODELS_DIR = PROJECT_ROOT / "models"
13
+
14
+ # Create directories if they don't exist
15
+ for dir_path in [DATA_DIR, OUTPUTS_DIR, MODELS_DIR]:
16
+ dir_path.mkdir(exist_ok=True)
17
+
18
+ # Model configurations
19
+ STABLE_DIFFUSION_MODEL = "runwayml/stable-diffusion-v1-5"
20
+ STABLE_DIFFUSION_IMG2IMG_MODEL = "runwayml/stable-diffusion-v1-5"
21
+ EMOTION_MODEL = "j-hartmann/emotion-english-distilroberta-base"
22
+ SENTIMENT_MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
23
+
24
+ # Generation settings
25
+ DEFAULT_IMAGE_SIZE = (512, 512)
26
+ DEFAULT_INFERENCE_STEPS = 20
27
+ DEFAULT_GUIDANCE_SCALE = 7.5
28
+
29
+ # Audio settings
30
+ SAMPLE_RATE = 22050
31
+ AUDIO_DURATION = 10 # seconds
32
+
33
+ # Device settings
34
+ DEVICE = "cuda" if os.getenv("CUDA_AVAILABLE", "false").lower() == "true" else "cpu"
35
+
36
+ # API keys (load from environment)
37
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
38
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
39
+
40
+ # Logging
41
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
42
+ LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
src/generators/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Image Generators
3
+ Text-to-image generation modules for the CompI platform.
4
+ """
5
+
6
+ from .compi_phase1_text2image import *
7
+ from .compi_phase1_advanced import *
8
+ from .compi_phase1b_styled_generation import *
9
+ from .compi_phase1b_advanced_styling import *
10
+ from .compi_phase2a_audio_to_image import *
11
+ from .compi_phase2b_data_to_image import *
12
+ from .compi_phase2c_emotion_to_image import *
13
+ from .compi_phase2d_realtime_to_image import *
14
+ from .compi_phase2e_refimg_to_image import *
15
+
16
+ __all__ = [
17
+ "compi_phase1_text2image",
18
+ "compi_phase1_advanced",
19
+ "compi_phase1b_styled_generation",
20
+ "compi_phase1b_advanced_styling",
21
+ "compi_phase2a_audio_to_image",
22
+ "compi_phase2b_data_to_image",
23
+ "compi_phase2c_emotion_to_image",
24
+ "compi_phase2d_realtime_to_image",
25
+ "compi_phase2e_refimg_to_image"
26
+ ]
src/generators/compi_phase1_advanced.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compi_phase1_advanced.py
2
+ # Enhanced text-to-image generation with batch processing, negative prompts, and style controls
3
+
4
+ import os
5
+ import sys
6
+ import torch
7
+ import argparse
8
+ from datetime import datetime
9
+ from diffusers import StableDiffusionPipeline
10
+ from PIL import Image
11
+ import json
12
+
13
+ # Add project root to path for imports
14
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
15
+
16
+ # ------------------ 1. SETUP AND ARGUMENT PARSING ------------------
17
+
18
+ def setup_args():
19
+ parser = argparse.ArgumentParser(description="CompI Phase 1: Advanced Text-to-Image Generation")
20
+ parser.add_argument("prompt", nargs="*", help="Text prompt for image generation")
21
+ parser.add_argument("--negative", "-n", default="", help="Negative prompt (what to avoid)")
22
+ parser.add_argument("--steps", "-s", type=int, default=30, help="Number of inference steps (default: 30)")
23
+ parser.add_argument("--guidance", "-g", type=float, default=7.5, help="Guidance scale (default: 7.5)")
24
+ parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility")
25
+ parser.add_argument("--batch", "-b", type=int, default=1, help="Number of images to generate")
26
+ parser.add_argument("--width", "-w", type=int, default=512, help="Image width (default: 512)")
27
+ parser.add_argument("--height", type=int, default=512, help="Image height (default: 512)")
28
+ parser.add_argument("--model", "-m", default="runwayml/stable-diffusion-v1-5", help="Model to use")
29
+ parser.add_argument("--output", "-o", default="outputs", help="Output directory")
30
+ parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
31
+ return parser.parse_args()
32
+
33
+ # Check for GPU
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+ print(f"Using device: {device}")
36
+
37
+ # Logging function
38
+ def log(msg):
39
+ now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
40
+ print(f"{now} {msg}")
41
+
42
+ # ------------------ 2. MODEL LOADING ------------------
43
+
44
+ def load_model(model_name):
45
+ log(f"Loading model: {model_name}")
46
+
47
+ def dummy_safety_checker(images, **kwargs):
48
+ return images, [False] * len(images)
49
+
50
+ try:
51
+ pipe = StableDiffusionPipeline.from_pretrained(
52
+ model_name,
53
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
54
+ safety_checker=dummy_safety_checker,
55
+ )
56
+ pipe = pipe.to(device)
57
+
58
+ # Memory optimizations
59
+ pipe.enable_attention_slicing()
60
+ # Note: enable_memory_efficient_attention() is deprecated in newer versions
61
+
62
+ log("Model loaded successfully")
63
+ return pipe
64
+ except Exception as e:
65
+ log(f"Error loading model: {e}")
66
+ sys.exit(1)
67
+
68
+ # ------------------ 3. GENERATION FUNCTION ------------------
69
+
70
+ def generate_image(pipe, prompt, negative_prompt="", **kwargs):
71
+ """Generate a single image with given parameters"""
72
+
73
+ # Set up generator
74
+ seed = kwargs.get('seed', torch.seed())
75
+ if device == "cuda":
76
+ generator = torch.Generator(device).manual_seed(seed)
77
+ else:
78
+ generator = torch.manual_seed(seed)
79
+
80
+ # Generation parameters
81
+ params = {
82
+ 'prompt': prompt,
83
+ 'negative_prompt': negative_prompt if negative_prompt else None,
84
+ 'height': kwargs.get('height', 512),
85
+ 'width': kwargs.get('width', 512),
86
+ 'num_inference_steps': kwargs.get('steps', 30),
87
+ 'guidance_scale': kwargs.get('guidance', 7.5),
88
+ 'generator': generator,
89
+ }
90
+
91
+ log(f"Generating: '{prompt[:50]}...' (seed: {seed})")
92
+
93
+ with torch.autocast(device) if device == "cuda" else torch.no_grad():
94
+ result = pipe(**params)
95
+ return result.images[0], seed
96
+
97
+ # ------------------ 4. SAVE FUNCTION ------------------
98
+
99
+ def save_image(image, prompt, seed, output_dir, metadata=None):
100
+ """Save image with descriptive filename and metadata"""
101
+ os.makedirs(output_dir, exist_ok=True)
102
+
103
+ # Create filename
104
+ prompt_slug = "_".join(prompt.lower().split()[:6])
105
+ prompt_slug = "".join(c for c in prompt_slug if c.isalnum() or c in "_-")[:40]
106
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
107
+ filename = f"{prompt_slug}_{timestamp}_seed{seed}.png"
108
+ filepath = os.path.join(output_dir, filename)
109
+
110
+ # Save image
111
+ image.save(filepath)
112
+
113
+ # Save metadata
114
+ if metadata:
115
+ metadata_file = filepath.replace('.png', '_metadata.json')
116
+ with open(metadata_file, 'w') as f:
117
+ json.dump(metadata, f, indent=2)
118
+
119
+ log(f"Saved: {filepath}")
120
+ return filepath
121
+
122
+ # ------------------ 5. INTERACTIVE MODE ------------------
123
+
124
+ def interactive_mode(pipe, output_dir):
125
+ """Interactive prompt mode for experimentation"""
126
+ log("Entering interactive mode. Type 'quit' to exit.")
127
+
128
+ while True:
129
+ try:
130
+ prompt = input("\n🎨 Enter prompt: ").strip()
131
+ if prompt.lower() in ['quit', 'exit', 'q']:
132
+ break
133
+
134
+ if not prompt:
135
+ continue
136
+
137
+ negative = input("❌ Negative prompt (optional): ").strip()
138
+
139
+ # Quick settings
140
+ print("⚙️ Quick settings (press Enter for defaults):")
141
+ steps = input(f" Steps (30): ").strip()
142
+ steps = int(steps) if steps else 30
143
+
144
+ guidance = input(f" Guidance (7.5): ").strip()
145
+ guidance = float(guidance) if guidance else 7.5
146
+
147
+ # Generate
148
+ image, seed = generate_image(
149
+ pipe, prompt, negative,
150
+ steps=steps, guidance=guidance
151
+ )
152
+
153
+ # Save with metadata
154
+ metadata = {
155
+ 'prompt': prompt,
156
+ 'negative_prompt': negative,
157
+ 'steps': steps,
158
+ 'guidance_scale': guidance,
159
+ 'seed': seed,
160
+ 'timestamp': datetime.now().isoformat()
161
+ }
162
+
163
+ save_image(image, prompt, seed, output_dir, metadata)
164
+
165
+ except KeyboardInterrupt:
166
+ print("\n👋 Goodbye!")
167
+ break
168
+ except Exception as e:
169
+ log(f"Error: {e}")
170
+
171
+ # ------------------ 6. MAIN FUNCTION ------------------
172
+
173
+ def main():
174
+ args = setup_args()
175
+
176
+ # Load model
177
+ pipe = load_model(args.model)
178
+
179
+ # Interactive mode
180
+ if args.interactive:
181
+ interactive_mode(pipe, args.output)
182
+ return
183
+
184
+ # Get prompt
185
+ if args.prompt:
186
+ prompt = " ".join(args.prompt)
187
+ else:
188
+ prompt = input("Enter your prompt: ").strip()
189
+
190
+ if not prompt:
191
+ log("No prompt provided. Use --interactive for interactive mode.")
192
+ return
193
+
194
+ # Generate batch
195
+ log(f"Generating {args.batch} image(s)")
196
+
197
+ for i in range(args.batch):
198
+ try:
199
+ # Use provided seed or generate random one
200
+ seed = args.seed if args.seed else torch.seed()
201
+
202
+ image, actual_seed = generate_image(
203
+ pipe, prompt, args.negative,
204
+ seed=seed, steps=args.steps, guidance=args.guidance,
205
+ width=args.width, height=args.height
206
+ )
207
+
208
+ # Prepare metadata
209
+ metadata = {
210
+ 'prompt': prompt,
211
+ 'negative_prompt': args.negative,
212
+ 'steps': args.steps,
213
+ 'guidance_scale': args.guidance,
214
+ 'seed': actual_seed,
215
+ 'width': args.width,
216
+ 'height': args.height,
217
+ 'model': args.model,
218
+ 'batch_index': i + 1,
219
+ 'timestamp': datetime.now().isoformat()
220
+ }
221
+
222
+ save_image(image, prompt, actual_seed, args.output, metadata)
223
+
224
+ except Exception as e:
225
+ log(f"Error generating image {i+1}: {e}")
226
+
227
+ log("Generation complete!")
228
+
229
+ if __name__ == "__main__":
230
+ main()
src/generators/compi_phase1_text2image.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compi_phase1_text2image.py
2
+
3
+ import os
4
+ import sys
5
+ import torch
6
+ from datetime import datetime
7
+ from diffusers import StableDiffusionPipeline
8
+ from PIL import Image
9
+
10
+ # Add project root to path for imports
11
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
12
+
13
+ # ------------------ 1. SETUP AND CHECKS ------------------
14
+
15
+ # Check for GPU
16
+ if torch.cuda.is_available():
17
+ device = "cuda"
18
+ print("CUDA GPU detected. Running on GPU for best performance.")
19
+ else:
20
+ device = "cpu"
21
+ print("No CUDA GPU detected. Running on CPU. Generation will be slow.")
22
+
23
+ # Set up output directory
24
+ OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
25
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
26
+
27
+ # Logging function
28
+ def log(msg):
29
+ now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
30
+ print(f"{now} {msg}")
31
+
32
+ # ------------------ 2. LOAD MODEL ------------------
33
+
34
+ MODEL_NAME = "runwayml/stable-diffusion-v1-5"
35
+ log(f"Loading model: {MODEL_NAME} (this may take a minute on first run)")
36
+
37
+ # Optionally, disable the safety checker for pure creative exploration
38
+ def dummy_safety_checker(images, **kwargs):
39
+ return images, [False] * len(images)
40
+
41
+ try:
42
+ pipe = StableDiffusionPipeline.from_pretrained(
43
+ MODEL_NAME,
44
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
45
+ safety_checker=dummy_safety_checker, # Remove for production!
46
+ )
47
+ except Exception as e:
48
+ log(f"Error loading model: {e}")
49
+ sys.exit(1)
50
+
51
+ pipe = pipe.to(device)
52
+ pipe.enable_attention_slicing() # Reduce VRAM use
53
+
54
+ log("Model loaded successfully.")
55
+
56
+ # ------------------ 3. PROMPT HANDLING ------------------
57
+
58
+ def main():
59
+ """Main function for command-line execution"""
60
+ if len(sys.argv) > 1:
61
+ prompt = " ".join(sys.argv[1:])
62
+ log(f"Prompt taken from command line: {prompt}")
63
+ else:
64
+ prompt = input("Enter your prompt (e.g. 'A magical forest, digital art'): ").strip()
65
+ log(f"Prompt entered: {prompt}")
66
+
67
+ if not prompt:
68
+ log("No prompt provided. Exiting.")
69
+ sys.exit(0)
70
+
71
+ # ------------------ 4. GENERATION PARAMETERS ------------------
72
+
73
+ SEED = torch.seed() # You can use a fixed seed for reproducibility, e.g. 1234
74
+ generator = torch.manual_seed(SEED) if device == "cpu" else torch.Generator(device).manual_seed(torch.seed())
75
+
76
+ num_inference_steps = 30 # More steps = better quality, slower (default 50)
77
+ guidance_scale = 7.5 # Higher = follow prompt more strictly
78
+
79
+ # Output image size (SDv1.5 default 512x512)
80
+ height = 512
81
+ width = 512
82
+
83
+ # ------------------ 5. IMAGE GENERATION ------------------
84
+
85
+ log(f"Generating image for prompt: {prompt}")
86
+ log(f"Params: steps={num_inference_steps}, guidance_scale={guidance_scale}, seed={SEED}")
87
+
88
+ with torch.autocast(device) if device == "cuda" else torch.no_grad():
89
+ result = pipe(
90
+ prompt,
91
+ height=height,
92
+ width=width,
93
+ num_inference_steps=num_inference_steps,
94
+ guidance_scale=guidance_scale,
95
+ generator=generator,
96
+ )
97
+
98
+ image: Image.Image = result.images[0]
99
+
100
+ # ------------------ 6. SAVE OUTPUT ------------------
101
+
102
+ # Filename: prompt short, datetime, seed
103
+ prompt_slug = "_".join(prompt.lower().split()[:6])
104
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
105
+ filename = f"{prompt_slug[:40]}_{timestamp}_seed{SEED}.png"
106
+ filepath = os.path.join(OUTPUT_DIR, filename)
107
+ image.save(filepath)
108
+ log(f"Image saved to {filepath}")
109
+
110
+ # Optionally, show image (uncomment next line if running locally)
111
+ # image.show()
112
+
113
+ # Log end
114
+ log("Generation complete.")
115
+
116
+ if __name__ == "__main__":
117
+ main()
src/generators/compi_phase1b_advanced_styling.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compi_phase1b_advanced_styling.py
2
+ # Advanced style conditioning with negative prompts, quality settings, and enhanced prompt engineering
3
+
4
+ import os
5
+ import sys
6
+ import torch
7
+ import json
8
+ import argparse
9
+ from datetime import datetime
10
+ from diffusers import StableDiffusionPipeline
11
+ from PIL import Image
12
+
13
+ # Add project root to path for imports
14
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
15
+
16
+ # -------- 1. SETUP AND ARGUMENT PARSING --------
17
+
18
+ def setup_args():
19
+ parser = argparse.ArgumentParser(description="CompI Phase 1.B: Advanced Style Conditioning")
20
+ parser.add_argument("prompt", nargs="*", help="Main scene/subject description")
21
+ parser.add_argument("--style", "-s", help="Art style (or number from list)")
22
+ parser.add_argument("--mood", "-m", help="Mood/atmosphere (or number from list)")
23
+ parser.add_argument("--variations", "-v", type=int, default=1, help="Number of variations")
24
+ parser.add_argument("--quality", "-q", choices=["draft", "standard", "high"], default="standard", help="Quality preset")
25
+ parser.add_argument("--negative", "-n", help="Negative prompt")
26
+ parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
27
+ parser.add_argument("--list-styles", action="store_true", help="List available styles and exit")
28
+ parser.add_argument("--list-moods", action="store_true", help="List available moods and exit")
29
+ return parser.parse_args()
30
+
31
+ # Device setup
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ print(f"Using device: {device}")
34
+
35
+ OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
36
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
37
+
38
+ def log(msg):
39
+ now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
40
+ print(f"{now} {msg}")
41
+
42
+ # -------- 2. STYLE AND MOOD DEFINITIONS --------
43
+
44
+ STYLES = {
45
+ "digital art": {
46
+ "prompt": "digital art, highly detailed",
47
+ "negative": "blurry, pixelated, low resolution"
48
+ },
49
+ "oil painting": {
50
+ "prompt": "oil painting, classical art, brushstrokes, canvas texture",
51
+ "negative": "digital, pixelated, modern"
52
+ },
53
+ "watercolor": {
54
+ "prompt": "watercolor painting, soft colors, flowing paint",
55
+ "negative": "harsh lines, digital, photographic"
56
+ },
57
+ "cyberpunk": {
58
+ "prompt": "cyberpunk style, neon lights, futuristic, sci-fi",
59
+ "negative": "natural, organic, pastoral"
60
+ },
61
+ "impressionist": {
62
+ "prompt": "impressionist painting, soft brushstrokes, light and color",
63
+ "negative": "sharp details, photorealistic, digital"
64
+ },
65
+ "concept art": {
66
+ "prompt": "concept art, professional illustration, detailed",
67
+ "negative": "amateur, sketch, unfinished"
68
+ },
69
+ "anime": {
70
+ "prompt": "anime style, manga, Japanese animation",
71
+ "negative": "realistic, western cartoon, photographic"
72
+ },
73
+ "photorealistic": {
74
+ "prompt": "photorealistic, high detail, professional photography",
75
+ "negative": "cartoon, painting, stylized"
76
+ },
77
+ "minimalist": {
78
+ "prompt": "minimalist art, clean lines, simple composition",
79
+ "negative": "cluttered, complex, detailed"
80
+ },
81
+ "surrealism": {
82
+ "prompt": "surrealist art, dreamlike, impossible, Salvador Dali style",
83
+ "negative": "realistic, logical, mundane"
84
+ },
85
+ "pixel art": {
86
+ "prompt": "pixel art, 8-bit style, retro gaming",
87
+ "negative": "smooth, high resolution, photorealistic"
88
+ },
89
+ "steampunk": {
90
+ "prompt": "steampunk style, Victorian era, brass and copper, gears",
91
+ "negative": "modern, digital, futuristic"
92
+ },
93
+ "3d render": {
94
+ "prompt": "3D render, CGI, computer graphics, ray tracing",
95
+ "negative": "2D, flat, hand-drawn"
96
+ }
97
+ }
98
+
99
+ MOODS = {
100
+ "dreamy": {
101
+ "prompt": "dreamy atmosphere, soft lighting, ethereal",
102
+ "negative": "harsh, stark, realistic"
103
+ },
104
+ "dark": {
105
+ "prompt": "dark and moody, dramatic shadows, mysterious",
106
+ "negative": "bright, cheerful, light"
107
+ },
108
+ "peaceful": {
109
+ "prompt": "peaceful, serene, calm, tranquil",
110
+ "negative": "chaotic, violent, disturbing"
111
+ },
112
+ "vibrant": {
113
+ "prompt": "vibrant and energetic, bright colors, dynamic",
114
+ "negative": "dull, muted, lifeless"
115
+ },
116
+ "melancholic": {
117
+ "prompt": "melancholic, sad, nostalgic, wistful",
118
+ "negative": "happy, joyful, upbeat"
119
+ },
120
+ "mysterious": {
121
+ "prompt": "mysterious, enigmatic, hidden secrets",
122
+ "negative": "obvious, clear, straightforward"
123
+ },
124
+ "whimsical": {
125
+ "prompt": "whimsical, playful, fantastical, magical",
126
+ "negative": "serious, realistic, mundane"
127
+ },
128
+ "dramatic": {
129
+ "prompt": "dramatic lighting, high contrast, cinematic",
130
+ "negative": "flat lighting, low contrast, amateur"
131
+ },
132
+ "retro": {
133
+ "prompt": "retro style, vintage, nostalgic, classic",
134
+ "negative": "modern, contemporary, futuristic"
135
+ }
136
+ }
137
+
138
+ QUALITY_PRESETS = {
139
+ "draft": {"steps": 20, "guidance": 6.0, "size": (512, 512)},
140
+ "standard": {"steps": 30, "guidance": 7.5, "size": (512, 512)},
141
+ "high": {"steps": 50, "guidance": 8.5, "size": (768, 768)}
142
+ }
143
+
144
+ # -------- 3. MODEL LOADING --------
145
+
146
+ def load_model():
147
+ MODEL_NAME = "runwayml/stable-diffusion-v1-5"
148
+ log(f"Loading model: {MODEL_NAME}")
149
+
150
+ def dummy_safety_checker(images, **kwargs):
151
+ return images, [False] * len(images)
152
+
153
+ try:
154
+ pipe = StableDiffusionPipeline.from_pretrained(
155
+ MODEL_NAME,
156
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
157
+ safety_checker=dummy_safety_checker,
158
+ )
159
+ pipe = pipe.to(device)
160
+ pipe.enable_attention_slicing()
161
+ log("Model loaded successfully")
162
+ return pipe
163
+ except Exception as e:
164
+ log(f"Error loading model: {e}")
165
+ sys.exit(1)
166
+
167
+ # -------- 4. INTERACTIVE FUNCTIONS --------
168
+
169
+ def list_options(options_dict, title):
170
+ print(f"\n{title}:")
171
+ for idx, (key, value) in enumerate(options_dict.items(), 1):
172
+ prompt_preview = value["prompt"][:50] + "..." if len(value["prompt"]) > 50 else value["prompt"]
173
+ print(f" {idx:2d}. {key:15s} - {prompt_preview}")
174
+
175
+ def get_user_choice(options_dict, prompt_text, allow_custom=True):
176
+ choice = input(f"{prompt_text}: ").strip()
177
+
178
+ if choice.isdigit():
179
+ idx = int(choice) - 1
180
+ keys = list(options_dict.keys())
181
+ if 0 <= idx < len(keys):
182
+ return keys[idx]
183
+
184
+ if choice in options_dict:
185
+ return choice
186
+
187
+ if allow_custom and choice:
188
+ return choice
189
+
190
+ return None
191
+
192
+ def interactive_mode(pipe):
193
+ log("Starting interactive style conditioning mode")
194
+
195
+ # Get main prompt
196
+ main_prompt = input("\nEnter your main scene/subject: ").strip()
197
+ if not main_prompt:
198
+ log("No prompt provided")
199
+ return
200
+
201
+ # Show and select style
202
+ list_options(STYLES, "Available Styles")
203
+ style_key = get_user_choice(STYLES, "Choose style (number/name/custom)")
204
+
205
+ # Show and select mood
206
+ list_options(MOODS, "Available Moods")
207
+ mood_key = get_user_choice(MOODS, "Choose mood (number/name/custom/blank)", allow_custom=True)
208
+
209
+ # Get additional parameters
210
+ variations = input("Number of variations (default 1): ").strip()
211
+ variations = int(variations) if variations.isdigit() else 1
212
+
213
+ quality = input("Quality [draft/standard/high] (default standard): ").strip()
214
+ quality = quality if quality in QUALITY_PRESETS else "standard"
215
+
216
+ negative = input("Negative prompt (optional): ").strip()
217
+
218
+ # Generate images
219
+ generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, negative)
220
+
221
+ # -------- 5. GENERATION FUNCTION --------
222
+
223
+ def generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, custom_negative=""):
224
+ # Build the full prompt
225
+ full_prompt = main_prompt
226
+ style_negative = ""
227
+ mood_negative = ""
228
+
229
+ if style_key and style_key in STYLES:
230
+ full_prompt += f", {STYLES[style_key]['prompt']}"
231
+ style_negative = STYLES[style_key]['negative']
232
+ elif style_key:
233
+ full_prompt += f", {style_key}"
234
+
235
+ if mood_key and mood_key in MOODS:
236
+ full_prompt += f", {MOODS[mood_key]['prompt']}"
237
+ mood_negative = MOODS[mood_key]['negative']
238
+ elif mood_key:
239
+ full_prompt += f", {mood_key}"
240
+
241
+ # Build negative prompt
242
+ negative_parts = [part for part in [style_negative, mood_negative, custom_negative] if part]
243
+ full_negative = ", ".join(negative_parts) if negative_parts else None
244
+
245
+ # Get quality settings
246
+ quality_settings = QUALITY_PRESETS[quality]
247
+
248
+ log(f"Full prompt: {full_prompt}")
249
+ log(f"Negative prompt: {full_negative or '[none]'}")
250
+ log(f"Quality: {quality} ({quality_settings['steps']} steps)")
251
+ log(f"Generating {variations} variation(s)")
252
+
253
+ # Generate images
254
+ for i in range(variations):
255
+ seed = torch.seed()
256
+ generator = torch.manual_seed(seed) if device == "cpu" else torch.Generator(device).manual_seed(seed)
257
+
258
+ with torch.autocast(device) if device == "cuda" else torch.no_grad():
259
+ result = pipe(
260
+ full_prompt,
261
+ negative_prompt=full_negative,
262
+ height=quality_settings["size"][1],
263
+ width=quality_settings["size"][0],
264
+ num_inference_steps=quality_settings["steps"],
265
+ guidance_scale=quality_settings["guidance"],
266
+ generator=generator,
267
+ )
268
+
269
+ img = result.images[0]
270
+
271
+ # Create filename
272
+ prompt_slug = "_".join(main_prompt.lower().split()[:4])
273
+ style_slug = (style_key or "nostyle").replace(" ", "")[:10]
274
+ mood_slug = (mood_key or "nomood").replace(" ", "")[:10]
275
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
276
+
277
+ filename = f"{prompt_slug[:20]}_{style_slug}_{mood_slug}_{quality}_{timestamp}_seed{seed}_v{i+1}.png"
278
+ filepath = os.path.join(OUTPUT_DIR, filename)
279
+
280
+ img.save(filepath)
281
+
282
+ # Save metadata
283
+ metadata = {
284
+ "main_prompt": main_prompt,
285
+ "style": style_key,
286
+ "mood": mood_key,
287
+ "full_prompt": full_prompt,
288
+ "negative_prompt": full_negative,
289
+ "quality": quality,
290
+ "seed": seed,
291
+ "variation": i + 1,
292
+ "timestamp": datetime.now().isoformat(),
293
+ "settings": quality_settings
294
+ }
295
+
296
+ metadata_file = filepath.replace('.png', '_metadata.json')
297
+ with open(metadata_file, 'w') as f:
298
+ json.dump(metadata, f, indent=2)
299
+
300
+ log(f"Generated variation {i+1}: {filepath}")
301
+
302
+ log(f"Phase 1.B complete - {variations} styled images generated")
303
+
304
+ # -------- 6. MAIN FUNCTION --------
305
+
306
+ def main():
307
+ args = setup_args()
308
+
309
+ # Handle list commands
310
+ if args.list_styles:
311
+ list_options(STYLES, "Available Styles")
312
+ return
313
+
314
+ if args.list_moods:
315
+ list_options(MOODS, "Available Moods")
316
+ return
317
+
318
+ # Load model
319
+ pipe = load_model()
320
+
321
+ # Interactive mode
322
+ if args.interactive:
323
+ interactive_mode(pipe)
324
+ return
325
+
326
+ # Command line mode
327
+ main_prompt = " ".join(args.prompt) if args.prompt else input("Enter main prompt: ").strip()
328
+ if not main_prompt:
329
+ log("No prompt provided")
330
+ return
331
+
332
+ generate_styled_images(
333
+ pipe, main_prompt, args.style, args.mood,
334
+ args.variations, args.quality, args.negative or ""
335
+ )
336
+
337
+ if __name__ == "__main__":
338
+ main()
src/generators/compi_phase1b_styled_generation.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compi_phase1b_styled_generation.py
2
+
3
+ import os
4
+ import sys
5
+ import torch
6
+ from datetime import datetime
7
+ from diffusers import StableDiffusionPipeline
8
+ from PIL import Image
9
+
10
+ # Add project root to path for imports
11
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
12
+
13
+ # -------- 1. SETUP --------
14
+ if torch.cuda.is_available():
15
+ device = "cuda"
16
+ print("Running on CUDA GPU.")
17
+ else:
18
+ device = "cpu"
19
+ print("Running on CPU.")
20
+
21
+ OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
22
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
23
+
24
+ def log(msg):
25
+ now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
26
+ print(f"{now} {msg}")
27
+
28
+ # -------- 2. LOAD MODEL --------
29
+ MODEL_NAME = "runwayml/stable-diffusion-v1-5"
30
+ log(f"Loading model: {MODEL_NAME}")
31
+
32
+ def dummy_safety_checker(images, **kwargs):
33
+ return images, [False] * len(images)
34
+
35
+ try:
36
+ pipe = StableDiffusionPipeline.from_pretrained(
37
+ MODEL_NAME,
38
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
39
+ safety_checker=dummy_safety_checker,
40
+ )
41
+ except Exception as e:
42
+ log(f"Error loading model: {e}")
43
+ sys.exit(1)
44
+
45
+ pipe = pipe.to(device)
46
+ pipe.enable_attention_slicing()
47
+
48
+ log("Model loaded.")
49
+
50
+ # -------- 3. STYLE & MOOD PROMPT ENGINEERING --------
51
+
52
+ # Predefined styles and moods (add more as desired)
53
+ STYLES = [
54
+ "digital art",
55
+ "oil painting",
56
+ "watercolor",
57
+ "cyberpunk",
58
+ "impressionist",
59
+ "concept art",
60
+ "anime",
61
+ "photorealistic",
62
+ "minimalist",
63
+ "surrealism",
64
+ "pixel art",
65
+ "steampunk",
66
+ "3d render"
67
+ ]
68
+
69
+ MOODS = [
70
+ "dreamy atmosphere",
71
+ "dark and moody",
72
+ "peaceful",
73
+ "vibrant and energetic",
74
+ "melancholic",
75
+ "mysterious",
76
+ "whimsical",
77
+ "serene",
78
+ "uplifting",
79
+ "dramatic lighting",
80
+ "retro"
81
+ ]
82
+
83
+ def main():
84
+ """Main function for command-line execution"""
85
+ # Input: main prompt
86
+ if len(sys.argv) > 1:
87
+ main_prompt = " ".join(sys.argv[1:])
88
+ log(f"Prompt from command line: {main_prompt}")
89
+ else:
90
+ main_prompt = input("Enter your main scene/subject (e.g., 'A forest of bioluminescent trees'): ").strip()
91
+
92
+ if not main_prompt:
93
+ log("No main prompt entered. Exiting.")
94
+ sys.exit(0)
95
+
96
+ # Style selector
97
+ print("\nChoose an art style from the list or enter your own:")
98
+ for idx, style in enumerate(STYLES, 1):
99
+ print(f" {idx}. {style}")
100
+ style_choice = input(f"Enter style number [1-{len(STYLES)}] or type your own: ").strip()
101
+ if style_choice.isdigit() and 1 <= int(style_choice) <= len(STYLES):
102
+ style = STYLES[int(style_choice)-1]
103
+ else:
104
+ style = style_choice if style_choice else STYLES[0]
105
+ log(f"Style selected: {style}")
106
+
107
+ # Mood selector
108
+ print("\nChoose a mood from the list or enter your own:")
109
+ for idx, mood in enumerate(MOODS, 1):
110
+ print(f" {idx}. {mood}")
111
+ mood_choice = input(f"Enter mood number [1-{len(MOODS)}] or type your own (or leave blank): ").strip()
112
+ if mood_choice.isdigit() and 1 <= int(mood_choice) <= len(MOODS):
113
+ mood = MOODS[int(mood_choice)-1]
114
+ elif mood_choice:
115
+ mood = mood_choice
116
+ else:
117
+ mood = ""
118
+ log(f"Mood selected: {mood if mood else '[none]'}")
119
+
120
+ # Combine all for final prompt
121
+ full_prompt = main_prompt
122
+ if style: full_prompt += f", {style}"
123
+ if mood: full_prompt += f", {mood}"
124
+ log(f"Full prompt: {full_prompt}")
125
+
126
+ # -------- 4. GENERATION PARAMETERS --------
127
+
128
+ NUM_VARIATIONS = input("How many variations to generate? (default 1): ").strip()
129
+ try:
130
+ NUM_VARIATIONS = max(1, int(NUM_VARIATIONS))
131
+ except Exception:
132
+ NUM_VARIATIONS = 1
133
+
134
+ INFERENCE_STEPS = 30
135
+ GUIDANCE_SCALE = 7.5
136
+ HEIGHT = 512
137
+ WIDTH = 512
138
+
139
+ # -------- 5. IMAGE GENERATION --------
140
+
141
+ log(f"Generating {NUM_VARIATIONS} image(s) for prompt: '{full_prompt}'")
142
+ images = []
143
+
144
+ for i in range(NUM_VARIATIONS):
145
+ seed = torch.seed() # random seed for each variation
146
+ generator = torch.manual_seed(seed) if device == "cpu" else torch.Generator(device).manual_seed(seed)
147
+ with torch.autocast(device) if device == "cuda" else torch.no_grad():
148
+ result = pipe(
149
+ full_prompt,
150
+ height=HEIGHT,
151
+ width=WIDTH,
152
+ num_inference_steps=INFERENCE_STEPS,
153
+ guidance_scale=GUIDANCE_SCALE,
154
+ generator=generator,
155
+ )
156
+ img: Image.Image = result.images[0]
157
+ # Compose filename
158
+ prompt_slug = "_".join(main_prompt.lower().split()[:5])
159
+ style_slug = style.replace(" ", "")[:10]
160
+ mood_slug = mood.replace(" ", "")[:10] if mood else "none"
161
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
162
+ fname = f"{prompt_slug[:25]}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}_v{i+1}.png"
163
+ fpath = os.path.join(OUTPUT_DIR, fname)
164
+ img.save(fpath)
165
+ log(f"Image saved: {fpath}")
166
+ images.append(fpath)
167
+
168
+ log(f"All {NUM_VARIATIONS} images generated and saved.")
169
+ log("Phase 1.B complete.")
170
+
171
+ if __name__ == "__main__":
172
+ main()
src/generators/compi_phase1d_cli_evaluation.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.D: Command-Line Quality Evaluation Tool
4
+
5
+ Command-line interface for batch evaluation and analysis of generated images.
6
+
7
+ Usage:
8
+ python src/generators/compi_phase1d_cli_evaluation.py --help
9
+ python src/generators/compi_phase1d_cli_evaluation.py --analyze
10
+ python src/generators/compi_phase1d_cli_evaluation.py --batch-score 4 3 4 4 3
11
+ """
12
+
13
+ import os
14
+ import argparse
15
+ import json
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Dict, List
19
+
20
+ import pandas as pd
21
+ from PIL import Image
22
+
23
+ # Import functions from the main evaluation module
24
+ from compi_phase1d_evaluate_quality import (
25
+ parse_filename, get_image_metrics, load_existing_evaluations,
26
+ save_evaluation, EVALUATION_CRITERIA, OUTPUT_DIR, EVAL_CSV
27
+ )
28
+
29
+ def setup_args():
30
+ """Setup command line arguments."""
31
+ parser = argparse.ArgumentParser(
32
+ description="CompI Phase 1.D: Command-Line Quality Evaluation",
33
+ formatter_class=argparse.RawDescriptionHelpFormatter,
34
+ epilog="""
35
+ Examples:
36
+ # Analyze existing evaluations
37
+ python %(prog)s --analyze
38
+
39
+ # Batch score all unevaluated images (prompt_match, style, mood, quality, appeal)
40
+ python %(prog)s --batch-score 4 3 4 4 3 --notes "Batch evaluation - good quality"
41
+
42
+ # Generate detailed report
43
+ python %(prog)s --report --output evaluation_report.txt
44
+
45
+ # List unevaluated images
46
+ python %(prog)s --list-unevaluated
47
+ """
48
+ )
49
+
50
+ parser.add_argument("--output-dir", default=OUTPUT_DIR,
51
+ help="Directory containing generated images")
52
+
53
+ # Analysis commands
54
+ parser.add_argument("--analyze", action="store_true",
55
+ help="Display evaluation summary and statistics")
56
+
57
+ parser.add_argument("--report", action="store_true",
58
+ help="Generate detailed evaluation report")
59
+
60
+ parser.add_argument("--output", "-o",
61
+ help="Output file for report (default: stdout)")
62
+
63
+ # Batch evaluation
64
+ parser.add_argument("--batch-score", nargs=5, type=int, metavar=("PROMPT", "STYLE", "MOOD", "QUALITY", "APPEAL"),
65
+ help="Batch score all unevaluated images (1-5 for each criteria)")
66
+
67
+ parser.add_argument("--notes", default="CLI batch evaluation",
68
+ help="Notes for batch evaluation")
69
+
70
+ # Listing commands
71
+ parser.add_argument("--list-all", action="store_true",
72
+ help="List all images with evaluation status")
73
+
74
+ parser.add_argument("--list-evaluated", action="store_true",
75
+ help="List only evaluated images")
76
+
77
+ parser.add_argument("--list-unevaluated", action="store_true",
78
+ help="List only unevaluated images")
79
+
80
+ # Filtering
81
+ parser.add_argument("--style", help="Filter by style")
82
+ parser.add_argument("--mood", help="Filter by mood")
83
+
84
+ return parser.parse_args()
85
+
86
+ def load_images(output_dir: str) -> List[Dict]:
87
+ """Load and parse all images from output directory."""
88
+ if not os.path.exists(output_dir):
89
+ print(f"❌ Output directory '{output_dir}' not found!")
90
+ return []
91
+
92
+ image_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.png')]
93
+ parsed_images = []
94
+
95
+ for fname in image_files:
96
+ metadata = parse_filename(fname)
97
+ if metadata:
98
+ parsed_images.append(metadata)
99
+
100
+ return parsed_images
101
+
102
+ def filter_images(images: List[Dict], style: str = None, mood: str = None) -> List[Dict]:
103
+ """Filter images by style and/or mood."""
104
+ filtered = images
105
+
106
+ if style:
107
+ filtered = [img for img in filtered if img.get('style', '').lower() == style.lower()]
108
+
109
+ if mood:
110
+ filtered = [img for img in filtered if img.get('mood', '').lower() == mood.lower()]
111
+
112
+ return filtered
113
+
114
+ def analyze_evaluations(existing_evals: Dict):
115
+ """Display evaluation analysis."""
116
+ if not existing_evals:
117
+ print("❌ No evaluations found.")
118
+ return
119
+
120
+ df = pd.DataFrame.from_dict(existing_evals, orient='index')
121
+
122
+ print("📊 CompI Phase 1.D - Evaluation Analysis")
123
+ print("=" * 50)
124
+ print(f"Total Evaluated Images: {len(df)}")
125
+ print()
126
+
127
+ # Score statistics
128
+ print("📈 Score Statistics:")
129
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
130
+ if criterion_key in df.columns:
131
+ mean_score = df[criterion_key].mean()
132
+ std_score = df[criterion_key].std()
133
+ min_score = df[criterion_key].min()
134
+ max_score = df[criterion_key].max()
135
+
136
+ print(f" {criterion_info['name']:20}: {mean_score:.2f} ± {std_score:.2f} (range: {min_score}-{max_score})")
137
+
138
+ print()
139
+
140
+ # Style analysis
141
+ if 'style' in df.columns and 'prompt_match' in df.columns:
142
+ print("🎨 Top Performing Styles (by Prompt Match):")
143
+ style_scores = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
144
+ for style, score in style_scores.head(5).items():
145
+ print(f" {style:15}: {score:.2f}")
146
+ print()
147
+
148
+ # Mood analysis
149
+ if 'mood' in df.columns and 'creative_appeal' in df.columns:
150
+ print("🌟 Top Performing Moods (by Creative Appeal):")
151
+ mood_scores = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
152
+ for mood, score in mood_scores.head(5).items():
153
+ print(f" {mood:15}: {score:.2f}")
154
+ print()
155
+
156
+ def generate_detailed_report(existing_evals: Dict) -> str:
157
+ """Generate detailed evaluation report."""
158
+ if not existing_evals:
159
+ return "No evaluations found."
160
+
161
+ df = pd.DataFrame.from_dict(existing_evals, orient='index')
162
+
163
+ report_lines = [
164
+ "# CompI Phase 1.D - Detailed Evaluation Report",
165
+ f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
166
+ f"Total Images Evaluated: {len(df)}",
167
+ "",
168
+ "## Overall Performance Summary"
169
+ ]
170
+
171
+ # Overall statistics
172
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
173
+ if criterion_key in df.columns:
174
+ mean_score = df[criterion_key].mean()
175
+ std_score = df[criterion_key].std()
176
+ report_lines.append(f"- **{criterion_info['name']}**: {mean_score:.2f} ± {std_score:.2f}")
177
+
178
+ # Distribution analysis
179
+ report_lines.extend([
180
+ "",
181
+ "## Score Distribution Analysis"
182
+ ])
183
+
184
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
185
+ if criterion_key in df.columns:
186
+ scores = df[criterion_key]
187
+ report_lines.extend([
188
+ f"",
189
+ f"### {criterion_info['name']}",
190
+ f"- Mean: {scores.mean():.2f}",
191
+ f"- Median: {scores.median():.2f}",
192
+ f"- Mode: {scores.mode().iloc[0] if not scores.mode().empty else 'N/A'}",
193
+ f"- Range: {scores.min()}-{scores.max()}",
194
+ f"- Distribution: " + " | ".join([f"{i}★: {(scores == i).sum()}" for i in range(1, 6)])
195
+ ])
196
+
197
+ # Style/Mood performance
198
+ if 'style' in df.columns:
199
+ report_lines.extend([
200
+ "",
201
+ "## Style Performance Analysis"
202
+ ])
203
+
204
+ for criterion_key in EVALUATION_CRITERIA.keys():
205
+ if criterion_key in df.columns:
206
+ style_performance = df.groupby('style')[criterion_key].agg(['mean', 'count']).sort_values('mean', ascending=False)
207
+ report_lines.extend([
208
+ f"",
209
+ f"### {EVALUATION_CRITERIA[criterion_key]['name']} by Style",
210
+ ])
211
+
212
+ for style, (mean_score, count) in style_performance.iterrows():
213
+ report_lines.append(f"- {style}: {mean_score:.2f} (n={count})")
214
+
215
+ # Recommendations
216
+ report_lines.extend([
217
+ "",
218
+ "## Recommendations",
219
+ "",
220
+ "### Areas for Improvement"
221
+ ])
222
+
223
+ # Find lowest scoring criteria
224
+ criterion_means = {}
225
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
226
+ if criterion_key in df.columns:
227
+ criterion_means[criterion_info['name']] = df[criterion_key].mean()
228
+
229
+ if criterion_means:
230
+ lowest_criteria = sorted(criterion_means.items(), key=lambda x: x[1])[:2]
231
+ for criterion_name, score in lowest_criteria:
232
+ report_lines.append(f"- Focus on improving **{criterion_name}** (current: {score:.2f}/5)")
233
+
234
+ report_lines.extend([
235
+ "",
236
+ "### Best Practices",
237
+ "- Continue systematic evaluation for trend analysis",
238
+ "- Experiment with parameter adjustments for low-scoring areas",
239
+ "- Consider A/B testing different generation approaches",
240
+ "- Document successful style/mood combinations for reuse"
241
+ ])
242
+
243
+ return "\n".join(report_lines)
244
+
245
+ def batch_evaluate_images(images: List[Dict], scores: List[int], notes: str, output_dir: str):
246
+ """Batch evaluate unevaluated images."""
247
+ existing_evals = load_existing_evaluations()
248
+ unevaluated = [img for img in images if img['filename'] not in existing_evals]
249
+
250
+ if not unevaluated:
251
+ print("✅ All images are already evaluated!")
252
+ return
253
+
254
+ print(f"📦 Batch evaluating {len(unevaluated)} images...")
255
+
256
+ # Map scores to criteria
257
+ criteria_keys = list(EVALUATION_CRITERIA.keys())
258
+ score_dict = dict(zip(criteria_keys, scores))
259
+
260
+ for i, img_data in enumerate(unevaluated):
261
+ fname = img_data["filename"]
262
+ img_path = os.path.join(output_dir, fname)
263
+
264
+ try:
265
+ metrics = get_image_metrics(img_path)
266
+ save_evaluation(fname, img_data, score_dict, notes, metrics)
267
+ print(f" ✅ Evaluated: {fname}")
268
+ except Exception as e:
269
+ print(f" ❌ Error evaluating {fname}: {e}")
270
+
271
+ print(f"🎉 Batch evaluation completed!")
272
+
273
+ def list_images(images: List[Dict], existing_evals: Dict, show_evaluated: bool = True, show_unevaluated: bool = True):
274
+ """List images with evaluation status."""
275
+ print(f"📋 Image List ({len(images)} total)")
276
+ print("-" * 80)
277
+
278
+ for img_data in images:
279
+ fname = img_data["filename"]
280
+ is_evaluated = fname in existing_evals
281
+
282
+ if (show_evaluated and is_evaluated) or (show_unevaluated and not is_evaluated):
283
+ status = "✅" if is_evaluated else "❌"
284
+ prompt = img_data.get('prompt', 'unknown')[:30]
285
+ style = img_data.get('style', 'unknown')[:15]
286
+ mood = img_data.get('mood', 'unknown')[:15]
287
+
288
+ print(f"{status} {fname}")
289
+ print(f" Prompt: {prompt}... | Style: {style} | Mood: {mood}")
290
+
291
+ if is_evaluated:
292
+ eval_data = existing_evals[fname]
293
+ scores = [f"{k}:{eval_data.get(k, 'N/A')}" for k in EVALUATION_CRITERIA.keys() if k in eval_data]
294
+ print(f" Scores: {' | '.join(scores[:3])}...")
295
+ print()
296
+
297
+ def main():
298
+ """Main CLI function."""
299
+ args = setup_args()
300
+
301
+ # Load images
302
+ images = load_images(args.output_dir)
303
+ if not images:
304
+ return
305
+
306
+ # Apply filters
307
+ images = filter_images(images, args.style, args.mood)
308
+
309
+ # Load existing evaluations
310
+ existing_evals = load_existing_evaluations()
311
+
312
+ # Execute commands
313
+ if args.analyze:
314
+ analyze_evaluations(existing_evals)
315
+
316
+ elif args.report:
317
+ report = generate_detailed_report(existing_evals)
318
+ if args.output:
319
+ with open(args.output, 'w', encoding='utf-8') as f:
320
+ f.write(report)
321
+ print(f"📄 Report saved to: {args.output}")
322
+ else:
323
+ print(report)
324
+
325
+ elif args.batch_score:
326
+ batch_evaluate_images(images, args.batch_score, args.notes, args.output_dir)
327
+
328
+ elif args.list_all:
329
+ list_images(images, existing_evals, True, True)
330
+
331
+ elif args.list_evaluated:
332
+ list_images(images, existing_evals, True, False)
333
+
334
+ elif args.list_unevaluated:
335
+ list_images(images, existing_evals, False, True)
336
+
337
+ else:
338
+ print("❓ No command specified. Use --help for usage information.")
339
+
340
+ if __name__ == "__main__":
341
+ main()
src/generators/compi_phase1d_evaluate_quality.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.D: Baseline Output Quality Evaluation Tool
4
+
5
+ This tool provides systematic evaluation of generated images with:
6
+ - Visual quality assessment
7
+ - Prompt adherence scoring
8
+ - Style/mood consistency evaluation
9
+ - Objective metrics calculation
10
+ - Comprehensive logging and tracking
11
+
12
+ Usage:
13
+ python src/generators/compi_phase1d_evaluate_quality.py
14
+ # Or via wrapper: python run_evaluation.py
15
+ """
16
+
17
+ import os
18
+ import re
19
+ import csv
20
+ import json
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Dict, List, Optional, Tuple
24
+ import argparse
25
+
26
+ import streamlit as st
27
+ from PIL import Image
28
+ import imagehash
29
+ import pandas as pd
30
+
31
+ # -------- 1. CONFIGURATION --------
32
+
33
+ OUTPUT_DIR = "outputs"
34
+ EVAL_CSV = "outputs/evaluation_log.csv"
35
+ EVAL_SUMMARY = "outputs/evaluation_summary.json"
36
+
37
+ # Filename patterns for different CompI phases
38
+ FILENAME_PATTERNS = [
39
+ # Phase 1.B Advanced styling: prompt_style_mood_timestamp_seed_variation
40
+ re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<style>[a-zA-Z0-9]+)_(?P<mood>[a-zA-Z0-9]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)_v(?P<variation>\d+)\.png$"),
41
+ # Phase 1.A Basic generation: prompt_timestamp_seed
42
+ re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)\.png$"),
43
+ # Alternative pattern: prompt_style_timestamp_seed
44
+ re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<style>[a-zA-Z0-9]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)\.png$"),
45
+ ]
46
+
47
+ # Evaluation criteria
48
+ EVALUATION_CRITERIA = {
49
+ "prompt_match": {
50
+ "name": "Prompt Adherence",
51
+ "description": "How well does the image match the text prompt?",
52
+ "scale": "1=Poor match, 3=Good match, 5=Perfect match"
53
+ },
54
+ "style_consistency": {
55
+ "name": "Style Consistency",
56
+ "description": "How well does the image reflect the intended artistic style?",
57
+ "scale": "1=Style not evident, 3=Style present, 5=Style perfectly executed"
58
+ },
59
+ "mood_atmosphere": {
60
+ "name": "Mood & Atmosphere",
61
+ "description": "How well does the image convey the intended mood/atmosphere?",
62
+ "scale": "1=Wrong mood, 3=Neutral/adequate, 5=Perfect mood"
63
+ },
64
+ "technical_quality": {
65
+ "name": "Technical Quality",
66
+ "description": "Overall image quality (resolution, composition, artifacts)",
67
+ "scale": "1=Poor quality, 3=Acceptable, 5=Excellent quality"
68
+ },
69
+ "creative_appeal": {
70
+ "name": "Creative Appeal",
71
+ "description": "Subjective aesthetic and creative value",
72
+ "scale": "1=Unappealing, 3=Decent, 5=Highly appealing"
73
+ }
74
+ }
75
+
76
+ # -------- 2. UTILITY FUNCTIONS --------
77
+
78
+ def parse_filename(filename: str) -> Optional[Dict]:
79
+ """Parse filename to extract metadata using multiple patterns."""
80
+ for pattern in FILENAME_PATTERNS:
81
+ match = pattern.match(filename)
82
+ if match:
83
+ data = match.groupdict()
84
+ data["filename"] = filename
85
+ # Set defaults for missing fields
86
+ data.setdefault("style", "unknown")
87
+ data.setdefault("mood", "unknown")
88
+ data.setdefault("variation", "1")
89
+ return data
90
+ return None
91
+
92
+ def get_image_metrics(image_path: str) -> Dict:
93
+ """Calculate objective image metrics."""
94
+ try:
95
+ img = Image.open(image_path)
96
+ file_size = os.path.getsize(image_path)
97
+
98
+ # Perceptual hashes for similarity detection
99
+ phash = str(imagehash.phash(img))
100
+ dhash = str(imagehash.dhash(img))
101
+
102
+ # Basic image stats
103
+ width, height = img.size
104
+ aspect_ratio = width / height
105
+
106
+ # Color analysis
107
+ if img.mode == 'RGB':
108
+ colors = img.getcolors(maxcolors=256*256*256)
109
+ unique_colors = len(colors) if colors else 0
110
+ else:
111
+ unique_colors = 0
112
+
113
+ return {
114
+ "width": width,
115
+ "height": height,
116
+ "aspect_ratio": round(aspect_ratio, 3),
117
+ "file_size_kb": round(file_size / 1024, 2),
118
+ "unique_colors": unique_colors,
119
+ "phash": phash,
120
+ "dhash": dhash,
121
+ "format": img.format,
122
+ "mode": img.mode
123
+ }
124
+ except Exception as e:
125
+ return {"error": str(e)}
126
+
127
+ def load_existing_evaluations() -> Dict:
128
+ """Load existing evaluations from CSV."""
129
+ if not os.path.exists(EVAL_CSV):
130
+ return {}
131
+
132
+ try:
133
+ df = pd.read_csv(EVAL_CSV)
134
+ return df.set_index('filename').to_dict('index')
135
+ except Exception:
136
+ return {}
137
+
138
+ def save_evaluation(filename: str, metadata: Dict, scores: Dict, notes: str, metrics: Dict):
139
+ """Save evaluation to CSV file."""
140
+ # Prepare row data
141
+ row_data = {
142
+ "filename": filename,
143
+ "timestamp": datetime.now().isoformat(),
144
+ "prompt": metadata.get("prompt", ""),
145
+ "style": metadata.get("style", ""),
146
+ "mood": metadata.get("mood", ""),
147
+ "seed": metadata.get("seed", ""),
148
+ "variation": metadata.get("variation", ""),
149
+ "generation_timestamp": metadata.get("timestamp", ""),
150
+ "notes": notes,
151
+ **scores, # Add all evaluation scores
152
+ **{f"metric_{k}": v for k, v in metrics.items() if k != "error"} # Add metrics with prefix
153
+ }
154
+
155
+ # Create CSV if it doesn't exist
156
+ file_exists = os.path.exists(EVAL_CSV)
157
+
158
+ with open(EVAL_CSV, "a", newline='', encoding='utf-8') as f:
159
+ fieldnames = list(row_data.keys())
160
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
161
+
162
+ if not file_exists:
163
+ writer.writeheader()
164
+ writer.writerow(row_data)
165
+
166
+ # -------- 3. STREAMLIT UI --------
167
+
168
+ def main():
169
+ st.set_page_config(
170
+ page_title="CompI - Quality Evaluation",
171
+ layout="wide",
172
+ initial_sidebar_state="expanded"
173
+ )
174
+
175
+ st.title("🕵️ CompI Phase 1.D: Baseline Output Quality Evaluation")
176
+
177
+ st.markdown("""
178
+ **Systematic evaluation tool for CompI-generated images**
179
+
180
+ This tool helps you:
181
+ - 📊 Assess image quality across multiple criteria
182
+ - 📈 Track improvements over time
183
+ - 🔍 Calculate objective metrics
184
+ - 📝 Maintain detailed evaluation logs
185
+ """)
186
+
187
+ # Sidebar configuration
188
+ with st.sidebar:
189
+ st.header("⚙️ Configuration")
190
+
191
+ # Output directory selection
192
+ output_dir = st.text_input("Output Directory", OUTPUT_DIR)
193
+
194
+ # Evaluation mode
195
+ eval_mode = st.selectbox(
196
+ "Evaluation Mode",
197
+ ["Single Image Review", "Batch Evaluation", "Summary Analysis"]
198
+ )
199
+
200
+ # Filter options
201
+ st.subheader("🔍 Filters")
202
+ show_evaluated = st.checkbox("Show already evaluated", True)
203
+ show_unevaluated = st.checkbox("Show unevaluated", True)
204
+
205
+ # Load images
206
+ if not os.path.exists(output_dir):
207
+ st.error(f"Output directory '{output_dir}' not found!")
208
+ return
209
+
210
+ image_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.png')]
211
+ parsed_images = []
212
+
213
+ for fname in image_files:
214
+ metadata = parse_filename(fname)
215
+ if metadata:
216
+ parsed_images.append(metadata)
217
+
218
+ if not parsed_images:
219
+ st.warning("No CompI-generated images found with recognizable filename patterns.")
220
+ st.info("Expected patterns: prompt_style_mood_timestamp_seed_variation.png")
221
+ return
222
+
223
+ # Load existing evaluations
224
+ existing_evals = load_existing_evaluations()
225
+
226
+ # Filter images based on evaluation status
227
+ filtered_images = []
228
+ for img_data in parsed_images:
229
+ fname = img_data["filename"]
230
+ is_evaluated = fname in existing_evals
231
+
232
+ if (show_evaluated and is_evaluated) or (show_unevaluated and not is_evaluated):
233
+ img_data["is_evaluated"] = is_evaluated
234
+ filtered_images.append(img_data)
235
+
236
+ st.info(f"Found {len(filtered_images)} images matching your filters")
237
+
238
+ # Main evaluation interface
239
+ if eval_mode == "Single Image Review":
240
+ single_image_evaluation(filtered_images, existing_evals, output_dir)
241
+ elif eval_mode == "Batch Evaluation":
242
+ batch_evaluation(filtered_images, existing_evals, output_dir)
243
+ else:
244
+ summary_analysis(existing_evals)
245
+
246
+ def single_image_evaluation(images: List[Dict], existing_evals: Dict, output_dir: str):
247
+ """Single image evaluation interface."""
248
+ if not images:
249
+ st.warning("No images available for evaluation.")
250
+ return
251
+
252
+ # Image selection
253
+ image_options = [f"{img['filename']} {'✅' if img['is_evaluated'] else '❌'}" for img in images]
254
+ selected_idx = st.selectbox("Select Image to Evaluate", range(len(image_options)), format_func=lambda x: image_options[x])
255
+
256
+ if selected_idx is None:
257
+ return
258
+
259
+ img_data = images[selected_idx]
260
+ fname = img_data["filename"]
261
+ img_path = os.path.join(output_dir, fname)
262
+
263
+ # Display image and metadata
264
+ col1, col2 = st.columns([1, 1])
265
+
266
+ with col1:
267
+ st.subheader("🖼️ Image")
268
+ try:
269
+ image = Image.open(img_path)
270
+ st.image(image, use_container_width=True)
271
+
272
+ # Calculate metrics
273
+ metrics = get_image_metrics(img_path)
274
+ if "error" not in metrics:
275
+ st.subheader("📊 Objective Metrics")
276
+ st.json(metrics)
277
+ except Exception as e:
278
+ st.error(f"Error loading image: {e}")
279
+ return
280
+
281
+ with col2:
282
+ st.subheader("📋 Metadata")
283
+ st.json({k: v for k, v in img_data.items() if k != "filename"})
284
+
285
+ # Evaluation form
286
+ st.subheader("⭐ Evaluation")
287
+
288
+ # Load existing scores if available
289
+ existing = existing_evals.get(fname, {})
290
+
291
+ with st.form(f"eval_form_{fname}"):
292
+ scores = {}
293
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
294
+ scores[criterion_key] = st.slider(
295
+ f"{criterion_info['name']}",
296
+ min_value=1, max_value=5,
297
+ value=int(existing.get(criterion_key, 3)),
298
+ help=f"{criterion_info['description']}\n{criterion_info['scale']}"
299
+ )
300
+
301
+ notes = st.text_area(
302
+ "Notes & Comments",
303
+ value=existing.get("notes", ""),
304
+ help="Additional observations, issues, or suggestions"
305
+ )
306
+
307
+ submitted = st.form_submit_button("💾 Save Evaluation")
308
+
309
+ if submitted:
310
+ save_evaluation(fname, img_data, scores, notes, metrics)
311
+ st.success(f"✅ Evaluation saved for {fname}")
312
+ st.experimental_rerun()
313
+
314
+ def batch_evaluation(images: List[Dict], existing_evals: Dict, output_dir: str):
315
+ """Batch evaluation interface for multiple images."""
316
+ st.subheader("📦 Batch Evaluation")
317
+
318
+ unevaluated = [img for img in images if not img['is_evaluated']]
319
+
320
+ if not unevaluated:
321
+ st.info("All images have been evaluated!")
322
+ return
323
+
324
+ st.info(f"{len(unevaluated)} images pending evaluation")
325
+
326
+ # Quick batch scoring
327
+ with st.form("batch_eval_form"):
328
+ st.write("**Quick Batch Scoring** (applies to all unevaluated images)")
329
+
330
+ batch_scores = {}
331
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
332
+ batch_scores[criterion_key] = st.slider(
333
+ f"Default {criterion_info['name']}",
334
+ min_value=1, max_value=5, value=3,
335
+ help=f"Default score for {criterion_info['description']}"
336
+ )
337
+
338
+ batch_notes = st.text_area("Default Notes", "Batch evaluation")
339
+
340
+ if st.form_submit_button("Apply to All Unevaluated"):
341
+ progress_bar = st.progress(0)
342
+
343
+ for i, img_data in enumerate(unevaluated):
344
+ fname = img_data["filename"]
345
+ img_path = os.path.join(output_dir, fname)
346
+ metrics = get_image_metrics(img_path)
347
+
348
+ save_evaluation(fname, img_data, batch_scores, batch_notes, metrics)
349
+ progress_bar.progress((i + 1) / len(unevaluated))
350
+
351
+ st.success(f"✅ Batch evaluation completed for {len(unevaluated)} images!")
352
+ st.experimental_rerun()
353
+
354
+ def summary_analysis(existing_evals: Dict):
355
+ """Display evaluation summary and analytics."""
356
+ st.subheader("📈 Evaluation Summary & Analytics")
357
+
358
+ if not existing_evals:
359
+ st.warning("No evaluations found. Please evaluate some images first.")
360
+ return
361
+
362
+ # Convert to DataFrame for analysis
363
+ df = pd.DataFrame.from_dict(existing_evals, orient='index')
364
+
365
+ # Basic statistics
366
+ col1, col2, col3 = st.columns(3)
367
+
368
+ with col1:
369
+ st.metric("Total Evaluated", len(df))
370
+
371
+ with col2:
372
+ if 'prompt_match' in df.columns:
373
+ avg_prompt_match = df['prompt_match'].mean()
374
+ st.metric("Avg Prompt Match", f"{avg_prompt_match:.2f}/5")
375
+
376
+ with col3:
377
+ if 'technical_quality' in df.columns:
378
+ avg_quality = df['technical_quality'].mean()
379
+ st.metric("Avg Technical Quality", f"{avg_quality:.2f}/5")
380
+
381
+ # Detailed analytics
382
+ st.subheader("📊 Detailed Analytics")
383
+
384
+ # Score distribution
385
+ if any(col in df.columns for col in EVALUATION_CRITERIA.keys()):
386
+ st.write("**Score Distribution by Criteria**")
387
+
388
+ score_cols = [col for col in EVALUATION_CRITERIA.keys() if col in df.columns]
389
+ if score_cols:
390
+ score_data = df[score_cols].mean().sort_values(ascending=False)
391
+ st.bar_chart(score_data)
392
+
393
+ # Style/Mood analysis
394
+ if 'style' in df.columns and 'mood' in df.columns:
395
+ st.write("**Performance by Style & Mood**")
396
+
397
+ col1, col2 = st.columns(2)
398
+
399
+ with col1:
400
+ if 'prompt_match' in df.columns:
401
+ style_performance = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
402
+ st.write("**Best Performing Styles (Prompt Match)**")
403
+ st.bar_chart(style_performance)
404
+
405
+ with col2:
406
+ if 'creative_appeal' in df.columns:
407
+ mood_performance = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
408
+ st.write("**Best Performing Moods (Creative Appeal)**")
409
+ st.bar_chart(mood_performance)
410
+
411
+ # Recent evaluations
412
+ st.subheader("🕒 Recent Evaluations")
413
+
414
+ if 'timestamp' in df.columns:
415
+ recent_df = df.sort_values('timestamp', ascending=False).head(10)
416
+ display_cols = ['prompt', 'style', 'mood'] + [col for col in EVALUATION_CRITERIA.keys() if col in df.columns]
417
+ display_cols = [col for col in display_cols if col in recent_df.columns]
418
+
419
+ if display_cols:
420
+ st.dataframe(recent_df[display_cols])
421
+
422
+ # Export options
423
+ st.subheader("💾 Export Data")
424
+
425
+ col1, col2 = st.columns(2)
426
+
427
+ with col1:
428
+ if st.button("📊 Download CSV"):
429
+ csv_data = df.to_csv()
430
+ st.download_button(
431
+ label="Download Evaluation Data",
432
+ data=csv_data,
433
+ file_name=f"compi_evaluation_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
434
+ mime="text/csv"
435
+ )
436
+
437
+ with col2:
438
+ if st.button("📋 Generate Report"):
439
+ # Generate summary report
440
+ report = generate_evaluation_report(df)
441
+ st.text_area("Evaluation Report", report, height=300)
442
+
443
+ def generate_evaluation_report(df: pd.DataFrame) -> str:
444
+ """Generate a text summary report of evaluations."""
445
+ report_lines = [
446
+ "# CompI Phase 1.D - Evaluation Report",
447
+ f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
448
+ "",
449
+ "## Summary Statistics",
450
+ f"- Total Images Evaluated: {len(df)}",
451
+ ]
452
+
453
+ # Add score summaries
454
+ for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
455
+ if criterion_key in df.columns:
456
+ mean_score = df[criterion_key].mean()
457
+ std_score = df[criterion_key].std()
458
+ report_lines.append(f"- {criterion_info['name']}: {mean_score:.2f} ± {std_score:.2f}")
459
+
460
+ # Add style/mood analysis
461
+ if 'style' in df.columns:
462
+ report_lines.extend([
463
+ "",
464
+ "## Style Performance",
465
+ ])
466
+
467
+ if 'prompt_match' in df.columns:
468
+ style_scores = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
469
+ for style, score in style_scores.head(5).items():
470
+ report_lines.append(f"- {style}: {score:.2f}")
471
+
472
+ if 'mood' in df.columns:
473
+ report_lines.extend([
474
+ "",
475
+ "## Mood Performance",
476
+ ])
477
+
478
+ if 'creative_appeal' in df.columns:
479
+ mood_scores = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
480
+ for mood, score in mood_scores.head(5).items():
481
+ report_lines.append(f"- {mood}: {score:.2f}")
482
+
483
+ # Add recommendations
484
+ report_lines.extend([
485
+ "",
486
+ "## Recommendations",
487
+ "- Focus on improving lowest-scoring criteria",
488
+ "- Experiment with best-performing style/mood combinations",
489
+ "- Consider adjusting generation parameters for consistency",
490
+ "- Continue systematic evaluation for trend analysis"
491
+ ])
492
+
493
+ return "\n".join(report_lines)
494
+
495
+ if __name__ == "__main__":
496
+ main()
src/generators/compi_phase1e_dataset_prep.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.E: Dataset Preparation for LoRA Fine-tuning
4
+
5
+ This tool helps prepare your personal style dataset for LoRA training:
6
+ - Organize and validate style images
7
+ - Generate appropriate captions
8
+ - Resize and format images for training
9
+ - Create training/validation splits
10
+
11
+ Usage:
12
+ python src/generators/compi_phase1e_dataset_prep.py --help
13
+ python src/generators/compi_phase1e_dataset_prep.py --input-dir my_style_images --style-name "my_art_style"
14
+ """
15
+
16
+ import os
17
+ import argparse
18
+ import json
19
+ import shutil
20
+ from pathlib import Path
21
+ from typing import List, Dict, Tuple
22
+ import random
23
+
24
+ from PIL import Image, ImageOps
25
+ import pandas as pd
26
+
27
+ # -------- 1. CONFIGURATION --------
28
+
29
+ DEFAULT_IMAGE_SIZE = 512
30
+ SUPPORTED_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
31
+ MIN_IMAGES_RECOMMENDED = 10
32
+ TRAIN_SPLIT_RATIO = 0.8
33
+
34
+ # -------- 2. UTILITY FUNCTIONS --------
35
+
36
+ def setup_args():
37
+ """Setup command line arguments."""
38
+ parser = argparse.ArgumentParser(
39
+ description="CompI Phase 1.E: Dataset Preparation for LoRA Fine-tuning",
40
+ formatter_class=argparse.RawDescriptionHelpFormatter,
41
+ epilog="""
42
+ Examples:
43
+ # Prepare dataset from a folder of images
44
+ python %(prog)s --input-dir my_artwork --style-name "impressionist_style"
45
+
46
+ # Custom output directory and image size
47
+ python %(prog)s --input-dir paintings --style-name "oil_painting" --output-dir datasets/oil_style --size 768
48
+
49
+ # Generate captions with custom trigger word
50
+ python %(prog)s --input-dir sketches --style-name "pencil_sketch" --trigger-word "sketch_style"
51
+ """
52
+ )
53
+
54
+ parser.add_argument("--input-dir", required=True,
55
+ help="Directory containing your style images")
56
+
57
+ parser.add_argument("--style-name", required=True,
58
+ help="Name for your style (used in file naming and captions)")
59
+
60
+ parser.add_argument("--output-dir",
61
+ help="Output directory for prepared dataset (default: datasets/{style_name})")
62
+
63
+ parser.add_argument("--trigger-word",
64
+ help="Trigger word for style (default: style_name)")
65
+
66
+ parser.add_argument("--size", type=int, default=DEFAULT_IMAGE_SIZE,
67
+ help=f"Target image size in pixels (default: {DEFAULT_IMAGE_SIZE})")
68
+
69
+ parser.add_argument("--caption-template",
70
+ default="a painting in {trigger_word} style",
71
+ help="Template for generating captions")
72
+
73
+ parser.add_argument("--train-split", type=float, default=TRAIN_SPLIT_RATIO,
74
+ help=f"Ratio for train/validation split (default: {TRAIN_SPLIT_RATIO})")
75
+
76
+ parser.add_argument("--copy-images", action="store_true",
77
+ help="Copy images instead of creating symlinks")
78
+
79
+ parser.add_argument("--validate-only", action="store_true",
80
+ help="Only validate input directory without processing")
81
+
82
+ return parser.parse_args()
83
+
84
+ def validate_image_directory(input_dir: str) -> Tuple[List[str], List[str]]:
85
+ """Validate input directory and return valid/invalid image files."""
86
+ if not os.path.exists(input_dir):
87
+ raise FileNotFoundError(f"Input directory not found: {input_dir}")
88
+
89
+ all_files = os.listdir(input_dir)
90
+ valid_images = []
91
+ invalid_files = []
92
+
93
+ for filename in all_files:
94
+ filepath = os.path.join(input_dir, filename)
95
+
96
+ # Check if it's a file
97
+ if not os.path.isfile(filepath):
98
+ continue
99
+
100
+ # Check extension
101
+ ext = Path(filename).suffix.lower()
102
+ if ext not in SUPPORTED_FORMATS:
103
+ invalid_files.append(f"{filename} (unsupported format: {ext})")
104
+ continue
105
+
106
+ # Try to open image
107
+ try:
108
+ with Image.open(filepath) as img:
109
+ # Basic validation
110
+ if img.size[0] < 64 or img.size[1] < 64:
111
+ invalid_files.append(f"{filename} (too small: {img.size})")
112
+ continue
113
+
114
+ valid_images.append(filename)
115
+ except Exception as e:
116
+ invalid_files.append(f"{filename} (corrupt: {str(e)})")
117
+
118
+ return valid_images, invalid_files
119
+
120
+ def process_image(input_path: str, output_path: str, target_size: int) -> Dict:
121
+ """Process a single image for training."""
122
+ with Image.open(input_path) as img:
123
+ # Convert to RGB if needed
124
+ if img.mode != 'RGB':
125
+ img = img.convert('RGB')
126
+
127
+ # Get original dimensions
128
+ original_size = img.size
129
+
130
+ # Resize maintaining aspect ratio, then center crop
131
+ img = ImageOps.fit(img, (target_size, target_size), Image.Resampling.LANCZOS)
132
+
133
+ # Save processed image
134
+ img.save(output_path, 'PNG', quality=95)
135
+
136
+ return {
137
+ 'original_size': original_size,
138
+ 'processed_size': img.size,
139
+ 'format': 'PNG'
140
+ }
141
+
142
+ def generate_captions(image_files: List[str], caption_template: str, trigger_word: str) -> Dict[str, str]:
143
+ """Generate captions for training images."""
144
+ captions = {}
145
+
146
+ for filename in image_files:
147
+ # Basic caption using template
148
+ caption = caption_template.format(trigger_word=trigger_word)
149
+
150
+ # You could add more sophisticated caption generation here
151
+ # For example, using BLIP or other image captioning models
152
+
153
+ captions[filename] = caption
154
+
155
+ return captions
156
+
157
+ def create_dataset_structure(output_dir: str, style_name: str):
158
+ """Create the dataset directory structure."""
159
+ dataset_dir = Path(output_dir)
160
+
161
+ # Create main directories
162
+ dirs_to_create = [
163
+ dataset_dir,
164
+ dataset_dir / "images",
165
+ dataset_dir / "train",
166
+ dataset_dir / "validation"
167
+ ]
168
+
169
+ for dir_path in dirs_to_create:
170
+ dir_path.mkdir(parents=True, exist_ok=True)
171
+
172
+ return dataset_dir
173
+
174
+ def split_dataset(image_files: List[str], train_ratio: float) -> Tuple[List[str], List[str]]:
175
+ """Split images into train and validation sets."""
176
+ random.shuffle(image_files)
177
+
178
+ train_count = int(len(image_files) * train_ratio)
179
+ train_files = image_files[:train_count]
180
+ val_files = image_files[train_count:]
181
+
182
+ return train_files, val_files
183
+
184
+ def save_metadata(dataset_dir: Path, metadata: Dict):
185
+ """Save dataset metadata."""
186
+ metadata_file = dataset_dir / "dataset_info.json"
187
+
188
+ with open(metadata_file, 'w') as f:
189
+ json.dump(metadata, f, indent=2)
190
+
191
+ print(f"📄 Dataset metadata saved to: {metadata_file}")
192
+
193
+ def create_captions_file(dataset_dir: Path, captions: Dict[str, str], split_name: str):
194
+ """Create captions file for training."""
195
+ captions_file = dataset_dir / f"{split_name}_captions.txt"
196
+
197
+ with open(captions_file, 'w') as f:
198
+ for filename, caption in captions.items():
199
+ f.write(f"{filename}: {caption}\n")
200
+
201
+ return captions_file
202
+
203
+ # -------- 3. MAIN PROCESSING FUNCTION --------
204
+
205
+ def prepare_dataset(args):
206
+ """Main dataset preparation function."""
207
+ print(f"🎨 CompI Phase 1.E: Preparing LoRA Dataset for '{args.style_name}'")
208
+ print("=" * 60)
209
+
210
+ # Setup paths
211
+ input_dir = Path(args.input_dir)
212
+ if args.output_dir:
213
+ output_dir = Path(args.output_dir)
214
+ else:
215
+ output_dir = Path("datasets") / args.style_name
216
+
217
+ trigger_word = args.trigger_word or args.style_name
218
+
219
+ print(f"📁 Input directory: {input_dir}")
220
+ print(f"📁 Output directory: {output_dir}")
221
+ print(f"🎯 Style name: {args.style_name}")
222
+ print(f"🔤 Trigger word: {trigger_word}")
223
+ print(f"📐 Target size: {args.size}x{args.size}")
224
+
225
+ # Validate input directory
226
+ print(f"\n🔍 Validating input directory...")
227
+ valid_images, invalid_files = validate_image_directory(str(input_dir))
228
+
229
+ print(f"✅ Found {len(valid_images)} valid images")
230
+ if invalid_files:
231
+ print(f"⚠️ Found {len(invalid_files)} invalid files:")
232
+ for invalid in invalid_files[:5]: # Show first 5
233
+ print(f" - {invalid}")
234
+ if len(invalid_files) > 5:
235
+ print(f" ... and {len(invalid_files) - 5} more")
236
+
237
+ if len(valid_images) < MIN_IMAGES_RECOMMENDED:
238
+ print(f"⚠️ Warning: Only {len(valid_images)} images found. Recommended minimum: {MIN_IMAGES_RECOMMENDED}")
239
+ print(" Consider adding more images for better style learning.")
240
+
241
+ if args.validate_only:
242
+ print("✅ Validation complete (--validate-only specified)")
243
+ return
244
+
245
+ # Create dataset structure
246
+ print(f"\n📁 Creating dataset structure...")
247
+ dataset_dir = create_dataset_structure(str(output_dir), args.style_name)
248
+
249
+ # Split dataset
250
+ train_files, val_files = split_dataset(valid_images, args.train_split)
251
+ print(f"📊 Dataset split: {len(train_files)} train, {len(val_files)} validation")
252
+
253
+ # Generate captions
254
+ print(f"\n📝 Generating captions...")
255
+ all_captions = generate_captions(valid_images, args.caption_template, trigger_word)
256
+
257
+ # Process images
258
+ print(f"\n🖼️ Processing images...")
259
+ processed_count = 0
260
+ processing_stats = []
261
+
262
+ for split_name, file_list in [("train", train_files), ("validation", val_files)]:
263
+ if not file_list:
264
+ continue
265
+
266
+ split_dir = dataset_dir / split_name
267
+ split_captions = {}
268
+
269
+ for filename in file_list:
270
+ input_path = input_dir / filename
271
+ output_filename = f"{Path(filename).stem}.png"
272
+ output_path = split_dir / output_filename
273
+
274
+ try:
275
+ stats = process_image(str(input_path), str(output_path), args.size)
276
+ processing_stats.append(stats)
277
+ split_captions[output_filename] = all_captions[filename]
278
+ processed_count += 1
279
+
280
+ if processed_count % 10 == 0:
281
+ print(f" Processed {processed_count}/{len(valid_images)} images...")
282
+
283
+ except Exception as e:
284
+ print(f"❌ Error processing {filename}: {e}")
285
+
286
+ # Create captions file for this split
287
+ if split_captions:
288
+ captions_file = create_captions_file(dataset_dir, split_captions, split_name)
289
+ print(f"📝 Created {split_name} captions: {captions_file}")
290
+
291
+ # Save metadata
292
+ metadata = {
293
+ 'style_name': args.style_name,
294
+ 'trigger_word': trigger_word,
295
+ 'total_images': len(valid_images),
296
+ 'train_images': len(train_files),
297
+ 'validation_images': len(val_files),
298
+ 'image_size': args.size,
299
+ 'caption_template': args.caption_template,
300
+ 'created_at': pd.Timestamp.now().isoformat(),
301
+ 'processing_stats': {
302
+ 'processed_count': processed_count,
303
+ 'failed_count': len(valid_images) - processed_count
304
+ }
305
+ }
306
+
307
+ save_metadata(dataset_dir, metadata)
308
+
309
+ print(f"\n🎉 Dataset preparation complete!")
310
+ print(f"📁 Dataset location: {dataset_dir}")
311
+ print(f"📊 Ready for LoRA training with {processed_count} processed images")
312
+ print(f"\n💡 Next steps:")
313
+ print(f" 1. Review the generated dataset in: {dataset_dir}")
314
+ print(f" 2. Run LoRA training: python src/generators/compi_phase1e_lora_training.py --dataset-dir {dataset_dir}")
315
+
316
+ def main():
317
+ """Main function."""
318
+ args = setup_args()
319
+
320
+ try:
321
+ prepare_dataset(args)
322
+ except Exception as e:
323
+ print(f"❌ Error: {e}")
324
+ return 1
325
+
326
+ return 0
327
+
328
+ if __name__ == "__main__":
329
+ exit(main())
src/generators/compi_phase1e_lora_training.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.E: LoRA Fine-tuning for Personal Style
4
+
5
+ This script implements LoRA (Low-Rank Adaptation) fine-tuning for Stable Diffusion
6
+ to learn your personal artistic style.
7
+
8
+ Usage:
9
+ python src/generators/compi_phase1e_lora_training.py --dataset-dir datasets/my_style
10
+ python src/generators/compi_phase1e_lora_training.py --help
11
+ """
12
+
13
+ import os
14
+ import argparse
15
+ import json
16
+ import math
17
+ from pathlib import Path
18
+ from typing import Dict, List, Optional
19
+ import logging
20
+
21
+ import torch
22
+ import torch.nn.functional as F
23
+ from torch.utils.data import Dataset, DataLoader
24
+ from PIL import Image
25
+ import numpy as np
26
+ from tqdm import tqdm
27
+
28
+ # Diffusers and transformers
29
+ from diffusers import (
30
+ StableDiffusionPipeline,
31
+ UNet2DConditionModel,
32
+ DDPMScheduler,
33
+ AutoencoderKL
34
+ )
35
+ from transformers import CLIPTextModel, CLIPTokenizer
36
+ from peft import LoraConfig, get_peft_model, TaskType
37
+
38
+ # -------- 1. CONFIGURATION --------
39
+
40
+ DEFAULT_MODEL = "runwayml/stable-diffusion-v1-5"
41
+ DEFAULT_RESOLUTION = 512
42
+ DEFAULT_BATCH_SIZE = 1
43
+ DEFAULT_LEARNING_RATE = 1e-4
44
+ DEFAULT_EPOCHS = 100
45
+ DEFAULT_LORA_RANK = 4
46
+ DEFAULT_LORA_ALPHA = 32
47
+
48
+ # -------- 2. DATASET CLASS --------
49
+
50
+ class StyleDataset(Dataset):
51
+ """Dataset class for LoRA fine-tuning."""
52
+
53
+ def __init__(self, dataset_dir: str, split: str = "train", resolution: int = 512):
54
+ self.dataset_dir = Path(dataset_dir)
55
+ self.split = split
56
+ self.resolution = resolution
57
+
58
+ # Load images and captions
59
+ self.images_dir = self.dataset_dir / split
60
+ self.captions_file = self.dataset_dir / f"{split}_captions.txt"
61
+
62
+ if not self.images_dir.exists():
63
+ raise FileNotFoundError(f"Images directory not found: {self.images_dir}")
64
+
65
+ if not self.captions_file.exists():
66
+ raise FileNotFoundError(f"Captions file not found: {self.captions_file}")
67
+
68
+ # Load captions
69
+ self.image_captions = {}
70
+ with open(self.captions_file, 'r') as f:
71
+ for line in f:
72
+ if ':' in line:
73
+ filename, caption = line.strip().split(':', 1)
74
+ self.image_captions[filename.strip()] = caption.strip()
75
+
76
+ # Get list of images
77
+ self.image_files = [f for f in os.listdir(self.images_dir)
78
+ if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
79
+
80
+ # Filter to only images with captions
81
+ self.image_files = [f for f in self.image_files if f in self.image_captions]
82
+
83
+ print(f"Loaded {len(self.image_files)} images for {split} split")
84
+
85
+ def __len__(self):
86
+ return len(self.image_files)
87
+
88
+ def __getitem__(self, idx):
89
+ filename = self.image_files[idx]
90
+ image_path = self.images_dir / filename
91
+ caption = self.image_captions[filename]
92
+
93
+ # Load and preprocess image
94
+ image = Image.open(image_path).convert('RGB')
95
+ image = image.resize((self.resolution, self.resolution), Image.Resampling.LANCZOS)
96
+
97
+ # Convert to tensor and normalize to [-1, 1]
98
+ image = np.array(image).astype(np.float32) / 255.0
99
+ image = (image - 0.5) / 0.5
100
+ image = torch.from_numpy(image).permute(2, 0, 1)
101
+
102
+ return {
103
+ 'pixel_values': image,
104
+ 'caption': caption,
105
+ 'filename': filename
106
+ }
107
+
108
+ # -------- 3. TRAINING FUNCTIONS --------
109
+
110
+ def setup_args():
111
+ """Setup command line arguments."""
112
+ parser = argparse.ArgumentParser(
113
+ description="CompI Phase 1.E: LoRA Fine-tuning for Personal Style",
114
+ formatter_class=argparse.RawDescriptionHelpFormatter
115
+ )
116
+
117
+ parser.add_argument("--dataset-dir", required=True,
118
+ help="Directory containing prepared dataset")
119
+
120
+ parser.add_argument("--output-dir",
121
+ help="Output directory for LoRA weights (default: lora_models/{style_name})")
122
+
123
+ parser.add_argument("--model-name", default=DEFAULT_MODEL,
124
+ help=f"Base Stable Diffusion model (default: {DEFAULT_MODEL})")
125
+
126
+ parser.add_argument("--resolution", type=int, default=DEFAULT_RESOLUTION,
127
+ help=f"Training resolution (default: {DEFAULT_RESOLUTION})")
128
+
129
+ parser.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE,
130
+ help=f"Training batch size (default: {DEFAULT_BATCH_SIZE})")
131
+
132
+ parser.add_argument("--learning-rate", type=float, default=DEFAULT_LEARNING_RATE,
133
+ help=f"Learning rate (default: {DEFAULT_LEARNING_RATE})")
134
+
135
+ parser.add_argument("--epochs", type=int, default=DEFAULT_EPOCHS,
136
+ help=f"Number of training epochs (default: {DEFAULT_EPOCHS})")
137
+
138
+ parser.add_argument("--lora-rank", type=int, default=DEFAULT_LORA_RANK,
139
+ help=f"LoRA rank (default: {DEFAULT_LORA_RANK})")
140
+
141
+ parser.add_argument("--lora-alpha", type=int, default=DEFAULT_LORA_ALPHA,
142
+ help=f"LoRA alpha (default: {DEFAULT_LORA_ALPHA})")
143
+
144
+ parser.add_argument("--save-steps", type=int, default=100,
145
+ help="Save checkpoint every N steps")
146
+
147
+ parser.add_argument("--validation-steps", type=int, default=50,
148
+ help="Run validation every N steps")
149
+
150
+ parser.add_argument("--mixed-precision", action="store_true",
151
+ help="Use mixed precision training")
152
+
153
+ parser.add_argument("--gradient-checkpointing", action="store_true",
154
+ help="Use gradient checkpointing to save memory")
155
+
156
+ return parser.parse_args()
157
+
158
+ def load_models(model_name: str, device: str):
159
+ """Load Stable Diffusion components."""
160
+ print(f"Loading models from {model_name}...")
161
+
162
+ # Load tokenizer and text encoder
163
+ tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
164
+ text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
165
+
166
+ # Load VAE
167
+ vae = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
168
+
169
+ # Load UNet
170
+ unet = UNet2DConditionModel.from_pretrained(model_name, subfolder="unet")
171
+
172
+ # Load noise scheduler
173
+ noise_scheduler = DDPMScheduler.from_pretrained(model_name, subfolder="scheduler")
174
+
175
+ # Move to device
176
+ text_encoder.to(device)
177
+ vae.to(device)
178
+ unet.to(device)
179
+
180
+ # Set to eval mode (we only train LoRA adapters)
181
+ text_encoder.eval()
182
+ vae.eval()
183
+ unet.train() # UNet needs to be in train mode for LoRA
184
+
185
+ return tokenizer, text_encoder, vae, unet, noise_scheduler
186
+
187
+ def setup_lora(unet: UNet2DConditionModel, lora_rank: int, lora_alpha: int):
188
+ """Setup LoRA adapters for UNet."""
189
+ print(f"Setting up LoRA with rank={lora_rank}, alpha={lora_alpha}")
190
+
191
+ # Define LoRA config
192
+ lora_config = LoraConfig(
193
+ r=lora_rank,
194
+ lora_alpha=lora_alpha,
195
+ target_modules=[
196
+ "to_k", "to_q", "to_v", "to_out.0",
197
+ "proj_in", "proj_out",
198
+ "ff.net.0.proj", "ff.net.2"
199
+ ],
200
+ lora_dropout=0.1,
201
+ )
202
+
203
+ # Apply LoRA to UNet
204
+ unet = get_peft_model(unet, lora_config)
205
+
206
+ # Print trainable parameters
207
+ trainable_params = sum(p.numel() for p in unet.parameters() if p.requires_grad)
208
+ total_params = sum(p.numel() for p in unet.parameters())
209
+
210
+ print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
211
+
212
+ return unet
213
+
214
+ def encode_text(tokenizer, text_encoder, captions: List[str], device: str):
215
+ """Encode text captions."""
216
+ inputs = tokenizer(
217
+ captions,
218
+ padding="max_length",
219
+ max_length=tokenizer.model_max_length,
220
+ truncation=True,
221
+ return_tensors="pt"
222
+ )
223
+
224
+ with torch.no_grad():
225
+ text_embeddings = text_encoder(inputs.input_ids.to(device))[0]
226
+
227
+ return text_embeddings
228
+
229
+ def training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device):
230
+ """Single training step."""
231
+ pixel_values = batch['pixel_values'].to(device)
232
+ captions = batch['caption']
233
+
234
+ # Encode images to latent space
235
+ with torch.no_grad():
236
+ latents = vae.encode(pixel_values).latent_dist.sample()
237
+ latents = latents * vae.config.scaling_factor
238
+
239
+ # Sample noise
240
+ noise = torch.randn_like(latents)
241
+ batch_size = latents.shape[0]
242
+
243
+ # Sample random timesteps
244
+ timesteps = torch.randint(
245
+ 0, noise_scheduler.config.num_train_timesteps,
246
+ (batch_size,), device=device
247
+ ).long()
248
+
249
+ # Add noise to latents
250
+ noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
251
+
252
+ # Encode text
253
+ text_embeddings = encode_text(tokenizer, text_encoder, captions, device)
254
+
255
+ # Predict noise
256
+ noise_pred = unet(noisy_latents, timesteps, text_embeddings).sample
257
+
258
+ # Calculate loss
259
+ loss = F.mse_loss(noise_pred.float(), noise.float(), reduction="mean")
260
+
261
+ return loss
262
+
263
+ def validate_model(val_dataloader, unet, vae, text_encoder, tokenizer, noise_scheduler, device):
264
+ """Validation step."""
265
+ unet.eval()
266
+ total_loss = 0
267
+ num_batches = 0
268
+
269
+ with torch.no_grad():
270
+ for batch in val_dataloader:
271
+ loss = training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
272
+ total_loss += loss.item()
273
+ num_batches += 1
274
+
275
+ unet.train()
276
+ return total_loss / num_batches if num_batches > 0 else 0
277
+
278
+ def save_lora_weights(unet, output_dir: Path, step: int):
279
+ """Save LoRA weights."""
280
+ checkpoint_dir = output_dir / f"checkpoint-{step}"
281
+ checkpoint_dir.mkdir(parents=True, exist_ok=True)
282
+
283
+ # Save LoRA weights
284
+ unet.save_pretrained(checkpoint_dir)
285
+
286
+ print(f"💾 Saved checkpoint to: {checkpoint_dir}")
287
+ return checkpoint_dir
288
+
289
+ # -------- 4. MAIN TRAINING FUNCTION --------
290
+
291
+ def train_lora(args):
292
+ """Main training function."""
293
+ print(f"🎨 CompI Phase 1.E: Starting LoRA Training")
294
+ print("=" * 50)
295
+
296
+ # Setup device
297
+ device = "cuda" if torch.cuda.is_available() else "cpu"
298
+ print(f"🖥️ Using device: {device}")
299
+
300
+ # Load dataset info
301
+ dataset_dir = Path(args.dataset_dir)
302
+ info_file = dataset_dir / "dataset_info.json"
303
+
304
+ if info_file.exists():
305
+ with open(info_file) as f:
306
+ dataset_info = json.load(f)
307
+ style_name = dataset_info.get('style_name', 'custom_style')
308
+ print(f"🎯 Training style: {style_name}")
309
+ else:
310
+ style_name = dataset_dir.name
311
+ print(f"⚠️ No dataset info found, using directory name: {style_name}")
312
+
313
+ # Setup output directory
314
+ if args.output_dir:
315
+ output_dir = Path(args.output_dir)
316
+ else:
317
+ output_dir = Path("lora_models") / style_name
318
+
319
+ output_dir.mkdir(parents=True, exist_ok=True)
320
+ print(f"📁 Output directory: {output_dir}")
321
+
322
+ # Load datasets
323
+ print(f"📊 Loading datasets...")
324
+ train_dataset = StyleDataset(args.dataset_dir, "train", args.resolution)
325
+
326
+ try:
327
+ val_dataset = StyleDataset(args.dataset_dir, "validation", args.resolution)
328
+ has_validation = True
329
+ except FileNotFoundError:
330
+ print("⚠️ No validation set found, using train set for validation")
331
+ val_dataset = train_dataset
332
+ has_validation = False
333
+
334
+ # Create data loaders
335
+ train_dataloader = DataLoader(
336
+ train_dataset,
337
+ batch_size=args.batch_size,
338
+ shuffle=True,
339
+ num_workers=2,
340
+ pin_memory=True
341
+ )
342
+
343
+ val_dataloader = DataLoader(
344
+ val_dataset,
345
+ batch_size=args.batch_size,
346
+ shuffle=False,
347
+ num_workers=2,
348
+ pin_memory=True
349
+ )
350
+
351
+ # Load models
352
+ tokenizer, text_encoder, vae, unet, noise_scheduler = load_models(args.model_name, device)
353
+
354
+ # Setup LoRA
355
+ unet = setup_lora(unet, args.lora_rank, args.lora_alpha)
356
+
357
+ # Setup optimizer
358
+ optimizer = torch.optim.AdamW(
359
+ unet.parameters(),
360
+ lr=args.learning_rate,
361
+ betas=(0.9, 0.999),
362
+ weight_decay=0.01,
363
+ eps=1e-08
364
+ )
365
+
366
+ # Calculate total steps
367
+ total_steps = len(train_dataloader) * args.epochs
368
+ print(f"📈 Total training steps: {total_steps}")
369
+
370
+ # Training loop
371
+ print(f"\n🚀 Starting training...")
372
+ global_step = 0
373
+ best_val_loss = float('inf')
374
+
375
+ for epoch in range(args.epochs):
376
+ print(f"\n📅 Epoch {epoch + 1}/{args.epochs}")
377
+
378
+ epoch_loss = 0
379
+ progress_bar = tqdm(train_dataloader, desc=f"Training")
380
+
381
+ for batch in progress_bar:
382
+ # Training step
383
+ loss = training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
384
+
385
+ # Backward pass
386
+ loss.backward()
387
+ optimizer.step()
388
+ optimizer.zero_grad()
389
+
390
+ # Update metrics
391
+ epoch_loss += loss.item()
392
+ global_step += 1
393
+
394
+ # Update progress bar
395
+ progress_bar.set_postfix({
396
+ 'loss': f"{loss.item():.4f}",
397
+ 'avg_loss': f"{epoch_loss / (progress_bar.n + 1):.4f}"
398
+ })
399
+
400
+ # Validation
401
+ if global_step % args.validation_steps == 0:
402
+ val_loss = validate_model(val_dataloader, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
403
+ print(f"\n📊 Step {global_step}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss:.4f}")
404
+
405
+ # Save best model
406
+ if val_loss < best_val_loss:
407
+ best_val_loss = val_loss
408
+ save_lora_weights(unet, output_dir, global_step)
409
+
410
+ # Save checkpoint
411
+ if global_step % args.save_steps == 0:
412
+ save_lora_weights(unet, output_dir, global_step)
413
+
414
+ # End of epoch
415
+ avg_epoch_loss = epoch_loss / len(train_dataloader)
416
+ print(f"📊 Epoch {epoch + 1} complete. Average loss: {avg_epoch_loss:.4f}")
417
+
418
+ # Save final model
419
+ final_checkpoint = save_lora_weights(unet, output_dir, global_step)
420
+
421
+ # Save training info
422
+ training_info = {
423
+ 'style_name': style_name,
424
+ 'model_name': args.model_name,
425
+ 'total_steps': global_step,
426
+ 'epochs': args.epochs,
427
+ 'learning_rate': args.learning_rate,
428
+ 'lora_rank': args.lora_rank,
429
+ 'lora_alpha': args.lora_alpha,
430
+ 'final_checkpoint': str(final_checkpoint),
431
+ 'best_val_loss': best_val_loss
432
+ }
433
+
434
+ with open(output_dir / "training_info.json", 'w') as f:
435
+ json.dump(training_info, f, indent=2)
436
+
437
+ print(f"\n🎉 Training complete!")
438
+ print(f"📁 LoRA weights saved to: {output_dir}")
439
+ print(f"💡 Next steps:")
440
+ print(f" 1. Test your style: python src/generators/compi_phase1e_style_generation.py --lora-path {final_checkpoint}")
441
+ print(f" 2. Integrate with UI: Use the style in your Streamlit interface")
442
+
443
+ def main():
444
+ """Main function."""
445
+ args = setup_args()
446
+
447
+ try:
448
+ train_lora(args)
449
+ except Exception as e:
450
+ print(f"❌ Training failed: {e}")
451
+ import traceback
452
+ traceback.print_exc()
453
+ return 1
454
+
455
+ return 0
456
+
457
+ if __name__ == "__main__":
458
+ exit(main())
src/generators/compi_phase1e_style_generation.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.E: Personal Style Generation with LoRA
4
+
5
+ Generate images using your trained LoRA personal style weights.
6
+
7
+ Usage:
8
+ python src/generators/compi_phase1e_style_generation.py --lora-path lora_models/my_style/checkpoint-1000
9
+ python src/generators/compi_phase1e_style_generation.py --help
10
+ """
11
+
12
+ import os
13
+ import argparse
14
+ import json
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+ from typing import Optional, List
18
+
19
+ import torch
20
+ from PIL import Image
21
+ from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
22
+ from peft import PeftModel
23
+
24
+ # -------- 1. CONFIGURATION --------
25
+
26
+ DEFAULT_MODEL = "runwayml/stable-diffusion-v1-5"
27
+ DEFAULT_STEPS = 30
28
+ DEFAULT_GUIDANCE = 7.5
29
+ DEFAULT_WIDTH = 512
30
+ DEFAULT_HEIGHT = 512
31
+ OUTPUT_DIR = "outputs"
32
+
33
+ # -------- 2. UTILITY FUNCTIONS --------
34
+
35
+ def setup_args():
36
+ """Setup command line arguments."""
37
+ parser = argparse.ArgumentParser(
38
+ description="CompI Phase 1.E: Personal Style Generation with LoRA",
39
+ formatter_class=argparse.RawDescriptionHelpFormatter,
40
+ epilog="""
41
+ Examples:
42
+ # Generate with trained LoRA style
43
+ python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 "a cat in my_style"
44
+
45
+ # Interactive mode
46
+ python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 --interactive
47
+
48
+ # Multiple variations
49
+ python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 "landscape" --variations 4
50
+ """
51
+ )
52
+
53
+ parser.add_argument("prompt", nargs="*", help="Text prompt for generation")
54
+
55
+ parser.add_argument("--lora-path", required=True,
56
+ help="Path to trained LoRA checkpoint directory")
57
+
58
+ parser.add_argument("--model-name", default=DEFAULT_MODEL,
59
+ help=f"Base Stable Diffusion model (default: {DEFAULT_MODEL})")
60
+
61
+ parser.add_argument("--variations", "-v", type=int, default=1,
62
+ help="Number of variations to generate")
63
+
64
+ parser.add_argument("--steps", type=int, default=DEFAULT_STEPS,
65
+ help=f"Number of inference steps (default: {DEFAULT_STEPS})")
66
+
67
+ parser.add_argument("--guidance", type=float, default=DEFAULT_GUIDANCE,
68
+ help=f"Guidance scale (default: {DEFAULT_GUIDANCE})")
69
+
70
+ parser.add_argument("--width", type=int, default=DEFAULT_WIDTH,
71
+ help=f"Image width (default: {DEFAULT_WIDTH})")
72
+
73
+ parser.add_argument("--height", type=int, default=DEFAULT_HEIGHT,
74
+ help=f"Image height (default: {DEFAULT_HEIGHT})")
75
+
76
+ parser.add_argument("--seed", type=int,
77
+ help="Random seed for reproducible generation")
78
+
79
+ parser.add_argument("--negative", "-n", default="",
80
+ help="Negative prompt")
81
+
82
+ parser.add_argument("--lora-scale", type=float, default=1.0,
83
+ help="LoRA scale factor (0.0-2.0, default: 1.0)")
84
+
85
+ parser.add_argument("--interactive", "-i", action="store_true",
86
+ help="Interactive mode")
87
+
88
+ parser.add_argument("--output-dir", default=OUTPUT_DIR,
89
+ help=f"Output directory (default: {OUTPUT_DIR})")
90
+
91
+ parser.add_argument("--list-styles", action="store_true",
92
+ help="List available LoRA styles")
93
+
94
+ return parser.parse_args()
95
+
96
+ def load_lora_info(lora_path: str) -> dict:
97
+ """Load LoRA training information."""
98
+ lora_dir = Path(lora_path)
99
+
100
+ # Try to find training info
101
+ info_files = [
102
+ lora_dir / "training_info.json",
103
+ lora_dir.parent / "training_info.json"
104
+ ]
105
+
106
+ for info_file in info_files:
107
+ if info_file.exists():
108
+ with open(info_file) as f:
109
+ return json.load(f)
110
+
111
+ # Fallback info
112
+ return {
113
+ 'style_name': lora_dir.parent.name,
114
+ 'model_name': DEFAULT_MODEL,
115
+ 'lora_rank': 4,
116
+ 'lora_alpha': 32
117
+ }
118
+
119
+ def load_pipeline_with_lora(model_name: str, lora_path: str, device: str):
120
+ """Load Stable Diffusion pipeline with LoRA weights."""
121
+ print(f"🔄 Loading base model: {model_name}")
122
+
123
+ # Load base pipeline
124
+ pipe = StableDiffusionPipeline.from_pretrained(
125
+ model_name,
126
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
127
+ safety_checker=None,
128
+ requires_safety_checker=False
129
+ )
130
+
131
+ # Use DPM solver for faster inference
132
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
133
+
134
+ print(f"🎨 Loading LoRA weights from: {lora_path}")
135
+
136
+ # Load LoRA weights
137
+ lora_dir = Path(lora_path)
138
+ if not lora_dir.exists():
139
+ raise FileNotFoundError(f"LoRA path not found: {lora_path}")
140
+
141
+ # Apply LoRA to UNet
142
+ pipe.unet = PeftModel.from_pretrained(pipe.unet, lora_path)
143
+
144
+ # Move to device
145
+ pipe = pipe.to(device)
146
+
147
+ # Enable memory efficient attention if available
148
+ if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
149
+ try:
150
+ pipe.enable_xformers_memory_efficient_attention()
151
+ except Exception:
152
+ pass
153
+
154
+ return pipe
155
+
156
+ def generate_with_style(
157
+ pipe,
158
+ prompt: str,
159
+ negative_prompt: str = "",
160
+ num_inference_steps: int = DEFAULT_STEPS,
161
+ guidance_scale: float = DEFAULT_GUIDANCE,
162
+ width: int = DEFAULT_WIDTH,
163
+ height: int = DEFAULT_HEIGHT,
164
+ seed: Optional[int] = None,
165
+ lora_scale: float = 1.0
166
+ ):
167
+ """Generate image with LoRA style."""
168
+
169
+ # Set LoRA scale
170
+ if hasattr(pipe.unet, 'set_adapter_scale'):
171
+ pipe.unet.set_adapter_scale(lora_scale)
172
+
173
+ # Setup generator
174
+ if seed is not None:
175
+ generator = torch.Generator(device=pipe.device).manual_seed(seed)
176
+ else:
177
+ generator = None
178
+ seed = torch.seed()
179
+
180
+ # Generate image
181
+ with torch.autocast(pipe.device.type):
182
+ result = pipe(
183
+ prompt=prompt,
184
+ negative_prompt=negative_prompt,
185
+ num_inference_steps=num_inference_steps,
186
+ guidance_scale=guidance_scale,
187
+ width=width,
188
+ height=height,
189
+ generator=generator
190
+ )
191
+
192
+ return result.images[0], seed
193
+
194
+ def save_generated_image(
195
+ image: Image.Image,
196
+ prompt: str,
197
+ style_name: str,
198
+ seed: int,
199
+ variation: int,
200
+ output_dir: str,
201
+ metadata: dict = None
202
+ ):
203
+ """Save generated image with metadata."""
204
+
205
+ # Create output directory
206
+ os.makedirs(output_dir, exist_ok=True)
207
+
208
+ # Generate filename
209
+ prompt_slug = "_".join(prompt.lower().split()[:5])
210
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
211
+ filename = f"{prompt_slug[:25]}_lora_{style_name}_{timestamp}_seed{seed}_v{variation}.png"
212
+ filepath = os.path.join(output_dir, filename)
213
+
214
+ # Save image
215
+ image.save(filepath)
216
+
217
+ # Save metadata if provided
218
+ if metadata:
219
+ metadata_file = filepath.replace('.png', '_metadata.json')
220
+ with open(metadata_file, 'w') as f:
221
+ json.dump(metadata, f, indent=2)
222
+
223
+ return filepath
224
+
225
+ def list_available_styles():
226
+ """List available LoRA styles."""
227
+ lora_dir = Path("lora_models")
228
+
229
+ if not lora_dir.exists():
230
+ print("❌ No LoRA models directory found")
231
+ return
232
+
233
+ print("🎨 Available LoRA Styles:")
234
+ print("=" * 40)
235
+
236
+ styles_found = False
237
+ for style_dir in lora_dir.iterdir():
238
+ if style_dir.is_dir():
239
+ # Look for checkpoints
240
+ checkpoints = list(style_dir.glob("checkpoint-*"))
241
+ if checkpoints:
242
+ styles_found = True
243
+ latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split('-')[1]))
244
+
245
+ # Load info if available
246
+ info_file = style_dir / "training_info.json"
247
+ if info_file.exists():
248
+ with open(info_file) as f:
249
+ info = json.load(f)
250
+ print(f"📁 {style_dir.name}")
251
+ print(f" Latest: {latest_checkpoint.name}")
252
+ print(f" Steps: {info.get('total_steps', 'unknown')}")
253
+ print(f" Model: {info.get('model_name', 'unknown')}")
254
+ else:
255
+ print(f"📁 {style_dir.name}")
256
+ print(f" Latest: {latest_checkpoint.name}")
257
+ print()
258
+
259
+ if not styles_found:
260
+ print("❌ No trained LoRA styles found")
261
+ print("💡 Train a style first using: python src/generators/compi_phase1e_lora_training.py")
262
+
263
+ def interactive_generation(pipe, lora_info: dict, args):
264
+ """Interactive generation mode."""
265
+ style_name = lora_info.get('style_name', 'custom')
266
+
267
+ print(f"🎨 Interactive LoRA Style Generation - {style_name}")
268
+ print("=" * 50)
269
+ print("💡 Tips:")
270
+ print(f" - Include '{style_name}' or trigger words in your prompts")
271
+ print(f" - Adjust LoRA scale (0.0-2.0) to control style strength")
272
+ print(" - Type 'quit' to exit")
273
+ print()
274
+
275
+ while True:
276
+ try:
277
+ # Get prompt
278
+ prompt = input("Enter prompt: ").strip()
279
+ if not prompt or prompt.lower() == 'quit':
280
+ break
281
+
282
+ # Get optional parameters
283
+ variations = input(f"Variations (default: 1): ").strip()
284
+ variations = int(variations) if variations.isdigit() else 1
285
+
286
+ lora_scale = input(f"LoRA scale (default: {args.lora_scale}): ").strip()
287
+ lora_scale = float(lora_scale) if lora_scale else args.lora_scale
288
+
289
+ # Generate images
290
+ print(f"🎨 Generating {variations} variation(s)...")
291
+
292
+ for i in range(variations):
293
+ image, seed = generate_with_style(
294
+ pipe, prompt, args.negative,
295
+ args.steps, args.guidance,
296
+ args.width, args.height,
297
+ args.seed, lora_scale
298
+ )
299
+
300
+ # Save image
301
+ filepath = save_generated_image(
302
+ image, prompt, style_name, seed, i + 1, args.output_dir,
303
+ {
304
+ 'prompt': prompt,
305
+ 'negative_prompt': args.negative,
306
+ 'style_name': style_name,
307
+ 'lora_scale': lora_scale,
308
+ 'seed': seed,
309
+ 'steps': args.steps,
310
+ 'guidance_scale': args.guidance,
311
+ 'timestamp': datetime.now().isoformat()
312
+ }
313
+ )
314
+
315
+ print(f"✅ Saved: {filepath}")
316
+
317
+ print()
318
+
319
+ except KeyboardInterrupt:
320
+ break
321
+ except Exception as e:
322
+ print(f"❌ Error: {e}")
323
+ print()
324
+
325
+ def main():
326
+ """Main function."""
327
+ args = setup_args()
328
+
329
+ # List styles if requested
330
+ if args.list_styles:
331
+ list_available_styles()
332
+ return 0
333
+
334
+ # Check LoRA path
335
+ if not os.path.exists(args.lora_path):
336
+ print(f"❌ LoRA path not found: {args.lora_path}")
337
+ return 1
338
+
339
+ # Load LoRA info
340
+ lora_info = load_lora_info(args.lora_path)
341
+ style_name = lora_info.get('style_name', 'custom')
342
+
343
+ print(f"🎨 CompI Phase 1.E: Personal Style Generation")
344
+ print(f"Style: {style_name}")
345
+ print("=" * 50)
346
+
347
+ # Setup device
348
+ device = "cuda" if torch.cuda.is_available() else "cpu"
349
+ print(f"🖥️ Using device: {device}")
350
+
351
+ # Load pipeline
352
+ try:
353
+ pipe = load_pipeline_with_lora(args.model_name, args.lora_path, device)
354
+ print("✅ Pipeline loaded successfully")
355
+ except Exception as e:
356
+ print(f"❌ Failed to load pipeline: {e}")
357
+ return 1
358
+
359
+ # Interactive mode
360
+ if args.interactive:
361
+ interactive_generation(pipe, lora_info, args)
362
+ return 0
363
+
364
+ # Command line mode
365
+ prompt = " ".join(args.prompt) if args.prompt else input("Enter prompt: ").strip()
366
+ if not prompt:
367
+ print("❌ No prompt provided")
368
+ return 1
369
+
370
+ print(f"🎨 Generating {args.variations} variation(s) for: {prompt}")
371
+
372
+ # Generate images
373
+ for i in range(args.variations):
374
+ try:
375
+ image, seed = generate_with_style(
376
+ pipe, prompt, args.negative,
377
+ args.steps, args.guidance,
378
+ args.width, args.height,
379
+ args.seed, args.lora_scale
380
+ )
381
+
382
+ # Save image
383
+ filepath = save_generated_image(
384
+ image, prompt, style_name, seed, i + 1, args.output_dir,
385
+ {
386
+ 'prompt': prompt,
387
+ 'negative_prompt': args.negative,
388
+ 'style_name': style_name,
389
+ 'lora_scale': args.lora_scale,
390
+ 'seed': seed,
391
+ 'steps': args.steps,
392
+ 'guidance_scale': args.guidance,
393
+ 'timestamp': datetime.now().isoformat()
394
+ }
395
+ )
396
+
397
+ print(f"✅ Generated variation {i + 1}: {filepath}")
398
+
399
+ except Exception as e:
400
+ print(f"❌ Error generating variation {i + 1}: {e}")
401
+
402
+ print("🎉 Generation complete!")
403
+ return 0
404
+
405
+ if __name__ == "__main__":
406
+ exit(main())
src/generators/compi_phase1e_style_manager.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 1.E: LoRA Style Management System
4
+
5
+ Manage multiple LoRA styles, switch between them, and organize trained models.
6
+
7
+ Usage:
8
+ python src/generators/compi_phase1e_style_manager.py --list
9
+ python src/generators/compi_phase1e_style_manager.py --info my_style
10
+ python src/generators/compi_phase1e_style_manager.py --cleanup
11
+ """
12
+
13
+ import os
14
+ import argparse
15
+ import json
16
+ import shutil
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from typing import Dict, List, Optional, Tuple
20
+
21
+ import pandas as pd
22
+
23
+ # -------- 1. CONFIGURATION --------
24
+
25
+ LORA_MODELS_DIR = "lora_models"
26
+ STYLES_CONFIG_FILE = "lora_styles_config.json"
27
+
28
+ # -------- 2. STYLE MANAGEMENT CLASS --------
29
+
30
+ class LoRAStyleManager:
31
+ """Manager for LoRA styles and models."""
32
+
33
+ def __init__(self, models_dir: str = LORA_MODELS_DIR):
34
+ self.models_dir = Path(models_dir)
35
+ self.models_dir.mkdir(exist_ok=True)
36
+ self.config_file = self.models_dir / STYLES_CONFIG_FILE
37
+ self.config = self.load_config()
38
+
39
+ def load_config(self) -> Dict:
40
+ """Load styles configuration."""
41
+ if self.config_file.exists():
42
+ with open(self.config_file) as f:
43
+ return json.load(f)
44
+ return {"styles": {}, "last_updated": datetime.now().isoformat()}
45
+
46
+ def save_config(self):
47
+ """Save styles configuration."""
48
+ self.config["last_updated"] = datetime.now().isoformat()
49
+ with open(self.config_file, 'w') as f:
50
+ json.dump(self.config, f, indent=2)
51
+
52
+ def scan_styles(self) -> Dict[str, Dict]:
53
+ """Scan for available LoRA styles."""
54
+ styles = {}
55
+
56
+ for style_dir in self.models_dir.iterdir():
57
+ if not style_dir.is_dir() or style_dir.name.startswith('.'):
58
+ continue
59
+
60
+ # Look for checkpoints
61
+ checkpoints = list(style_dir.glob("checkpoint-*"))
62
+ if not checkpoints:
63
+ continue
64
+
65
+ # Get latest checkpoint
66
+ latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split('-')[1]))
67
+
68
+ # Load training info
69
+ info_file = style_dir / "training_info.json"
70
+ if info_file.exists():
71
+ with open(info_file) as f:
72
+ training_info = json.load(f)
73
+ else:
74
+ training_info = {}
75
+
76
+ # Load dataset info if available
77
+ dataset_info = {}
78
+ for dataset_dir in [style_dir / "dataset", Path("datasets") / style_dir.name]:
79
+ dataset_info_file = dataset_dir / "dataset_info.json"
80
+ if dataset_info_file.exists():
81
+ with open(dataset_info_file) as f:
82
+ dataset_info = json.load(f)
83
+ break
84
+
85
+ # Compile style information
86
+ style_info = {
87
+ "name": style_dir.name,
88
+ "path": str(style_dir),
89
+ "latest_checkpoint": str(latest_checkpoint),
90
+ "checkpoints": [str(cp) for cp in checkpoints],
91
+ "training_info": training_info,
92
+ "dataset_info": dataset_info,
93
+ "last_scanned": datetime.now().isoformat()
94
+ }
95
+
96
+ styles[style_dir.name] = style_info
97
+
98
+ return styles
99
+
100
+ def refresh_styles(self):
101
+ """Refresh the styles database."""
102
+ print("🔄 Scanning for LoRA styles...")
103
+ scanned_styles = self.scan_styles()
104
+
105
+ # Update config
106
+ self.config["styles"] = scanned_styles
107
+ self.save_config()
108
+
109
+ print(f"✅ Found {len(scanned_styles)} LoRA style(s)")
110
+ return scanned_styles
111
+
112
+ def list_styles(self, detailed: bool = False) -> List[Dict]:
113
+ """List available styles."""
114
+ styles = self.config.get("styles", {})
115
+
116
+ if not styles:
117
+ styles = self.refresh_styles()
118
+
119
+ if detailed:
120
+ return list(styles.values())
121
+ else:
122
+ return [{"name": name, "checkpoints": len(info["checkpoints"])}
123
+ for name, info in styles.items()]
124
+
125
+ def get_style_info(self, style_name: str) -> Optional[Dict]:
126
+ """Get detailed information about a specific style."""
127
+ styles = self.config.get("styles", {})
128
+ return styles.get(style_name)
129
+
130
+ def get_best_checkpoint(self, style_name: str) -> Optional[str]:
131
+ """Get the best checkpoint for a style."""
132
+ style_info = self.get_style_info(style_name)
133
+ if not style_info:
134
+ return None
135
+
136
+ # For now, return the latest checkpoint
137
+ # Could be enhanced to track validation loss and return best performing
138
+ return style_info.get("latest_checkpoint")
139
+
140
+ def delete_style(self, style_name: str, confirm: bool = False) -> bool:
141
+ """Delete a LoRA style."""
142
+ if not confirm:
143
+ print("⚠️ Use --confirm to actually delete the style")
144
+ return False
145
+
146
+ style_dir = self.models_dir / style_name
147
+ if not style_dir.exists():
148
+ print(f"❌ Style not found: {style_name}")
149
+ return False
150
+
151
+ try:
152
+ shutil.rmtree(style_dir)
153
+
154
+ # Remove from config
155
+ if style_name in self.config.get("styles", {}):
156
+ del self.config["styles"][style_name]
157
+ self.save_config()
158
+
159
+ print(f"✅ Deleted style: {style_name}")
160
+ return True
161
+
162
+ except Exception as e:
163
+ print(f"❌ Error deleting style: {e}")
164
+ return False
165
+
166
+ def cleanup_checkpoints(self, style_name: str, keep_last: int = 3) -> int:
167
+ """Clean up old checkpoints, keeping only the most recent ones."""
168
+ style_dir = self.models_dir / style_name
169
+ if not style_dir.exists():
170
+ print(f"❌ Style not found: {style_name}")
171
+ return 0
172
+
173
+ checkpoints = list(style_dir.glob("checkpoint-*"))
174
+ if len(checkpoints) <= keep_last:
175
+ print(f"✅ No cleanup needed for {style_name} ({len(checkpoints)} checkpoints)")
176
+ return 0
177
+
178
+ # Sort by step number
179
+ checkpoints.sort(key=lambda x: int(x.name.split('-')[1]))
180
+
181
+ # Remove old checkpoints
182
+ to_remove = checkpoints[:-keep_last]
183
+ removed_count = 0
184
+
185
+ for checkpoint in to_remove:
186
+ try:
187
+ shutil.rmtree(checkpoint)
188
+ removed_count += 1
189
+ except Exception as e:
190
+ print(f"⚠️ Failed to remove {checkpoint}: {e}")
191
+
192
+ print(f"✅ Cleaned up {removed_count} old checkpoints for {style_name}")
193
+ return removed_count
194
+
195
+ def export_style_info(self, output_file: str = None) -> str:
196
+ """Export styles information to CSV."""
197
+ styles = self.list_styles(detailed=True)
198
+
199
+ if not styles:
200
+ print("❌ No styles found")
201
+ return ""
202
+
203
+ # Prepare data for CSV
204
+ rows = []
205
+ for style in styles:
206
+ training_info = style.get("training_info", {})
207
+ dataset_info = style.get("dataset_info", {})
208
+
209
+ row = {
210
+ "style_name": style["name"],
211
+ "checkpoints": len(style["checkpoints"]),
212
+ "latest_checkpoint": Path(style["latest_checkpoint"]).name,
213
+ "total_steps": training_info.get("total_steps", "unknown"),
214
+ "epochs": training_info.get("epochs", "unknown"),
215
+ "learning_rate": training_info.get("learning_rate", "unknown"),
216
+ "lora_rank": training_info.get("lora_rank", "unknown"),
217
+ "dataset_images": dataset_info.get("total_images", "unknown"),
218
+ "trigger_word": dataset_info.get("trigger_word", "unknown"),
219
+ "last_scanned": style.get("last_scanned", "unknown")
220
+ }
221
+ rows.append(row)
222
+
223
+ # Create DataFrame and save
224
+ df = pd.DataFrame(rows)
225
+
226
+ if output_file is None:
227
+ output_file = f"lora_styles_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
228
+
229
+ df.to_csv(output_file, index=False)
230
+ print(f"📊 Exported styles info to: {output_file}")
231
+ return output_file
232
+
233
+ # -------- 3. COMMAND LINE INTERFACE --------
234
+
235
+ def setup_args():
236
+ """Setup command line arguments."""
237
+ parser = argparse.ArgumentParser(
238
+ description="CompI Phase 1.E: LoRA Style Management",
239
+ formatter_class=argparse.RawDescriptionHelpFormatter,
240
+ epilog="""
241
+ Examples:
242
+ # List all available styles
243
+ python %(prog)s --list
244
+
245
+ # Get detailed info about a specific style
246
+ python %(prog)s --info my_style
247
+
248
+ # Refresh styles database
249
+ python %(prog)s --refresh
250
+
251
+ # Clean up old checkpoints
252
+ python %(prog)s --cleanup my_style --keep 2
253
+
254
+ # Export styles information
255
+ python %(prog)s --export styles_report.csv
256
+ """
257
+ )
258
+
259
+ parser.add_argument("--list", action="store_true",
260
+ help="List all available LoRA styles")
261
+
262
+ parser.add_argument("--list-detailed", action="store_true",
263
+ help="List styles with detailed information")
264
+
265
+ parser.add_argument("--info", metavar="STYLE_NAME",
266
+ help="Show detailed information about a specific style")
267
+
268
+ parser.add_argument("--refresh", action="store_true",
269
+ help="Refresh the styles database")
270
+
271
+ parser.add_argument("--cleanup", metavar="STYLE_NAME",
272
+ help="Clean up old checkpoints for a style")
273
+
274
+ parser.add_argument("--keep", type=int, default=3,
275
+ help="Number of recent checkpoints to keep during cleanup")
276
+
277
+ parser.add_argument("--delete", metavar="STYLE_NAME",
278
+ help="Delete a LoRA style")
279
+
280
+ parser.add_argument("--confirm", action="store_true",
281
+ help="Confirm destructive operations")
282
+
283
+ parser.add_argument("--export", metavar="OUTPUT_FILE",
284
+ help="Export styles information to CSV")
285
+
286
+ parser.add_argument("--models-dir", default=LORA_MODELS_DIR,
287
+ help=f"LoRA models directory (default: {LORA_MODELS_DIR})")
288
+
289
+ return parser.parse_args()
290
+
291
+ def print_style_info(style_info: Dict):
292
+ """Print detailed style information."""
293
+ print(f"🎨 Style: {style_info['name']}")
294
+ print("=" * 40)
295
+
296
+ # Basic info
297
+ print(f"📁 Path: {style_info['path']}")
298
+ print(f"📊 Checkpoints: {len(style_info['checkpoints'])}")
299
+ print(f"🏆 Latest: {Path(style_info['latest_checkpoint']).name}")
300
+
301
+ # Training info
302
+ training_info = style_info.get("training_info", {})
303
+ if training_info:
304
+ print(f"\n🚀 Training Information:")
305
+ print(f" Steps: {training_info.get('total_steps', 'unknown')}")
306
+ print(f" Epochs: {training_info.get('epochs', 'unknown')}")
307
+ print(f" Learning Rate: {training_info.get('learning_rate', 'unknown')}")
308
+ print(f" LoRA Rank: {training_info.get('lora_rank', 'unknown')}")
309
+ print(f" LoRA Alpha: {training_info.get('lora_alpha', 'unknown')}")
310
+
311
+ # Dataset info
312
+ dataset_info = style_info.get("dataset_info", {})
313
+ if dataset_info:
314
+ print(f"\n📊 Dataset Information:")
315
+ print(f" Total Images: {dataset_info.get('total_images', 'unknown')}")
316
+ print(f" Train Images: {dataset_info.get('train_images', 'unknown')}")
317
+ print(f" Validation Images: {dataset_info.get('validation_images', 'unknown')}")
318
+ print(f" Trigger Word: {dataset_info.get('trigger_word', 'unknown')}")
319
+ print(f" Image Size: {dataset_info.get('image_size', 'unknown')}")
320
+
321
+ print(f"\n🕒 Last Scanned: {style_info.get('last_scanned', 'unknown')}")
322
+
323
+ def main():
324
+ """Main function."""
325
+ args = setup_args()
326
+
327
+ # Initialize style manager
328
+ manager = LoRAStyleManager(args.models_dir)
329
+
330
+ print("🎨 CompI Phase 1.E: LoRA Style Manager")
331
+ print("=" * 40)
332
+
333
+ # Execute commands
334
+ if args.refresh:
335
+ manager.refresh_styles()
336
+
337
+ elif args.list or args.list_detailed:
338
+ styles = manager.list_styles(detailed=args.list_detailed)
339
+
340
+ if not styles:
341
+ print("❌ No LoRA styles found")
342
+ print("💡 Train a style first using: python src/generators/compi_phase1e_lora_training.py")
343
+ else:
344
+ print(f"📋 Available LoRA Styles ({len(styles)}):")
345
+ print("-" * 40)
346
+
347
+ if args.list_detailed:
348
+ for style in styles:
349
+ print_style_info(style)
350
+ print()
351
+ else:
352
+ for style in styles:
353
+ print(f"🎨 {style['name']} ({style['checkpoints']} checkpoints)")
354
+
355
+ elif args.info:
356
+ style_info = manager.get_style_info(args.info)
357
+ if style_info:
358
+ print_style_info(style_info)
359
+ else:
360
+ print(f"❌ Style not found: {args.info}")
361
+ print("💡 Use --list to see available styles")
362
+
363
+ elif args.cleanup:
364
+ removed = manager.cleanup_checkpoints(args.cleanup, args.keep)
365
+ if removed > 0:
366
+ manager.refresh_styles()
367
+
368
+ elif args.delete:
369
+ manager.delete_style(args.delete, args.confirm)
370
+ if args.confirm:
371
+ manager.refresh_styles()
372
+
373
+ elif args.export:
374
+ manager.export_style_info(args.export)
375
+
376
+ else:
377
+ print("❓ No command specified. Use --help for usage information.")
378
+ print("💡 Common commands:")
379
+ print(" --list List available styles")
380
+ print(" --info STYLE_NAME Show style details")
381
+ print(" --refresh Refresh styles database")
382
+
383
+ return 0
384
+
385
+ if __name__ == "__main__":
386
+ exit(main())
src/generators/compi_phase2a_audio_to_image.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Phase 2.A: Audio-to-Image Generation
3
+
4
+ This module implements multimodal AI art generation that combines:
5
+ - Text prompts with style and mood conditioning
6
+ - Audio analysis and feature extraction
7
+ - Audio-to-text captioning
8
+ - Intelligent prompt fusion for enhanced creativity
9
+
10
+ Features:
11
+ - Support for various audio formats (mp3, wav, flac, etc.)
12
+ - Real-time audio analysis with tempo, energy, and spectral features
13
+ - OpenAI Whisper integration for audio captioning
14
+ - Comprehensive metadata logging and filename conventions
15
+ - Batch processing capabilities
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import torch
21
+ import json
22
+ from datetime import datetime
23
+ from typing import Dict, List, Optional, Tuple, Union
24
+ from pathlib import Path
25
+ import logging
26
+
27
+ # Add project root to path
28
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
29
+
30
+ from diffusers import StableDiffusionPipeline
31
+ from PIL import Image
32
+ import numpy as np
33
+
34
+ from src.utils.audio_utils import AudioProcessor, AudioCaptioner, MultimodalPromptFusion, AudioFeatures
35
+ from src.utils.logging_utils import setup_logger
36
+ from src.utils.file_utils import ensure_directory_exists, generate_filename
37
+
38
+ # Setup logging
39
+ logger = setup_logger(__name__)
40
+
41
+ class CompIPhase2AAudioToImage:
42
+ """
43
+ CompI Phase 2.A: Audio-to-Image Generation System
44
+
45
+ Combines text prompts with audio analysis to generate contextually rich AI art
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ model_name: str = "runwayml/stable-diffusion-v1-5",
51
+ device: str = "auto",
52
+ output_dir: str = "outputs",
53
+ whisper_model: str = "base"
54
+ ):
55
+ """
56
+ Initialize the audio-to-image generation system
57
+
58
+ Args:
59
+ model_name: Stable Diffusion model to use
60
+ device: Device for inference (auto, cpu, cuda)
61
+ output_dir: Directory for saving generated images
62
+ whisper_model: Whisper model size for audio captioning
63
+ """
64
+ self.model_name = model_name
65
+ self.device = self._setup_device(device)
66
+ self.output_dir = Path(output_dir)
67
+ ensure_directory_exists(self.output_dir)
68
+
69
+ # Initialize components
70
+ self.pipe = None
71
+ self.audio_processor = AudioProcessor()
72
+ self.audio_captioner = AudioCaptioner(model_size=whisper_model, device=self.device)
73
+ self.prompt_fusion = MultimodalPromptFusion()
74
+
75
+ logger.info(f"Initialized CompI Phase 2.A on {self.device}")
76
+
77
+ def _setup_device(self, device: str) -> str:
78
+ """Setup and validate device"""
79
+ if device == "auto":
80
+ device = "cuda" if torch.cuda.is_available() else "cpu"
81
+
82
+ if device == "cuda" and not torch.cuda.is_available():
83
+ logger.warning("CUDA requested but not available, falling back to CPU")
84
+ device = "cpu"
85
+
86
+ return device
87
+
88
+ def _load_pipeline(self):
89
+ """Lazy load the Stable Diffusion pipeline"""
90
+ if self.pipe is None:
91
+ logger.info(f"Loading Stable Diffusion model: {self.model_name}")
92
+
93
+ # Custom safety checker that allows creative content
94
+ def dummy_safety_checker(images, **kwargs):
95
+ return images, [False] * len(images)
96
+
97
+ self.pipe = StableDiffusionPipeline.from_pretrained(
98
+ self.model_name,
99
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
100
+ safety_checker=dummy_safety_checker,
101
+ requires_safety_checker=False
102
+ )
103
+
104
+ self.pipe = self.pipe.to(self.device)
105
+ self.pipe.enable_attention_slicing()
106
+
107
+ if self.device == "cuda":
108
+ self.pipe.enable_model_cpu_offload()
109
+
110
+ logger.info("Stable Diffusion pipeline loaded successfully")
111
+
112
+ def analyze_audio(self, audio_path: str, include_caption: bool = True) -> Tuple[AudioFeatures, str]:
113
+ """
114
+ Comprehensive audio analysis
115
+
116
+ Args:
117
+ audio_path: Path to audio file
118
+ include_caption: Whether to generate audio caption
119
+
120
+ Returns:
121
+ Tuple of (AudioFeatures, audio_caption)
122
+ """
123
+ logger.info(f"Analyzing audio: {audio_path}")
124
+
125
+ # Extract audio features
126
+ audio_features = self.audio_processor.analyze_audio_file(audio_path)
127
+
128
+ # Generate audio caption if requested
129
+ audio_caption = ""
130
+ if include_caption:
131
+ try:
132
+ audio_caption = self.audio_captioner.caption_audio(audio_path)
133
+ except Exception as e:
134
+ logger.warning(f"Audio captioning failed: {e}")
135
+ audio_caption = ""
136
+
137
+ return audio_features, audio_caption
138
+
139
+ def generate_image(
140
+ self,
141
+ text_prompt: str,
142
+ style: str = "",
143
+ mood: str = "",
144
+ audio_path: Optional[str] = None,
145
+ num_images: int = 1,
146
+ height: int = 512,
147
+ width: int = 512,
148
+ num_inference_steps: int = 30,
149
+ guidance_scale: float = 7.5,
150
+ seed: Optional[int] = None
151
+ ) -> List[Dict]:
152
+ """
153
+ Generate images with optional audio conditioning
154
+
155
+ Args:
156
+ text_prompt: Base text prompt
157
+ style: Art style
158
+ mood: Mood/atmosphere
159
+ audio_path: Optional path to audio file for conditioning
160
+ num_images: Number of images to generate
161
+ height: Image height
162
+ width: Image width
163
+ num_inference_steps: Number of diffusion steps
164
+ guidance_scale: Guidance scale for generation
165
+ seed: Random seed for reproducibility
166
+
167
+ Returns:
168
+ List of generation results with metadata
169
+ """
170
+ self._load_pipeline()
171
+
172
+ # Analyze audio if provided
173
+ audio_features = None
174
+ audio_caption = ""
175
+ if audio_path and os.path.exists(audio_path):
176
+ audio_features, audio_caption = self.analyze_audio(audio_path)
177
+
178
+ # Create enhanced prompt
179
+ if audio_features:
180
+ enhanced_prompt = self.prompt_fusion.fuse_prompt_with_audio(
181
+ text_prompt, style, mood, audio_features, audio_caption
182
+ )
183
+ else:
184
+ enhanced_prompt = text_prompt
185
+ if style:
186
+ enhanced_prompt += f", {style}"
187
+ if mood:
188
+ enhanced_prompt += f", {mood}"
189
+
190
+ logger.info(f"Generating {num_images} image(s) with prompt: {enhanced_prompt}")
191
+
192
+ results = []
193
+
194
+ for i in range(num_images):
195
+ # Set up generation parameters
196
+ current_seed = seed if seed is not None else torch.seed()
197
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
198
+
199
+ # Generate image
200
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
201
+ result = self.pipe(
202
+ enhanced_prompt,
203
+ height=height,
204
+ width=width,
205
+ num_inference_steps=num_inference_steps,
206
+ guidance_scale=guidance_scale,
207
+ generator=generator
208
+ )
209
+
210
+ image = result.images[0]
211
+
212
+ # Create metadata
213
+ metadata = {
214
+ "timestamp": datetime.now().isoformat(),
215
+ "text_prompt": text_prompt,
216
+ "style": style,
217
+ "mood": mood,
218
+ "enhanced_prompt": enhanced_prompt,
219
+ "audio_path": audio_path,
220
+ "audio_caption": audio_caption,
221
+ "generation_params": {
222
+ "height": height,
223
+ "width": width,
224
+ "num_inference_steps": num_inference_steps,
225
+ "guidance_scale": guidance_scale,
226
+ "seed": current_seed,
227
+ "model": self.model_name
228
+ },
229
+ "device": self.device,
230
+ "phase": "2A_audio_to_image"
231
+ }
232
+
233
+ # Add audio features to metadata
234
+ if audio_features:
235
+ metadata["audio_features"] = audio_features.to_dict()
236
+ metadata["audio_tags"] = self.prompt_fusion.generate_audio_tags(audio_features)
237
+
238
+ # Generate filename
239
+ filename = self._generate_filename(
240
+ text_prompt, style, mood, current_seed, i + 1,
241
+ has_audio=audio_path is not None
242
+ )
243
+
244
+ # Save image and metadata
245
+ image_path = self.output_dir / f"{filename}.png"
246
+ metadata_path = self.output_dir / f"{filename}_metadata.json"
247
+
248
+ image.save(image_path)
249
+ with open(metadata_path, 'w') as f:
250
+ json.dump(metadata, f, indent=2)
251
+
252
+ results.append({
253
+ "image": image,
254
+ "image_path": str(image_path),
255
+ "metadata_path": str(metadata_path),
256
+ "metadata": metadata,
257
+ "filename": filename
258
+ })
259
+
260
+ logger.info(f"Generated image {i+1}/{num_images}: {filename}")
261
+
262
+ return results
263
+
264
+ def _generate_filename(
265
+ self,
266
+ prompt: str,
267
+ style: str,
268
+ mood: str,
269
+ seed: int,
270
+ variation: int,
271
+ has_audio: bool = False
272
+ ) -> str:
273
+ """Generate descriptive filename following CompI conventions"""
274
+
275
+ # Create prompt slug (first 5 words)
276
+ prompt_words = prompt.lower().replace(',', '').split()[:5]
277
+ prompt_slug = "_".join(prompt_words)
278
+
279
+ # Create style and mood slugs
280
+ style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
281
+ mood_slug = mood.replace(" ", "").replace(",", "")[:10] if mood else "neutral"
282
+
283
+ # Timestamp
284
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
285
+
286
+ # Audio indicator
287
+ audio_tag = "_AUDIO" if has_audio else ""
288
+
289
+ # Combine all elements
290
+ filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}{audio_tag}_v{variation}"
291
+
292
+ return filename
293
+
294
+ def batch_process(
295
+ self,
296
+ audio_directory: str,
297
+ text_prompt: str,
298
+ style: str = "",
299
+ mood: str = "",
300
+ **generation_kwargs
301
+ ) -> List[Dict]:
302
+ """
303
+ Process multiple audio files in batch
304
+
305
+ Args:
306
+ audio_directory: Directory containing audio files
307
+ text_prompt: Base text prompt for all generations
308
+ style: Art style
309
+ mood: Mood/atmosphere
310
+ **generation_kwargs: Additional generation parameters
311
+
312
+ Returns:
313
+ List of all generation results
314
+ """
315
+ audio_dir = Path(audio_directory)
316
+ if not audio_dir.exists():
317
+ raise ValueError(f"Audio directory not found: {audio_directory}")
318
+
319
+ # Find audio files
320
+ audio_extensions = {'.mp3', '.wav', '.flac', '.m4a', '.ogg'}
321
+ audio_files = [
322
+ f for f in audio_dir.iterdir()
323
+ if f.suffix.lower() in audio_extensions
324
+ ]
325
+
326
+ if not audio_files:
327
+ raise ValueError(f"No audio files found in {audio_directory}")
328
+
329
+ logger.info(f"Processing {len(audio_files)} audio files")
330
+
331
+ all_results = []
332
+ for audio_file in audio_files:
333
+ logger.info(f"Processing: {audio_file.name}")
334
+
335
+ try:
336
+ results = self.generate_image(
337
+ text_prompt=text_prompt,
338
+ style=style,
339
+ mood=mood,
340
+ audio_path=str(audio_file),
341
+ **generation_kwargs
342
+ )
343
+ all_results.extend(results)
344
+
345
+ except Exception as e:
346
+ logger.error(f"Error processing {audio_file.name}: {e}")
347
+ continue
348
+
349
+ logger.info(f"Batch processing complete: {len(all_results)} images generated")
350
+ return all_results
src/generators/compi_phase2b_data_to_image.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Phase 2.B: Data/Logic Input to Image Generation
3
+
4
+ This module implements data-driven AI art generation that combines:
5
+ - CSV data analysis and processing
6
+ - Mathematical formula evaluation
7
+ - Data-to-text conversion for prompt enhancement
8
+ - Data visualization for artistic conditioning
9
+ - Intelligent fusion of data insights with creative prompts
10
+
11
+ Features:
12
+ - Support for CSV files with comprehensive data analysis
13
+ - Safe mathematical formula evaluation with NumPy
14
+ - Poetic text generation from data patterns
15
+ - Data visualization creation for artistic inspiration
16
+ - Comprehensive metadata logging and filename conventions
17
+ - Batch processing capabilities for multiple datasets
18
+ """
19
+
20
+ import os
21
+ import sys
22
+ import torch
23
+ import json
24
+ import pandas as pd
25
+ import numpy as np
26
+ from datetime import datetime
27
+ from typing import Dict, List, Optional, Tuple, Union
28
+ from pathlib import Path
29
+ import logging
30
+
31
+ # Add project root to path
32
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
33
+
34
+ from diffusers import StableDiffusionPipeline
35
+ from PIL import Image
36
+
37
+ from src.utils.data_utils import DataProcessor, DataToTextConverter, DataVisualizer, DataFeatures
38
+ from src.utils.logging_utils import setup_logger
39
+ from src.utils.file_utils import ensure_directory_exists, generate_filename
40
+
41
+ # Setup logging
42
+ logger = setup_logger(__name__)
43
+
44
+ class CompIPhase2BDataToImage:
45
+ """
46
+ CompI Phase 2.B: Data/Logic Input to Image Generation System
47
+
48
+ Transforms structured data and mathematical formulas into AI-generated art
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ model_name: str = "runwayml/stable-diffusion-v1-5",
54
+ device: str = "auto",
55
+ output_dir: str = "outputs",
56
+ visualization_style: str = "artistic"
57
+ ):
58
+ """
59
+ Initialize the data-to-image generation system
60
+
61
+ Args:
62
+ model_name: Stable Diffusion model to use
63
+ device: Device for inference (auto, cpu, cuda)
64
+ output_dir: Directory for saving generated images
65
+ visualization_style: Style for data visualizations
66
+ """
67
+ self.model_name = model_name
68
+ self.device = self._setup_device(device)
69
+ self.output_dir = Path(output_dir)
70
+ ensure_directory_exists(self.output_dir)
71
+
72
+ # Initialize components
73
+ self.pipe = None
74
+ self.data_processor = DataProcessor()
75
+ self.text_converter = DataToTextConverter()
76
+ self.visualizer = DataVisualizer(style=visualization_style)
77
+
78
+ logger.info(f"Initialized CompI Phase 2.B on {self.device}")
79
+
80
+ def _setup_device(self, device: str) -> str:
81
+ """Setup and validate device"""
82
+ if device == "auto":
83
+ device = "cuda" if torch.cuda.is_available() else "cpu"
84
+
85
+ if device == "cuda" and not torch.cuda.is_available():
86
+ logger.warning("CUDA requested but not available, falling back to CPU")
87
+ device = "cpu"
88
+
89
+ return device
90
+
91
+ def _load_pipeline(self):
92
+ """Lazy load the Stable Diffusion pipeline"""
93
+ if self.pipe is None:
94
+ logger.info(f"Loading Stable Diffusion model: {self.model_name}")
95
+
96
+ # Custom safety checker that allows creative content
97
+ def dummy_safety_checker(images, **kwargs):
98
+ return images, [False] * len(images)
99
+
100
+ self.pipe = StableDiffusionPipeline.from_pretrained(
101
+ self.model_name,
102
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
103
+ safety_checker=dummy_safety_checker,
104
+ requires_safety_checker=False
105
+ )
106
+
107
+ self.pipe = self.pipe.to(self.device)
108
+ self.pipe.enable_attention_slicing()
109
+
110
+ if self.device == "cuda":
111
+ self.pipe.enable_model_cpu_offload()
112
+
113
+ logger.info("Stable Diffusion pipeline loaded successfully")
114
+
115
+ def analyze_csv_data(self, csv_path: str) -> Tuple[pd.DataFrame, DataFeatures, str, Image.Image]:
116
+ """
117
+ Comprehensive CSV data analysis
118
+
119
+ Args:
120
+ csv_path: Path to CSV file
121
+
122
+ Returns:
123
+ Tuple of (DataFrame, DataFeatures, poetic_description, visualization_image)
124
+ """
125
+ logger.info(f"Analyzing CSV data: {csv_path}")
126
+
127
+ # Load and analyze data
128
+ df = pd.read_csv(csv_path)
129
+ features = self.data_processor.analyze_csv_data(df)
130
+
131
+ # Generate poetic description
132
+ poetic_description = self.text_converter.generate_poetic_description(features)
133
+
134
+ # Create visualization
135
+ visualization_image = self.visualizer.create_data_visualization(df, features)
136
+
137
+ return df, features, poetic_description, visualization_image
138
+
139
+ def evaluate_mathematical_formula(self, formula: str, num_points: int = 100) -> Tuple[np.ndarray, Dict, str, Image.Image]:
140
+ """
141
+ Evaluate mathematical formula and create artistic interpretation
142
+
143
+ Args:
144
+ formula: Mathematical expression
145
+ num_points: Number of points to generate
146
+
147
+ Returns:
148
+ Tuple of (result_array, metadata, poetic_description, visualization_image)
149
+ """
150
+ logger.info(f"Evaluating mathematical formula: {formula}")
151
+
152
+ # Evaluate formula
153
+ result_array, metadata = self.data_processor.evaluate_formula(formula, num_points)
154
+
155
+ # Generate poetic description
156
+ poetic_description = self.text_converter.generate_formula_description(formula, metadata)
157
+
158
+ # Create visualization
159
+ visualization_image = self.visualizer.create_formula_visualization(result_array, formula, metadata)
160
+
161
+ return result_array, metadata, poetic_description, visualization_image
162
+
163
+ def generate_image(
164
+ self,
165
+ text_prompt: str,
166
+ style: str = "",
167
+ mood: str = "",
168
+ csv_path: Optional[str] = None,
169
+ formula: Optional[str] = None,
170
+ num_images: int = 1,
171
+ height: int = 512,
172
+ width: int = 512,
173
+ num_inference_steps: int = 30,
174
+ guidance_scale: float = 7.5,
175
+ seed: Optional[int] = None,
176
+ save_data_visualization: bool = True
177
+ ) -> List[Dict]:
178
+ """
179
+ Generate images with data/formula conditioning
180
+
181
+ Args:
182
+ text_prompt: Base text prompt
183
+ style: Art style
184
+ mood: Mood/atmosphere
185
+ csv_path: Optional path to CSV file
186
+ formula: Optional mathematical formula
187
+ num_images: Number of images to generate
188
+ height: Image height
189
+ width: Image width
190
+ num_inference_steps: Number of diffusion steps
191
+ guidance_scale: Guidance scale for generation
192
+ seed: Random seed for reproducibility
193
+ save_data_visualization: Whether to save data visualization
194
+
195
+ Returns:
196
+ List of generation results with metadata
197
+ """
198
+ self._load_pipeline()
199
+
200
+ # Process data input
201
+ data_features = None
202
+ poetic_description = ""
203
+ data_visualization = None
204
+ data_type = "none"
205
+
206
+ if csv_path and os.path.exists(csv_path):
207
+ df, data_features, poetic_description, data_visualization = self.analyze_csv_data(csv_path)
208
+ data_type = "csv"
209
+ elif formula and formula.strip():
210
+ result_array, formula_metadata, poetic_description, data_visualization = self.evaluate_mathematical_formula(formula)
211
+ data_type = "formula"
212
+
213
+ # Create enhanced prompt
214
+ enhanced_prompt = text_prompt
215
+ if style:
216
+ enhanced_prompt += f", {style}"
217
+ if mood:
218
+ enhanced_prompt += f", {mood}"
219
+ if poetic_description:
220
+ enhanced_prompt += f", {poetic_description}"
221
+
222
+ logger.info(f"Generating {num_images} image(s) with enhanced prompt")
223
+
224
+ results = []
225
+
226
+ for i in range(num_images):
227
+ # Set up generation parameters
228
+ current_seed = seed if seed is not None else torch.seed()
229
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
230
+
231
+ # Generate image
232
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
233
+ result = self.pipe(
234
+ enhanced_prompt,
235
+ height=height,
236
+ width=width,
237
+ num_inference_steps=num_inference_steps,
238
+ guidance_scale=guidance_scale,
239
+ generator=generator
240
+ )
241
+
242
+ image = result.images[0]
243
+
244
+ # Create metadata
245
+ metadata = {
246
+ "timestamp": datetime.now().isoformat(),
247
+ "text_prompt": text_prompt,
248
+ "style": style,
249
+ "mood": mood,
250
+ "enhanced_prompt": enhanced_prompt,
251
+ "poetic_description": poetic_description,
252
+ "data_type": data_type,
253
+ "csv_path": csv_path,
254
+ "formula": formula,
255
+ "generation_params": {
256
+ "height": height,
257
+ "width": width,
258
+ "num_inference_steps": num_inference_steps,
259
+ "guidance_scale": guidance_scale,
260
+ "seed": current_seed,
261
+ "model": self.model_name
262
+ },
263
+ "device": self.device,
264
+ "phase": "2B_data_to_image"
265
+ }
266
+
267
+ # Add data features to metadata
268
+ if data_features:
269
+ metadata["data_features"] = data_features.to_dict()
270
+
271
+ # Generate filename
272
+ filename = self._generate_filename(
273
+ text_prompt, style, mood, current_seed, i + 1,
274
+ data_type=data_type
275
+ )
276
+
277
+ # Save image and metadata
278
+ image_path = self.output_dir / f"{filename}.png"
279
+ metadata_path = self.output_dir / f"{filename}_metadata.json"
280
+
281
+ image.save(image_path)
282
+ with open(metadata_path, 'w') as f:
283
+ json.dump(metadata, f, indent=2)
284
+
285
+ # Save data visualization if requested
286
+ data_viz_path = None
287
+ if save_data_visualization and data_visualization:
288
+ data_viz_path = self.output_dir / f"{filename}_data_viz.png"
289
+ data_visualization.save(data_viz_path)
290
+
291
+ results.append({
292
+ "image": image,
293
+ "image_path": str(image_path),
294
+ "metadata_path": str(metadata_path),
295
+ "data_visualization_path": str(data_viz_path) if data_viz_path else None,
296
+ "data_visualization": data_visualization,
297
+ "metadata": metadata,
298
+ "filename": filename,
299
+ "poetic_description": poetic_description
300
+ })
301
+
302
+ logger.info(f"Generated image {i+1}/{num_images}: {filename}")
303
+
304
+ return results
305
+
306
+ def _generate_filename(
307
+ self,
308
+ prompt: str,
309
+ style: str,
310
+ mood: str,
311
+ seed: int,
312
+ variation: int,
313
+ data_type: str = "none"
314
+ ) -> str:
315
+ """Generate descriptive filename following CompI conventions"""
316
+
317
+ # Create prompt slug (first 5 words)
318
+ prompt_words = prompt.lower().replace(',', '').split()[:5]
319
+ prompt_slug = "_".join(prompt_words)
320
+
321
+ # Create style and mood slugs
322
+ style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
323
+ mood_slug = mood.replace(" ", "").replace(",", "")[:10] if mood else "neutral"
324
+
325
+ # Timestamp
326
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
327
+
328
+ # Data type indicator
329
+ data_tag = f"_{data_type.upper()}" if data_type != "none" else ""
330
+
331
+ # Combine all elements
332
+ filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}{data_tag}_v{variation}"
333
+
334
+ return filename
335
+
336
+ def batch_process_csv_files(
337
+ self,
338
+ csv_directory: str,
339
+ text_prompt: str,
340
+ style: str = "",
341
+ mood: str = "",
342
+ **generation_kwargs
343
+ ) -> List[Dict]:
344
+ """
345
+ Process multiple CSV files in batch
346
+
347
+ Args:
348
+ csv_directory: Directory containing CSV files
349
+ text_prompt: Base text prompt for all generations
350
+ style: Art style
351
+ mood: Mood/atmosphere
352
+ **generation_kwargs: Additional generation parameters
353
+
354
+ Returns:
355
+ List of all generation results
356
+ """
357
+ csv_dir = Path(csv_directory)
358
+ if not csv_dir.exists():
359
+ raise ValueError(f"CSV directory not found: {csv_directory}")
360
+
361
+ # Find CSV files
362
+ csv_files = list(csv_dir.glob("*.csv"))
363
+
364
+ if not csv_files:
365
+ raise ValueError(f"No CSV files found in {csv_directory}")
366
+
367
+ logger.info(f"Processing {len(csv_files)} CSV files")
368
+
369
+ all_results = []
370
+ for csv_file in csv_files:
371
+ logger.info(f"Processing: {csv_file.name}")
372
+
373
+ try:
374
+ results = self.generate_image(
375
+ text_prompt=text_prompt,
376
+ style=style,
377
+ mood=mood,
378
+ csv_path=str(csv_file),
379
+ **generation_kwargs
380
+ )
381
+ all_results.extend(results)
382
+
383
+ except Exception as e:
384
+ logger.error(f"Error processing {csv_file.name}: {e}")
385
+ continue
386
+
387
+ logger.info(f"Batch processing complete: {len(all_results)} images generated")
388
+ return all_results
389
+
390
+ def batch_process_formulas(
391
+ self,
392
+ formulas: List[str],
393
+ text_prompt: str,
394
+ style: str = "",
395
+ mood: str = "",
396
+ **generation_kwargs
397
+ ) -> List[Dict]:
398
+ """
399
+ Process multiple mathematical formulas in batch
400
+
401
+ Args:
402
+ formulas: List of mathematical formulas
403
+ text_prompt: Base text prompt for all generations
404
+ style: Art style
405
+ mood: Mood/atmosphere
406
+ **generation_kwargs: Additional generation parameters
407
+
408
+ Returns:
409
+ List of all generation results
410
+ """
411
+ logger.info(f"Processing {len(formulas)} mathematical formulas")
412
+
413
+ all_results = []
414
+ for i, formula in enumerate(formulas):
415
+ logger.info(f"Processing formula {i+1}/{len(formulas)}: {formula}")
416
+
417
+ try:
418
+ results = self.generate_image(
419
+ text_prompt=text_prompt,
420
+ style=style,
421
+ mood=mood,
422
+ formula=formula,
423
+ **generation_kwargs
424
+ )
425
+ all_results.extend(results)
426
+
427
+ except Exception as e:
428
+ logger.error(f"Error processing formula '{formula}': {e}")
429
+ continue
430
+
431
+ logger.info(f"Batch processing complete: {len(all_results)} images generated")
432
+ return all_results
src/generators/compi_phase2c_emotion_to_image.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Phase 2.C: Emotional/Contextual Input to Image Generation
3
+
4
+ This module implements emotion-driven AI art generation that combines:
5
+ - Emotion detection and sentiment analysis
6
+ - Contextual mood processing
7
+ - Emoji and text-based emotion recognition
8
+ - Color palette generation based on emotions
9
+ - Intelligent fusion of emotional context with creative prompts
10
+
11
+ Features:
12
+ - Support for preset emotions, custom emotions, and emoji input
13
+ - Automatic sentiment analysis with TextBlob
14
+ - Emotion-to-color palette mapping
15
+ - Contextual prompt enhancement
16
+ - Comprehensive metadata logging and filename conventions
17
+ - Batch processing capabilities for multiple emotional contexts
18
+ """
19
+
20
+ import os
21
+ import sys
22
+ import torch
23
+ import json
24
+ from datetime import datetime
25
+ from typing import Dict, List, Optional, Tuple, Union
26
+ from pathlib import Path
27
+ import logging
28
+
29
+ # Add project root to path
30
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
31
+
32
+ from diffusers import StableDiffusionPipeline
33
+ from PIL import Image
34
+
35
+ from src.utils.emotion_utils import EmotionProcessor, EmotionalPromptEnhancer, EmotionAnalysis, EmotionCategory
36
+ from src.utils.logging_utils import setup_logger
37
+ from src.utils.file_utils import ensure_directory_exists, generate_filename
38
+
39
+ # Setup logging
40
+ logger = setup_logger(__name__)
41
+
42
+ class CompIPhase2CEmotionToImage:
43
+ """
44
+ CompI Phase 2.C: Emotional/Contextual Input to Image Generation System
45
+
46
+ Transforms emotions, moods, and contextual feelings into AI-generated art
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ model_name: str = "runwayml/stable-diffusion-v1-5",
52
+ device: str = "auto",
53
+ output_dir: str = "outputs"
54
+ ):
55
+ """
56
+ Initialize the emotion-to-image generation system
57
+
58
+ Args:
59
+ model_name: Stable Diffusion model to use
60
+ device: Device for inference (auto, cpu, cuda)
61
+ output_dir: Directory for saving generated images
62
+ """
63
+ self.model_name = model_name
64
+ self.device = self._setup_device(device)
65
+ self.output_dir = Path(output_dir)
66
+ ensure_directory_exists(self.output_dir)
67
+
68
+ # Initialize components
69
+ self.pipe = None
70
+ self.emotion_processor = EmotionProcessor()
71
+ self.prompt_enhancer = EmotionalPromptEnhancer()
72
+
73
+ logger.info(f"Initialized CompI Phase 2.C on {self.device}")
74
+
75
+ def _setup_device(self, device: str) -> str:
76
+ """Setup and validate device"""
77
+ if device == "auto":
78
+ device = "cuda" if torch.cuda.is_available() else "cpu"
79
+
80
+ if device == "cuda" and not torch.cuda.is_available():
81
+ logger.warning("CUDA requested but not available, falling back to CPU")
82
+ device = "cpu"
83
+
84
+ return device
85
+
86
+ def _load_pipeline(self):
87
+ """Lazy load the Stable Diffusion pipeline"""
88
+ if self.pipe is None:
89
+ logger.info(f"Loading Stable Diffusion model: {self.model_name}")
90
+
91
+ # Custom safety checker that allows creative content
92
+ def dummy_safety_checker(images, **kwargs):
93
+ return images, [False] * len(images)
94
+
95
+ self.pipe = StableDiffusionPipeline.from_pretrained(
96
+ self.model_name,
97
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
98
+ safety_checker=dummy_safety_checker,
99
+ requires_safety_checker=False
100
+ )
101
+
102
+ self.pipe = self.pipe.to(self.device)
103
+ self.pipe.enable_attention_slicing()
104
+
105
+ if self.device == "cuda":
106
+ self.pipe.enable_model_cpu_offload()
107
+
108
+ logger.info("Stable Diffusion pipeline loaded successfully")
109
+
110
+ def analyze_emotion(
111
+ self,
112
+ emotion_input: str,
113
+ emotion_type: str = "auto",
114
+ contextual_text: Optional[str] = None
115
+ ) -> EmotionAnalysis:
116
+ """
117
+ Comprehensive emotion analysis
118
+
119
+ Args:
120
+ emotion_input: Emotion input (preset, custom, emoji, or text)
121
+ emotion_type: Type of input ('preset', 'custom', 'emoji', 'text', 'auto')
122
+ contextual_text: Additional contextual text for analysis
123
+
124
+ Returns:
125
+ EmotionAnalysis object with complete analysis
126
+ """
127
+ logger.info(f"Analyzing emotion input: {emotion_input}")
128
+
129
+ # Combine inputs for analysis
130
+ analysis_text = emotion_input
131
+ if contextual_text:
132
+ analysis_text += f" {contextual_text}"
133
+
134
+ # Determine selected emotion for preset types
135
+ selected_emotion = None
136
+ if emotion_type == "preset" or (emotion_type == "auto" and emotion_input.lower() in self.emotion_processor.preset_emotions):
137
+ selected_emotion = emotion_input.lower()
138
+
139
+ # Perform emotion analysis
140
+ emotion_analysis = self.emotion_processor.analyze_emotion(analysis_text, selected_emotion)
141
+
142
+ return emotion_analysis
143
+
144
+ def generate_image(
145
+ self,
146
+ text_prompt: str,
147
+ style: str = "",
148
+ emotion_input: str = "",
149
+ emotion_type: str = "auto",
150
+ contextual_text: str = "",
151
+ enhancement_strength: float = 0.7,
152
+ num_images: int = 1,
153
+ height: int = 512,
154
+ width: int = 512,
155
+ num_inference_steps: int = 30,
156
+ guidance_scale: float = 7.5,
157
+ seed: Optional[int] = None
158
+ ) -> List[Dict]:
159
+ """
160
+ Generate images with emotional conditioning
161
+
162
+ Args:
163
+ text_prompt: Base text prompt
164
+ style: Art style
165
+ emotion_input: Emotion input (preset, custom, emoji, or descriptive text)
166
+ emotion_type: Type of emotion input
167
+ contextual_text: Additional contextual description
168
+ enhancement_strength: How strongly to apply emotion (0-1)
169
+ num_images: Number of images to generate
170
+ height: Image height
171
+ width: Image width
172
+ num_inference_steps: Number of diffusion steps
173
+ guidance_scale: Guidance scale for generation
174
+ seed: Random seed for reproducibility
175
+
176
+ Returns:
177
+ List of generation results with metadata
178
+ """
179
+ self._load_pipeline()
180
+
181
+ # Analyze emotion if provided
182
+ emotion_analysis = None
183
+ if emotion_input.strip():
184
+ emotion_analysis = self.analyze_emotion(emotion_input, emotion_type, contextual_text)
185
+
186
+ # Create enhanced prompt
187
+ if emotion_analysis:
188
+ enhanced_prompt = self.prompt_enhancer.enhance_prompt_with_emotion(
189
+ text_prompt, style, emotion_analysis, enhancement_strength
190
+ )
191
+ else:
192
+ enhanced_prompt = text_prompt
193
+ if style:
194
+ enhanced_prompt += f", {style}"
195
+
196
+ logger.info(f"Generating {num_images} image(s) with enhanced prompt")
197
+
198
+ results = []
199
+
200
+ for i in range(num_images):
201
+ # Set up generation parameters
202
+ current_seed = seed if seed is not None else torch.seed()
203
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
204
+
205
+ # Generate image
206
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
207
+ result = self.pipe(
208
+ enhanced_prompt,
209
+ height=height,
210
+ width=width,
211
+ num_inference_steps=num_inference_steps,
212
+ guidance_scale=guidance_scale,
213
+ generator=generator
214
+ )
215
+
216
+ image = result.images[0]
217
+
218
+ # Create metadata
219
+ metadata = {
220
+ "timestamp": datetime.now().isoformat(),
221
+ "text_prompt": text_prompt,
222
+ "style": style,
223
+ "emotion_input": emotion_input,
224
+ "emotion_type": emotion_type,
225
+ "contextual_text": contextual_text,
226
+ "enhancement_strength": enhancement_strength,
227
+ "enhanced_prompt": enhanced_prompt,
228
+ "generation_params": {
229
+ "height": height,
230
+ "width": width,
231
+ "num_inference_steps": num_inference_steps,
232
+ "guidance_scale": guidance_scale,
233
+ "seed": current_seed,
234
+ "model": self.model_name
235
+ },
236
+ "device": self.device,
237
+ "phase": "2C_emotion_to_image"
238
+ }
239
+
240
+ # Add emotion analysis to metadata
241
+ if emotion_analysis:
242
+ metadata["emotion_analysis"] = emotion_analysis.to_dict()
243
+ metadata["emotion_tags"] = self.prompt_enhancer.generate_emotion_tags(emotion_analysis)
244
+
245
+ # Generate filename
246
+ filename = self._generate_filename(
247
+ text_prompt, style, emotion_analysis, current_seed, i + 1
248
+ )
249
+
250
+ # Save image and metadata
251
+ image_path = self.output_dir / f"{filename}.png"
252
+ metadata_path = self.output_dir / f"{filename}_metadata.json"
253
+
254
+ image.save(image_path)
255
+ with open(metadata_path, 'w') as f:
256
+ json.dump(metadata, f, indent=2)
257
+
258
+ results.append({
259
+ "image": image,
260
+ "image_path": str(image_path),
261
+ "metadata_path": str(metadata_path),
262
+ "metadata": metadata,
263
+ "filename": filename,
264
+ "emotion_analysis": emotion_analysis
265
+ })
266
+
267
+ logger.info(f"Generated image {i+1}/{num_images}: {filename}")
268
+
269
+ return results
270
+
271
+ def _generate_filename(
272
+ self,
273
+ prompt: str,
274
+ style: str,
275
+ emotion_analysis: Optional[EmotionAnalysis],
276
+ seed: int,
277
+ variation: int
278
+ ) -> str:
279
+ """Generate descriptive filename following CompI conventions"""
280
+
281
+ # Create prompt slug (first 5 words)
282
+ prompt_words = prompt.lower().replace(',', '').split()[:5]
283
+ prompt_slug = "_".join(prompt_words)
284
+
285
+ # Create style slug
286
+ style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
287
+
288
+ # Create emotion slug
289
+ if emotion_analysis:
290
+ emotion_slug = f"{emotion_analysis.primary_emotion.value}_{emotion_analysis.intensity_level}"[:15]
291
+ else:
292
+ emotion_slug = "neutral"
293
+
294
+ # Timestamp
295
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
296
+
297
+ # Combine all elements
298
+ filename = f"{prompt_slug}_{style_slug}_{emotion_slug}_{timestamp}_seed{seed}_EMO_v{variation}"
299
+
300
+ return filename
301
+
302
+ def batch_process_emotions(
303
+ self,
304
+ text_prompt: str,
305
+ style: str,
306
+ emotions: List[str],
307
+ emotion_type: str = "auto",
308
+ **generation_kwargs
309
+ ) -> List[Dict]:
310
+ """
311
+ Process multiple emotions in batch
312
+
313
+ Args:
314
+ text_prompt: Base text prompt for all generations
315
+ style: Art style
316
+ emotions: List of emotions to process
317
+ emotion_type: Type of emotion input
318
+ **generation_kwargs: Additional generation parameters
319
+
320
+ Returns:
321
+ List of all generation results
322
+ """
323
+ logger.info(f"Processing {len(emotions)} emotions in batch")
324
+
325
+ all_results = []
326
+ for i, emotion in enumerate(emotions):
327
+ logger.info(f"Processing emotion {i+1}/{len(emotions)}: {emotion}")
328
+
329
+ try:
330
+ results = self.generate_image(
331
+ text_prompt=text_prompt,
332
+ style=style,
333
+ emotion_input=emotion,
334
+ emotion_type=emotion_type,
335
+ **generation_kwargs
336
+ )
337
+ all_results.extend(results)
338
+
339
+ except Exception as e:
340
+ logger.error(f"Error processing emotion '{emotion}': {e}")
341
+ continue
342
+
343
+ logger.info(f"Batch processing complete: {len(all_results)} images generated")
344
+ return all_results
345
+
346
+ def generate_emotion_palette_art(
347
+ self,
348
+ text_prompt: str,
349
+ style: str,
350
+ emotion_input: str,
351
+ use_color_conditioning: bool = True,
352
+ **generation_kwargs
353
+ ) -> List[Dict]:
354
+ """
355
+ Generate art using emotion-derived color palettes
356
+
357
+ Args:
358
+ text_prompt: Base text prompt
359
+ style: Art style
360
+ emotion_input: Emotion input
361
+ use_color_conditioning: Whether to add color palette to prompt
362
+ **generation_kwargs: Additional generation parameters
363
+
364
+ Returns:
365
+ List of generation results with color palette conditioning
366
+ """
367
+ # Analyze emotion to get color palette
368
+ emotion_analysis = self.analyze_emotion(emotion_input)
369
+
370
+ # Enhance prompt with color information if requested
371
+ if use_color_conditioning and emotion_analysis:
372
+ color_names = self._hex_to_color_names(emotion_analysis.color_palette)
373
+ color_prompt = f"with a color palette of {', '.join(color_names)}"
374
+ enhanced_text_prompt = f"{text_prompt}, {color_prompt}"
375
+ else:
376
+ enhanced_text_prompt = text_prompt
377
+
378
+ return self.generate_image(
379
+ text_prompt=enhanced_text_prompt,
380
+ style=style,
381
+ emotion_input=emotion_input,
382
+ **generation_kwargs
383
+ )
384
+
385
+ def _hex_to_color_names(self, hex_colors: List[str]) -> List[str]:
386
+ """Convert hex colors to approximate color names"""
387
+ color_mapping = {
388
+ "#FFD700": "golden", "#FFA500": "orange", "#FF69B4": "pink",
389
+ "#00CED1": "turquoise", "#32CD32": "lime", "#4169E1": "blue",
390
+ "#6495ED": "cornflower", "#708090": "slate", "#2F4F4F": "dark slate",
391
+ "#191970": "midnight blue", "#DC143C": "crimson", "#B22222": "firebrick",
392
+ "#8B0000": "dark red", "#FF4500": "orange red", "#FF6347": "tomato",
393
+ "#800080": "purple", "#4B0082": "indigo", "#2E2E2E": "dark gray",
394
+ "#696969": "dim gray", "#A9A9A9": "dark gray", "#FF1493": "deep pink",
395
+ "#FFB6C1": "light pink", "#FFC0CB": "pink", "#FFFF00": "yellow",
396
+ "#C71585": "medium violet", "#DB7093": "pale violet", "#20B2AA": "light sea green",
397
+ "#48D1CC": "medium turquoise", "#40E0D0": "turquoise", "#AFEEEE": "pale turquoise",
398
+ "#9370DB": "medium purple", "#8A2BE2": "blue violet", "#7B68EE": "medium slate blue",
399
+ "#6A5ACD": "slate blue", "#483D8B": "dark slate blue", "#808080": "gray",
400
+ "#C0C0C0": "silver", "#D3D3D3": "light gray", "#DCDCDC": "gainsboro"
401
+ }
402
+
403
+ color_names = []
404
+ for hex_color in hex_colors:
405
+ color_name = color_mapping.get(hex_color.upper(), "colorful")
406
+ color_names.append(color_name)
407
+
408
+ return color_names
src/generators/compi_phase2d_realtime_to_image.py ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Phase 2.D: Real-Time Data Feeds to Image Generation
3
+
4
+ This module implements real-time data-driven AI art generation that combines:
5
+ - Weather data integration from multiple APIs
6
+ - News headlines and RSS feed processing
7
+ - Financial market data incorporation
8
+ - Real-time context analysis and summarization
9
+ - Intelligent fusion of real-time data with creative prompts
10
+
11
+ Features:
12
+ - Support for weather, news, and financial data feeds
13
+ - Automatic data caching and rate limiting
14
+ - Context-aware prompt enhancement
15
+ - Temporal and thematic analysis
16
+ - Comprehensive metadata logging and filename conventions
17
+ - Batch processing capabilities for multiple data sources
18
+ """
19
+
20
+ import os
21
+ import sys
22
+ import torch
23
+ import json
24
+ from datetime import datetime
25
+ from typing import Dict, List, Optional, Tuple, Union
26
+ from pathlib import Path
27
+ import logging
28
+
29
+ # Add project root to path
30
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
31
+
32
+ from diffusers import StableDiffusionPipeline
33
+ from PIL import Image
34
+
35
+ from src.utils.realtime_data_utils import (
36
+ RealTimeDataProcessor, RealTimeContext, DataFeedType, RealTimeDataPoint
37
+ )
38
+ from src.utils.logging_utils import setup_logger
39
+ from src.utils.file_utils import ensure_directory_exists, generate_filename
40
+
41
+ # Setup logging
42
+ logger = setup_logger(__name__)
43
+
44
+ class CompIPhase2DRealTimeToImage:
45
+ """
46
+ CompI Phase 2.D: Real-Time Data Feeds to Image Generation System
47
+
48
+ Transforms real-time data feeds into AI-generated art
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ model_name: str = "runwayml/stable-diffusion-v1-5",
54
+ device: str = "auto",
55
+ output_dir: str = "outputs"
56
+ ):
57
+ """
58
+ Initialize the real-time data-to-image generation system
59
+
60
+ Args:
61
+ model_name: Stable Diffusion model to use
62
+ device: Device for inference (auto, cpu, cuda)
63
+ output_dir: Directory for saving generated images
64
+ """
65
+ self.model_name = model_name
66
+ self.device = self._setup_device(device)
67
+ self.output_dir = Path(output_dir)
68
+ ensure_directory_exists(self.output_dir)
69
+
70
+ # Initialize components
71
+ self.pipe = None
72
+ self.data_processor = RealTimeDataProcessor()
73
+
74
+ logger.info(f"Initialized CompI Phase 2.D on {self.device}")
75
+
76
+ def _setup_device(self, device: str) -> str:
77
+ """Setup and validate device"""
78
+ if device == "auto":
79
+ device = "cuda" if torch.cuda.is_available() else "cpu"
80
+
81
+ if device == "cuda" and not torch.cuda.is_available():
82
+ logger.warning("CUDA requested but not available, falling back to CPU")
83
+ device = "cpu"
84
+
85
+ return device
86
+
87
+ def _load_pipeline(self):
88
+ """Lazy load the Stable Diffusion pipeline"""
89
+ if self.pipe is None:
90
+ logger.info(f"Loading Stable Diffusion model: {self.model_name}")
91
+
92
+ # Custom safety checker that allows creative content
93
+ def dummy_safety_checker(images, **kwargs):
94
+ return images, [False] * len(images)
95
+
96
+ self.pipe = StableDiffusionPipeline.from_pretrained(
97
+ self.model_name,
98
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
99
+ safety_checker=dummy_safety_checker,
100
+ requires_safety_checker=False
101
+ )
102
+
103
+ self.pipe = self.pipe.to(self.device)
104
+ self.pipe.enable_attention_slicing()
105
+
106
+ if self.device == "cuda":
107
+ self.pipe.enable_model_cpu_offload()
108
+
109
+ logger.info("Stable Diffusion pipeline loaded successfully")
110
+
111
+ def fetch_realtime_context(
112
+ self,
113
+ include_weather: bool = False,
114
+ weather_city: str = "New York",
115
+ weather_api_key: Optional[str] = None,
116
+ include_news: bool = False,
117
+ news_category: str = "general",
118
+ max_news: int = 3,
119
+ news_api_key: Optional[str] = None,
120
+ include_financial: bool = False
121
+ ) -> RealTimeContext:
122
+ """
123
+ Fetch real-time context from various data sources
124
+
125
+ Args:
126
+ include_weather: Whether to include weather data
127
+ weather_city: City for weather data
128
+ weather_api_key: Optional weather API key
129
+ include_news: Whether to include news data
130
+ news_category: Category of news to fetch
131
+ max_news: Maximum number of news items
132
+ news_api_key: Optional news API key
133
+ include_financial: Whether to include financial data
134
+
135
+ Returns:
136
+ RealTimeContext with processed data
137
+ """
138
+ logger.info("Fetching real-time context for art generation")
139
+
140
+ return self.data_processor.fetch_realtime_context(
141
+ include_weather=include_weather,
142
+ weather_city=weather_city,
143
+ include_news=include_news,
144
+ news_category=news_category,
145
+ max_news=max_news,
146
+ include_financial=include_financial,
147
+ weather_api_key=weather_api_key,
148
+ news_api_key=news_api_key
149
+ )
150
+
151
+ def generate_image(
152
+ self,
153
+ text_prompt: str,
154
+ style: str = "",
155
+ mood: str = "",
156
+ include_weather: bool = False,
157
+ weather_city: str = "New York",
158
+ weather_api_key: Optional[str] = None,
159
+ include_news: bool = False,
160
+ news_category: str = "general",
161
+ max_news: int = 3,
162
+ news_api_key: Optional[str] = None,
163
+ include_financial: bool = False,
164
+ context_strength: float = 0.7,
165
+ num_images: int = 1,
166
+ height: int = 512,
167
+ width: int = 512,
168
+ num_inference_steps: int = 30,
169
+ guidance_scale: float = 7.5,
170
+ seed: Optional[int] = None
171
+ ) -> List[Dict]:
172
+ """
173
+ Generate images with real-time data conditioning
174
+
175
+ Args:
176
+ text_prompt: Base text prompt
177
+ style: Art style
178
+ mood: Mood/atmosphere
179
+ include_weather: Whether to include weather data
180
+ weather_city: City for weather data
181
+ weather_api_key: Optional weather API key
182
+ include_news: Whether to include news data
183
+ news_category: Category of news to fetch
184
+ max_news: Maximum number of news items
185
+ news_api_key: Optional news API key
186
+ include_financial: Whether to include financial data
187
+ context_strength: How strongly to apply real-time context (0-1)
188
+ num_images: Number of images to generate
189
+ height: Image height
190
+ width: Image width
191
+ num_inference_steps: Number of diffusion steps
192
+ guidance_scale: Guidance scale for generation
193
+ seed: Random seed for reproducibility
194
+
195
+ Returns:
196
+ List of generation results with metadata
197
+ """
198
+ self._load_pipeline()
199
+
200
+ # Fetch real-time context if any data sources are enabled
201
+ realtime_context = None
202
+ if include_weather or include_news or include_financial:
203
+ realtime_context = self.fetch_realtime_context(
204
+ include_weather=include_weather,
205
+ weather_city=weather_city,
206
+ weather_api_key=weather_api_key,
207
+ include_news=include_news,
208
+ news_category=news_category,
209
+ max_news=max_news,
210
+ news_api_key=news_api_key,
211
+ include_financial=include_financial
212
+ )
213
+
214
+ # Create enhanced prompt
215
+ enhanced_prompt = self._create_enhanced_prompt(
216
+ text_prompt, style, mood, realtime_context, context_strength
217
+ )
218
+
219
+ logger.info(f"Generating {num_images} image(s) with real-time context")
220
+
221
+ results = []
222
+
223
+ for i in range(num_images):
224
+ # Set up generation parameters
225
+ current_seed = seed if seed is not None else torch.seed()
226
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
227
+
228
+ # Generate image
229
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
230
+ result = self.pipe(
231
+ enhanced_prompt,
232
+ height=height,
233
+ width=width,
234
+ num_inference_steps=num_inference_steps,
235
+ guidance_scale=guidance_scale,
236
+ generator=generator
237
+ )
238
+
239
+ image = result.images[0]
240
+
241
+ # Create metadata
242
+ metadata = {
243
+ "timestamp": datetime.now().isoformat(),
244
+ "text_prompt": text_prompt,
245
+ "style": style,
246
+ "mood": mood,
247
+ "enhanced_prompt": enhanced_prompt,
248
+ "context_strength": context_strength,
249
+ "data_sources": {
250
+ "weather": include_weather,
251
+ "news": include_news,
252
+ "financial": include_financial
253
+ },
254
+ "generation_params": {
255
+ "height": height,
256
+ "width": width,
257
+ "num_inference_steps": num_inference_steps,
258
+ "guidance_scale": guidance_scale,
259
+ "seed": current_seed,
260
+ "model": self.model_name
261
+ },
262
+ "device": self.device,
263
+ "phase": "2D_realtime_to_image"
264
+ }
265
+
266
+ # Add real-time context to metadata
267
+ if realtime_context:
268
+ metadata["realtime_context"] = realtime_context.to_dict()
269
+
270
+ # Generate filename
271
+ filename = self._generate_filename(
272
+ text_prompt, style, realtime_context, current_seed, i + 1
273
+ )
274
+
275
+ # Save image and metadata
276
+ image_path = self.output_dir / f"{filename}.png"
277
+ metadata_path = self.output_dir / f"{filename}_metadata.json"
278
+
279
+ image.save(image_path)
280
+ with open(metadata_path, 'w') as f:
281
+ json.dump(metadata, f, indent=2)
282
+
283
+ results.append({
284
+ "image": image,
285
+ "image_path": str(image_path),
286
+ "metadata_path": str(metadata_path),
287
+ "metadata": metadata,
288
+ "filename": filename,
289
+ "realtime_context": realtime_context
290
+ })
291
+
292
+ logger.info(f"Generated image {i+1}/{num_images}: {filename}")
293
+
294
+ return results
295
+
296
+ def _create_enhanced_prompt(
297
+ self,
298
+ text_prompt: str,
299
+ style: str,
300
+ mood: str,
301
+ realtime_context: Optional[RealTimeContext],
302
+ context_strength: float
303
+ ) -> str:
304
+ """
305
+ Create enhanced prompt with real-time context
306
+
307
+ Args:
308
+ text_prompt: Base text prompt
309
+ style: Art style
310
+ mood: Mood/atmosphere
311
+ realtime_context: Real-time context data
312
+ context_strength: How strongly to apply context (0-1)
313
+
314
+ Returns:
315
+ Enhanced prompt with real-time context
316
+ """
317
+ enhanced_prompt = text_prompt.strip()
318
+
319
+ # Add style
320
+ if style:
321
+ enhanced_prompt += f", {style}"
322
+
323
+ # Add mood
324
+ if mood:
325
+ enhanced_prompt += f", {mood}"
326
+
327
+ # Add real-time context based on strength
328
+ if realtime_context and context_strength > 0:
329
+ if context_strength > 0.7:
330
+ # Strong context integration
331
+ enhanced_prompt += f", {realtime_context.artistic_inspiration}"
332
+ if realtime_context.mood_indicators:
333
+ mood_text = ", ".join(realtime_context.mood_indicators[:2])
334
+ enhanced_prompt += f", with {mood_text} influences"
335
+
336
+ elif context_strength > 0.4:
337
+ # Moderate context integration
338
+ enhanced_prompt += f", {realtime_context.artistic_inspiration}"
339
+
340
+ else:
341
+ # Subtle context integration
342
+ if realtime_context.key_themes:
343
+ theme = realtime_context.key_themes[0]
344
+ enhanced_prompt += f", inspired by {theme}"
345
+
346
+ return enhanced_prompt
347
+
348
+ def _generate_filename(
349
+ self,
350
+ prompt: str,
351
+ style: str,
352
+ realtime_context: Optional[RealTimeContext],
353
+ seed: int,
354
+ variation: int
355
+ ) -> str:
356
+ """Generate descriptive filename following CompI conventions"""
357
+
358
+ # Create prompt slug (first 5 words)
359
+ prompt_words = prompt.lower().replace(',', '').split()[:5]
360
+ prompt_slug = "_".join(prompt_words)
361
+
362
+ # Create style slug
363
+ style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
364
+
365
+ # Create context slug
366
+ if realtime_context and realtime_context.data_points:
367
+ context_types = []
368
+ for dp in realtime_context.data_points:
369
+ context_types.append(dp.feed_type.value[:3]) # First 3 chars
370
+ context_slug = "_".join(set(context_types))[:15]
371
+ else:
372
+ context_slug = "static"
373
+
374
+ # Timestamp
375
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
376
+
377
+ # Combine all elements
378
+ filename = f"{prompt_slug}_{style_slug}_{context_slug}_{timestamp}_seed{seed}_RTDATA_v{variation}"
379
+
380
+ return filename
381
+
382
+ def batch_process_data_sources(
383
+ self,
384
+ text_prompt: str,
385
+ style: str,
386
+ data_source_configs: List[Dict],
387
+ **generation_kwargs
388
+ ) -> List[Dict]:
389
+ """
390
+ Process multiple data source configurations in batch
391
+
392
+ Args:
393
+ text_prompt: Base text prompt for all generations
394
+ style: Art style
395
+ data_source_configs: List of data source configuration dictionaries
396
+ **generation_kwargs: Additional generation parameters
397
+
398
+ Returns:
399
+ List of all generation results
400
+ """
401
+ logger.info(f"Processing {len(data_source_configs)} data source configurations")
402
+
403
+ all_results = []
404
+ for i, config in enumerate(data_source_configs):
405
+ logger.info(f"Processing configuration {i+1}/{len(data_source_configs)}")
406
+
407
+ try:
408
+ results = self.generate_image(
409
+ text_prompt=text_prompt,
410
+ style=style,
411
+ **config,
412
+ **generation_kwargs
413
+ )
414
+ all_results.extend(results)
415
+
416
+ except Exception as e:
417
+ logger.error(f"Error processing configuration {i+1}: {e}")
418
+ continue
419
+
420
+ logger.info(f"Batch processing complete: {len(all_results)} images generated")
421
+ return all_results
422
+
423
+ def generate_temporal_series(
424
+ self,
425
+ text_prompt: str,
426
+ style: str,
427
+ data_config: Dict,
428
+ time_intervals: List[int],
429
+ **generation_kwargs
430
+ ) -> List[Dict]:
431
+ """
432
+ Generate a series of images with real-time data at different time intervals
433
+
434
+ Args:
435
+ text_prompt: Base text prompt
436
+ style: Art style
437
+ data_config: Data source configuration
438
+ time_intervals: List of time intervals in minutes between generations
439
+ **generation_kwargs: Additional generation parameters
440
+
441
+ Returns:
442
+ List of generation results across time
443
+ """
444
+ import time
445
+
446
+ logger.info(f"Generating temporal series with {len(time_intervals)} intervals")
447
+
448
+ all_results = []
449
+
450
+ for i, interval in enumerate(time_intervals):
451
+ if i > 0: # Don't wait before first generation
452
+ logger.info(f"Waiting {interval} minutes before next generation...")
453
+ time.sleep(interval * 60) # Convert minutes to seconds
454
+
455
+ logger.info(f"Generating image {i+1}/{len(time_intervals)}")
456
+
457
+ try:
458
+ # Clear cache to ensure fresh data
459
+ self.data_processor.cache.cache.clear()
460
+
461
+ results = self.generate_image(
462
+ text_prompt=text_prompt,
463
+ style=style,
464
+ **data_config,
465
+ **generation_kwargs
466
+ )
467
+
468
+ # Add temporal metadata
469
+ for result in results:
470
+ result["metadata"]["temporal_series"] = {
471
+ "series_index": i,
472
+ "total_in_series": len(time_intervals),
473
+ "interval_minutes": interval if i > 0 else 0
474
+ }
475
+
476
+ all_results.extend(results)
477
+
478
+ except Exception as e:
479
+ logger.error(f"Error in temporal generation {i+1}: {e}")
480
+ continue
481
+
482
+ logger.info(f"Temporal series complete: {len(all_results)} images generated")
483
+ return all_results
src/generators/compi_phase2e_refimg_to_image.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Phase 2.E: Style Reference/Example Image to AI Art Generation
3
+
4
+ This module implements multimodal AI art generation that combines:
5
+ - Text prompts with style and mood conditioning
6
+ - Reference image style transfer and guidance
7
+ - Image-to-image generation with controllable strength
8
+ - Support for both local files and web URLs
9
+ - Advanced style analysis and prompt enhancement
10
+
11
+ Features:
12
+ - Support for various image formats and web sources
13
+ - Real-time image analysis and style suggestion
14
+ - Controllable reference strength for creative flexibility
15
+ - Comprehensive metadata logging and filename conventions
16
+ - Batch processing capabilities with multiple variations
17
+ """
18
+
19
+ import os
20
+ import sys
21
+ import torch
22
+ import json
23
+ from datetime import datetime
24
+ from typing import Dict, List, Optional, Tuple, Union
25
+ from pathlib import Path
26
+ import logging
27
+
28
+ # Add project root to path
29
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
30
+
31
+ from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionPipeline
32
+ from PIL import Image
33
+ import numpy as np
34
+
35
+ from src.utils.image_utils import ImageProcessor, StyleAnalyzer
36
+ from src.utils.logging_utils import setup_logger
37
+ from src.utils.file_utils import ensure_directory_exists, generate_filename
38
+ from src.config import (
39
+ STABLE_DIFFUSION_IMG2IMG_MODEL,
40
+ OUTPUTS_DIR,
41
+ DEFAULT_IMAGE_SIZE,
42
+ DEFAULT_INFERENCE_STEPS,
43
+ DEFAULT_GUIDANCE_SCALE
44
+ )
45
+
46
+ # Setup logging
47
+ logger = setup_logger(__name__)
48
+
49
+ class CompIPhase2ERefImageToImage:
50
+ """
51
+ CompI Phase 2.E: Style Reference/Example Image to AI Art Generation System
52
+
53
+ Combines text prompts with reference image style guidance for enhanced creativity
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ model_name: str = STABLE_DIFFUSION_IMG2IMG_MODEL,
59
+ device: Optional[str] = None,
60
+ enable_attention_slicing: bool = True,
61
+ enable_memory_efficient_attention: bool = True
62
+ ):
63
+ """
64
+ Initialize the CompI Phase 2.E system
65
+
66
+ Args:
67
+ model_name: Hugging Face model identifier
68
+ device: Device to run on ('cuda', 'cpu', or None for auto)
69
+ enable_attention_slicing: Enable attention slicing for memory efficiency
70
+ enable_memory_efficient_attention: Enable memory efficient attention
71
+ """
72
+ self.model_name = model_name
73
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
74
+
75
+ # Initialize components
76
+ self.image_processor = ImageProcessor()
77
+ self.style_analyzer = StyleAnalyzer()
78
+
79
+ # Initialize pipelines (lazy loading)
80
+ self._img2img_pipeline = None
81
+ self._txt2img_pipeline = None
82
+
83
+ # Configuration
84
+ self.enable_attention_slicing = enable_attention_slicing
85
+ self.enable_memory_efficient_attention = enable_memory_efficient_attention
86
+
87
+ logger.info(f"Initialized CompI Phase 2.E on device: {self.device}")
88
+
89
+ @property
90
+ def img2img_pipeline(self) -> StableDiffusionImg2ImgPipeline:
91
+ """Lazy load img2img pipeline"""
92
+ if self._img2img_pipeline is None:
93
+ logger.info(f"Loading img2img pipeline: {self.model_name}")
94
+ self._img2img_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
95
+ self.model_name,
96
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
97
+ safety_checker=None, # Disabled for creative use
98
+ requires_safety_checker=False
99
+ )
100
+ self._img2img_pipeline = self._img2img_pipeline.to(self.device)
101
+
102
+ if self.enable_attention_slicing:
103
+ self._img2img_pipeline.enable_attention_slicing()
104
+ if self.enable_memory_efficient_attention and hasattr(self._img2img_pipeline, 'enable_memory_efficient_attention'):
105
+ self._img2img_pipeline.enable_memory_efficient_attention()
106
+
107
+ return self._img2img_pipeline
108
+
109
+ @property
110
+ def txt2img_pipeline(self) -> StableDiffusionPipeline:
111
+ """Lazy load txt2img pipeline for fallback"""
112
+ if self._txt2img_pipeline is None:
113
+ logger.info(f"Loading txt2img pipeline: {self.model_name}")
114
+ self._txt2img_pipeline = StableDiffusionPipeline.from_pretrained(
115
+ self.model_name,
116
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
117
+ safety_checker=None, # Disabled for creative use
118
+ requires_safety_checker=False
119
+ )
120
+ self._txt2img_pipeline = self._txt2img_pipeline.to(self.device)
121
+
122
+ if self.enable_attention_slicing:
123
+ self._txt2img_pipeline.enable_attention_slicing()
124
+ if self.enable_memory_efficient_attention and hasattr(self._txt2img_pipeline, 'enable_memory_efficient_attention'):
125
+ self._txt2img_pipeline.enable_memory_efficient_attention()
126
+
127
+ return self._txt2img_pipeline
128
+
129
+ def load_reference_image(
130
+ self,
131
+ source: Union[str, Path, Image.Image],
132
+ preprocess: bool = True
133
+ ) -> Optional[Tuple[Image.Image, Dict]]:
134
+ """
135
+ Load and analyze reference image from various sources
136
+
137
+ Args:
138
+ source: Image source (file path, URL, or PIL Image)
139
+ preprocess: Whether to preprocess the image
140
+
141
+ Returns:
142
+ Tuple of (processed_image, analysis_results) or None if failed
143
+ """
144
+ try:
145
+ # Load image based on source type
146
+ if isinstance(source, Image.Image):
147
+ image = source.convert('RGB')
148
+ source_info = "PIL Image object"
149
+ elif isinstance(source, (str, Path)):
150
+ source_str = str(source)
151
+ if source_str.startswith(('http://', 'https://')):
152
+ image = self.image_processor.load_image_from_url(source_str)
153
+ source_info = f"URL: {source_str}"
154
+ else:
155
+ image = self.image_processor.load_image_from_file(source_str)
156
+ source_info = f"File: {source_str}"
157
+
158
+ if image is None:
159
+ return None
160
+ else:
161
+ logger.error(f"Unsupported source type: {type(source)}")
162
+ return None
163
+
164
+ # Preprocess if requested
165
+ if preprocess:
166
+ image = self.image_processor.preprocess_image(image, DEFAULT_IMAGE_SIZE)
167
+
168
+ # Analyze image properties
169
+ properties = self.image_processor.analyze_image_properties(image)
170
+ style_suggestions = self.style_analyzer.suggest_style_keywords(properties)
171
+ image_hash = self.image_processor.generate_image_hash(image)
172
+
173
+ analysis = {
174
+ 'source': source_info,
175
+ 'properties': properties,
176
+ 'style_suggestions': style_suggestions,
177
+ 'hash': image_hash,
178
+ 'processed_size': image.size
179
+ }
180
+
181
+ logger.info(f"Successfully loaded and analyzed reference image: {analysis}")
182
+ return image, analysis
183
+
184
+ except Exception as e:
185
+ logger.error(f"Error loading reference image: {e}")
186
+ return None
187
+
188
+ def enhance_prompt_with_style(
189
+ self,
190
+ base_prompt: str,
191
+ style: str = "",
192
+ mood: str = "",
193
+ style_suggestions: List[str] = None
194
+ ) -> str:
195
+ """
196
+ Enhance prompt with style information from reference image
197
+
198
+ Args:
199
+ base_prompt: Base text prompt
200
+ style: Additional style keywords
201
+ mood: Mood/atmosphere keywords
202
+ style_suggestions: Suggested keywords from image analysis
203
+
204
+ Returns:
205
+ Enhanced prompt string
206
+ """
207
+ try:
208
+ prompt_parts = [base_prompt.strip()]
209
+
210
+ # Add explicit style
211
+ if style.strip():
212
+ prompt_parts.append(style.strip())
213
+
214
+ # Add mood
215
+ if mood.strip():
216
+ prompt_parts.append(mood.strip())
217
+
218
+ # Add style suggestions from image analysis
219
+ if style_suggestions:
220
+ # Limit to top 3 suggestions to avoid prompt bloat
221
+ top_suggestions = style_suggestions[:3]
222
+ prompt_parts.extend(top_suggestions)
223
+
224
+ enhanced_prompt = ", ".join(prompt_parts)
225
+ logger.info(f"Enhanced prompt: {enhanced_prompt}")
226
+ return enhanced_prompt
227
+
228
+ except Exception as e:
229
+ logger.error(f"Error enhancing prompt: {e}")
230
+ return base_prompt
231
+
232
+ def generate_with_reference(
233
+ self,
234
+ prompt: str,
235
+ reference_image: Image.Image,
236
+ style: str = "",
237
+ mood: str = "",
238
+ strength: float = 0.5,
239
+ num_images: int = 1,
240
+ num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
241
+ guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
242
+ seed: Optional[int] = None,
243
+ style_suggestions: List[str] = None
244
+ ) -> List[Dict]:
245
+ """
246
+ Generate images using reference image guidance
247
+
248
+ Args:
249
+ prompt: Text prompt
250
+ reference_image: Reference PIL Image
251
+ style: Style keywords
252
+ mood: Mood keywords
253
+ strength: Reference strength (0.0-1.0, higher = closer to reference)
254
+ num_images: Number of images to generate
255
+ num_inference_steps: Number of denoising steps
256
+ guidance_scale: Classifier-free guidance scale
257
+ seed: Random seed for reproducibility
258
+ style_suggestions: Style suggestions from image analysis
259
+
260
+ Returns:
261
+ List of generation results with metadata
262
+ """
263
+ try:
264
+ # Enhance prompt with style information
265
+ enhanced_prompt = self.enhance_prompt_with_style(
266
+ prompt, style, mood, style_suggestions
267
+ )
268
+
269
+ results = []
270
+
271
+ for i in range(num_images):
272
+ # Set up random seed
273
+ if seed is not None:
274
+ current_seed = seed + i
275
+ else:
276
+ current_seed = torch.seed()
277
+
278
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
279
+
280
+ # Generate image
281
+ logger.info(f"Generating image {i+1}/{num_images} with reference guidance")
282
+
283
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
284
+ result = self.img2img_pipeline(
285
+ prompt=enhanced_prompt,
286
+ image=reference_image,
287
+ strength=strength,
288
+ num_inference_steps=num_inference_steps,
289
+ guidance_scale=guidance_scale,
290
+ generator=generator
291
+ )
292
+
293
+ generated_image = result.images[0]
294
+
295
+ # Create metadata
296
+ metadata = {
297
+ 'prompt': prompt,
298
+ 'enhanced_prompt': enhanced_prompt,
299
+ 'style': style,
300
+ 'mood': mood,
301
+ 'strength': strength,
302
+ 'num_inference_steps': num_inference_steps,
303
+ 'guidance_scale': guidance_scale,
304
+ 'seed': current_seed,
305
+ 'model': self.model_name,
306
+ 'generation_type': 'img2img_reference',
307
+ 'timestamp': datetime.now().isoformat(),
308
+ 'device': self.device,
309
+ 'reference_size': reference_image.size,
310
+ 'output_size': generated_image.size,
311
+ 'style_suggestions': style_suggestions or []
312
+ }
313
+
314
+ results.append({
315
+ 'image': generated_image,
316
+ 'metadata': metadata,
317
+ 'index': i
318
+ })
319
+
320
+ logger.info(f"Successfully generated {len(results)} images with reference guidance")
321
+ return results
322
+
323
+ except Exception as e:
324
+ logger.error(f"Error generating images with reference: {e}")
325
+ return []
326
+
327
+ def generate_without_reference(
328
+ self,
329
+ prompt: str,
330
+ style: str = "",
331
+ mood: str = "",
332
+ num_images: int = 1,
333
+ num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
334
+ guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
335
+ seed: Optional[int] = None
336
+ ) -> List[Dict]:
337
+ """
338
+ Generate images without reference (fallback to text-to-image)
339
+
340
+ Args:
341
+ prompt: Text prompt
342
+ style: Style keywords
343
+ mood: Mood keywords
344
+ num_images: Number of images to generate
345
+ num_inference_steps: Number of denoising steps
346
+ guidance_scale: Classifier-free guidance scale
347
+ seed: Random seed for reproducibility
348
+
349
+ Returns:
350
+ List of generation results with metadata
351
+ """
352
+ try:
353
+ # Enhance prompt
354
+ enhanced_prompt = self.enhance_prompt_with_style(prompt, style, mood)
355
+
356
+ results = []
357
+
358
+ for i in range(num_images):
359
+ # Set up random seed
360
+ if seed is not None:
361
+ current_seed = seed + i
362
+ else:
363
+ current_seed = torch.seed()
364
+
365
+ generator = torch.Generator(device=self.device).manual_seed(current_seed)
366
+
367
+ # Generate image
368
+ logger.info(f"Generating image {i+1}/{num_images} without reference")
369
+
370
+ with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
371
+ result = self.txt2img_pipeline(
372
+ prompt=enhanced_prompt,
373
+ height=DEFAULT_IMAGE_SIZE[1],
374
+ width=DEFAULT_IMAGE_SIZE[0],
375
+ num_inference_steps=num_inference_steps,
376
+ guidance_scale=guidance_scale,
377
+ generator=generator
378
+ )
379
+
380
+ generated_image = result.images[0]
381
+
382
+ # Create metadata
383
+ metadata = {
384
+ 'prompt': prompt,
385
+ 'enhanced_prompt': enhanced_prompt,
386
+ 'style': style,
387
+ 'mood': mood,
388
+ 'num_inference_steps': num_inference_steps,
389
+ 'guidance_scale': guidance_scale,
390
+ 'seed': current_seed,
391
+ 'model': self.model_name,
392
+ 'generation_type': 'txt2img_fallback',
393
+ 'timestamp': datetime.now().isoformat(),
394
+ 'device': self.device,
395
+ 'output_size': generated_image.size
396
+ }
397
+
398
+ results.append({
399
+ 'image': generated_image,
400
+ 'metadata': metadata,
401
+ 'index': i
402
+ })
403
+
404
+ logger.info(f"Successfully generated {len(results)} images without reference")
405
+ return results
406
+
407
+ except Exception as e:
408
+ logger.error(f"Error generating images without reference: {e}")
409
+ return []
410
+
411
+ def save_results(
412
+ self,
413
+ results: List[Dict],
414
+ output_dir: Path = OUTPUTS_DIR,
415
+ reference_info: Optional[Dict] = None
416
+ ) -> List[str]:
417
+ """
418
+ Save generation results with comprehensive metadata
419
+
420
+ Args:
421
+ results: List of generation results
422
+ output_dir: Output directory
423
+ reference_info: Reference image information
424
+
425
+ Returns:
426
+ List of saved file paths
427
+ """
428
+ try:
429
+ ensure_directory_exists(output_dir)
430
+ saved_files = []
431
+
432
+ for result in results:
433
+ image = result['image']
434
+ metadata = result['metadata']
435
+ index = result['index']
436
+
437
+ # Generate filename
438
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
439
+ prompt_slug = "_".join(metadata['prompt'].lower().split()[:5])
440
+ style_slug = metadata.get('style', '').replace(' ', '')[:10]
441
+ mood_slug = metadata.get('mood', '').replace(' ', '')[:10]
442
+
443
+ # Add reference indicator
444
+ ref_indicator = "REFIMG" if metadata['generation_type'] == 'img2img_reference' else "NOREFIMG"
445
+
446
+ filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{metadata['seed']}_{ref_indicator}_v{index+1}.png"
447
+ filepath = output_dir / filename
448
+
449
+ # Save image
450
+ image.save(filepath)
451
+
452
+ # Add reference info to metadata if available
453
+ if reference_info:
454
+ metadata['reference_info'] = reference_info
455
+
456
+ # Save metadata
457
+ metadata_filename = filepath.stem + "_metadata.json"
458
+ metadata_filepath = output_dir / metadata_filename
459
+
460
+ with open(metadata_filepath, 'w') as f:
461
+ json.dump(metadata, f, indent=2, default=str)
462
+
463
+ saved_files.extend([str(filepath), str(metadata_filepath)])
464
+ logger.info(f"Saved: {filepath}")
465
+
466
+ return saved_files
467
+
468
+ except Exception as e:
469
+ logger.error(f"Error saving results: {e}")
470
+ return []
471
+
472
+ def generate_batch(
473
+ self,
474
+ prompt: str,
475
+ reference_source: Optional[Union[str, Path, Image.Image]] = None,
476
+ style: str = "",
477
+ mood: str = "",
478
+ strength: float = 0.5,
479
+ num_images: int = 1,
480
+ num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
481
+ guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
482
+ seed: Optional[int] = None,
483
+ save_results: bool = True,
484
+ output_dir: Path = OUTPUTS_DIR
485
+ ) -> Dict:
486
+ """
487
+ Complete batch generation pipeline
488
+
489
+ Args:
490
+ prompt: Text prompt
491
+ reference_source: Reference image source (file, URL, or PIL Image)
492
+ style: Style keywords
493
+ mood: Mood keywords
494
+ strength: Reference strength (only used if reference provided)
495
+ num_images: Number of images to generate
496
+ num_inference_steps: Number of denoising steps
497
+ guidance_scale: Classifier-free guidance scale
498
+ seed: Random seed for reproducibility
499
+ save_results: Whether to save results to disk
500
+ output_dir: Output directory for saved files
501
+
502
+ Returns:
503
+ Dictionary with results and metadata
504
+ """
505
+ try:
506
+ logger.info(f"Starting batch generation: {num_images} images")
507
+
508
+ reference_image = None
509
+ reference_info = None
510
+ style_suggestions = []
511
+
512
+ # Load and analyze reference image if provided
513
+ if reference_source is not None:
514
+ ref_result = self.load_reference_image(reference_source)
515
+ if ref_result:
516
+ reference_image, reference_info = ref_result
517
+ style_suggestions = reference_info.get('style_suggestions', [])
518
+ logger.info(f"Using reference image with suggestions: {style_suggestions}")
519
+ else:
520
+ logger.warning("Failed to load reference image, falling back to text-only generation")
521
+
522
+ # Generate images
523
+ if reference_image is not None:
524
+ results = self.generate_with_reference(
525
+ prompt=prompt,
526
+ reference_image=reference_image,
527
+ style=style,
528
+ mood=mood,
529
+ strength=strength,
530
+ num_images=num_images,
531
+ num_inference_steps=num_inference_steps,
532
+ guidance_scale=guidance_scale,
533
+ seed=seed,
534
+ style_suggestions=style_suggestions
535
+ )
536
+ else:
537
+ results = self.generate_without_reference(
538
+ prompt=prompt,
539
+ style=style,
540
+ mood=mood,
541
+ num_images=num_images,
542
+ num_inference_steps=num_inference_steps,
543
+ guidance_scale=guidance_scale,
544
+ seed=seed
545
+ )
546
+
547
+ # Save results if requested
548
+ saved_files = []
549
+ if save_results and results:
550
+ saved_files = self.save_results(results, output_dir, reference_info)
551
+
552
+ # Compile final results
553
+ batch_result = {
554
+ 'results': results,
555
+ 'reference_info': reference_info,
556
+ 'saved_files': saved_files,
557
+ 'generation_summary': {
558
+ 'total_images': len(results),
559
+ 'prompt': prompt,
560
+ 'style': style,
561
+ 'mood': mood,
562
+ 'has_reference': reference_image is not None,
563
+ 'style_suggestions': style_suggestions,
564
+ 'timestamp': datetime.now().isoformat()
565
+ }
566
+ }
567
+
568
+ logger.info(f"Batch generation complete: {len(results)} images generated")
569
+ return batch_result
570
+
571
+ except Exception as e:
572
+ logger.error(f"Error in batch generation: {e}")
573
+ return {
574
+ 'results': [],
575
+ 'reference_info': None,
576
+ 'saved_files': [],
577
+ 'error': str(e)
578
+ }
src/setup_env.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Environment setup script for CompI project.
4
+ Run this script to check and install dependencies.
5
+ """
6
+
7
+ import subprocess
8
+ import sys
9
+ import os
10
+ from pathlib import Path
11
+
12
+ def run_command(command, description):
13
+ """Run a shell command and handle errors."""
14
+ print(f"\n🔄 {description}...")
15
+ try:
16
+ result = subprocess.run(command, shell=True, check=True,
17
+ capture_output=True, text=True)
18
+ print(f"✅ {description} completed successfully")
19
+ return True
20
+ except subprocess.CalledProcessError as e:
21
+ print(f"❌ {description} failed:")
22
+ print(f"Error: {e.stderr}")
23
+ return False
24
+
25
+ def check_python_version():
26
+ """Check if Python version is compatible."""
27
+ version = sys.version_info
28
+ if version.major == 3 and version.minor >= 8:
29
+ print(f"✅ Python {version.major}.{version.minor}.{version.micro} is compatible")
30
+ return True
31
+ else:
32
+ print(f"❌ Python {version.major}.{version.minor}.{version.micro} is not compatible")
33
+ print("Please use Python 3.8 or higher")
34
+ return False
35
+
36
+ def check_gpu():
37
+ """Check for CUDA availability."""
38
+ try:
39
+ import torch
40
+ if torch.cuda.is_available():
41
+ gpu_count = torch.cuda.device_count()
42
+ gpu_name = torch.cuda.get_device_name(0)
43
+ print(f"✅ CUDA available with {gpu_count} GPU(s): {gpu_name}")
44
+ return True
45
+ else:
46
+ print("⚠️ CUDA not available, will use CPU")
47
+ return False
48
+ except ImportError:
49
+ print("⚠️ PyTorch not installed yet, GPU check will be done after installation")
50
+ return False
51
+
52
+ def install_requirements():
53
+ """Install requirements from requirements.txt."""
54
+ if not Path("requirements.txt").exists():
55
+ print("❌ requirements.txt not found")
56
+ return False
57
+
58
+ return run_command(
59
+ f"{sys.executable} -m pip install -r requirements.txt",
60
+ "Installing requirements"
61
+ )
62
+
63
+ def download_nltk_data():
64
+ """Download required NLTK data."""
65
+ try:
66
+ import nltk
67
+ print("\n🔄 Downloading NLTK data...")
68
+ nltk.download('punkt', quiet=True)
69
+ nltk.download('vader_lexicon', quiet=True)
70
+ nltk.download('stopwords', quiet=True)
71
+ print("✅ NLTK data downloaded")
72
+ return True
73
+ except ImportError:
74
+ print("⚠️ NLTK not installed, skipping data download")
75
+ return False
76
+
77
+ def setup_textblob():
78
+ """Setup TextBlob corpora."""
79
+ try:
80
+ import textblob
81
+ print("\n🔄 Setting up TextBlob...")
82
+ run_command(f"{sys.executable} -m textblob.download_corpora",
83
+ "Downloading TextBlob corpora")
84
+ return True
85
+ except ImportError:
86
+ print("⚠️ TextBlob not installed, skipping setup")
87
+ return False
88
+
89
+ def main():
90
+ """Main setup function."""
91
+ print("🚀 Setting up CompI Development Environment")
92
+ print("=" * 50)
93
+
94
+ # Check Python version
95
+ if not check_python_version():
96
+ sys.exit(1)
97
+
98
+ # Install requirements
99
+ if not install_requirements():
100
+ print("❌ Failed to install requirements")
101
+ sys.exit(1)
102
+
103
+ # Check GPU after PyTorch installation
104
+ check_gpu()
105
+
106
+ # Setup additional components
107
+ download_nltk_data()
108
+ setup_textblob()
109
+
110
+ print("\n" + "=" * 50)
111
+ print("🎉 Environment setup completed!")
112
+ print("\nNext steps:")
113
+ print("1. Run: python src/test_setup.py")
114
+ print("2. Start experimenting with notebooks/")
115
+ print("3. Check out the README.md for usage examples")
116
+
117
+ if __name__ == "__main__":
118
+ main()
src/ui/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI User Interface Components
3
+ Interactive web interfaces for the CompI platform.
4
+ """
5
+
6
+ __all__ = [
7
+ "compi_phase1c_streamlit_ui",
8
+ "compi_phase1c_gradio_ui",
9
+ "compi_phase2a_streamlit_ui",
10
+ "compi_phase2b_streamlit_ui",
11
+ "compi_phase2c_streamlit_ui",
12
+ "compi_phase2d_streamlit_ui",
13
+ "compi_phase2e_streamlit_ui"
14
+ ]
src/ui/compi_phase3_final_dashboard.py ADDED
@@ -0,0 +1,1709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CompI Phase 3 Final Dashboard - Complete Integration (3.A → 3.E)
4
+
5
+ This is the ultimate CompI interface that integrates ALL Phase 3 components:
6
+ - Phase 3.A/3.B: True multimodal fusion with real processing
7
+ - Phase 3.C: Advanced references with role assignment and live ControlNet previews
8
+ - Phase 3.D: Professional workflow management (gallery, presets, export)
9
+ - Phase 3.E: Performance management and model switching
10
+
11
+ Features:
12
+ - All multimodal inputs (Text, Audio, Data, Emotion, Real-time, Multi-Reference)
13
+ - Advanced References: multi-image upload/URLs, style vs structure roles, ControlNet with live previews
14
+ - Model & Performance: SD 1.5/SDXL switching, LoRA integration, VRAM monitoring, OOM auto-retry
15
+ - Workflow & Export: gallery, filters, rating/tags/notes, presets save/load, portable export ZIP
16
+ - True fusion engine: real processing for all inputs, intelligent generation mode selection
17
+ """
18
+
19
+ import os
20
+ import io
21
+ import csv
22
+ import json
23
+ import zipfile
24
+ import shutil
25
+ import platform
26
+ import requests
27
+ from datetime import datetime
28
+ from pathlib import Path
29
+ from typing import Optional, Dict, List
30
+
31
+ import numpy as np
32
+ import pandas as pd
33
+ import streamlit as st
34
+ from PIL import Image
35
+ import torch
36
+
37
+ # --- Diffusers base (txt2img, img2img) ---
38
+ from diffusers import (
39
+ StableDiffusionPipeline,
40
+ StableDiffusionImg2ImgPipeline,
41
+ )
42
+
43
+ # --- ControlNet (optional, with graceful fallback) ---
44
+ HAS_CONTROLNET = True
45
+ CN_IMG2IMG_AVAILABLE = True
46
+ try:
47
+ from diffusers import (
48
+ StableDiffusionControlNetPipeline,
49
+ StableDiffusionControlNetImg2ImgPipeline,
50
+ ControlNetModel,
51
+ )
52
+ except Exception:
53
+ HAS_CONTROLNET = False
54
+ CN_IMG2IMG_AVAILABLE = False
55
+
56
+ # --- SDXL & Upscaler (optional) ---
57
+ HAS_SDXL = True
58
+ HAS_UPSCALER = True
59
+ try:
60
+ from diffusers import StableDiffusionXLPipeline
61
+ except Exception:
62
+ HAS_SDXL = False
63
+
64
+ try:
65
+ from diffusers import StableDiffusionLatentUpscalePipeline
66
+ except Exception:
67
+ HAS_UPSCALER = False
68
+
69
+ # --- Audio, Emotion, Real-time, Plots, Previews ---
70
+ def _lazy_install(pkgs: str):
71
+ """Install packages on demand"""
72
+ os.system(f"pip install -q {pkgs}")
73
+
74
+ try:
75
+ import librosa
76
+ import soundfile as sf
77
+ except Exception:
78
+ _lazy_install("librosa soundfile")
79
+ import librosa
80
+ import soundfile as sf
81
+
82
+ try:
83
+ import whisper
84
+ except Exception:
85
+ _lazy_install("git+https://github.com/openai/whisper.git")
86
+ import whisper
87
+
88
+ try:
89
+ from textblob import TextBlob
90
+ except Exception:
91
+ _lazy_install("textblob")
92
+ from textblob import TextBlob
93
+
94
+ try:
95
+ import feedparser
96
+ except Exception:
97
+ _lazy_install("feedparser")
98
+ import feedparser
99
+
100
+ try:
101
+ import matplotlib.pyplot as plt
102
+ except Exception:
103
+ _lazy_install("matplotlib")
104
+ import matplotlib.pyplot as plt
105
+
106
+ try:
107
+ import cv2
108
+ except Exception:
109
+ _lazy_install("opencv-python-headless")
110
+ import cv2
111
+
112
+ # ==================== CONSTANTS & PATHS ====================
113
+
114
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
115
+
116
+ # Directory structure
117
+ OUTPUT_DIR = Path("outputs")
118
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
119
+
120
+ EXPORTS_DIR = Path("exports")
121
+ EXPORTS_DIR.mkdir(parents=True, exist_ok=True)
122
+
123
+ PRESETS_DIR = Path("presets")
124
+ PRESETS_DIR.mkdir(parents=True, exist_ok=True)
125
+
126
+ # Log files for different phases
127
+ RUNLOG = OUTPUT_DIR / "phase3_run_log.csv" # fusion logs (3.B)
128
+ RUNLOG_3C = OUTPUT_DIR / "phase3c_runs.csv" # advanced ref logs (3.C)
129
+ RUNLOG_3E = OUTPUT_DIR / "phase3e_runlog.csv" # perf/model logs (3.E)
130
+ ANNOT_CSV = OUTPUT_DIR / "phase3d_annotations.csv" # annotations (3.D)
131
+
132
+ # ==================== UTILITY FUNCTIONS ====================
133
+
134
+ def slugify(s: str, n=30):
135
+ """Create safe filename from string"""
136
+ if not s:
137
+ return "none"
138
+ return "_".join(s.lower().split())[:n]
139
+
140
+ def save_image(img: Image.Image, name: str) -> str:
141
+ """Save image to outputs directory"""
142
+ p = OUTPUT_DIR / name
143
+ img.save(p)
144
+ return str(p)
145
+
146
+ def vram_gb() -> Optional[float]:
147
+ """Get total VRAM in GB"""
148
+ if DEVICE == "cuda":
149
+ try:
150
+ return torch.cuda.get_device_properties(0).total_memory / (1024**3)
151
+ except Exception:
152
+ return None
153
+ return None
154
+
155
+ def vram_used_gb() -> Optional[float]:
156
+ """Get used VRAM in GB"""
157
+ if DEVICE == "cuda":
158
+ try:
159
+ torch.cuda.synchronize()
160
+ return torch.cuda.memory_allocated() / (1024**3)
161
+ except Exception:
162
+ return None
163
+ return None
164
+
165
+ def attempt_enable_xformers(pipe):
166
+ """Try to enable xFormers memory efficient attention"""
167
+ try:
168
+ pipe.enable_xformers_memory_efficient_attention()
169
+ return True
170
+ except Exception:
171
+ return False
172
+
173
+ def apply_perf(pipe, attn_slice=True, vae_slice=True, vae_tile=False):
174
+ """Apply performance optimizations to pipeline"""
175
+ if attn_slice:
176
+ pipe.enable_attention_slicing()
177
+ if vae_slice:
178
+ try:
179
+ pipe.enable_vae_slicing()
180
+ except Exception:
181
+ pass
182
+ if vae_tile:
183
+ try:
184
+ pipe.enable_vae_tiling()
185
+ except Exception:
186
+ pass
187
+
188
+ def safe_retry_sizes(h, w, steps):
189
+ """Generate progressive fallback sizes for OOM recovery"""
190
+ sizes = [
191
+ (h, w, steps),
192
+ (max(384, h//2), max(384, w//2), max(steps-8, 12)),
193
+ (384, 384, max(steps-12, 12)),
194
+ (256, 256, max(steps-16, 10)),
195
+ ]
196
+ seen = set()
197
+ for it in sizes:
198
+ if it not in seen:
199
+ seen.add(it)
200
+ yield it
201
+
202
+ def canny_map(img: Image.Image) -> Image.Image:
203
+ """Create Canny edge map from image"""
204
+ arr = np.array(img.convert("RGB"))
205
+ edges = cv2.Canny(arr, 100, 200)
206
+ edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
207
+ return Image.fromarray(edges_rgb)
208
+
209
+ def depth_proxy(img: Image.Image) -> Image.Image:
210
+ """Create depth-like proxy using grayscale"""
211
+ gray = img.convert("L")
212
+ return Image.merge("RGB", (gray, gray, gray))
213
+
214
+ def save_plot(fig) -> Image.Image:
215
+ """Save matplotlib figure as PIL Image"""
216
+ buf = io.BytesIO()
217
+ fig.savefig(buf, format="png", bbox_inches="tight")
218
+ plt.close(fig)
219
+ buf.seek(0)
220
+ return Image.open(buf).convert("RGB")
221
+
222
+ def env_snapshot() -> Dict:
223
+ """Create environment snapshot for reproducibility"""
224
+ import sys
225
+ try:
226
+ import importlib.metadata as im
227
+ except Exception:
228
+ import importlib_metadata as im
229
+
230
+ pkgs = {}
231
+ for pkg in ["torch", "diffusers", "transformers", "accelerate", "opencv-python-headless",
232
+ "librosa", "whisper", "textblob", "pandas", "numpy", "matplotlib",
233
+ "feedparser", "streamlit", "Pillow"]:
234
+ try:
235
+ pkgs[pkg] = im.version(pkg)
236
+ except Exception:
237
+ pass
238
+
239
+ return {
240
+ "timestamp": datetime.now().isoformat(),
241
+ "python_version": sys.version,
242
+ "platform": platform.platform(),
243
+ "packages": pkgs
244
+ }
245
+
246
+ def mk_readme(bundle_meta: Dict, df_meta: pd.DataFrame) -> str:
247
+ """Generate README for export bundle"""
248
+ L = []
249
+ L.append(f"# CompI Export — {bundle_meta['bundle_name']}\n")
250
+ L.append(f"_Created: {bundle_meta['created_at']}_\n")
251
+ L += [
252
+ "## What's inside",
253
+ "- Selected images",
254
+ "- `manifest.json` (environment + settings)",
255
+ "- `metadata.csv` (merged logs)",
256
+ "- `annotations.csv` (ratings/tags/notes)",
257
+ ]
258
+ if bundle_meta.get("preset"):
259
+ L.append("- `preset.json` (saved generation settings)")
260
+
261
+ L.append("\n## Summary of selected runs")
262
+ if not df_meta.empty and "mode" in df_meta.columns:
263
+ counts = df_meta["mode"].value_counts().to_dict()
264
+ L.append("Modes:")
265
+ for k, v in counts.items():
266
+ L.append(f"- {k}: {v}")
267
+
268
+ L.append("\n## Reproducing")
269
+ L.append("1. Install versions in `manifest.json`.")
270
+ L.append("2. Use `preset.json` or copy prompt/params from `metadata.csv`.")
271
+ L.append("3. Run the dashboard with these settings.")
272
+
273
+ return "\n".join(L)
274
+
275
+ # ==================== CACHED MODEL LOADERS ====================
276
+
277
+ @st.cache_resource(show_spinner=True)
278
+ def load_sd15(txt2img=True):
279
+ """Load Stable Diffusion 1.5 pipeline"""
280
+ if txt2img:
281
+ pipe = StableDiffusionPipeline.from_pretrained(
282
+ "runwayml/stable-diffusion-v1-5",
283
+ safety_checker=None,
284
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
285
+ )
286
+ else:
287
+ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
288
+ "runwayml/stable-diffusion-v1-5",
289
+ safety_checker=None,
290
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
291
+ )
292
+ return pipe.to(DEVICE)
293
+
294
+ @st.cache_resource(show_spinner=True)
295
+ def load_sdxl():
296
+ """Load SDXL pipeline"""
297
+ if not HAS_SDXL:
298
+ return None
299
+ pipe = StableDiffusionXLPipeline.from_pretrained(
300
+ "stabilityai/stable-diffusion-xl-base-1.0",
301
+ safety_checker=None,
302
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
303
+ )
304
+ return pipe.to(DEVICE)
305
+
306
+ @st.cache_resource(show_spinner=True)
307
+ def load_upscaler():
308
+ """Load latent upscaler pipeline"""
309
+ if not HAS_UPSCALER:
310
+ return None
311
+ up = StableDiffusionLatentUpscalePipeline.from_pretrained(
312
+ "stabilityai/sd-x2-latent-upscaler",
313
+ safety_checker=None,
314
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
315
+ )
316
+ return up.to(DEVICE)
317
+
318
+ @st.cache_resource(show_spinner=True)
319
+ def load_controlnet(cn_type: str):
320
+ """Load ControlNet pipeline"""
321
+ if not HAS_CONTROLNET:
322
+ return None
323
+ cn_id = "lllyasviel/sd-controlnet-canny" if cn_type == "Canny" else "lllyasviel/sd-controlnet-depth"
324
+ controlnet = ControlNetModel.from_pretrained(
325
+ cn_id, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
326
+ )
327
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
328
+ "runwayml/stable-diffusion-v1-5",
329
+ controlnet=controlnet,
330
+ safety_checker=None,
331
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
332
+ ).to(DEVICE)
333
+ try:
334
+ pipe.enable_xformers_memory_efficient_attention()
335
+ except Exception:
336
+ pass
337
+ pipe.enable_attention_slicing()
338
+ return pipe
339
+
340
+ @st.cache_resource(show_spinner=True)
341
+ def load_controlnet_img2img(cn_type: str):
342
+ """Load ControlNet + Img2Img hybrid pipeline"""
343
+ global CN_IMG2IMG_AVAILABLE
344
+ if not HAS_CONTROLNET:
345
+ return None
346
+ try:
347
+ cn_id = "lllyasviel/sd-controlnet-canny" if cn_type == "Canny" else "lllyasviel/sd-controlnet-depth"
348
+ controlnet = ControlNetModel.from_pretrained(
349
+ cn_id, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
350
+ )
351
+ pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
352
+ "runwayml/stable-diffusion-v1-5",
353
+ controlnet=controlnet,
354
+ safety_checker=None,
355
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
356
+ ).to(DEVICE)
357
+ try:
358
+ pipe.enable_xformers_memory_efficient_attention()
359
+ except Exception:
360
+ pass
361
+ pipe.enable_attention_slicing()
362
+ return pipe
363
+ except Exception:
364
+ CN_IMG2IMG_AVAILABLE = False
365
+ return None
366
+
367
+ # ==================== STREAMLIT LAYOUT ====================
368
+
369
+ st.set_page_config(page_title="CompI — Phase 3 Final Dashboard", layout="wide")
370
+ st.title("🧪 CompI — Final Integrated Dashboard (3.A → 3.E)")
371
+
372
+ # ---- Minimal, clean UI styling ----
373
+
374
+ def inject_minimal_css():
375
+ st.markdown(
376
+ """
377
+ <style>
378
+ .block-container {padding-top: 1.2rem; padding-bottom: 2rem; max-width: 1200px;}
379
+ .stTabs [role="tablist"] {gap: 6px;}
380
+ .stTabs [role="tab"] {padding: 6px 10px; border-radius: 8px; background: rgba(255,255,255,0.02); border: 1px solid rgba(255,255,255,0.08);}
381
+ .stTabs [aria-selected="true"] {background: rgba(255,255,255,0.04); border-color: rgba(255,255,255,0.16);}
382
+ h1, h2, h3 {margin-bottom: .3rem;}
383
+ .section {padding: 14px 16px; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; background: rgba(255,255,255,0.02); margin-bottom: 14px;}
384
+ .muted {color: rgba(255,255,255,0.6); text-transform: uppercase; letter-spacing: .08em; font-size: .75rem; margin-bottom: .25rem;}
385
+ .stButton>button {border-radius: 10px; height: 44px;}
386
+ .stButton>button[kind="primary"] {background: #2563eb; border-color: #2563eb;}
387
+ .stTextInput input, .stTextArea textarea {border-radius: 10px;}
388
+ .stMultiSelect [data-baseweb="tag"] {border-radius: 8px;}
389
+ pre, code {border-radius: 10px;}
390
+ #MainMenu, footer {visibility: hidden;}
391
+ </style>
392
+ """,
393
+ unsafe_allow_html=True,
394
+ )
395
+
396
+ # Apply minimal styling early
397
+ inject_minimal_css()
398
+
399
+ # Top metrics (Phase 3.E VRAM monitoring)
400
+ colA, colB, colC, colD = st.columns(4)
401
+ with colA:
402
+ st.metric("Device", DEVICE)
403
+ with colB:
404
+ st.metric("VRAM (GB)", f"{vram_gb():.2f}" if vram_gb() else "N/A")
405
+ with colC:
406
+ st.metric("Used VRAM (GB)", f"{vram_used_gb():.2f}" if vram_used_gb() else "N/A")
407
+ with colD:
408
+ st.caption(f"PyTorch {torch.__version__} • diffusers ready")
409
+
410
+ # Handle deferred clear request BEFORE creating any widgets
411
+ if st.session_state.get("clear_inputs", False):
412
+ # Pop ALL relevant input/widget keys so widgets re-initialize to defaults
413
+ keys_to_clear = [
414
+ # Text inputs
415
+ "main_prompt_input", "style_input", "mood_input", "neg_prompt_input", "style_ms", "mood_ms",
416
+ # Optional text areas
417
+ "emo_free_textarea", "ref_urls_textarea",
418
+ # Uploaders & inputs
419
+ "audio_file_uploader", "data_file_uploader", "formula_input", "ref_images_uploader",
420
+ # Toggles / checkboxes / selects / sliders (with explicit keys)
421
+ "enable_emo_checkbox", "enable_rt_checkbox", "enable_ref_checkbox",
422
+ "model_choice_selectbox", "gen_mode_selectbox",
423
+ "use_lora_checkbox", "lora_path_input", "lora_scale_slider",
424
+ "width_input", "height_input", "steps_input", "guidance_input",
425
+ "batch_input", "seed_input", "upsample_checkbox",
426
+ "use_xformers_checkbox", "attn_slice_checkbox", "vae_slice_checkbox", "vae_tile_checkbox",
427
+ "oom_retry_checkbox",
428
+ # Real-time extras
429
+ "city_input", "headlines_slider",
430
+ ]
431
+ for k in keys_to_clear:
432
+ st.session_state.pop(k, None)
433
+
434
+ # Clear outputs/state
435
+ st.session_state["generated_images"] = []
436
+ st.session_state["generation_results"] = []
437
+
438
+ # Unset the flag and rerun
439
+ st.session_state["clear_inputs"] = False
440
+
441
+
442
+ # Main tabs - Complete Phase 3 integration
443
+ # Moved generation below Inputs per UX request; removed separate Generate tab
444
+ tab_inputs, tab_refs, tab_model, tab_gallery, tab_presets, tab_export = st.tabs([
445
+ "🧩 Inputs (Text/Audio/Data/Emotion/Real‑time)",
446
+ "🖼️ Advanced References",
447
+ "⚙️ Model & Performance",
448
+ "🖼️ Gallery & Annotate",
449
+ "💾 Presets",
450
+ "📦 Export"
451
+ ])
452
+
453
+ # ==================== INPUTS TAB (Phase 3.A/3.B) ====================
454
+
455
+ with tab_inputs:
456
+ st.markdown("<div class='section'>", unsafe_allow_html=True)
457
+ st.subheader("🧩 Multimodal Inputs")
458
+
459
+ # Text & Style (always enabled)
460
+ st.markdown("<div class='muted'>Text & Style</div>", unsafe_allow_html=True)
461
+ main_prompt = st.text_input(
462
+ "Main prompt",
463
+ value=st.session_state.get("main_prompt_input", ""),
464
+ placeholder="A serene cyberpunk alley at dawn",
465
+ key="main_prompt_input",
466
+ )
467
+
468
+ # Style and Mood as multi-select dropdowns
469
+ STYLE_OPTIONS = [
470
+ "digital painting", "watercolor", "oil painting", "pixel art", "anime",
471
+ "3D render", "photorealistic", "line art", "low poly", "cyberpunk",
472
+ "isometric", "concept art", "cel shading", "comic book", "impressionist"
473
+ ]
474
+ MOOD_OPTIONS = [
475
+ "dreamy", "luminous", "dark and moody", "whimsical", "serene",
476
+ "epic", "melancholic", "vibrant", "mysterious", "dystopian",
477
+ "hopeful", "playful", "contemplative", "energetic", "ethereal"
478
+ ]
479
+
480
+ style_selected = st.multiselect(
481
+ "Style (choose one or more)",
482
+ options=STYLE_OPTIONS,
483
+ default=st.session_state.get("style_ms", []),
484
+ key="style_ms",
485
+ help="Pick one or more styles to condition the artwork"
486
+ )
487
+ mood_selected = st.multiselect(
488
+ "Mood (choose one or more)",
489
+ options=MOOD_OPTIONS,
490
+ default=st.session_state.get("mood_ms", []),
491
+ key="mood_ms",
492
+ help="Pick one or more moods to influence the atmosphere"
493
+ )
494
+
495
+ # Join lists into strings for downstream prompt fusion
496
+ style = ", ".join(style_selected)
497
+ mood = ", ".join(mood_selected)
498
+
499
+ neg_prompt = st.text_input(
500
+ "Negative prompt (optional)",
501
+ value=st.session_state.get("neg_prompt_input", ""),
502
+ placeholder="e.g., low quality, bad anatomy",
503
+ key="neg_prompt_input",
504
+ )
505
+
506
+ st.markdown("</div>", unsafe_allow_html=True)
507
+
508
+ # Four columns for aligned sections
509
+ col1, col2, col3, col4 = st.columns(4)
510
+
511
+ # AUDIO PROCESSING (Phase 2.A)
512
+ with col1:
513
+ st.markdown("### 🎵 Audio Analysis")
514
+ enable_audio = st.checkbox("Enable Audio Processing", value=False)
515
+ audio_caption = ""
516
+ audio_tags = []
517
+ tempo = None
518
+
519
+ if enable_audio:
520
+ audio_file = st.file_uploader("Upload audio (.wav/.mp3)", type=["wav", "mp3"], key="audio_file_uploader")
521
+ if audio_file:
522
+ # Save temporary audio file
523
+ audio_path = OUTPUT_DIR / "tmp_audio.wav"
524
+ with open(audio_path, "wb") as f:
525
+ f.write(audio_file.read())
526
+
527
+ # Load and analyze audio
528
+ y, sr = librosa.load(audio_path.as_posix(), sr=16000)
529
+ dur = librosa.get_duration(y=y, sr=sr)
530
+ st.caption(f"Duration: {dur:.1f}s")
531
+
532
+ # Extract tempo
533
+ try:
534
+ tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
535
+ except Exception:
536
+ tempo = None
537
+
538
+ # Extract audio features
539
+ rms = float(np.sqrt(np.mean(y**2)))
540
+ zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
541
+
542
+ # Generate audio tags based on features
543
+ if tempo:
544
+ if tempo < 90:
545
+ audio_tags.append("slow tempo")
546
+ elif tempo > 140:
547
+ audio_tags.append("fast tempo")
548
+
549
+ if rms > 0.04:
550
+ audio_tags.append("energetic")
551
+ if zcr > 0.12:
552
+ audio_tags.append("percussive")
553
+
554
+ # Whisper transcription
555
+ st.info("Transcribing audio (Whisper base)…")
556
+ w = whisper.load_model("base", device=DEVICE)
557
+ wav = whisper.load_audio(audio_path.as_posix())
558
+ wav = whisper.pad_or_trim(wav)
559
+ mel = whisper.log_mel_spectrogram(wav).to(DEVICE)
560
+ dec = whisper.DecodingOptions(language="en", fp16=(DEVICE=="cuda"))
561
+ res = whisper.decode(w, mel, dec)
562
+ audio_caption = res.text.strip()
563
+
564
+ st.success(f"Caption: '{audio_caption}'")
565
+ if audio_tags:
566
+ st.write("Audio tags:", ", ".join(audio_tags))
567
+
568
+ # DATA PROCESSING (Phase 2.B)
569
+ with col2:
570
+ st.markdown("### 📊 Data Analysis")
571
+ enable_data = st.checkbox("Enable Data Processing", value=False)
572
+ data_summary = ""
573
+ data_plot = None
574
+
575
+ if enable_data:
576
+ data_file = st.file_uploader("Upload CSV", type=["csv"], key="data_file_uploader")
577
+ formula = st.text_input("Or numpy formula", placeholder="np.sin(np.linspace(0, 20, 200))", key="formula_input")
578
+
579
+ if data_file is not None:
580
+ df = pd.read_csv(data_file)
581
+ st.dataframe(df.head(), use_container_width=True)
582
+
583
+ # Analyze numeric columns
584
+ num = df.select_dtypes(include=np.number)
585
+ if not num.empty:
586
+ means, mins, maxs, stds = num.mean(), num.min(), num.max(), num.std()
587
+ data_summary = f"{len(num)} rows x {num.shape[1]} cols; " + " ".join([
588
+ f"{c}: avg {means[c]:.2f}, min {mins[c]:.2f}, max {maxs[c]:.2f}."
589
+ for c in num.columns[:3]
590
+ ])
591
+ data_summary += " Variability " + ("high." if stds.mean() > 1 else "gentle.")
592
+
593
+ # Create visualization
594
+ fig = plt.figure(figsize=(6, 3))
595
+ if num.shape[1] == 1:
596
+ plt.plot(num.iloc[:, 0])
597
+ plt.title(f"Pattern: {num.columns[0]}")
598
+ else:
599
+ plt.plot(num.iloc[:, 0], label=num.columns[0])
600
+ plt.plot(num.iloc[:, 1], label=num.columns[1])
601
+ plt.legend()
602
+ plt.title("Data Patterns")
603
+ plt.tight_layout()
604
+ data_plot = save_plot(fig)
605
+ st.image(data_plot, caption="Data pattern")
606
+
607
+ elif formula.strip():
608
+ try:
609
+ arr = eval(formula, {"np": np, "__builtins__": {}})
610
+ arr = np.array(arr)
611
+ data_summary = f"Mathematical pattern with {arr.size} points."
612
+
613
+ fig = plt.figure(figsize=(6, 3))
614
+ plt.plot(arr)
615
+ plt.title("Formula Pattern")
616
+ plt.tight_layout()
617
+ data_plot = save_plot(fig)
618
+ st.image(data_plot, caption="Formula pattern")
619
+ except Exception as e:
620
+ st.error(f"Formula error: {e}")
621
+
622
+ # EMOTION (Phase 2.C)
623
+ with col3:
624
+ st.markdown("### 💭 Emotion Analysis")
625
+ enable_emo = st.checkbox("Enable Emotion Processing", value=False, key="enable_emo_checkbox")
626
+ emo_free = st.text_area(
627
+ "Describe a feeling/context",
628
+ value=st.session_state.get("emo_free_textarea", ""),
629
+ key="emo_free_textarea",
630
+ ) if enable_emo else ""
631
+ emo_label = ""
632
+
633
+ if enable_emo and emo_free.strip():
634
+ tb = TextBlob(emo_free)
635
+ pol = tb.sentiment.polarity
636
+ emo_label = "positive, uplifting" if pol > 0.3 else (
637
+ "sad, melancholic" if pol < -0.3 else "neutral, contemplative"
638
+ )
639
+ st.info(f"Sentiment: {emo_label} (polarity {pol:.2f})")
640
+
641
+ # REAL-TIME (Phase 2.D)
642
+ with col4:
643
+ st.markdown("### 🌎 Real-time Data")
644
+ enable_rt = st.checkbox("Enable Real-time Feeds", value=False, key="enable_rt_checkbox")
645
+ rt_context = ""
646
+
647
+ if enable_rt:
648
+ city = st.text_input("City (weather)", "Toronto", key="city_input")
649
+ headlines_num = st.slider("Headlines", 1, 5, 3, key="headlines_slider")
650
+
651
+ def get_weather(city):
652
+ try:
653
+ key = st.secrets.get("OPENWEATHER_KEY", None) if hasattr(st, "secrets") else None
654
+ url = "https://api.openweathermap.org/data/2.5/weather"
655
+ params = {
656
+ "q": city,
657
+ "units": "metric",
658
+ "appid": key or "9a524f695a4940f392150142250107"
659
+ }
660
+ r = requests.get(url, params=params, timeout=6).json()
661
+ return f"{r['weather'][0]['description']}, {r['main']['temp']:.1f}°C"
662
+ except Exception as e:
663
+ return f"unavailable ({e})"
664
+
665
+ def get_news(n):
666
+ try:
667
+ feed = feedparser.parse("https://feeds.bbci.co.uk/news/rss.xml")
668
+ return "; ".join([e["title"] for e in feed.entries[:n]])
669
+ except Exception as e:
670
+ return f"unavailable ({e})"
671
+
672
+ w = get_weather(city)
673
+ n = get_news(headlines_num)
674
+ st.caption(f"Weather: {w}")
675
+ st.caption(f"News: {n}")
676
+ rt_context = f"Current weather in {city}: {w}. Today's news: {n}."
677
+
678
+ # ==================== ADVANCED REFERENCES TAB (Phase 3.C) ====================
679
+
680
+ with tab_refs:
681
+ st.subheader("🖼️ Advanced Multi‑Reference + ControlNet")
682
+ enable_ref = st.checkbox("Enable Multi-Reference Processing", value=False, key="enable_ref_checkbox")
683
+ ref_images: List[Image.Image] = []
684
+ style_idxs = []
685
+ cn_images = []
686
+ img2img_strength = 0.55
687
+ cn_type = "Canny"
688
+ cn_scale = 1.0
689
+
690
+ if enable_ref:
691
+ # Multi-reference upload (files + URLs)
692
+ colU, colURL = st.columns(2)
693
+
694
+ with colU:
695
+ st.markdown("**���� Upload Images**")
696
+ uploads = st.file_uploader(
697
+ "Upload reference images",
698
+ type=["png", "jpg", "jpeg"],
699
+ accept_multiple_files=True,
700
+ key="ref_images_uploader"
701
+ )
702
+ if uploads:
703
+ for u in uploads:
704
+ try:
705
+ im = Image.open(u).convert("RGB")
706
+ ref_images.append(im)
707
+ except Exception as e:
708
+ st.warning(f"Upload failed: {e}")
709
+
710
+ with colURL:
711
+ st.markdown("**🔗 Image URLs**")
712
+ block = st.text_area(
713
+ "Paste image URLs (one per line)",
714
+ value=st.session_state.get("ref_urls_textarea", ""),
715
+ key="ref_urls_textarea",
716
+ )
717
+ if block.strip():
718
+ for line in block.splitlines():
719
+ url = line.strip()
720
+ if not url:
721
+ continue
722
+ try:
723
+ r = requests.get(url, timeout=8)
724
+ if r.status_code == 200:
725
+ im = Image.open(io.BytesIO(r.content)).convert("RGB")
726
+ ref_images.append(im)
727
+ except Exception as e:
728
+ st.warning(f"URL failed: {e}")
729
+
730
+ if ref_images:
731
+ # Display reference images
732
+ st.image(
733
+ ref_images,
734
+ width=180,
735
+ caption=[f"Ref {i+1}" for i in range(len(ref_images))]
736
+ )
737
+
738
+ # Role-based assignment (Phase 3.C key feature)
739
+ st.markdown("### 🎨 Reference Role Assignment")
740
+ style_idxs = st.multiselect(
741
+ "Use as **Style References (img2img)**",
742
+ list(range(1, len(ref_images)+1)),
743
+ default=list(range(1, len(ref_images)+1)),
744
+ help="These images will influence the artistic style and mood"
745
+ )
746
+
747
+ # ControlNet structure conditioning
748
+ use_cn = st.checkbox("Use **ControlNet** for structure", value=HAS_CONTROLNET)
749
+ if use_cn and not HAS_CONTROLNET:
750
+ st.warning("ControlNet not available in this environment.")
751
+ use_cn = False
752
+
753
+ if use_cn:
754
+ cn_type = st.selectbox("ControlNet type", ["Canny", "Depth"], index=0)
755
+ pick = st.selectbox(
756
+ "Pick **one** structural reference",
757
+ list(range(1, len(ref_images)+1)),
758
+ index=0,
759
+ help="This image will control the composition and structure"
760
+ )
761
+
762
+ # Live ControlNet preview (Phase 3.C key feature)
763
+ base = ref_images[int(pick)-1].resize((512, 512))
764
+ cn_map = canny_map(base) if cn_type == "Canny" else depth_proxy(base)
765
+
766
+ st.markdown("**🔍 Live ControlNet Preview**")
767
+ st.image(
768
+ [base, cn_map],
769
+ width=240,
770
+ caption=["Selected Reference", f"{cn_type} Map"]
771
+ )
772
+ cn_images = [cn_map]
773
+ cn_scale = st.slider("ControlNet conditioning scale", 0.1, 2.0, 1.0, 0.05)
774
+
775
+ # Style strength control
776
+ img2img_strength = st.slider(
777
+ "img2img strength (style adherence)",
778
+ 0.2, 0.85, 0.55, 0.05,
779
+ help="Higher values follow style references more closely"
780
+ )
781
+
782
+ # ==================== MODEL & PERFORMANCE TAB (Phase 3.E) ====================
783
+
784
+ with tab_model:
785
+ st.subheader("⚙️ Model & Performance Management")
786
+ st.caption("Choose a base model, optional style add‑ons (LoRA), and tune speed/quality settings.")
787
+
788
+ # Presets and Glossary helpers
789
+ @st.dialog("Glossary: Common terms")
790
+ def show_glossary():
791
+ st.markdown(
792
+ """
793
+ - Base model: The foundation that generates images (SD 1.5 = fast, SDXL = higher detail).
794
+ - Generation mode:
795
+ - txt2img: Create from your text prompt only.
796
+ - img2img: Start from an input image and transform it using your text.
797
+ - LoRA: A small add‑on that injects a trained style or subject. Use a .safetensors/.pt file.
798
+ - Width/Height: Image size in pixels. Bigger = more detail but slower and more VRAM.
799
+ - Steps: How long the model refines the image. More steps usually means cleaner details.
800
+ - Guidance: How strongly to follow your text. 6–9 is a good range; too high can look unnatural.
801
+ - Batch size: How many images at once. Higher uses more VRAM.
802
+ - Seed: Randomness control. Reuse the same non‑zero seed to reproduce a result.
803
+ - Upscale ×2: Quickly doubles resolution after generation.
804
+ - xFormers attention: GPU speed‑up if supported.
805
+ - Attention/VAE slicing: Reduce VRAM usage (slightly slower). Keep on for stability.
806
+ - VAE tiling: For very large images; decodes in tiles.
807
+ - Auto‑retry on CUDA OOM: If VRAM runs out, try again with safer settings.
808
+ """
809
+ )
810
+ st.button("Close", use_container_width=True)
811
+
812
+ def apply_preset(name: str):
813
+ ss = st.session_state
814
+ def s(k, v):
815
+ ss[k] = v
816
+ if name == "fast":
817
+ s("model_choice_selectbox", "SD 1.5 (v1-5)")
818
+ s("gen_mode_selectbox", "txt2img")
819
+ s("width_input", 512); s("height_input", 512)
820
+ s("steps_input", 30); s("guidance_input", 7.5)
821
+ s("batch_input", 1); s("seed_input", 0)
822
+ s("upsample_checkbox", False)
823
+ s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
824
+ s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
825
+ s("oom_retry_checkbox", True)
826
+ elif name == "high":
827
+ model = "SDXL Base 1.0" if HAS_SDXL else "SD 1.5 (v1-5)"
828
+ s("model_choice_selectbox", model)
829
+ s("gen_mode_selectbox", "txt2img")
830
+ s("width_input", 768); s("height_input", 768)
831
+ s("steps_input", 40); s("guidance_input", 7.0)
832
+ s("batch_input", 1); s("seed_input", 0)
833
+ s("upsample_checkbox", True)
834
+
835
+
836
+ s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
837
+ s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
838
+ s("oom_retry_checkbox", True)
839
+ elif name == "low_vram":
840
+ s("model_choice_selectbox", "SD 1.5 (v1-5)")
841
+ s("gen_mode_selectbox", "txt2img")
842
+ s("width_input", 448); s("height_input", 448)
843
+ s("steps_input", 25); s("guidance_input", 7.5)
844
+ s("batch_input", 1); s("seed_input", 0)
845
+ s("upsample_checkbox", False)
846
+ s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
847
+ s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
848
+ s("oom_retry_checkbox", True)
849
+ elif name == "portrait":
850
+ s("gen_mode_selectbox", "txt2img")
851
+ s("width_input", 512); s("height_input", 768)
852
+ s("steps_input", 30); s("guidance_input", 7.5)
853
+ s("batch_input", 1)
854
+ elif name == "landscape":
855
+ s("gen_mode_selectbox", "txt2img")
856
+ s("width_input", 768); s("height_input", 512)
857
+ s("steps_input", 30); s("guidance_input", 7.5)
858
+ s("batch_input", 1)
859
+ elif name == "instagram":
860
+ s("gen_mode_selectbox", "txt2img")
861
+ s("width_input", 1024); s("height_input", 1024)
862
+ s("steps_input", 35); s("guidance_input", 7.0)
863
+ s("batch_input", 1); s("upsample_checkbox", False)
864
+ elif name == "defaults":
865
+ s("model_choice_selectbox", "SD 1.5 (v1-5)")
866
+ s("gen_mode_selectbox", "txt2img")
867
+ s("width_input", 512); s("height_input", 512)
868
+ s("steps_input", 30); s("guidance_input", 7.5)
869
+ s("batch_input", 1); s("seed_input", 0)
870
+ s("upsample_checkbox", False)
871
+ s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
872
+ s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
873
+ s("oom_retry_checkbox", True)
874
+ st.rerun()
875
+
876
+ colA, colB, colC, colD = st.columns(4)
877
+ with colA:
878
+ if st.button("⚡ Fast Start"):
879
+ apply_preset("fast")
880
+ with colB:
881
+ if st.button("🔍 High Detail"):
882
+ apply_preset("high")
883
+ with colC:
884
+ if st.button("💻 Low VRAM"):
885
+ apply_preset("low_vram")
886
+ with colD:
887
+ if st.button("❓ Glossary"):
888
+ show_glossary()
889
+
890
+ # Simple VRAM safety indicator (placed after preset buttons for visibility)
891
+ def estimate_pixels(w, h):
892
+ return int(w) * int(h)
893
+ def vram_risk_level(w, h, steps, batch, model_name):
894
+ px = estimate_pixels(w, h)
895
+ multiplier = 1.0 if "1.5" in model_name else 2.0 # SDXL ~2x heavier
896
+ load = (px / (512*512)) * (steps / 30.0) * max(1, batch) * multiplier
897
+ if load < 1.2:
898
+ return "✅ Likely safe"
899
+ elif load < 2.2:
900
+ return "⚠️ May be heavy — consider smaller size or steps"
901
+ else:
902
+ return "🟥 High risk of OOM — reduce size/batch/steps"
903
+
904
+ risk_msg = vram_risk_level(
905
+ st.session_state.get("width_input", 512),
906
+ st.session_state.get("height_input", 512),
907
+ st.session_state.get("steps_input", 30),
908
+ st.session_state.get("batch_input", 1),
909
+ st.session_state.get("model_choice_selectbox", "SD 1.5 (v1-5)")
910
+ )
911
+ st.info(f"VRAM safety: {risk_msg}")
912
+
913
+
914
+
915
+ # Additional simple layout for more presets and reset
916
+ colP0, colP1a, colP2a, colP3a, colP4a = st.columns(5)
917
+ with colP0:
918
+ if st.button("🧼 Reset to defaults"):
919
+ apply_preset("defaults")
920
+ with colP1a:
921
+ if st.button("🧍 Portrait"):
922
+ apply_preset("portrait")
923
+ with colP2a:
924
+ if st.button("🏞️ Landscape"):
925
+ apply_preset("landscape")
926
+ with colP3a:
927
+ if st.button("📸 Instagram Post"):
928
+ apply_preset("instagram")
929
+ with colP4a:
930
+ st.write("")
931
+
932
+ # Model selection
933
+ st.markdown("### 🤖 Model Selection")
934
+ model_choice = st.selectbox(
935
+ "Base model",
936
+ ["SD 1.5 (v1-5)"] + (["SDXL Base 1.0"] if HAS_SDXL else []),
937
+ index=0,
938
+ help="Choose SD 1.5 for speed/low VRAM. Choose SDXL for higher detail (needs more VRAM/CPU).",
939
+ key="model_choice_selectbox"
940
+ )
941
+ gen_mode = st.selectbox(
942
+ "Generation mode",
943
+ ["txt2img", "img2img"],
944
+ index=0,
945
+ help="txt2img: make an image from your text. img2img: start from a reference image and transform it.",
946
+ key="gen_mode_selectbox"
947
+ )
948
+
949
+ # LoRA integration
950
+ st.markdown("### 🎭 LoRA Integration")
951
+ use_lora = st.checkbox("Attach LoRA", value=False, help="LoRA = small add-on that injects a learned style or subject into the base model.", key="use_lora_checkbox")
952
+ lora_path = st.text_input("LoRA path", "", help="Path to the .safetensors/.pt LoRA file.", key="lora_path_input") if use_lora else ""
953
+ lora_scale = st.slider("LoRA scale", 0.1, 1.5, 0.8, 0.05, help="How strongly to apply the LoRA. Start at 0.7–0.9.", key="lora_scale_slider") if use_lora else 0.0
954
+
955
+ # Generation parameters
956
+ st.markdown("### 🎛️ Generation Parameters")
957
+ colP1, colP2, colP3, colP4 = st.columns(4)
958
+ with colP1:
959
+ width = st.number_input("Width", 256, 1536, 512, 64, help="Image width in pixels. Larger = more detail but slower and more VRAM.", key="width_input")
960
+ with colP2:
961
+ height = st.number_input("Height", 256, 1536, 512, 64, help="Image height in pixels. Common pairs: 512x512 (square), 768x512 (wide).", key="height_input")
962
+ with colP3:
963
+ steps = st.number_input("Steps", 10, 100, 30, 1, help="How long to refine the image. More steps = better quality but slower.", key="steps_input")
964
+ with colP4:
965
+ guidance = st.number_input("Guidance", 1.0, 20.0, 7.5, 0.5, help="How strongly to follow your text prompt. 6–9 is a good range.", key="guidance_input")
966
+
967
+ colP5, colP6, colP7 = st.columns(3)
968
+ with colP5:
969
+ batch = st.number_input("Batch size", 1, 6, 1, 1, help="How many images to generate at once. Higher uses more VRAM.", key="batch_input")
970
+ with colP6:
971
+ seed = st.number_input("Seed (0=random)", 0, 2**31-1, 0, 1, help="Use the same seed to reproduce a result. 0 picks a random seed.", key="seed_input")
972
+ with colP7:
973
+ upsample_x2 = st.checkbox("Upscale ×2 (latent upscaler)", value=False, help="Quickly doubles the resolution after generation.", key="upsample_checkbox")
974
+
975
+ # Performance optimizations
976
+ st.markdown("### ⚡ Performance & Reliability")
977
+ st.caption("These options help run on limited VRAM and reduce crashes. If you are new, keep the defaults on.")
978
+ colT1, colT2, colT3, colT4 = st.columns(4)
979
+ with colT1:
980
+ use_xformers = st.checkbox("xFormers attention", value=True, help="Speeds up attention on GPUs that support it.", key="use_xformers_checkbox")
981
+ with colT2:
982
+ attn_slice = st.checkbox("Attention slicing", value=True, help="Reduces VRAM usage, slightly slower.", key="attn_slice_checkbox")
983
+ with colT3:
984
+ vae_slice = st.checkbox("VAE slicing", value=True, help="Lower VRAM for the decoder, usually safe to keep on.", key="vae_slice_checkbox")
985
+ with colT4:
986
+ vae_tile = st.checkbox("VAE tiling", value=False, help="For very large images. Uses tiles to decode.", key="vae_tile_checkbox")
987
+
988
+ oom_retry = st.checkbox("Auto‑retry on CUDA OOM", value=True, help="If out‑of‑memory happens, try again with safer settings.", key="oom_retry_checkbox")
989
+
990
+ with st.expander("New to this? Quick tips"):
991
+ st.markdown(
992
+ "- For fast, reliable results: SD 1.5, 512×512, Steps 25–35, Guidance 7.5, Batch 1.\n"
993
+ "- Higher detail: try SDXL (needs more VRAM), Steps 30–50, bigger size like 768×768.\n"
994
+ "- Seed: 0 = random. Reuse a non‑zero seed to recreate a result.\n"
995
+ "- Out‑of‑memory? Lower width/height, set Batch = 1, keep slicing options on.\n"
996
+ "- LoRA: paste path to a .safetensors/.pt file. Start scale at 0.7–0.9.\n"
997
+ "- Modes: txt2img = from text; img2img = transform an existing image.\n"
998
+ "- Upscale ×2: quickly increases resolution after generation."
999
+ )
1000
+
1001
+
1002
+ # ==================== GENERATION SECTION BELOW INPUTS (Phase 3.B + 3.C + 3.E) ====================
1003
+
1004
+ with tab_inputs:
1005
+ st.markdown("<div class='section'>", unsafe_allow_html=True)
1006
+ st.subheader("🎛️ Fusion & Generation")
1007
+
1008
+ # Build final prompt from real processed inputs (Phase 3.B True Fusion)
1009
+ parts = [p for p in [main_prompt, style, mood] if p and p.strip()]
1010
+
1011
+ # Audio fusion - REAL processing
1012
+ if 'audio_caption' in locals() and enable_audio and audio_caption:
1013
+ parts.append(f"(sound of: {audio_caption})")
1014
+ if 'tempo' in locals() and enable_audio and tempo:
1015
+ tempo_desc = "slow tempo" if tempo < 90 else ("fast tempo" if tempo > 140 else "")
1016
+ if tempo_desc:
1017
+ parts.append(tempo_desc)
1018
+ if 'audio_tags' in locals() and enable_audio and audio_tags:
1019
+ parts.extend(audio_tags)
1020
+
1021
+ # Data fusion - REAL processing
1022
+ if 'data_summary' in locals() and enable_data and data_summary:
1023
+ parts.append(f"reflecting data patterns: {data_summary}")
1024
+
1025
+ # Emotion fusion - REAL processing
1026
+ if 'emo_label' in locals() and enable_emo and emo_label:
1027
+ parts.append(f"with a {emo_label} atmosphere")
1028
+ elif enable_emo and emo_free.strip():
1029
+ parts.append(f"evoking the feeling: {emo_free.strip()}")
1030
+
1031
+ # Real-time fusion - REAL processing
1032
+ if 'rt_context' in locals() and enable_rt and rt_context:
1033
+ parts.append(rt_context)
1034
+
1035
+ # Build final fused prompt
1036
+ final_prompt = ", ".join([p for p in parts if p])
1037
+ st.markdown("</div>", unsafe_allow_html=True)
1038
+
1039
+ st.markdown("### 🔮 Fused Prompt Preview")
1040
+ st.code(final_prompt, language="text")
1041
+
1042
+ # Initialize image for img2img
1043
+ init_image = None
1044
+ if gen_mode == "img2img" and enable_ref and style_idxs:
1045
+ # Use first chosen style reference as init image
1046
+ init_image = ref_images[style_idxs[0]-1].resize((int(width), int(height)))
1047
+
1048
+ # Generation + Clear buttons side-by-side
1049
+ col_gen, col_clear = st.columns([3, 1])
1050
+ with col_gen:
1051
+ go = st.button("🚀 Generate Multimodal Art", type="primary", use_container_width=True)
1052
+ with col_clear:
1053
+ clear = st.button("🧹 Clear", use_container_width=True)
1054
+
1055
+ # Clear logic: reset prompt fields and any generated output state
1056
+ if 'generated_images' not in st.session_state:
1057
+ st.session_state.generated_images = []
1058
+ if 'generation_results' not in st.session_state:
1059
+ st.session_state.generation_results = []
1060
+
1061
+ if clear:
1062
+ # Defer clearing input widgets by setting a flag, then rerun
1063
+ st.session_state["clear_inputs"] = True
1064
+ st.success("Cleared current prompt and output. Ready for a new prompt.")
1065
+ st.rerun()
1066
+
1067
+ # Cached pipeline getters
1068
+ @st.cache_resource(show_spinner=True)
1069
+ def get_txt2img():
1070
+ return load_sd15(txt2img=True)
1071
+
1072
+ @st.cache_resource(show_spinner=True)
1073
+ def get_img2img():
1074
+ return load_sd15(txt2img=False)
1075
+
1076
+ @st.cache_resource(show_spinner=True)
1077
+ def get_sdxl():
1078
+ return load_sdxl()
1079
+
1080
+ @st.cache_resource(show_spinner=True)
1081
+ def get_upscaler():
1082
+ return load_upscaler()
1083
+
1084
+ @st.cache_resource(show_spinner=True)
1085
+ def get_cn(cn_type: str):
1086
+ return load_controlnet(cn_type)
1087
+
1088
+ @st.cache_resource(show_spinner=True)
1089
+ def get_cn_i2i(cn_type: str):
1090
+ return load_controlnet_img2img(cn_type)
1091
+
1092
+ def apply_lora(pipe, lora_path, lora_scale):
1093
+ """Apply LoRA to pipeline"""
1094
+ if not lora_path:
1095
+ return "No LoRA"
1096
+ try:
1097
+ pipe.load_lora_weights(lora_path)
1098
+ try:
1099
+ pipe.fuse_lora(lora_scale=lora_scale)
1100
+ except Exception:
1101
+ try:
1102
+ pipe.set_adapters(["default"], adapter_weights=[lora_scale])
1103
+ except Exception:
1104
+ pass
1105
+ return f"LoRA loaded: {os.path.basename(lora_path)} (scale {lora_scale})"
1106
+ except Exception as e:
1107
+ return f"LoRA failed: {e}"
1108
+
1109
+ def upsample_if_any(img: Image.Image):
1110
+ """Apply upscaling if enabled"""
1111
+ if not upsample_x2 or not HAS_UPSCALER:
1112
+ return img, False, "none"
1113
+ try:
1114
+ up = get_upscaler()
1115
+ with (torch.autocast(DEVICE) if DEVICE == "cuda" else torch.no_grad()):
1116
+ out = up(prompt="sharp, detailed, high quality", image=img)
1117
+ return out.images[0], True, "latent_x2"
1118
+ except Exception as e:
1119
+ return img, False, f"fail:{e}"
1120
+
1121
+ def log_rows(rows, log_path):
1122
+ """Log generation results"""
1123
+ exists = Path(log_path).exists()
1124
+ # Union header across Phase 3 logs
1125
+ header = [
1126
+ "filepath", "prompt", "neg_prompt", "steps", "guidance", "mode", "seed",
1127
+ "width", "height", "model", "img2img_strength", "cn_type", "cn_scale",
1128
+ "upscaled", "timestamp"
1129
+ ]
1130
+ with open(log_path, "a", newline="", encoding="utf-8") as f:
1131
+ w = csv.writer(f)
1132
+ if not exists:
1133
+ w.writerow(header)
1134
+ for r in rows:
1135
+ w.writerow([r.get(k, "") for k in header])
1136
+
1137
+ # GENERATION EXECUTION
1138
+ if go:
1139
+ images, paths = [], []
1140
+
1141
+ # Choose pipeline based on model selection
1142
+ if model_choice.startswith("SDXL") and HAS_SDXL and gen_mode == "txt2img":
1143
+ pipe = get_sdxl()
1144
+ model_id = "SDXL-Base-1.0"
1145
+ else:
1146
+ if gen_mode == "txt2img":
1147
+ pipe = get_txt2img()
1148
+ model_id = "SD-1.5"
1149
+ else:
1150
+ pipe = get_img2img()
1151
+ model_id = "SD-1.5 (img2img)"
1152
+
1153
+ # Apply performance optimizations
1154
+ xformed = attempt_enable_xformers(pipe) if use_xformers else False
1155
+ apply_perf(pipe, attn_slice, vae_slice, vae_tile)
1156
+
1157
+ # Apply LoRA if specified
1158
+ lora_msg = ""
1159
+ if use_lora:
1160
+ lora_msg = apply_lora(pipe, lora_path, lora_scale)
1161
+ if lora_msg:
1162
+ st.caption(lora_msg)
1163
+
1164
+ # Determine generation mode based on available inputs (Phase 3.C intelligence)
1165
+ have_style = bool(style_idxs)
1166
+ have_cn = enable_ref and bool(cn_images)
1167
+
1168
+ # MODE PRIORITY: CN+I2I > CN only > I2I only > T2I
1169
+ mode = "T2I"
1170
+ if have_cn and have_style and HAS_CONTROLNET:
1171
+ mode = "CN+I2I"
1172
+ elif have_cn and HAS_CONTROLNET:
1173
+ mode = "CN"
1174
+ elif have_style:
1175
+ mode = "I2I"
1176
+
1177
+ st.info(f"Mode: **{mode}** • Model: **{model_id}** • xFormers: `{xformed}`")
1178
+
1179
+ rows = []
1180
+ attempt_list = list(safe_retry_sizes(height, width, steps)) if oom_retry else [(height, width, steps)]
1181
+
1182
+ # Generate batch
1183
+ for b in range(int(batch)):
1184
+ ok = False
1185
+ last_err = None
1186
+
1187
+ for (h_try, w_try, s_try) in attempt_list:
1188
+ try:
1189
+ # Seed management
1190
+ seed_eff = torch.seed() if seed == 0 else seed + b
1191
+ gen = torch.manual_seed(seed_eff) if DEVICE == "cpu" else torch.Generator(DEVICE).manual_seed(seed_eff)
1192
+
1193
+ with (torch.autocast(DEVICE) if DEVICE == "cuda" else torch.no_grad()):
1194
+ if mode == "CN+I2I":
1195
+ # Hybrid ControlNet + Img2Img (Phase 3.C advanced mode)
1196
+ if CN_IMG2IMG_AVAILABLE:
1197
+ cn_pipe = get_cn_i2i(cn_type)
1198
+ init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
1199
+ out = cn_pipe(
1200
+ prompt=final_prompt,
1201
+ image=init_ref,
1202
+ control_image=[im for im in cn_images],
1203
+ controlnet_conditioning_scale=cn_scale,
1204
+ strength=img2img_strength,
1205
+ num_inference_steps=s_try,
1206
+ guidance_scale=guidance,
1207
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1208
+ generator=gen,
1209
+ )
1210
+ img = out.images[0]
1211
+ else:
1212
+ # Fallback two-pass approach
1213
+ cn_pipe = get_cn(cn_type)
1214
+ cn_out = cn_pipe(
1215
+ prompt=final_prompt,
1216
+ image=[im for im in cn_images],
1217
+ controlnet_conditioning_scale=cn_scale,
1218
+ num_inference_steps=max(s_try//2, 12),
1219
+ guidance_scale=guidance,
1220
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1221
+ generator=gen,
1222
+ )
1223
+ struct_img = cn_out.images[0].resize((w_try, h_try))
1224
+ i2i = get_img2img()
1225
+ init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
1226
+ blend = Image.blend(init_ref, struct_img, 0.5)
1227
+ out = i2i(
1228
+ prompt=final_prompt,
1229
+ image=blend,
1230
+ strength=img2img_strength,
1231
+ num_inference_steps=s_try,
1232
+ guidance_scale=guidance,
1233
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1234
+ generator=gen,
1235
+ )
1236
+ img = out.images[0]
1237
+
1238
+ elif mode == "CN":
1239
+ # ControlNet only
1240
+ cn_pipe = get_cn(cn_type)
1241
+ out = cn_pipe(
1242
+ prompt=final_prompt,
1243
+ image=[im for im in cn_images],
1244
+ controlnet_conditioning_scale=cn_scale,
1245
+ num_inference_steps=s_try,
1246
+ guidance_scale=guidance,
1247
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1248
+ generator=gen,
1249
+ )
1250
+ img = out.images[0]
1251
+
1252
+ elif mode == "I2I":
1253
+ # Img2Img only
1254
+ i2i = get_img2img()
1255
+ init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
1256
+ out = i2i(
1257
+ prompt=final_prompt,
1258
+ image=init_ref,
1259
+ strength=img2img_strength,
1260
+ num_inference_steps=s_try,
1261
+ guidance_scale=guidance,
1262
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1263
+ generator=gen,
1264
+ )
1265
+ img = out.images[0]
1266
+
1267
+ else:
1268
+ # Text-to-Image
1269
+ kwargs = dict(
1270
+ prompt=final_prompt,
1271
+ num_inference_steps=s_try,
1272
+ guidance_scale=guidance,
1273
+ negative_prompt=neg_prompt if neg_prompt.strip() else None,
1274
+ generator=gen,
1275
+ )
1276
+ if not (model_choice.startswith("SDXL") and HAS_SDXL):
1277
+ kwargs.update({"height": h_try, "width": w_try})
1278
+ out = pipe(**kwargs)
1279
+ img = out.images[0]
1280
+
1281
+ # Optional upscaling
1282
+ upscaled = "none"
1283
+ if upsample_x2 and HAS_UPSCALER:
1284
+ img, did_upscale, upscaled = upsample_if_any(img)
1285
+
1286
+ # Save image
1287
+ fname = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{mode}_{w_try}x{h_try}_s{s_try}_g{guidance}_seed{seed_eff}.png"
1288
+ path = save_image(img, fname)
1289
+ st.image(img, caption=fname, use_container_width=True)
1290
+ paths.append(path)
1291
+ images.append(img)
1292
+
1293
+ # Log generation
1294
+ rows.append({
1295
+ "filepath": path,
1296
+ "prompt": final_prompt,
1297
+ "neg_prompt": neg_prompt,
1298
+ "steps": s_try,
1299
+ "guidance": guidance,
1300
+ "mode": mode,
1301
+ "seed": seed_eff,
1302
+ "width": w_try,
1303
+ "height": h_try,
1304
+ "model": model_id,
1305
+ "img2img_strength": img2img_strength if mode in ["I2I", "CN+I2I"] else "",
1306
+ "cn_type": cn_type if mode in ["CN", "CN+I2I"] else "",
1307
+ "cn_scale": cn_scale if mode in ["CN", "CN+I2I"] else "",
1308
+ "upscaled": upscaled,
1309
+ "timestamp": datetime.now().isoformat()
1310
+ })
1311
+ ok = True
1312
+ break
1313
+
1314
+ except RuntimeError as e:
1315
+ if "out of memory" in str(e).lower() and oom_retry and DEVICE == "cuda":
1316
+ torch.cuda.empty_cache()
1317
+ st.warning(f"CUDA OOM — retrying at smaller size/steps…")
1318
+ continue
1319
+ else:
1320
+ st.error(f"Runtime error: {e}")
1321
+ last_err = str(e)
1322
+ break
1323
+ except Exception as e:
1324
+ st.error(f"Error: {e}")
1325
+ last_err = str(e)
1326
+ break
1327
+
1328
+ if not ok and last_err:
1329
+ st.error(f"Failed item {b+1}: {last_err}")
1330
+
1331
+ # Save results
1332
+ if rows:
1333
+ # Write unified run log (3.B/3.C/3.E compatible)
1334
+ log_rows(rows, RUNLOG)
1335
+ st.success(f"Saved {len(rows)} image(s). Run log updated: {RUNLOG}")
1336
+
1337
+ # ==================== GALLERY & ANNOTATE TAB (Phase 3.D) ====================
1338
+
1339
+ with tab_gallery:
1340
+ st.subheader("🖼️ Gallery & Filters")
1341
+
1342
+ # Helper functions for Phase 3.D workflow management
1343
+ def read_logs():
1344
+ """Read and merge all log files"""
1345
+ frames = []
1346
+ for p in [RUNLOG, RUNLOG_3C, RUNLOG_3E]:
1347
+ if Path(p).exists():
1348
+ try:
1349
+ df = pd.read_csv(p)
1350
+ df["source_log"] = Path(p).name
1351
+ frames.append(df)
1352
+ except Exception as e:
1353
+ st.warning(f"Failed reading {p}: {e}")
1354
+ if not frames:
1355
+ return pd.DataFrame(columns=["filepath"])
1356
+ return pd.concat(frames, ignore_index=True).drop_duplicates(subset=["filepath"])
1357
+
1358
+ def scan_images():
1359
+ """Scan output directory for images"""
1360
+ rows = [{"filepath": str(p), "filename": p.name} for p in OUTPUT_DIR.glob("*.png")]
1361
+ return pd.DataFrame(rows)
1362
+
1363
+ def load_annotations():
1364
+ """Load existing annotations"""
1365
+ if ANNOT_CSV.exists():
1366
+ try:
1367
+ return pd.read_csv(ANNOT_CSV)
1368
+ except Exception:
1369
+ pass
1370
+ return pd.DataFrame(columns=["filepath", "rating", "tags", "notes"])
1371
+
1372
+ def save_annotations(df):
1373
+ """Save annotations to CSV"""
1374
+ df.to_csv(ANNOT_CSV, index=False)
1375
+
1376
+ # Load data
1377
+ imgs_df = scan_images()
1378
+ logs_df = read_logs()
1379
+ ann_df = load_annotations()
1380
+ meta_df = imgs_df.merge(logs_df, on="filepath", how="left")
1381
+
1382
+ if meta_df.empty:
1383
+ st.info("No images found in outputs/. Generate some images first.")
1384
+ else:
1385
+ # Filtering controls
1386
+ st.markdown("### 🔍 Filter Images")
1387
+ colf1, colf2, colf3 = st.columns(3)
1388
+
1389
+ with colf1:
1390
+ mode_opt = ["(all)"] + sorted([m for m in meta_df.get("mode", pd.Series([])).dropna().unique()])
1391
+ sel_mode = st.selectbox("Filter by mode", mode_opt, index=0)
1392
+
1393
+ with colf2:
1394
+ prompt_filter = st.text_input("Filter prompt contains", "")
1395
+
1396
+ with colf3:
1397
+ min_steps = st.number_input("Min steps", 0, 200, 0, 1)
1398
+
1399
+ # Apply filters
1400
+ filtered = meta_df.copy()
1401
+ if sel_mode != "(all)" and "mode" in filtered.columns:
1402
+ filtered = filtered[filtered["mode"] == sel_mode]
1403
+ if prompt_filter.strip() and "prompt" in filtered.columns:
1404
+ filtered = filtered[filtered["prompt"].fillna("").str.contains(prompt_filter, case=False)]
1405
+ if "steps" in filtered.columns:
1406
+ try:
1407
+ filtered = filtered[pd.to_numeric(filtered["steps"], errors="coerce").fillna(0) >= min_steps]
1408
+ except Exception:
1409
+ pass
1410
+
1411
+ st.caption(f"{len(filtered)} image(s) match filters.")
1412
+
1413
+ # Display gallery
1414
+ if not filtered.empty:
1415
+ st.markdown("### 🖼️ Image Gallery")
1416
+ cols = st.columns(4)
1417
+ for i, row in filtered.reset_index(drop=True).iterrows():
1418
+ with cols[i % 4]:
1419
+ p = row["filepath"]
1420
+ try:
1421
+ st.image(p, use_container_width=True, caption=os.path.basename(p))
1422
+ except Exception:
1423
+ st.write(os.path.basename(p))
1424
+ if "prompt" in row and pd.notna(row["prompt"]):
1425
+ st.caption(row["prompt"][:120])
1426
+
1427
+ # Annotation system
1428
+ st.markdown("---")
1429
+ st.subheader("✍️ Annotate / Rate / Tag")
1430
+ choose = st.multiselect("Pick images to annotate", meta_df["filepath"].tolist())
1431
+
1432
+ if choose:
1433
+ for path in choose:
1434
+ st.markdown("---")
1435
+ st.write(f"**{os.path.basename(path)}**")
1436
+ try:
1437
+ st.image(path, width=320)
1438
+ except Exception:
1439
+ pass
1440
+
1441
+ # Get existing annotation values
1442
+ prev = ann_df[ann_df["filepath"] == path]
1443
+ rating_val = int(prev.iloc[0]["rating"]) if not prev.empty and not pd.isna(prev.iloc[0]["rating"]) else 3
1444
+ tags_val = prev.iloc[0]["tags"] if not prev.empty else ""
1445
+ notes_val = prev.iloc[0]["notes"] if not prev.empty else ""
1446
+
1447
+ # Annotation controls
1448
+ colE1, colE2, colE3 = st.columns([1, 1, 2])
1449
+ with colE1:
1450
+ rating = st.slider(
1451
+ f"Rating {os.path.basename(path)}",
1452
+ 1, 5, rating_val, 1,
1453
+ key=f"rate_{path}"
1454
+ )
1455
+ with colE2:
1456
+ tags = st.text_input("Tags", tags_val, key=f"tags_{path}")
1457
+ with colE3:
1458
+ notes = st.text_area("Notes", notes_val, key=f"notes_{path}")
1459
+
1460
+ # Update annotations dataframe
1461
+ if (ann_df["filepath"] == path).any():
1462
+ ann_df.loc[ann_df["filepath"] == path, ["rating", "tags", "notes"]] = [rating, tags, notes]
1463
+ else:
1464
+ ann_df.loc[len(ann_df)] = [path, rating, tags, notes]
1465
+
1466
+ if st.button("💾 Save annotations", use_container_width=True):
1467
+ save_annotations(ann_df)
1468
+ st.success("Annotations saved!")
1469
+ else:
1470
+ st.info("Select images above to annotate them.")
1471
+
1472
+ # ==================== PRESETS TAB (Phase 3.D) ====================
1473
+
1474
+ with tab_presets:
1475
+ st.subheader("💾 Create / Save / Load Presets")
1476
+
1477
+ # Preset creation
1478
+ st.markdown("### 🎛️ Create New Preset")
1479
+ colP1, colP2 = st.columns(2)
1480
+
1481
+ with colP1:
1482
+ preset_name = st.text_input("Preset name", "my_style", key="preset_name_input")
1483
+ p_prompt = st.text_input("Prompt", main_prompt or "A serene cyberpunk alley at dawn", key="preset_prompt_input")
1484
+ p_style = st.text_input("Style", style or "digital painting", key="preset_style_input")
1485
+ p_mood = st.text_input("Mood", mood or ", ".join(MOOD_OPTIONS[:2]), key="preset_mood_input")
1486
+ p_neg = st.text_input("Negative", neg_prompt or "", key="preset_neg_input")
1487
+
1488
+ with colP2:
1489
+ p_steps = st.number_input("Steps", 10, 100, steps or 30, 1, key="preset_steps_input")
1490
+ p_guid = st.number_input("Guidance", 1.0, 20.0, guidance or 7.5, 0.5, key="preset_guidance_input")
1491
+ p_i2i = st.slider("img2img strength", 0.2, 0.9, 0.55, 0.05, key="preset_i2i_slider")
1492
+ p_cn_type = st.selectbox("ControlNet type", ["Canny", "Depth"], key="preset_cn_type_selectbox")
1493
+ p_cn_scale = st.slider("ControlNet scale", 0.1, 2.0, 1.0, 0.05, key="preset_cn_scale_slider")
1494
+
1495
+ # Build preset object
1496
+ preset = {
1497
+ "name": preset_name,
1498
+ "prompt": p_prompt,
1499
+ "style": p_style,
1500
+ "mood": p_mood,
1501
+ "negative": p_neg,
1502
+ "steps": p_steps,
1503
+ "guidance": p_guid,
1504
+ "img2img_strength": p_i2i,
1505
+ "controlnet": {"type": p_cn_type, "scale": p_cn_scale},
1506
+ "created_at": datetime.now().isoformat()
1507
+ }
1508
+
1509
+ st.markdown("### 📋 Preset Preview")
1510
+ st.code(json.dumps(preset, indent=2), language="json")
1511
+
1512
+ # Save/Load controls
1513
+ colPS1, colPS2 = st.columns(2)
1514
+
1515
+ with colPS1:
1516
+ st.markdown("### 💾 Save Preset")
1517
+ if st.button("💾 Save preset", use_container_width=True, key="save_preset_button"):
1518
+ if preset_name.strip():
1519
+ fp = PRESETS_DIR / f"{preset_name}.json"
1520
+ with open(fp, "w", encoding="utf-8") as f:
1521
+ json.dump(preset, f, indent=2)
1522
+ st.success(f"Saved {fp}")
1523
+ else:
1524
+ st.error("Please enter a preset name")
1525
+
1526
+ with colPS2:
1527
+ st.markdown("### 📂 Load Preset")
1528
+ existing = sorted([p.name for p in PRESETS_DIR.glob("*.json")])
1529
+ if existing:
1530
+ sel = st.selectbox("Load preset", ["(choose)"] + existing, key="load_preset_selectbox")
1531
+ if sel != "(choose)":
1532
+ with open(PRESETS_DIR / sel, "r", encoding="utf-8") as f:
1533
+ loaded = json.load(f)
1534
+ st.success(f"Loaded {sel}")
1535
+ st.code(json.dumps(loaded, indent=2), language="json")
1536
+ else:
1537
+ st.info("No presets found. Create your first preset above!")
1538
+
1539
+ # ==================== EXPORT TAB (Phase 3.D) ====================
1540
+
1541
+ with tab_export:
1542
+ st.subheader("📦 Export Bundle (ZIP)")
1543
+
1544
+ # Helper functions for export
1545
+ def read_logs_all():
1546
+ """Read all logs for export"""
1547
+ frames = []
1548
+ for p in [RUNLOG, RUNLOG_3C, RUNLOG_3E]:
1549
+ if Path(p).exists():
1550
+ try:
1551
+ df = pd.read_csv(p)
1552
+ df["source_log"] = Path(p).name
1553
+ frames.append(df)
1554
+ except Exception as e:
1555
+ st.warning(f"Read fail {p}: {e}")
1556
+ if not frames:
1557
+ return pd.DataFrame(columns=["filepath"])
1558
+ return pd.concat(frames, ignore_index=True).drop_duplicates(subset=["filepath"])
1559
+
1560
+ def scan_imgs():
1561
+ """Scan images for export"""
1562
+ return pd.DataFrame([
1563
+ {"filepath": str(p), "filename": p.name}
1564
+ for p in OUTPUT_DIR.glob("*.png")
1565
+ ])
1566
+
1567
+ # Load export data
1568
+ imgs_df = scan_imgs()
1569
+ logs_df = read_logs_all()
1570
+
1571
+ if imgs_df.empty:
1572
+ st.info("No images to export yet. Generate some images first.")
1573
+ else:
1574
+ meta_df = imgs_df.merge(logs_df, on="filepath", how="left")
1575
+
1576
+ # Display available images
1577
+ st.markdown("### 📋 Available Images")
1578
+ display_cols = ["filepath", "prompt", "mode", "steps", "guidance"]
1579
+ available_cols = [col for col in display_cols if col in meta_df.columns]
1580
+ st.dataframe(
1581
+ meta_df[available_cols].fillna("").astype(str),
1582
+ use_container_width=True,
1583
+ height=240
1584
+ )
1585
+
1586
+ # Export selection
1587
+ st.markdown("### 🎯 Export Selection")
1588
+ sel = st.multiselect(
1589
+ "Select images to export",
1590
+ meta_df["filepath"].tolist(),
1591
+ default=meta_df["filepath"].tolist()[:8],
1592
+ key="export_images_multiselect"
1593
+ )
1594
+
1595
+ # Preset inclusion
1596
+ include_preset = st.checkbox("Include preset.json", value=False, key="include_preset_checkbox")
1597
+ preset_blob = None
1598
+ if include_preset:
1599
+ ex = sorted([p.name for p in PRESETS_DIR.glob("*.json")])
1600
+ if ex:
1601
+ choose = st.selectbox("Choose preset", ex, key="export_preset_selectbox")
1602
+ with open(PRESETS_DIR / choose, "r", encoding="utf-8") as f:
1603
+ preset_blob = json.load(f)
1604
+ else:
1605
+ st.warning("No presets found in /presets")
1606
+ include_preset = False
1607
+
1608
+ # Bundle configuration
1609
+ bundle_name = st.text_input(
1610
+ "Bundle name (no spaces)",
1611
+ f"compi_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
1612
+ key="bundle_name_input"
1613
+ )
1614
+
1615
+ # Create export bundle
1616
+ if st.button("📦 Create Export Bundle", type="primary", use_container_width=True, key="create_bundle_button"):
1617
+ if not sel:
1618
+ st.error("Pick at least one image.")
1619
+ elif not bundle_name.strip():
1620
+ st.error("Please enter a bundle name.")
1621
+ else:
1622
+ with st.spinner("Creating export bundle..."):
1623
+ # Create temporary directory
1624
+ tmp_dir = EXPORTS_DIR / bundle_name
1625
+ if tmp_dir.exists():
1626
+ shutil.rmtree(tmp_dir)
1627
+ (tmp_dir / "images").mkdir(parents=True, exist_ok=True)
1628
+
1629
+ # Copy images
1630
+ for p in sel:
1631
+ try:
1632
+ shutil.copy2(p, tmp_dir / "images" / os.path.basename(p))
1633
+ except Exception as e:
1634
+ st.warning(f"Copy failed: {p} ({e})")
1635
+
1636
+ # Export metadata
1637
+ msel = meta_df[meta_df["filepath"].isin(sel)].copy()
1638
+ msel.to_csv(tmp_dir / "metadata.csv", index=False)
1639
+
1640
+ # Export annotations
1641
+ if ANNOT_CSV.exists():
1642
+ shutil.copy2(ANNOT_CSV, tmp_dir / "annotations.csv")
1643
+ else:
1644
+ pd.DataFrame(columns=["filepath", "rating", "tags", "notes"]).to_csv(
1645
+ tmp_dir / "annotations.csv", index=False
1646
+ )
1647
+
1648
+ # Create manifest
1649
+ manifest = {
1650
+ "bundle_name": bundle_name,
1651
+ "created_at": datetime.now().isoformat(),
1652
+ "environment": env_snapshot(),
1653
+ "includes": {
1654
+ "images": True,
1655
+ "metadata_csv": True,
1656
+ "annotations_csv": True,
1657
+ "preset_json": bool(preset_blob),
1658
+ "readme_md": True
1659
+ }
1660
+ }
1661
+ with open(tmp_dir / "manifest.json", "w", encoding="utf-8") as f:
1662
+ json.dump(manifest, f, indent=2)
1663
+
1664
+ # Include preset if specified
1665
+ if preset_blob:
1666
+ with open(tmp_dir / "preset.json", "w", encoding="utf-8") as f:
1667
+ json.dump(preset_blob, f, indent=2)
1668
+
1669
+ # Create README
1670
+ with open(tmp_dir / "README.md", "w", encoding="utf-8") as f:
1671
+ f.write(mk_readme(manifest, msel))
1672
+
1673
+ # Create ZIP file
1674
+ zpath = EXPORTS_DIR / f"{bundle_name}.zip"
1675
+ if zpath.exists():
1676
+ zpath.unlink()
1677
+
1678
+ with zipfile.ZipFile(zpath, 'w', zipfile.ZIP_DEFLATED) as zf:
1679
+ for root, _, files in os.walk(tmp_dir):
1680
+ for file in files:
1681
+ full = Path(root) / file
1682
+ zf.write(full, full.relative_to(tmp_dir))
1683
+
1684
+ # Cleanup temporary directory
1685
+ shutil.rmtree(tmp_dir, ignore_errors=True)
1686
+
1687
+ st.success(f"✅ Export created: {zpath}")
1688
+ st.info(f"📁 Bundle size: {zpath.stat().st_size / (1024*1024):.1f} MB")
1689
+
1690
+ # Provide download link
1691
+ with open(zpath, "rb") as f:
1692
+ st.download_button(
1693
+ label="📥 Download Export Bundle",
1694
+ data=f.read(),
1695
+ file_name=f"{bundle_name}.zip",
1696
+ mime="application/zip",
1697
+ use_container_width=True
1698
+ )
1699
+
1700
+ # ==================== FOOTER ====================
1701
+
1702
+ st.markdown("---")
1703
+ st.markdown("""
1704
+ <div style='text-align: center; color: #666; padding: 20px;'>
1705
+ <strong>🧪 CompI Phase 3 Final Dashboard</strong><br>
1706
+ Complete integration of all Phase 3 components (3.A → 3.E)<br>
1707
+ <em>Multimodal AI Art Generation • Advanced References • Performance Management • Professional Workflow</em>
1708
+ </div>
1709
+ """, unsafe_allow_html=True)
src/utils/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for CompI project.
3
+ """
4
+
5
+ from .logging_utils import setup_logger
6
+ from .file_utils import save_image, save_audio, load_config
7
+ from .image_utils import ImageProcessor, StyleAnalyzer
8
+
9
+ __all__ = [
10
+ "setup_logger",
11
+ "save_image",
12
+ "save_audio",
13
+ "load_config",
14
+ "ImageProcessor",
15
+ "StyleAnalyzer"
16
+ ]
src/utils/audio_utils.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio processing utilities for CompI Phase 2.A: Audio Input Integration
3
+
4
+ This module provides comprehensive audio analysis capabilities including:
5
+ - Audio feature extraction (tempo, energy, spectral features)
6
+ - Audio preprocessing and normalization
7
+ - Audio-to-text captioning using OpenAI Whisper
8
+ - Multimodal prompt fusion combining audio features with text prompts
9
+ """
10
+
11
+ import os
12
+ import numpy as np
13
+ import librosa
14
+ import soundfile as sf
15
+ from typing import Dict, List, Optional, Tuple, Union
16
+ import logging
17
+ from dataclasses import dataclass
18
+
19
+ # Setup logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ @dataclass
24
+ class AudioFeatures:
25
+ """Container for extracted audio features"""
26
+ tempo: float
27
+ energy: float # RMS energy
28
+ zero_crossing_rate: float
29
+ spectral_centroid: float
30
+ spectral_rolloff: float
31
+ mfcc_mean: np.ndarray
32
+ chroma_mean: np.ndarray
33
+ duration: float
34
+ sample_rate: int
35
+
36
+ def to_dict(self) -> Dict:
37
+ """Convert to dictionary for JSON serialization"""
38
+ return {
39
+ 'tempo': float(self.tempo),
40
+ 'energy': float(self.energy),
41
+ 'zero_crossing_rate': float(self.zero_crossing_rate),
42
+ 'spectral_centroid': float(self.spectral_centroid),
43
+ 'spectral_rolloff': float(self.spectral_rolloff),
44
+ 'mfcc_mean': self.mfcc_mean.tolist() if hasattr(self.mfcc_mean, 'tolist') else list(self.mfcc_mean),
45
+ 'chroma_mean': self.chroma_mean.tolist() if hasattr(self.chroma_mean, 'tolist') else list(self.chroma_mean),
46
+ 'duration': float(self.duration),
47
+ 'sample_rate': int(self.sample_rate)
48
+ }
49
+
50
+ class AudioProcessor:
51
+ """Comprehensive audio processing and analysis"""
52
+
53
+ def __init__(self, target_sr: int = 16000, max_duration: float = 60.0):
54
+ """
55
+ Initialize audio processor
56
+
57
+ Args:
58
+ target_sr: Target sample rate for processing
59
+ max_duration: Maximum audio duration to process (seconds)
60
+ """
61
+ self.target_sr = target_sr
62
+ self.max_duration = max_duration
63
+
64
+ def load_audio(self, audio_path: str) -> Tuple[np.ndarray, int]:
65
+ """
66
+ Load and preprocess audio file
67
+
68
+ Args:
69
+ audio_path: Path to audio file
70
+
71
+ Returns:
72
+ Tuple of (audio_data, sample_rate)
73
+ """
74
+ try:
75
+ # Load audio with librosa
76
+ audio, sr = librosa.load(
77
+ audio_path,
78
+ sr=self.target_sr,
79
+ duration=self.max_duration
80
+ )
81
+
82
+ # Normalize audio
83
+ audio = librosa.util.normalize(audio)
84
+
85
+ logger.info(f"Loaded audio: {audio_path}, duration: {len(audio)/sr:.2f}s")
86
+ return audio, sr
87
+
88
+ except Exception as e:
89
+ logger.error(f"Error loading audio {audio_path}: {e}")
90
+ raise
91
+
92
+ def extract_features(self, audio: np.ndarray, sr: int) -> AudioFeatures:
93
+ """
94
+ Extract comprehensive audio features
95
+
96
+ Args:
97
+ audio: Audio signal
98
+ sr: Sample rate
99
+
100
+ Returns:
101
+ AudioFeatures object containing all extracted features
102
+ """
103
+ try:
104
+ # Basic features
105
+ duration = len(audio) / sr
106
+
107
+ # Tempo and beat tracking
108
+ tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
109
+
110
+ # Energy (RMS)
111
+ rms = librosa.feature.rms(y=audio)[0]
112
+ energy = np.sqrt(np.mean(rms**2))
113
+
114
+ # Zero crossing rate
115
+ zcr = librosa.feature.zero_crossing_rate(audio)[0]
116
+ zcr_mean = np.mean(zcr)
117
+
118
+ # Spectral features
119
+ spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
120
+ spectral_centroid = np.mean(spectral_centroids)
121
+
122
+ spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
123
+ spectral_rolloff_mean = np.mean(spectral_rolloff)
124
+
125
+ # MFCC features
126
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
127
+ mfcc_mean = np.mean(mfccs, axis=1)
128
+
129
+ # Chroma features
130
+ chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
131
+ chroma_mean = np.mean(chroma, axis=1)
132
+
133
+ features = AudioFeatures(
134
+ tempo=float(tempo),
135
+ energy=float(energy),
136
+ zero_crossing_rate=float(zcr_mean),
137
+ spectral_centroid=float(spectral_centroid),
138
+ spectral_rolloff=float(spectral_rolloff_mean),
139
+ mfcc_mean=mfcc_mean,
140
+ chroma_mean=chroma_mean,
141
+ duration=float(duration),
142
+ sample_rate=int(sr)
143
+ )
144
+
145
+ logger.info(f"Extracted features: tempo={float(tempo):.1f}, energy={float(energy):.4f}")
146
+ return features
147
+
148
+ except Exception as e:
149
+ logger.error(f"Error extracting audio features: {e}")
150
+ raise
151
+
152
+ def analyze_audio_file(self, audio_path: str) -> AudioFeatures:
153
+ """
154
+ Complete audio analysis pipeline
155
+
156
+ Args:
157
+ audio_path: Path to audio file
158
+
159
+ Returns:
160
+ AudioFeatures object
161
+ """
162
+ audio, sr = self.load_audio(audio_path)
163
+ return self.extract_features(audio, sr)
164
+
165
+ class AudioCaptioner:
166
+ """Audio-to-text captioning using OpenAI Whisper"""
167
+
168
+ def __init__(self, model_size: str = "base", device: str = "auto"):
169
+ """
170
+ Initialize audio captioner
171
+
172
+ Args:
173
+ model_size: Whisper model size (tiny, base, small, medium, large)
174
+ device: Device to run on (auto, cpu, cuda)
175
+ """
176
+ self.model_size = model_size
177
+ self.device = device
178
+ self._model = None
179
+
180
+ def _load_model(self):
181
+ """Lazy load Whisper model"""
182
+ if self._model is None:
183
+ try:
184
+ import whisper
185
+ self._model = whisper.load_model(self.model_size, device=self.device)
186
+ logger.info(f"Loaded Whisper model: {self.model_size}")
187
+ except ImportError:
188
+ logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper")
189
+ raise
190
+ except Exception as e:
191
+ logger.error(f"Error loading Whisper model: {e}")
192
+ raise
193
+
194
+ def caption_audio(self, audio_path: str, language: str = "en") -> str:
195
+ """
196
+ Generate text caption from audio
197
+
198
+ Args:
199
+ audio_path: Path to audio file
200
+ language: Language code for transcription
201
+
202
+ Returns:
203
+ Text caption of the audio content
204
+ """
205
+ self._load_model()
206
+
207
+ try:
208
+ import whisper
209
+
210
+ # Load and preprocess audio for Whisper
211
+ audio = whisper.load_audio(audio_path)
212
+ audio = whisper.pad_or_trim(audio)
213
+
214
+ # Generate mel spectrogram
215
+ mel = whisper.log_mel_spectrogram(audio).to(self._model.device)
216
+
217
+ # Decode audio
218
+ options = whisper.DecodingOptions(language=language, fp16=False)
219
+ result = whisper.decode(self._model, mel, options)
220
+
221
+ caption = result.text.strip()
222
+ logger.info(f"Generated audio caption: '{caption[:50]}...'")
223
+
224
+ return caption
225
+
226
+ except Exception as e:
227
+ logger.error(f"Error captioning audio: {e}")
228
+ return ""
229
+
230
+ class MultimodalPromptFusion:
231
+ """Intelligent fusion of text prompts with audio features and captions"""
232
+
233
+ def __init__(self):
234
+ """Initialize prompt fusion system"""
235
+ pass
236
+
237
+ def fuse_prompt_with_audio(
238
+ self,
239
+ text_prompt: str,
240
+ style: str,
241
+ mood: str,
242
+ audio_features: AudioFeatures,
243
+ audio_caption: str = ""
244
+ ) -> str:
245
+ """
246
+ Create enhanced prompt by fusing text with audio analysis
247
+
248
+ Args:
249
+ text_prompt: Original text prompt
250
+ style: Art style
251
+ mood: Mood/atmosphere
252
+ audio_features: Extracted audio features
253
+ audio_caption: Audio caption from Whisper
254
+
255
+ Returns:
256
+ Enhanced multimodal prompt
257
+ """
258
+ # Start with base prompt
259
+ enhanced_prompt = text_prompt.strip()
260
+
261
+ # Add style and mood
262
+ if style:
263
+ enhanced_prompt += f", {style}"
264
+ if mood:
265
+ enhanced_prompt += f", {mood}"
266
+
267
+ # Add audio caption if available
268
+ if audio_caption:
269
+ enhanced_prompt += f", inspired by the sound of: {audio_caption}"
270
+
271
+ # Add tempo-based descriptors
272
+ if audio_features.tempo < 80:
273
+ enhanced_prompt += ", slow and contemplative"
274
+ elif audio_features.tempo > 140:
275
+ enhanced_prompt += ", fast-paced and energetic"
276
+ elif audio_features.tempo > 120:
277
+ enhanced_prompt += ", upbeat and dynamic"
278
+
279
+ # Add energy-based descriptors
280
+ if audio_features.energy > 0.05:
281
+ enhanced_prompt += ", vibrant and powerful"
282
+ elif audio_features.energy < 0.02:
283
+ enhanced_prompt += ", gentle and subtle"
284
+
285
+ # Add rhythm-based descriptors
286
+ if audio_features.zero_crossing_rate > 0.15:
287
+ enhanced_prompt += ", rhythmic and percussive"
288
+
289
+ # Add tonal descriptors based on spectral features
290
+ if audio_features.spectral_centroid > 3000:
291
+ enhanced_prompt += ", bright and crisp"
292
+ elif audio_features.spectral_centroid < 1500:
293
+ enhanced_prompt += ", warm and deep"
294
+
295
+ logger.info(f"Enhanced prompt: {enhanced_prompt}")
296
+ return enhanced_prompt
297
+
298
+ def generate_audio_tags(self, audio_features: AudioFeatures) -> List[str]:
299
+ """
300
+ Generate descriptive tags based on audio features
301
+
302
+ Args:
303
+ audio_features: Extracted audio features
304
+
305
+ Returns:
306
+ List of descriptive tags
307
+ """
308
+ tags = []
309
+
310
+ # Tempo tags
311
+ if audio_features.tempo < 60:
312
+ tags.append("very_slow")
313
+ elif audio_features.tempo < 90:
314
+ tags.append("slow")
315
+ elif audio_features.tempo < 120:
316
+ tags.append("moderate")
317
+ elif audio_features.tempo < 140:
318
+ tags.append("fast")
319
+ else:
320
+ tags.append("very_fast")
321
+
322
+ # Energy tags
323
+ if audio_features.energy > 0.06:
324
+ tags.append("high_energy")
325
+ elif audio_features.energy > 0.03:
326
+ tags.append("medium_energy")
327
+ else:
328
+ tags.append("low_energy")
329
+
330
+ # Rhythm tags
331
+ if audio_features.zero_crossing_rate > 0.15:
332
+ tags.append("percussive")
333
+ elif audio_features.zero_crossing_rate < 0.05:
334
+ tags.append("smooth")
335
+
336
+ # Spectral tags
337
+ if audio_features.spectral_centroid > 3000:
338
+ tags.append("bright")
339
+ elif audio_features.spectral_centroid < 1500:
340
+ tags.append("dark")
341
+
342
+ return tags
src/utils/data_utils.py ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Data Processing Utilities
3
+
4
+ This module provides utilities for Phase 2.B: Data/Logic Input Integration
5
+ - CSV data analysis and processing
6
+ - Mathematical formula evaluation
7
+ - Data-to-text conversion (poetic descriptions)
8
+ - Data visualization generation
9
+ - Statistical analysis and pattern detection
10
+ """
11
+
12
+ import os
13
+ import io
14
+ import ast
15
+ import math
16
+ import numpy as np
17
+ import pandas as pd
18
+ import matplotlib
19
+ matplotlib.use('Agg') # Use non-interactive backend for Streamlit
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+ from typing import Dict, List, Optional, Tuple, Union, Any
23
+ from dataclasses import dataclass
24
+ from PIL import Image
25
+ import logging
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ @dataclass
30
+ class DataFeatures:
31
+ """Container for extracted data features and statistics"""
32
+
33
+ # Basic properties
34
+ shape: Tuple[int, int]
35
+ columns: List[str]
36
+ numeric_columns: List[str]
37
+ data_types: Dict[str, str]
38
+
39
+ # Statistical features
40
+ means: Dict[str, float]
41
+ medians: Dict[str, float]
42
+ stds: Dict[str, float]
43
+ mins: Dict[str, float]
44
+ maxs: Dict[str, float]
45
+ ranges: Dict[str, float]
46
+
47
+ # Pattern features
48
+ trends: Dict[str, str] # 'increasing', 'decreasing', 'stable', 'volatile'
49
+ correlations: Dict[str, float] # strongest correlations
50
+ seasonality: Dict[str, bool] # detected patterns
51
+
52
+ # Derived insights
53
+ complexity_score: float # 0-1 measure of data complexity
54
+ variability_score: float # 0-1 measure of data variability
55
+ pattern_strength: float # 0-1 measure of detectable patterns
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ """Convert to dictionary for JSON serialization"""
59
+ return {
60
+ 'shape': self.shape,
61
+ 'columns': self.columns,
62
+ 'numeric_columns': self.numeric_columns,
63
+ 'data_types': self.data_types,
64
+ 'means': self.means,
65
+ 'medians': self.medians,
66
+ 'stds': self.stds,
67
+ 'mins': self.mins,
68
+ 'maxs': self.maxs,
69
+ 'ranges': self.ranges,
70
+ 'trends': self.trends,
71
+ 'correlations': self.correlations,
72
+ 'seasonality': self.seasonality,
73
+ 'complexity_score': self.complexity_score,
74
+ 'variability_score': self.variability_score,
75
+ 'pattern_strength': self.pattern_strength
76
+ }
77
+
78
+ class DataProcessor:
79
+ """Core data processing and analysis functionality"""
80
+
81
+ def __init__(self):
82
+ """Initialize the data processor"""
83
+ self.safe_functions = {
84
+ # Math functions
85
+ 'abs': abs, 'round': round, 'min': min, 'max': max,
86
+ 'sum': sum, 'len': len, 'pow': pow,
87
+
88
+ # NumPy functions
89
+ 'np': np, 'numpy': np,
90
+ 'sin': np.sin, 'cos': np.cos, 'tan': np.tan,
91
+ 'exp': np.exp, 'log': np.log, 'sqrt': np.sqrt,
92
+ 'pi': np.pi, 'e': np.e,
93
+
94
+ # Math module functions
95
+ 'math': math,
96
+
97
+ # Restricted builtins
98
+ '__builtins__': {}
99
+ }
100
+
101
+ def analyze_csv_data(self, df: pd.DataFrame) -> DataFeatures:
102
+ """
103
+ Comprehensive analysis of CSV data
104
+
105
+ Args:
106
+ df: Input DataFrame
107
+
108
+ Returns:
109
+ DataFeatures object with extracted insights
110
+ """
111
+ logger.info(f"Analyzing CSV data with shape {df.shape}")
112
+
113
+ # Basic properties
114
+ shape = df.shape
115
+ columns = df.columns.tolist()
116
+ numeric_df = df.select_dtypes(include=[np.number])
117
+ numeric_columns = numeric_df.columns.tolist()
118
+ data_types = {col: str(df[col].dtype) for col in columns}
119
+
120
+ # Statistical features
121
+ means = {col: float(numeric_df[col].mean()) for col in numeric_columns}
122
+ medians = {col: float(numeric_df[col].median()) for col in numeric_columns}
123
+ stds = {col: float(numeric_df[col].std()) for col in numeric_columns}
124
+ mins = {col: float(numeric_df[col].min()) for col in numeric_columns}
125
+ maxs = {col: float(numeric_df[col].max()) for col in numeric_columns}
126
+ ranges = {col: maxs[col] - mins[col] for col in numeric_columns}
127
+
128
+ # Pattern analysis
129
+ trends = self._analyze_trends(numeric_df)
130
+ correlations = self._find_strongest_correlations(numeric_df)
131
+ seasonality = self._detect_seasonality(numeric_df)
132
+
133
+ # Derived scores
134
+ complexity_score = self._calculate_complexity_score(numeric_df)
135
+ variability_score = self._calculate_variability_score(stds, ranges)
136
+ pattern_strength = self._calculate_pattern_strength(trends, correlations)
137
+
138
+ return DataFeatures(
139
+ shape=shape,
140
+ columns=columns,
141
+ numeric_columns=numeric_columns,
142
+ data_types=data_types,
143
+ means=means,
144
+ medians=medians,
145
+ stds=stds,
146
+ mins=mins,
147
+ maxs=maxs,
148
+ ranges=ranges,
149
+ trends=trends,
150
+ correlations=correlations,
151
+ seasonality=seasonality,
152
+ complexity_score=complexity_score,
153
+ variability_score=variability_score,
154
+ pattern_strength=pattern_strength
155
+ )
156
+
157
+ def evaluate_formula(self, formula: str, num_points: int = 100) -> Tuple[np.ndarray, Dict[str, Any]]:
158
+ """
159
+ Safely evaluate mathematical formula
160
+
161
+ Args:
162
+ formula: Mathematical expression (Python/NumPy syntax)
163
+ num_points: Number of points to generate
164
+
165
+ Returns:
166
+ Tuple of (result_array, metadata)
167
+ """
168
+ logger.info(f"Evaluating formula: {formula}")
169
+
170
+ try:
171
+ # Create default x values if not specified in formula
172
+ if 'x' in formula and 'linspace' not in formula and 'arange' not in formula:
173
+ # Add default x range if x is used but not defined
174
+ x = np.linspace(0, 10, num_points)
175
+ self.safe_functions['x'] = x
176
+
177
+ # Evaluate the formula
178
+ result = eval(formula, self.safe_functions)
179
+
180
+ # Ensure result is a numpy array
181
+ if not isinstance(result, np.ndarray):
182
+ if isinstance(result, (list, tuple)):
183
+ result = np.array(result)
184
+ else:
185
+ # Single value - create array
186
+ result = np.full(num_points, result)
187
+
188
+ # Analyze the result
189
+ metadata = {
190
+ 'length': len(result),
191
+ 'min': float(np.min(result)),
192
+ 'max': float(np.max(result)),
193
+ 'mean': float(np.mean(result)),
194
+ 'std': float(np.std(result)),
195
+ 'range': float(np.max(result) - np.min(result)),
196
+ 'formula': formula,
197
+ 'has_pattern': self._detect_mathematical_pattern(result)
198
+ }
199
+
200
+ return result, metadata
201
+
202
+ except Exception as e:
203
+ logger.error(f"Formula evaluation failed: {e}")
204
+ raise ValueError(f"Invalid formula: {e}")
205
+
206
+ def _analyze_trends(self, df: pd.DataFrame) -> Dict[str, str]:
207
+ """Analyze trends in numeric columns"""
208
+ trends = {}
209
+ for col in df.columns:
210
+ values = df[col].dropna()
211
+ if len(values) < 3:
212
+ trends[col] = 'insufficient_data'
213
+ continue
214
+
215
+ # Calculate trend using linear regression slope
216
+ x = np.arange(len(values))
217
+ slope = np.polyfit(x, values, 1)[0]
218
+ std_val = values.std()
219
+
220
+ if abs(slope) < std_val * 0.1:
221
+ trends[col] = 'stable'
222
+ elif std_val > values.mean() * 0.5:
223
+ trends[col] = 'volatile'
224
+ elif slope > 0:
225
+ trends[col] = 'increasing'
226
+ else:
227
+ trends[col] = 'decreasing'
228
+
229
+ return trends
230
+
231
+ def _find_strongest_correlations(self, df: pd.DataFrame) -> Dict[str, float]:
232
+ """Find strongest correlations between columns"""
233
+ if len(df.columns) < 2:
234
+ return {}
235
+
236
+ corr_matrix = df.corr()
237
+ correlations = {}
238
+
239
+ for i, col1 in enumerate(df.columns):
240
+ for j, col2 in enumerate(df.columns):
241
+ if i < j: # Avoid duplicates and self-correlation
242
+ corr_val = corr_matrix.loc[col1, col2]
243
+ if not np.isnan(corr_val):
244
+ correlations[f"{col1}_vs_{col2}"] = float(corr_val)
245
+
246
+ # Return top 3 strongest correlations
247
+ sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)
248
+ return dict(sorted_corr[:3])
249
+
250
+ def _detect_seasonality(self, df: pd.DataFrame) -> Dict[str, bool]:
251
+ """Simple seasonality detection"""
252
+ seasonality = {}
253
+ for col in df.columns:
254
+ values = df[col].dropna()
255
+ if len(values) < 12: # Need at least 12 points for seasonality
256
+ seasonality[col] = False
257
+ continue
258
+
259
+ # Simple autocorrelation check
260
+ try:
261
+ autocorr = np.corrcoef(values[:-1], values[1:])[0, 1]
262
+ seasonality[col] = not np.isnan(autocorr) and abs(autocorr) > 0.3
263
+ except:
264
+ seasonality[col] = False
265
+
266
+ return seasonality
267
+
268
+ def _calculate_complexity_score(self, df: pd.DataFrame) -> float:
269
+ """Calculate data complexity score (0-1)"""
270
+ if df.empty:
271
+ return 0.0
272
+
273
+ # Factors: number of columns, data types variety, missing values
274
+ num_cols = len(df.columns)
275
+ col_score = min(num_cols / 10, 1.0) # Normalize to 0-1
276
+
277
+ # Missing data complexity
278
+ missing_ratio = df.isnull().sum().sum() / (df.shape[0] * df.shape[1])
279
+ missing_score = min(missing_ratio * 2, 1.0)
280
+
281
+ return (col_score + missing_score) / 2
282
+
283
+ def _calculate_variability_score(self, stds: Dict[str, float], ranges: Dict[str, float]) -> float:
284
+ """Calculate data variability score (0-1)"""
285
+ if not stds:
286
+ return 0.0
287
+
288
+ # Normalize standard deviations by their ranges
289
+ normalized_vars = []
290
+ for col in stds:
291
+ if ranges[col] > 0:
292
+ normalized_vars.append(stds[col] / ranges[col])
293
+
294
+ if not normalized_vars:
295
+ return 0.0
296
+
297
+ return min(np.mean(normalized_vars) * 2, 1.0)
298
+
299
+ def _calculate_pattern_strength(self, trends: Dict[str, str], correlations: Dict[str, float]) -> float:
300
+ """Calculate pattern strength score (0-1)"""
301
+ pattern_score = 0.0
302
+
303
+ # Trend strength
304
+ trend_patterns = sum(1 for trend in trends.values() if trend in ['increasing', 'decreasing'])
305
+ trend_score = min(trend_patterns / max(len(trends), 1), 1.0)
306
+
307
+ # Correlation strength
308
+ if correlations:
309
+ max_corr = max(abs(corr) for corr in correlations.values())
310
+ corr_score = max_corr
311
+ else:
312
+ corr_score = 0.0
313
+
314
+ return (trend_score + corr_score) / 2
315
+
316
+ def _detect_mathematical_pattern(self, data: np.ndarray) -> bool:
317
+ """Detect if mathematical data has recognizable patterns"""
318
+ if len(data) < 10:
319
+ return False
320
+
321
+ # Check for periodicity using autocorrelation
322
+ try:
323
+ # Simple pattern detection
324
+ autocorr = np.corrcoef(data[:-1], data[1:])[0, 1]
325
+ return not np.isnan(autocorr) and abs(autocorr) > 0.5
326
+ except:
327
+ return False
328
+
329
+
330
+ class DataToTextConverter:
331
+ """Convert data patterns into poetic/narrative text descriptions"""
332
+
333
+ def __init__(self):
334
+ """Initialize the converter with descriptive vocabularies"""
335
+ self.trend_descriptions = {
336
+ 'increasing': ['ascending', 'rising', 'climbing', 'growing', 'soaring'],
337
+ 'decreasing': ['descending', 'falling', 'declining', 'diminishing', 'fading'],
338
+ 'stable': ['steady', 'constant', 'balanced', 'harmonious', 'peaceful'],
339
+ 'volatile': ['chaotic', 'turbulent', 'dynamic', 'energetic', 'wild']
340
+ }
341
+
342
+ self.pattern_adjectives = {
343
+ 'high_complexity': ['intricate', 'complex', 'sophisticated', 'elaborate'],
344
+ 'low_complexity': ['simple', 'pure', 'minimal', 'clean'],
345
+ 'high_variability': ['diverse', 'varied', 'rich', 'multifaceted'],
346
+ 'low_variability': ['consistent', 'uniform', 'regular', 'predictable'],
347
+ 'strong_patterns': ['rhythmic', 'structured', 'organized', 'patterned'],
348
+ 'weak_patterns': ['random', 'scattered', 'free-flowing', 'organic']
349
+ }
350
+
351
+ self.artistic_metaphors = [
352
+ 'like brushstrokes on a canvas',
353
+ 'resembling musical notes in harmony',
354
+ 'flowing like water through landscapes',
355
+ 'dancing with mathematical precision',
356
+ 'weaving patterns of light and shadow',
357
+ 'creating symphonies of numbers',
358
+ 'painting stories with data points',
359
+ 'sculpting meaning from statistics'
360
+ ]
361
+
362
+ def generate_poetic_description(self, features: DataFeatures) -> str:
363
+ """
364
+ Generate poetic description from data features
365
+
366
+ Args:
367
+ features: DataFeatures object
368
+
369
+ Returns:
370
+ Poetic text description
371
+ """
372
+ descriptions = []
373
+
374
+ # Basic data description
375
+ descriptions.append(f"A tapestry woven from {features.shape[0]} data points across {features.shape[1]} dimensions")
376
+
377
+ # Trend descriptions
378
+ trend_desc = self._describe_trends(features.trends)
379
+ if trend_desc:
380
+ descriptions.append(trend_desc)
381
+
382
+ # Variability description
383
+ var_desc = self._describe_variability(features.variability_score)
384
+ if var_desc:
385
+ descriptions.append(var_desc)
386
+
387
+ # Pattern description
388
+ pattern_desc = self._describe_patterns(features.pattern_strength, features.correlations)
389
+ if pattern_desc:
390
+ descriptions.append(pattern_desc)
391
+
392
+ # Add artistic metaphor
393
+ import random
394
+ metaphor = random.choice(self.artistic_metaphors)
395
+ descriptions.append(f"The data flows {metaphor}")
396
+
397
+ return '. '.join(descriptions) + '.'
398
+
399
+ def generate_formula_description(self, formula: str, metadata: Dict[str, Any]) -> str:
400
+ """
401
+ Generate poetic description for mathematical formula
402
+
403
+ Args:
404
+ formula: Original formula
405
+ metadata: Formula evaluation metadata
406
+
407
+ Returns:
408
+ Poetic text description
409
+ """
410
+ descriptions = []
411
+
412
+ # Formula introduction
413
+ descriptions.append(f"Mathematical harmony emerges from the expression: {formula}")
414
+
415
+ # Range description
416
+ range_val = metadata['range']
417
+ if range_val > 10:
418
+ descriptions.append("The function soars across vast numerical landscapes")
419
+ elif range_val > 1:
420
+ descriptions.append("Values dance within moderate bounds")
421
+ else:
422
+ descriptions.append("Numbers whisper in gentle, subtle variations")
423
+
424
+ # Pattern description
425
+ if metadata['has_pattern']:
426
+ descriptions.append("Revealing intricate patterns that speak to the soul")
427
+ else:
428
+ descriptions.append("Creating unique, unrepeatable mathematical poetry")
429
+
430
+ # Add artistic metaphor
431
+ import random
432
+ metaphor = random.choice(self.artistic_metaphors)
433
+ descriptions.append(f"Each calculation {metaphor}")
434
+
435
+ return '. '.join(descriptions) + '.'
436
+
437
+ def _describe_trends(self, trends: Dict[str, str]) -> str:
438
+ """Describe overall trends in the data"""
439
+ if not trends:
440
+ return ""
441
+
442
+ trend_counts = {}
443
+ for trend in trends.values():
444
+ trend_counts[trend] = trend_counts.get(trend, 0) + 1
445
+
446
+ dominant_trend = max(trend_counts, key=trend_counts.get)
447
+
448
+ if dominant_trend in self.trend_descriptions:
449
+ import random
450
+ adj = random.choice(self.trend_descriptions[dominant_trend])
451
+ return f"The data reveals {adj} patterns throughout its structure"
452
+
453
+ return ""
454
+
455
+ def _describe_variability(self, variability_score: float) -> str:
456
+ """Describe data variability"""
457
+ import random
458
+
459
+ if variability_score > 0.7:
460
+ adj = random.choice(self.pattern_adjectives['high_variability'])
461
+ return f"With {adj} expressions of numerical diversity"
462
+ elif variability_score < 0.3:
463
+ adj = random.choice(self.pattern_adjectives['low_variability'])
464
+ return f"Maintaining {adj} elegance in its values"
465
+ else:
466
+ return "Balancing consistency with creative variation"
467
+
468
+ def _describe_patterns(self, pattern_strength: float, correlations: Dict[str, float]) -> str:
469
+ """Describe pattern strength and correlations"""
470
+ import random
471
+
472
+ if pattern_strength > 0.6:
473
+ adj = random.choice(self.pattern_adjectives['strong_patterns'])
474
+ return f"Displaying {adj} relationships between its elements"
475
+ elif pattern_strength < 0.3:
476
+ adj = random.choice(self.pattern_adjectives['weak_patterns'])
477
+ return f"Embracing {adj} freedom in its numerical expression"
478
+ else:
479
+ return "Weaving subtle connections throughout its numerical fabric"
480
+
481
+
482
+ class DataVisualizer:
483
+ """Create visualizations from data for artistic conditioning"""
484
+
485
+ def __init__(self, style: str = 'artistic'):
486
+ """
487
+ Initialize visualizer
488
+
489
+ Args:
490
+ style: Visualization style ('artistic', 'scientific', 'minimal')
491
+ """
492
+ self.style = style
493
+ self.color_palettes = {
494
+ 'artistic': ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7'],
495
+ 'scientific': ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D', '#592E83'],
496
+ 'minimal': ['#2C3E50', '#34495E', '#7F8C8D', '#95A5A6', '#BDC3C7']
497
+ }
498
+
499
+ def create_data_visualization(self, df: pd.DataFrame, features: DataFeatures) -> Image.Image:
500
+ """
501
+ Create artistic visualization from DataFrame
502
+
503
+ Args:
504
+ df: Input DataFrame
505
+ features: DataFeatures object
506
+
507
+ Returns:
508
+ PIL Image of the visualization
509
+ """
510
+ plt.style.use('default')
511
+ fig, axes = plt.subplots(2, 2, figsize=(12, 10))
512
+ fig.suptitle('Data Pattern Visualization', fontsize=16, fontweight='bold')
513
+
514
+ numeric_df = df.select_dtypes(include=[np.number])
515
+ colors = self.color_palettes[self.style]
516
+
517
+ # Plot 1: Line plot of first few columns
518
+ ax1 = axes[0, 0]
519
+ for i, col in enumerate(numeric_df.columns[:3]):
520
+ ax1.plot(numeric_df[col], color=colors[i % len(colors)],
521
+ linewidth=2, alpha=0.8, label=col)
522
+ ax1.set_title('Data Trends', fontweight='bold')
523
+ ax1.legend()
524
+ ax1.grid(True, alpha=0.3)
525
+
526
+ # Plot 2: Distribution/histogram
527
+ ax2 = axes[0, 1]
528
+ if len(numeric_df.columns) > 0:
529
+ col = numeric_df.columns[0]
530
+ ax2.hist(numeric_df[col].dropna(), bins=20, color=colors[0],
531
+ alpha=0.7, edgecolor='black')
532
+ ax2.set_title(f'Distribution: {col}', fontweight='bold')
533
+ ax2.grid(True, alpha=0.3)
534
+
535
+ # Plot 3: Correlation heatmap (if multiple columns)
536
+ ax3 = axes[1, 0]
537
+ if len(numeric_df.columns) > 1:
538
+ corr_matrix = numeric_df.corr()
539
+ im = ax3.imshow(corr_matrix, cmap='RdBu_r', aspect='auto', vmin=-1, vmax=1)
540
+ ax3.set_xticks(range(len(corr_matrix.columns)))
541
+ ax3.set_yticks(range(len(corr_matrix.columns)))
542
+ ax3.set_xticklabels(corr_matrix.columns, rotation=45)
543
+ ax3.set_yticklabels(corr_matrix.columns)
544
+ ax3.set_title('Correlations', fontweight='bold')
545
+ plt.colorbar(im, ax=ax3, shrink=0.8)
546
+ else:
547
+ ax3.text(0.5, 0.5, 'Single Column\nNo Correlations',
548
+ ha='center', va='center', transform=ax3.transAxes)
549
+ ax3.set_title('Correlations', fontweight='bold')
550
+
551
+ # Plot 4: Summary statistics
552
+ ax4 = axes[1, 1]
553
+ if len(numeric_df.columns) > 0:
554
+ stats_data = [features.means[col] for col in numeric_df.columns[:5]]
555
+ bars = ax4.bar(range(len(stats_data)), stats_data, color=colors[:len(stats_data)])
556
+ ax4.set_title('Mean Values', fontweight='bold')
557
+ ax4.set_xticks(range(len(stats_data)))
558
+ ax4.set_xticklabels([col[:8] for col in numeric_df.columns[:5]], rotation=45)
559
+ ax4.grid(True, alpha=0.3)
560
+
561
+ plt.tight_layout()
562
+
563
+ # Convert to PIL Image
564
+ buf = io.BytesIO()
565
+ plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
566
+ plt.close()
567
+ buf.seek(0)
568
+
569
+ return Image.open(buf)
570
+
571
+ def create_formula_visualization(self, data: np.ndarray, formula: str, metadata: Dict[str, Any]) -> Image.Image:
572
+ """
573
+ Create artistic visualization from formula result
574
+
575
+ Args:
576
+ data: Formula result array
577
+ formula: Original formula
578
+ metadata: Formula metadata
579
+
580
+ Returns:
581
+ PIL Image of the visualization
582
+ """
583
+ try:
584
+ logger.info(f"Creating visualization for formula: {formula}")
585
+ logger.info(f"Data shape: {data.shape}, Data range: [{np.min(data):.3f}, {np.max(data):.3f}]")
586
+
587
+ plt.style.use('default')
588
+ fig, axes = plt.subplots(2, 2, figsize=(12, 10))
589
+ fig.suptitle(f'Mathematical Pattern: {formula}', fontsize=14, fontweight='bold')
590
+
591
+ colors = self.color_palettes[self.style]
592
+ x = np.arange(len(data))
593
+
594
+ # Plot 1: Main function plot
595
+ ax1 = axes[0, 0]
596
+ ax1.plot(x, data, color=colors[0], linewidth=3, alpha=0.8)
597
+ ax1.fill_between(x, data, alpha=0.3, color=colors[0])
598
+ ax1.set_title('Function Values', fontweight='bold')
599
+ ax1.grid(True, alpha=0.3)
600
+
601
+ # Plot 2: Derivative approximation
602
+ ax2 = axes[0, 1]
603
+ if len(data) > 1:
604
+ derivative = np.gradient(data)
605
+ ax2.plot(x, derivative, color=colors[1], linewidth=2)
606
+ ax2.set_title('Rate of Change', fontweight='bold')
607
+ ax2.grid(True, alpha=0.3)
608
+
609
+ # Plot 3: Distribution
610
+ ax3 = axes[1, 0]
611
+ ax3.hist(data, bins=30, color=colors[2], alpha=0.7, edgecolor='black')
612
+ ax3.set_title('Value Distribution', fontweight='bold')
613
+ ax3.grid(True, alpha=0.3)
614
+
615
+ # Plot 4: Phase space (if applicable)
616
+ ax4 = axes[1, 1]
617
+ if len(data) > 1:
618
+ ax4.scatter(data[:-1], data[1:], c=x[:-1], cmap='viridis', alpha=0.6)
619
+ ax4.set_xlabel('f(t)')
620
+ ax4.set_ylabel('f(t+1)')
621
+ ax4.set_title('Phase Space', fontweight='bold')
622
+ ax4.grid(True, alpha=0.3)
623
+
624
+ plt.tight_layout()
625
+
626
+ # Convert to PIL Image
627
+ buf = io.BytesIO()
628
+ plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
629
+ plt.close()
630
+ buf.seek(0)
631
+
632
+ image = Image.open(buf)
633
+ logger.info(f"Successfully created visualization image: {image.size}")
634
+ return image
635
+
636
+ except Exception as e:
637
+ logger.error(f"Error creating formula visualization: {e}")
638
+ plt.close('all') # Clean up any open figures
639
+
640
+ # Return a simple error image
641
+ fig, ax = plt.subplots(figsize=(8, 6))
642
+ ax.text(0.5, 0.5, f'Visualization Error:\n{str(e)}',
643
+ ha='center', va='center', fontsize=12,
644
+ bbox=dict(boxstyle="round,pad=0.3", facecolor="lightcoral"))
645
+ ax.set_xlim(0, 1)
646
+ ax.set_ylim(0, 1)
647
+ ax.axis('off')
648
+
649
+ buf = io.BytesIO()
650
+ plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
651
+ plt.close()
652
+ buf.seek(0)
653
+
654
+ return Image.open(buf)
src/utils/emotion_utils.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CompI Emotion Processing Utilities
3
+
4
+ This module provides utilities for Phase 2.C: Emotional/Contextual Input Integration
5
+ - Emotion detection and sentiment analysis
6
+ - Mood mapping and emotional context processing
7
+ - Color palette generation based on emotions
8
+ - Contextual prompt enhancement
9
+ - Emoji and text-based emotion recognition
10
+ """
11
+
12
+ import re
13
+ import json
14
+ from typing import Dict, List, Optional, Tuple, Union, Any
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ import logging
18
+
19
+ # Optional imports with fallbacks
20
+ try:
21
+ from textblob import TextBlob
22
+ TEXTBLOB_AVAILABLE = True
23
+ except ImportError:
24
+ TEXTBLOB_AVAILABLE = False
25
+ TextBlob = None
26
+
27
+ try:
28
+ import emoji
29
+ EMOJI_AVAILABLE = True
30
+ except ImportError:
31
+ EMOJI_AVAILABLE = False
32
+ emoji = None
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ class EmotionCategory(Enum):
37
+ """Primary emotion categories"""
38
+ JOY = "joy"
39
+ SADNESS = "sadness"
40
+ ANGER = "anger"
41
+ FEAR = "fear"
42
+ SURPRISE = "surprise"
43
+ DISGUST = "disgust"
44
+ LOVE = "love"
45
+ ANTICIPATION = "anticipation"
46
+ TRUST = "trust"
47
+ NEUTRAL = "neutral"
48
+
49
+ @dataclass
50
+ class EmotionAnalysis:
51
+ """Container for emotion analysis results"""
52
+
53
+ # Primary emotion detection
54
+ primary_emotion: EmotionCategory
55
+ emotion_confidence: float # 0-1 confidence score
56
+
57
+ # Sentiment analysis
58
+ sentiment_polarity: float # -1 to 1 (negative to positive)
59
+ sentiment_subjectivity: float # 0 to 1 (objective to subjective)
60
+
61
+ # Detected emotions with scores
62
+ emotion_scores: Dict[str, float]
63
+
64
+ # Contextual information
65
+ detected_emojis: List[str]
66
+ emotion_keywords: List[str]
67
+ intensity_level: str # 'low', 'medium', 'high'
68
+
69
+ # Generated artistic attributes
70
+ color_palette: List[str]
71
+ artistic_descriptors: List[str]
72
+ mood_modifiers: List[str]
73
+
74
+ def to_dict(self) -> Dict[str, Any]:
75
+ """Convert to dictionary for JSON serialization"""
76
+ return {
77
+ 'primary_emotion': self.primary_emotion.value,
78
+ 'emotion_confidence': self.emotion_confidence,
79
+ 'sentiment_polarity': self.sentiment_polarity,
80
+ 'sentiment_subjectivity': self.sentiment_subjectivity,
81
+ 'emotion_scores': self.emotion_scores,
82
+ 'detected_emojis': self.detected_emojis,
83
+ 'emotion_keywords': self.emotion_keywords,
84
+ 'intensity_level': self.intensity_level,
85
+ 'color_palette': self.color_palette,
86
+ 'artistic_descriptors': self.artistic_descriptors,
87
+ 'mood_modifiers': self.mood_modifiers
88
+ }
89
+
90
+ class EmotionProcessor:
91
+ """Core emotion processing and analysis functionality"""
92
+
93
+ def __init__(self):
94
+ """Initialize the emotion processor with predefined mappings"""
95
+
96
+ # Predefined emotion sets
97
+ self.preset_emotions = {
98
+ "joyful": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "😊"},
99
+ "happy": {"category": EmotionCategory.JOY, "intensity": "medium", "emoji": "😄"},
100
+ "ecstatic": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "🤩"},
101
+ "sad": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "😢"},
102
+ "melancholic": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "😔"},
103
+ "depressed": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "😞"},
104
+ "angry": {"category": EmotionCategory.ANGER, "intensity": "high", "emoji": "😡"},
105
+ "frustrated": {"category": EmotionCategory.ANGER, "intensity": "medium", "emoji": "😤"},
106
+ "furious": {"category": EmotionCategory.ANGER, "intensity": "high", "emoji": "🤬"},
107
+ "fearful": {"category": EmotionCategory.FEAR, "intensity": "high", "emoji": "😱"},
108
+ "anxious": {"category": EmotionCategory.FEAR, "intensity": "medium", "emoji": "😰"},
109
+ "nervous": {"category": EmotionCategory.FEAR, "intensity": "low", "emoji": "😬"},
110
+ "surprised": {"category": EmotionCategory.SURPRISE, "intensity": "medium", "emoji": "😲"},
111
+ "amazed": {"category": EmotionCategory.SURPRISE, "intensity": "high", "emoji": "🤯"},
112
+ "romantic": {"category": EmotionCategory.LOVE, "intensity": "high", "emoji": "💖"},
113
+ "loving": {"category": EmotionCategory.LOVE, "intensity": "medium", "emoji": "❤️"},
114
+ "peaceful": {"category": EmotionCategory.TRUST, "intensity": "medium", "emoji": "🕊️"},
115
+ "serene": {"category": EmotionCategory.TRUST, "intensity": "high", "emoji": "🌱"},
116
+ "mysterious": {"category": EmotionCategory.ANTICIPATION, "intensity": "medium", "emoji": "🕵️‍♂️"},
117
+ "nostalgic": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "🕰️"},
118
+ "energetic": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "⚡"},
119
+ "whimsical": {"category": EmotionCategory.JOY, "intensity": "medium", "emoji": "🎠"},
120
+ "uplifting": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "🌞"},
121
+ "dark": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "🌑"},
122
+ "moody": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "🌫️"}
123
+ }
124
+
125
+ # Emotion-to-color mappings
126
+ self.emotion_colors = {
127
+ EmotionCategory.JOY: ["#FFD700", "#FFA500", "#FF69B4", "#00CED1", "#32CD32"],
128
+ EmotionCategory.SADNESS: ["#4169E1", "#6495ED", "#708090", "#2F4F4F", "#191970"],
129
+ EmotionCategory.ANGER: ["#DC143C", "#B22222", "#8B0000", "#FF4500", "#FF6347"],
130
+ EmotionCategory.FEAR: ["#800080", "#4B0082", "#2E2E2E", "#696969", "#A9A9A9"],
131
+ EmotionCategory.SURPRISE: ["#FF1493", "#FF69B4", "#FFB6C1", "#FFC0CB", "#FFFF00"],
132
+ EmotionCategory.LOVE: ["#FF69B4", "#DC143C", "#FF1493", "#C71585", "#DB7093"],
133
+ EmotionCategory.TRUST: ["#00CED1", "#20B2AA", "#48D1CC", "#40E0D0", "#AFEEEE"],
134
+ EmotionCategory.ANTICIPATION: ["#9370DB", "#8A2BE2", "#7B68EE", "#6A5ACD", "#483D8B"],
135
+ EmotionCategory.NEUTRAL: ["#808080", "#A9A9A9", "#C0C0C0", "#D3D3D3", "#DCDCDC"]
136
+ }
137
+
138
+ # Artistic descriptors for each emotion
139
+ self.artistic_descriptors = {
140
+ EmotionCategory.JOY: ["vibrant", "luminous", "radiant", "effervescent", "sparkling"],
141
+ EmotionCategory.SADNESS: ["muted", "somber", "melancholic", "wistful", "contemplative"],
142
+ EmotionCategory.ANGER: ["intense", "fiery", "bold", "dramatic", "powerful"],
143
+ EmotionCategory.FEAR: ["shadowy", "mysterious", "ethereal", "haunting", "enigmatic"],
144
+ EmotionCategory.SURPRISE: ["dynamic", "explosive", "unexpected", "striking", "vivid"],
145
+ EmotionCategory.LOVE: ["warm", "tender", "passionate", "romantic", "intimate"],
146
+ EmotionCategory.TRUST: ["serene", "peaceful", "harmonious", "balanced", "tranquil"],
147
+ EmotionCategory.ANTICIPATION: ["electric", "suspenseful", "charged", "expectant", "tense"],
148
+ EmotionCategory.NEUTRAL: ["balanced", "calm", "steady", "composed", "neutral"]
149
+ }
150
+
151
+ # Emoji to emotion mapping
152
+ self.emoji_emotions = {
153
+ "😊": EmotionCategory.JOY, "😄": EmotionCategory.JOY, "😃": EmotionCategory.JOY,
154
+ "🤩": EmotionCategory.JOY, "😍": EmotionCategory.LOVE, "🥰": EmotionCategory.LOVE,
155
+ "😢": EmotionCategory.SADNESS, "😭": EmotionCategory.SADNESS, "😔": EmotionCategory.SADNESS,
156
+ "😡": EmotionCategory.ANGER, "🤬": EmotionCategory.ANGER, "😤": EmotionCategory.ANGER,
157
+ "😱": EmotionCategory.FEAR, "😰": EmotionCategory.FEAR, "😨": EmotionCategory.FEAR,
158
+ "😲": EmotionCategory.SURPRISE, "😮": EmotionCategory.SURPRISE, "🤯": EmotionCategory.SURPRISE,
159
+ "❤️": EmotionCategory.LOVE, "💖": EmotionCategory.LOVE, "💕": EmotionCategory.LOVE,
160
+ "🕊️": EmotionCategory.TRUST, "🌱": EmotionCategory.TRUST, "☮️": EmotionCategory.TRUST
161
+ }
162
+
163
+ # Keyword patterns for emotion detection
164
+ self.emotion_keywords = {
165
+ EmotionCategory.JOY: ["happy", "joyful", "cheerful", "delighted", "elated", "euphoric", "blissful"],
166
+ EmotionCategory.SADNESS: ["sad", "depressed", "melancholy", "sorrowful", "gloomy", "dejected"],
167
+ EmotionCategory.ANGER: ["angry", "furious", "rage", "irritated", "annoyed", "livid", "irate"],
168
+ EmotionCategory.FEAR: ["afraid", "scared", "terrified", "anxious", "worried", "nervous", "fearful"],
169
+ EmotionCategory.SURPRISE: ["surprised", "amazed", "astonished", "shocked", "stunned", "bewildered"],
170
+ EmotionCategory.LOVE: ["love", "romantic", "affectionate", "tender", "passionate", "adoring"],
171
+ EmotionCategory.TRUST: ["peaceful", "serene", "calm", "tranquil", "secure", "confident"],
172
+ EmotionCategory.ANTICIPATION: ["excited", "eager", "hopeful", "expectant", "anticipating"]
173
+ }
174
+
175
+ def analyze_emotion(self, text: str, selected_emotion: Optional[str] = None) -> EmotionAnalysis:
176
+ """
177
+ Comprehensive emotion analysis of input text
178
+
179
+ Args:
180
+ text: Input text to analyze
181
+ selected_emotion: Optional pre-selected emotion
182
+
183
+ Returns:
184
+ EmotionAnalysis object with complete analysis
185
+ """
186
+ logger.info(f"Analyzing emotion for text: {text[:100]}...")
187
+
188
+ # Initialize analysis components
189
+ detected_emojis = self._extract_emojis(text)
190
+ emotion_keywords = self._extract_emotion_keywords(text)
191
+
192
+ # Determine primary emotion
193
+ if selected_emotion and selected_emotion.lower() in self.preset_emotions:
194
+ # Use selected emotion
195
+ emotion_info = self.preset_emotions[selected_emotion.lower()]
196
+ primary_emotion = emotion_info["category"]
197
+ emotion_confidence = 0.9
198
+ intensity_level = emotion_info["intensity"]
199
+ else:
200
+ # Analyze text for emotion
201
+ primary_emotion, emotion_confidence, intensity_level = self._analyze_text_emotion(text, detected_emojis, emotion_keywords)
202
+
203
+ # Sentiment analysis
204
+ sentiment_polarity, sentiment_subjectivity = self._analyze_sentiment(text)
205
+
206
+ # Generate emotion scores
207
+ emotion_scores = self._generate_emotion_scores(primary_emotion, emotion_confidence)
208
+
209
+ # Generate artistic attributes
210
+ color_palette = self.emotion_colors.get(primary_emotion, self.emotion_colors[EmotionCategory.NEUTRAL])
211
+ artistic_descriptors = self.artistic_descriptors.get(primary_emotion, ["neutral"])
212
+ mood_modifiers = self._generate_mood_modifiers(primary_emotion, intensity_level)
213
+
214
+ return EmotionAnalysis(
215
+ primary_emotion=primary_emotion,
216
+ emotion_confidence=emotion_confidence,
217
+ sentiment_polarity=sentiment_polarity,
218
+ sentiment_subjectivity=sentiment_subjectivity,
219
+ emotion_scores=emotion_scores,
220
+ detected_emojis=detected_emojis,
221
+ emotion_keywords=emotion_keywords,
222
+ intensity_level=intensity_level,
223
+ color_palette=color_palette[:3], # Top 3 colors
224
+ artistic_descriptors=artistic_descriptors[:3], # Top 3 descriptors
225
+ mood_modifiers=mood_modifiers
226
+ )
227
+
228
+ def _extract_emojis(self, text: str) -> List[str]:
229
+ """Extract emojis from text"""
230
+ if not EMOJI_AVAILABLE:
231
+ # Simple emoji detection using Unicode ranges
232
+ emoji_pattern = re.compile(
233
+ "["
234
+ "\U0001F600-\U0001F64F" # emoticons
235
+ "\U0001F300-\U0001F5FF" # symbols & pictographs
236
+ "\U0001F680-\U0001F6FF" # transport & map symbols
237
+ "\U0001F1E0-\U0001F1FF" # flags (iOS)
238
+ "\U00002702-\U000027B0"
239
+ "\U000024C2-\U0001F251"
240
+ "]+",
241
+ flags=re.UNICODE
242
+ )
243
+ return emoji_pattern.findall(text)
244
+ else:
245
+ return [char for char in text if char in emoji.UNICODE_EMOJI['en']]
246
+
247
+ def _extract_emotion_keywords(self, text: str) -> List[str]:
248
+ """Extract emotion-related keywords from text"""
249
+ text_lower = text.lower()
250
+ found_keywords = []
251
+
252
+ for emotion, keywords in self.emotion_keywords.items():
253
+ for keyword in keywords:
254
+ if keyword in text_lower:
255
+ found_keywords.append(keyword)
256
+
257
+ return found_keywords
258
+
259
+ def _analyze_text_emotion(self, text: str, emojis: List[str], keywords: List[str]) -> Tuple[EmotionCategory, float, str]:
260
+ """Analyze emotion from text, emojis, and keywords"""
261
+
262
+ # Check emojis first
263
+ for emoji_char in emojis:
264
+ if emoji_char in self.emoji_emotions:
265
+ return self.emoji_emotions[emoji_char], 0.8, "medium"
266
+
267
+ # Check keywords
268
+ emotion_votes = {}
269
+ for keyword in keywords:
270
+ for emotion, emotion_keywords in self.emotion_keywords.items():
271
+ if keyword in emotion_keywords:
272
+ emotion_votes[emotion] = emotion_votes.get(emotion, 0) + 1
273
+
274
+ if emotion_votes:
275
+ primary_emotion = max(emotion_votes, key=emotion_votes.get)
276
+ confidence = min(emotion_votes[primary_emotion] * 0.3, 0.9)
277
+ intensity = "high" if emotion_votes[primary_emotion] > 2 else "medium"
278
+ return primary_emotion, confidence, intensity
279
+
280
+ # Fallback to sentiment analysis
281
+ sentiment_polarity, _ = self._analyze_sentiment(text)
282
+
283
+ if sentiment_polarity > 0.3:
284
+ return EmotionCategory.JOY, 0.6, "medium"
285
+ elif sentiment_polarity < -0.3:
286
+ return EmotionCategory.SADNESS, 0.6, "medium"
287
+ else:
288
+ return EmotionCategory.NEUTRAL, 0.5, "low"
289
+
290
+ def _analyze_sentiment(self, text: str) -> Tuple[float, float]:
291
+ """Analyze sentiment using TextBlob or fallback method"""
292
+ if not text.strip():
293
+ return 0.0, 0.0
294
+
295
+ if TEXTBLOB_AVAILABLE:
296
+ try:
297
+ blob = TextBlob(text)
298
+ return blob.sentiment.polarity, blob.sentiment.subjectivity
299
+ except Exception as e:
300
+ logger.warning(f"TextBlob sentiment analysis failed: {e}")
301
+
302
+ # Simple fallback sentiment analysis
303
+ positive_words = ["good", "great", "excellent", "amazing", "wonderful", "fantastic", "love", "like", "happy", "joy"]
304
+ negative_words = ["bad", "terrible", "awful", "hate", "dislike", "sad", "angry", "fear", "worried", "depressed"]
305
+
306
+ text_lower = text.lower()
307
+ positive_count = sum(1 for word in positive_words if word in text_lower)
308
+ negative_count = sum(1 for word in negative_words if word in text_lower)
309
+
310
+ total_words = len(text.split())
311
+ if total_words == 0:
312
+ return 0.0, 0.0
313
+
314
+ polarity = (positive_count - negative_count) / max(total_words, 1)
315
+ subjectivity = (positive_count + negative_count) / max(total_words, 1)
316
+
317
+ return max(-1.0, min(1.0, polarity)), max(0.0, min(1.0, subjectivity))
318
+
319
+ def _generate_emotion_scores(self, primary_emotion: EmotionCategory, confidence: float) -> Dict[str, float]:
320
+ """Generate scores for all emotions"""
321
+ scores = {emotion.value: 0.1 for emotion in EmotionCategory}
322
+ scores[primary_emotion.value] = confidence
323
+
324
+ # Add some secondary emotions based on primary
325
+ secondary_emotions = {
326
+ EmotionCategory.JOY: [EmotionCategory.LOVE, EmotionCategory.TRUST],
327
+ EmotionCategory.SADNESS: [EmotionCategory.FEAR, EmotionCategory.NEUTRAL],
328
+ EmotionCategory.ANGER: [EmotionCategory.DISGUST, EmotionCategory.FEAR],
329
+ EmotionCategory.FEAR: [EmotionCategory.SADNESS, EmotionCategory.SURPRISE],
330
+ EmotionCategory.LOVE: [EmotionCategory.JOY, EmotionCategory.TRUST],
331
+ EmotionCategory.TRUST: [EmotionCategory.JOY, EmotionCategory.LOVE]
332
+ }
333
+
334
+ if primary_emotion in secondary_emotions:
335
+ for secondary in secondary_emotions[primary_emotion]:
336
+ scores[secondary.value] = min(0.4, confidence * 0.5)
337
+
338
+ return scores
339
+
340
+ def _generate_mood_modifiers(self, emotion: EmotionCategory, intensity: str) -> List[str]:
341
+ """Generate mood modifiers for prompt enhancement"""
342
+ base_modifiers = {
343
+ EmotionCategory.JOY: ["bright", "cheerful", "uplifting", "radiant"],
344
+ EmotionCategory.SADNESS: ["melancholic", "somber", "wistful", "contemplative"],
345
+ EmotionCategory.ANGER: ["intense", "dramatic", "powerful", "bold"],
346
+ EmotionCategory.FEAR: ["mysterious", "dark", "ethereal", "haunting"],
347
+ EmotionCategory.SURPRISE: ["dynamic", "striking", "unexpected", "vivid"],
348
+ EmotionCategory.LOVE: ["romantic", "warm", "tender", "passionate"],
349
+ EmotionCategory.TRUST: ["peaceful", "serene", "harmonious", "tranquil"],
350
+ EmotionCategory.ANTICIPATION: ["electric", "suspenseful", "charged", "expectant"],
351
+ EmotionCategory.NEUTRAL: ["balanced", "calm", "neutral", "composed"]
352
+ }
353
+
354
+ modifiers = base_modifiers.get(emotion, ["neutral"])
355
+
356
+ # Adjust based on intensity
357
+ if intensity == "high":
358
+ intensity_modifiers = ["very", "extremely", "deeply", "intensely"]
359
+ return [f"{intensity_modifiers[0]} {mod}" for mod in modifiers[:2]]
360
+ elif intensity == "low":
361
+ return [f"subtly {mod}" for mod in modifiers[:2]]
362
+ else:
363
+ return modifiers[:3]
364
+
365
+
366
+ class EmotionalPromptEnhancer:
367
+ """Enhance prompts with emotional context"""
368
+
369
+ def __init__(self):
370
+ """Initialize the prompt enhancer"""
371
+ self.emotion_processor = EmotionProcessor()
372
+
373
+ def enhance_prompt_with_emotion(
374
+ self,
375
+ base_prompt: str,
376
+ style: str,
377
+ emotion_analysis: EmotionAnalysis,
378
+ enhancement_strength: float = 0.7
379
+ ) -> str:
380
+ """
381
+ Enhance prompt with emotional context
382
+
383
+ Args:
384
+ base_prompt: Original text prompt
385
+ style: Art style
386
+ emotion_analysis: Emotion analysis results
387
+ enhancement_strength: How strongly to apply emotion (0-1)
388
+
389
+ Returns:
390
+ Enhanced prompt with emotional context
391
+ """
392
+ enhanced_prompt = base_prompt.strip()
393
+
394
+ # Add style
395
+ if style:
396
+ enhanced_prompt += f", {style}"
397
+
398
+ # Add emotional descriptors based on strength
399
+ if enhancement_strength > 0.5:
400
+ # Strong emotional enhancement
401
+ descriptors = emotion_analysis.artistic_descriptors[:2]
402
+ mood_modifiers = emotion_analysis.mood_modifiers[:2]
403
+
404
+ enhanced_prompt += f", {', '.join(descriptors)}"
405
+ enhanced_prompt += f", with a {', '.join(mood_modifiers)} atmosphere"
406
+
407
+ # Add intensity if high
408
+ if emotion_analysis.intensity_level == "high":
409
+ enhanced_prompt += f", deeply {emotion_analysis.primary_emotion.value}"
410
+
411
+ elif enhancement_strength > 0.2:
412
+ # Moderate emotional enhancement
413
+ descriptor = emotion_analysis.artistic_descriptors[0]
414
+ mood = emotion_analysis.mood_modifiers[0]
415
+
416
+ enhanced_prompt += f", {descriptor}, {mood}"
417
+
418
+ else:
419
+ # Subtle emotional enhancement
420
+ if emotion_analysis.artistic_descriptors:
421
+ enhanced_prompt += f", {emotion_analysis.artistic_descriptors[0]}"
422
+
423
+ return enhanced_prompt
424
+
425
+ def generate_emotion_tags(self, emotion_analysis: EmotionAnalysis) -> List[str]:
426
+ """Generate descriptive tags for the emotion"""
427
+ tags = []
428
+
429
+ # Primary emotion
430
+ tags.append(emotion_analysis.primary_emotion.value)
431
+
432
+ # Intensity
433
+ tags.append(f"{emotion_analysis.intensity_level}_intensity")
434
+
435
+ # Sentiment
436
+ if emotion_analysis.sentiment_polarity > 0.3:
437
+ tags.append("positive_sentiment")
438
+ elif emotion_analysis.sentiment_polarity < -0.3:
439
+ tags.append("negative_sentiment")
440
+ else:
441
+ tags.append("neutral_sentiment")
442
+
443
+ # Artistic descriptors
444
+ tags.extend(emotion_analysis.artistic_descriptors[:2])
445
+
446
+ return tags
src/utils/file_utils.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File handling utilities for CompI project.
3
+ """
4
+
5
+ import json
6
+ import yaml
7
+ from pathlib import Path
8
+ from typing import Dict, Any, Union
9
+ from PIL import Image
10
+ import soundfile as sf
11
+ import numpy as np
12
+
13
+ from src.config import OUTPUTS_DIR
14
+
15
+ def save_image(image: Image.Image, filename: str, subfolder: str = "images") -> Path:
16
+ """
17
+ Save a PIL Image to the outputs directory.
18
+
19
+ Args:
20
+ image: PIL Image to save
21
+ filename: Name of the file (with extension)
22
+ subfolder: Subfolder within outputs directory
23
+
24
+ Returns:
25
+ Path to saved file
26
+ """
27
+ output_dir = OUTPUTS_DIR / subfolder
28
+ output_dir.mkdir(parents=True, exist_ok=True)
29
+
30
+ file_path = output_dir / filename
31
+ image.save(file_path)
32
+
33
+ return file_path
34
+
35
+ def save_audio(audio_data: np.ndarray, filename: str,
36
+ sample_rate: int = 22050, subfolder: str = "audio") -> Path:
37
+ """
38
+ Save audio data to the outputs directory.
39
+
40
+ Args:
41
+ audio_data: Audio data as numpy array
42
+ filename: Name of the file (with extension)
43
+ sample_rate: Audio sample rate
44
+ subfolder: Subfolder within outputs directory
45
+
46
+ Returns:
47
+ Path to saved file
48
+ """
49
+ output_dir = OUTPUTS_DIR / subfolder
50
+ output_dir.mkdir(parents=True, exist_ok=True)
51
+
52
+ file_path = output_dir / filename
53
+ sf.write(file_path, audio_data, sample_rate)
54
+
55
+ return file_path
56
+
57
+ def load_config(config_path: Union[str, Path]) -> Dict[str, Any]:
58
+ """
59
+ Load configuration from JSON or YAML file.
60
+
61
+ Args:
62
+ config_path: Path to configuration file
63
+
64
+ Returns:
65
+ Configuration dictionary
66
+ """
67
+ config_path = Path(config_path)
68
+
69
+ if not config_path.exists():
70
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
71
+
72
+ with open(config_path, 'r') as f:
73
+ if config_path.suffix.lower() in ['.yml', '.yaml']:
74
+ return yaml.safe_load(f)
75
+ elif config_path.suffix.lower() == '.json':
76
+ return json.load(f)
77
+ else:
78
+ raise ValueError(f"Unsupported config file format: {config_path.suffix}")
79
+
80
+ def ensure_dir(path: Union[str, Path]) -> Path:
81
+ """
82
+ Ensure directory exists, create if it doesn't.
83
+
84
+ Args:
85
+ path: Directory path
86
+
87
+ Returns:
88
+ Path object
89
+ """
90
+ path = Path(path)
91
+ path.mkdir(parents=True, exist_ok=True)
92
+ return path
93
+
94
+ def ensure_directory_exists(path: Union[str, Path]) -> Path:
95
+ """
96
+ Alias for ensure_dir for backward compatibility.
97
+
98
+ Args:
99
+ path: Directory path
100
+
101
+ Returns:
102
+ Path object
103
+ """
104
+ return ensure_dir(path)
105
+
106
+ def generate_filename(prompt: str, style: str = "", mood: str = "",
107
+ seed: int = 0, variation: int = 1,
108
+ has_audio: bool = False, max_length: int = 100) -> str:
109
+ """
110
+ Generate a descriptive filename for generated images.
111
+
112
+ Args:
113
+ prompt: Text prompt used for generation
114
+ style: Art style
115
+ mood: Mood/atmosphere
116
+ seed: Random seed used
117
+ variation: Variation number
118
+ has_audio: Whether audio was used in generation
119
+ max_length: Maximum filename length
120
+
121
+ Returns:
122
+ Generated filename (without extension)
123
+ """
124
+ import re
125
+ from datetime import datetime
126
+
127
+ # Clean and truncate prompt
128
+ prompt_clean = re.sub(r'[^\w\s-]', '', prompt.lower())
129
+ prompt_slug = "_".join(prompt_clean.split()[:6])[:30]
130
+
131
+ # Clean style and mood
132
+ style_slug = re.sub(r'[^\w]', '', style.lower())[:10] if style else ""
133
+ mood_slug = re.sub(r'[^\w]', '', mood.lower())[:10] if mood else ""
134
+
135
+ # Timestamp
136
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
137
+
138
+ # Audio indicator
139
+ audio_tag = "_audio" if has_audio else ""
140
+
141
+ # Combine all parts
142
+ parts = [prompt_slug, style_slug, mood_slug, timestamp, f"seed{seed}", f"v{variation}"]
143
+ filename = "_".join(filter(None, parts)) + audio_tag
144
+
145
+ # Truncate if too long
146
+ if len(filename) > max_length:
147
+ filename = filename[:max_length]
148
+
149
+ return filename
src/utils/image_utils.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image processing utilities for CompI Phase 2.E: Style Reference/Example Image Integration
3
+
4
+ This module provides utilities for:
5
+ - Image loading from files and URLs
6
+ - Image validation and preprocessing
7
+ - Style analysis and feature extraction
8
+ - Image format conversion and optimization
9
+ """
10
+
11
+ import os
12
+ import io
13
+ import requests
14
+ import hashlib
15
+ from typing import Optional, Tuple, Dict, Any, Union, List
16
+ from pathlib import Path
17
+ import logging
18
+
19
+ import torch
20
+ import numpy as np
21
+ from PIL import Image, ImageStat, ImageFilter
22
+ import cv2
23
+
24
+ from src.utils.logging_utils import setup_logger
25
+
26
+ logger = setup_logger(__name__)
27
+
28
+ class ImageProcessor:
29
+ """
30
+ Handles image loading, validation, and preprocessing for style reference
31
+ """
32
+
33
+ def __init__(self, max_size: Tuple[int, int] = (1024, 1024)):
34
+ self.max_size = max_size
35
+ self.supported_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
36
+
37
+ def load_image_from_url(
38
+ self,
39
+ url: str,
40
+ timeout: int = 10,
41
+ max_file_size: int = 10 * 1024 * 1024 # 10MB
42
+ ) -> Optional[Image.Image]:
43
+ """
44
+ Load image from URL with validation and error handling
45
+
46
+ Args:
47
+ url: Image URL
48
+ timeout: Request timeout in seconds
49
+ max_file_size: Maximum file size in bytes
50
+
51
+ Returns:
52
+ PIL Image or None if failed
53
+ """
54
+ try:
55
+ logger.info(f"Loading image from URL: {url}")
56
+
57
+ # Validate URL format
58
+ if not url.startswith(('http://', 'https://')):
59
+ logger.error(f"Invalid URL format: {url}")
60
+ return None
61
+
62
+ # Make request with headers to avoid blocking
63
+ headers = {
64
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
65
+ }
66
+
67
+ response = requests.get(url, timeout=timeout, headers=headers, stream=True)
68
+ response.raise_for_status()
69
+
70
+ # Check content type
71
+ content_type = response.headers.get('content-type', '').lower()
72
+ if not any(img_type in content_type for img_type in ['image/', 'jpeg', 'png', 'webp']):
73
+ logger.error(f"Invalid content type: {content_type}")
74
+ return None
75
+
76
+ # Check file size
77
+ content_length = response.headers.get('content-length')
78
+ if content_length and int(content_length) > max_file_size:
79
+ logger.error(f"File too large: {content_length} bytes")
80
+ return None
81
+
82
+ # Load image data
83
+ image_data = io.BytesIO()
84
+ downloaded_size = 0
85
+
86
+ for chunk in response.iter_content(chunk_size=8192):
87
+ downloaded_size += len(chunk)
88
+ if downloaded_size > max_file_size:
89
+ logger.error(f"File too large during download: {downloaded_size} bytes")
90
+ return None
91
+ image_data.write(chunk)
92
+
93
+ image_data.seek(0)
94
+
95
+ # Open and validate image
96
+ image = Image.open(image_data)
97
+ image = image.convert('RGB')
98
+
99
+ logger.info(f"Successfully loaded image: {image.size}")
100
+ return image
101
+
102
+ except requests.exceptions.RequestException as e:
103
+ logger.error(f"Request error loading image from {url}: {e}")
104
+ return None
105
+ except Exception as e:
106
+ logger.error(f"Error loading image from {url}: {e}")
107
+ return None
108
+
109
+ def load_image_from_file(self, file_path: Union[str, Path]) -> Optional[Image.Image]:
110
+ """
111
+ Load image from local file with validation
112
+
113
+ Args:
114
+ file_path: Path to image file
115
+
116
+ Returns:
117
+ PIL Image or None if failed
118
+ """
119
+ try:
120
+ file_path = Path(file_path)
121
+
122
+ if not file_path.exists():
123
+ logger.error(f"File does not exist: {file_path}")
124
+ return None
125
+
126
+ if file_path.suffix.lower() not in self.supported_formats:
127
+ logger.error(f"Unsupported format: {file_path.suffix}")
128
+ return None
129
+
130
+ image = Image.open(file_path)
131
+ image = image.convert('RGB')
132
+
133
+ logger.info(f"Successfully loaded image from file: {image.size}")
134
+ return image
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error loading image from {file_path}: {e}")
138
+ return None
139
+
140
+ def preprocess_image(
141
+ self,
142
+ image: Image.Image,
143
+ target_size: Optional[Tuple[int, int]] = None,
144
+ maintain_aspect_ratio: bool = True
145
+ ) -> Image.Image:
146
+ """
147
+ Preprocess image for stable diffusion
148
+
149
+ Args:
150
+ image: Input PIL Image
151
+ target_size: Target size (width, height)
152
+ maintain_aspect_ratio: Whether to maintain aspect ratio
153
+
154
+ Returns:
155
+ Preprocessed PIL Image
156
+ """
157
+ if target_size is None:
158
+ target_size = (512, 512) # Default SD size
159
+
160
+ try:
161
+ # Resize image
162
+ if maintain_aspect_ratio:
163
+ image.thumbnail(target_size, Image.Resampling.LANCZOS)
164
+
165
+ # Create new image with target size and paste resized image
166
+ new_image = Image.new('RGB', target_size, (255, 255, 255))
167
+ paste_x = (target_size[0] - image.width) // 2
168
+ paste_y = (target_size[1] - image.height) // 2
169
+ new_image.paste(image, (paste_x, paste_y))
170
+ image = new_image
171
+ else:
172
+ image = image.resize(target_size, Image.Resampling.LANCZOS)
173
+
174
+ logger.info(f"Preprocessed image to size: {image.size}")
175
+ return image
176
+
177
+ except Exception as e:
178
+ logger.error(f"Error preprocessing image: {e}")
179
+ return image
180
+
181
+ def analyze_image_properties(self, image: Image.Image) -> Dict[str, Any]:
182
+ """
183
+ Analyze image properties for style reference
184
+
185
+ Args:
186
+ image: PIL Image to analyze
187
+
188
+ Returns:
189
+ Dictionary of image properties
190
+ """
191
+ try:
192
+ # Basic properties
193
+ width, height = image.size
194
+ aspect_ratio = width / height
195
+
196
+ # Color analysis
197
+ stat = ImageStat.Stat(image)
198
+ avg_brightness = sum(stat.mean) / len(stat.mean)
199
+ avg_contrast = sum(stat.stddev) / len(stat.stddev)
200
+
201
+ # Convert to numpy for additional analysis
202
+ img_array = np.array(image)
203
+
204
+ # Color distribution
205
+ r_mean, g_mean, b_mean = np.mean(img_array, axis=(0, 1))
206
+ color_variance = np.var(img_array, axis=(0, 1))
207
+
208
+ # Edge detection for complexity
209
+ gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
210
+ edges = cv2.Canny(gray, 50, 150)
211
+ edge_density = np.sum(edges > 0) / (width * height)
212
+
213
+ properties = {
214
+ 'dimensions': (width, height),
215
+ 'aspect_ratio': aspect_ratio,
216
+ 'brightness': avg_brightness,
217
+ 'contrast': avg_contrast,
218
+ 'color_means': (float(r_mean), float(g_mean), float(b_mean)),
219
+ 'color_variance': color_variance.tolist(),
220
+ 'edge_density': float(edge_density),
221
+ 'file_size_pixels': width * height
222
+ }
223
+
224
+ logger.info(f"Analyzed image properties: {properties}")
225
+ return properties
226
+
227
+ except Exception as e:
228
+ logger.error(f"Error analyzing image properties: {e}")
229
+ return {}
230
+
231
+ def generate_image_hash(self, image: Image.Image) -> str:
232
+ """
233
+ Generate hash for image deduplication
234
+
235
+ Args:
236
+ image: PIL Image
237
+
238
+ Returns:
239
+ MD5 hash string
240
+ """
241
+ try:
242
+ # Convert image to bytes
243
+ img_bytes = io.BytesIO()
244
+ image.save(img_bytes, format='PNG')
245
+ img_bytes = img_bytes.getvalue()
246
+
247
+ # Generate hash
248
+ hash_md5 = hashlib.md5(img_bytes)
249
+ return hash_md5.hexdigest()
250
+
251
+ except Exception as e:
252
+ logger.error(f"Error generating image hash: {e}")
253
+ return ""
254
+
255
+ class StyleAnalyzer:
256
+ """
257
+ Analyzes style characteristics of reference images
258
+ """
259
+
260
+ def __init__(self):
261
+ self.style_keywords = {
262
+ 'realistic': ['photo', 'realistic', 'detailed', 'sharp'],
263
+ 'artistic': ['painting', 'artistic', 'brushstrokes', 'canvas'],
264
+ 'anime': ['anime', 'manga', 'cartoon', 'stylized'],
265
+ 'abstract': ['abstract', 'geometric', 'surreal', 'conceptual'],
266
+ 'vintage': ['vintage', 'retro', 'aged', 'classic'],
267
+ 'modern': ['modern', 'contemporary', 'clean', 'minimal']
268
+ }
269
+
270
+ def suggest_style_keywords(self, image_properties: Dict[str, Any]) -> List[str]:
271
+ """
272
+ Suggest style keywords based on image analysis
273
+
274
+ Args:
275
+ image_properties: Properties from analyze_image_properties
276
+
277
+ Returns:
278
+ List of suggested style keywords
279
+ """
280
+ suggestions = []
281
+
282
+ try:
283
+ brightness = image_properties.get('brightness', 128)
284
+ contrast = image_properties.get('contrast', 50)
285
+ edge_density = image_properties.get('edge_density', 0.1)
286
+
287
+ # Brightness-based suggestions
288
+ if brightness < 100:
289
+ suggestions.extend(['dark', 'moody', 'dramatic'])
290
+ elif brightness > 180:
291
+ suggestions.extend(['bright', 'light', 'airy'])
292
+
293
+ # Contrast-based suggestions
294
+ if contrast > 80:
295
+ suggestions.extend(['high contrast', 'bold', 'striking'])
296
+ elif contrast < 30:
297
+ suggestions.extend(['soft', 'gentle', 'muted'])
298
+
299
+ # Edge density-based suggestions
300
+ if edge_density > 0.2:
301
+ suggestions.extend(['detailed', 'complex', 'intricate'])
302
+ elif edge_density < 0.05:
303
+ suggestions.extend(['smooth', 'simple', 'minimalist'])
304
+
305
+ return list(set(suggestions)) # Remove duplicates
306
+
307
+ except Exception as e:
308
+ logger.error(f"Error suggesting style keywords: {e}")
309
+ return []
src/utils/logging_utils.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Logging utilities for CompI project.
3
+ """
4
+
5
+ import logging
6
+ import sys
7
+ from pathlib import Path
8
+ from src.config import LOG_LEVEL, LOG_FORMAT, PROJECT_ROOT
9
+
10
+ def setup_logger(name: str, log_file: str = None) -> logging.Logger:
11
+ """
12
+ Set up a logger with console and optional file output.
13
+
14
+ Args:
15
+ name: Logger name
16
+ log_file: Optional log file path
17
+
18
+ Returns:
19
+ Configured logger instance
20
+ """
21
+ logger = logging.getLogger(name)
22
+ logger.setLevel(getattr(logging, LOG_LEVEL.upper()))
23
+
24
+ # Clear existing handlers
25
+ logger.handlers.clear()
26
+
27
+ # Console handler
28
+ console_handler = logging.StreamHandler(sys.stdout)
29
+ console_handler.setLevel(logging.INFO)
30
+ console_formatter = logging.Formatter(LOG_FORMAT)
31
+ console_handler.setFormatter(console_formatter)
32
+ logger.addHandler(console_handler)
33
+
34
+ # File handler (optional)
35
+ if log_file:
36
+ log_path = PROJECT_ROOT / "logs"
37
+ log_path.mkdir(exist_ok=True)
38
+
39
+ file_handler = logging.FileHandler(log_path / log_file)
40
+ file_handler.setLevel(logging.DEBUG)
41
+ file_formatter = logging.Formatter(LOG_FORMAT)
42
+ file_handler.setFormatter(file_formatter)
43
+ logger.addHandler(file_handler)
44
+
45
+ return logger