update new version of app
Browse files- .claude/settings.local.json +9 -0
- .env +1 -0
- .gradio/certificate.pem +31 -0
- .gradio/flagged/dataset1.csv +2 -0
- .gradio/flagged/dataset2.csv +2 -0
- Dockerfile +1 -1
- README.md +41 -13
- __pycache__/ai_assistant.cpython-311.pyc +0 -0
- __pycache__/ai_assistant.cpython-38.pyc +0 -0
- __pycache__/app.cpython-38.pyc +0 -0
- ai_assistant.py +881 -0
- ai_enhanced_app.py +607 -0
- app1.py +842 -0
- gradio_demo.py +277 -0
- requirements.txt +5 -2
- simple_app.py +370 -0
- simple_gradio.py +81 -0
- test.ipynb +0 -0
.claude/settings.local.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"permissions": {
|
3 |
+
"allow": [
|
4 |
+
"Bash(python:*)"
|
5 |
+
],
|
6 |
+
"deny": [],
|
7 |
+
"ask": []
|
8 |
+
}
|
9 |
+
}
|
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OPENROUTER_API_KEY=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
|
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
.gradio/flagged/dataset1.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
name,output,timestamp
|
2 |
+
ddd,Hello ddd!,2025-09-07 16:28:01.266019
|
.gradio/flagged/dataset2.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
output,timestamp
|
2 |
+
"{""type"": ""matplotlib"", ""plot"": """"}",2025-09-07 17:34:58.244504
|
Dockerfile
CHANGED
@@ -35,4 +35,4 @@ RUN mkdir -p /code/cache
|
|
35 |
EXPOSE 7860
|
36 |
|
37 |
# Run the application
|
38 |
-
CMD ["python", "
|
|
|
35 |
EXPOSE 7860
|
36 |
|
37 |
# Run the application
|
38 |
+
CMD ["python", "app1.py"]
|
README.md
CHANGED
@@ -1,13 +1,41 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dash MCP - AI-Powered Data Analytics Dashboard
|
2 |
+
|
3 |
+
🤖 An interactive data analytics dashboard built with Dash and powered by AI assistance.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- **Interactive Data Visualization**: Create various chart types (scatter, line, bar, histogram, box, heatmap, pie)
|
8 |
+
- **Built-in Sample Datasets**: Gapminder, Iris, Tips, Stock Data, and Wind Data
|
9 |
+
- **CSV/Excel Upload**: Upload and analyze your own datasets
|
10 |
+
- **AI-Powered Analysis**: Ask questions about your data and get intelligent responses
|
11 |
+
- **Multiple Tabs**: Organized interface for dataset management, AI assistant, visualizations, and data exploration
|
12 |
+
|
13 |
+
## Quick Start
|
14 |
+
|
15 |
+
1. Install dependencies:
|
16 |
+
```bash
|
17 |
+
pip install -r requirements.txt
|
18 |
+
```
|
19 |
+
|
20 |
+
2. Set up environment variables (optional for enhanced AI features):
|
21 |
+
```bash
|
22 |
+
echo "OPENROUTER_API_KEY=your_key_here" > .env
|
23 |
+
```
|
24 |
+
|
25 |
+
3. Run the application:
|
26 |
+
```bash
|
27 |
+
python app.py
|
28 |
+
```
|
29 |
+
|
30 |
+
4. Open your browser to `http://localhost:7860`
|
31 |
+
|
32 |
+
## Tech Stack
|
33 |
+
|
34 |
+
- **Frontend**: Dash, Plotly, Bootstrap Components
|
35 |
+
- **Backend**: Python, Pandas, NumPy
|
36 |
+
- **AI**: LangChain, OpenRouter API
|
37 |
+
- **Data Processing**: Pandas, Plotly Express
|
38 |
+
|
39 |
+
## License
|
40 |
+
|
41 |
+
MIT
|
__pycache__/ai_assistant.cpython-311.pyc
ADDED
Binary file (44.8 kB). View file
|
|
__pycache__/ai_assistant.cpython-38.pyc
ADDED
Binary file (26.7 kB). View file
|
|
__pycache__/app.cpython-38.pyc
ADDED
Binary file (19.2 kB). View file
|
|
ai_assistant.py
ADDED
@@ -0,0 +1,881 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
AI Assistant Module for Data Analytics Dashboard
|
3 |
+
|
4 |
+
This module contains all AI-related functionality including:
|
5 |
+
- LLM integrations (OpenRouter, OpenAI, etc.)
|
6 |
+
- Data analysis functions
|
7 |
+
- Natural language processing
|
8 |
+
- Chart generation from prompts
|
9 |
+
- Advanced analytics
|
10 |
+
"""
|
11 |
+
|
12 |
+
import os
|
13 |
+
import pandas as pd
|
14 |
+
import plotly.express as px
|
15 |
+
import plotly.graph_objects as go
|
16 |
+
from typing import Optional, Dict, Any, List, Tuple
|
17 |
+
from dotenv import load_dotenv
|
18 |
+
import sys
|
19 |
+
import io
|
20 |
+
import contextlib
|
21 |
+
|
22 |
+
# Configure matplotlib for non-interactive backend to avoid GUI issues
|
23 |
+
import matplotlib
|
24 |
+
matplotlib.use('Agg') # Use non-interactive backend
|
25 |
+
import matplotlib.pyplot as plt
|
26 |
+
import traceback
|
27 |
+
import re
|
28 |
+
import numpy as np
|
29 |
+
import seaborn as sns
|
30 |
+
from io import StringIO, BytesIO
|
31 |
+
import base64
|
32 |
+
|
33 |
+
# Load environment variables
|
34 |
+
load_dotenv()
|
35 |
+
|
36 |
+
# LangChain imports (only import what we need)
|
37 |
+
try:
|
38 |
+
from langchain_core.prompts import PromptTemplate
|
39 |
+
# Try the newer langchain-openai first, fallback to community
|
40 |
+
try:
|
41 |
+
from langchain_openai import ChatOpenAI
|
42 |
+
except ImportError:
|
43 |
+
from langchain_community.chat_models import ChatOpenAI
|
44 |
+
LANGCHAIN_AVAILABLE = True
|
45 |
+
except ImportError:
|
46 |
+
print("LangChain not fully available - using demo mode")
|
47 |
+
LANGCHAIN_AVAILABLE = False
|
48 |
+
|
49 |
+
class PythonREPL:
|
50 |
+
"""Safe Python code execution environment for AI assistant"""
|
51 |
+
|
52 |
+
def __init__(self, dataframe=None):
|
53 |
+
self.df = dataframe
|
54 |
+
self.globals_dict = {
|
55 |
+
# Safe imports
|
56 |
+
'pd': pd,
|
57 |
+
'np': np,
|
58 |
+
'plt': plt,
|
59 |
+
'sns': sns,
|
60 |
+
'px': px,
|
61 |
+
'go': go,
|
62 |
+
# Built-in functions (safe subset)
|
63 |
+
'len': len,
|
64 |
+
'sum': sum,
|
65 |
+
'min': min,
|
66 |
+
'max': max,
|
67 |
+
'abs': abs,
|
68 |
+
'round': round,
|
69 |
+
'range': range,
|
70 |
+
'list': list,
|
71 |
+
'dict': dict,
|
72 |
+
'tuple': tuple,
|
73 |
+
'set': set,
|
74 |
+
'str': str,
|
75 |
+
'int': int,
|
76 |
+
'float': float,
|
77 |
+
'bool': bool,
|
78 |
+
# Data analysis functions
|
79 |
+
'print': print,
|
80 |
+
}
|
81 |
+
|
82 |
+
if dataframe is not None:
|
83 |
+
self.globals_dict['df'] = dataframe
|
84 |
+
|
85 |
+
def execute_code(self, code: str) -> Dict[str, Any]:
|
86 |
+
"""Execute Python code safely and return results"""
|
87 |
+
# Capture stdout
|
88 |
+
stdout_capture = StringIO()
|
89 |
+
result = {
|
90 |
+
'success': False,
|
91 |
+
'output': '',
|
92 |
+
'error': '',
|
93 |
+
'plots': [],
|
94 |
+
'returned_value': None
|
95 |
+
}
|
96 |
+
|
97 |
+
try:
|
98 |
+
# Security check - block potentially dangerous operations
|
99 |
+
if self._is_code_safe(code):
|
100 |
+
with contextlib.redirect_stdout(stdout_capture):
|
101 |
+
# Create a copy of globals for this execution
|
102 |
+
local_globals = self.globals_dict.copy()
|
103 |
+
|
104 |
+
# Execute the code
|
105 |
+
exec(code, local_globals)
|
106 |
+
|
107 |
+
result['success'] = True
|
108 |
+
result['output'] = stdout_capture.getvalue()
|
109 |
+
|
110 |
+
# Check if any plots were created (matplotlib)
|
111 |
+
if plt.get_fignums():
|
112 |
+
plot_data = self._capture_plots()
|
113 |
+
result['plots'] = plot_data
|
114 |
+
else:
|
115 |
+
result['error'] = "Code contains potentially unsafe operations and cannot be executed."
|
116 |
+
|
117 |
+
except Exception as e:
|
118 |
+
result['error'] = f"Error: {str(e)}\n{traceback.format_exc()}"
|
119 |
+
|
120 |
+
return result
|
121 |
+
|
122 |
+
def _is_code_safe(self, code: str) -> bool:
|
123 |
+
"""Check if code is safe to execute"""
|
124 |
+
# List of potentially dangerous patterns
|
125 |
+
dangerous_patterns = [
|
126 |
+
r'import\s+os',
|
127 |
+
r'import\s+sys',
|
128 |
+
r'import\s+subprocess',
|
129 |
+
r'import\s+shutil',
|
130 |
+
r'from\s+os',
|
131 |
+
r'from\s+sys',
|
132 |
+
r'from\s+subprocess',
|
133 |
+
r'__import__',
|
134 |
+
r'eval\s*\(',
|
135 |
+
r'exec\s*\(',
|
136 |
+
r'open\s*\(',
|
137 |
+
r'file\s*\(',
|
138 |
+
r'input\s*\(',
|
139 |
+
r'raw_input\s*\(',
|
140 |
+
r'exit\s*\(',
|
141 |
+
r'quit\s*\(',
|
142 |
+
r'del\s+',
|
143 |
+
r'globals\s*\(',
|
144 |
+
r'locals\s*\(',
|
145 |
+
r'vars\s*\(',
|
146 |
+
r'reload\s*\(',
|
147 |
+
r'pd\.read_csv\s*\(',
|
148 |
+
r'pd\.read_excel\s*\(',
|
149 |
+
r'pd\.read_json\s*\(',
|
150 |
+
r'pandas\.read_csv\s*\(',
|
151 |
+
r'pandas\.read_excel\s*\(',
|
152 |
+
r'pandas\.read_json\s*\(',
|
153 |
+
r'["\'][^"\']*\.csv["\']', # Only block actual file paths like "file.csv"
|
154 |
+
r'["\'][^"\']*\.xlsx["\']', # Only block actual file paths like "file.xlsx"
|
155 |
+
r'["\'][^"\']*\.json["\']', # Only block actual file paths like "file.json"
|
156 |
+
r'your_data_file',
|
157 |
+
]
|
158 |
+
|
159 |
+
code_lower = code.lower()
|
160 |
+
for pattern in dangerous_patterns:
|
161 |
+
if re.search(pattern, code_lower):
|
162 |
+
return False
|
163 |
+
|
164 |
+
return True
|
165 |
+
|
166 |
+
def _capture_plots(self) -> List[str]:
|
167 |
+
"""Capture matplotlib plots as base64 encoded images"""
|
168 |
+
plots = []
|
169 |
+
|
170 |
+
for fig_num in plt.get_fignums():
|
171 |
+
fig = plt.figure(fig_num)
|
172 |
+
|
173 |
+
# Save plot to BytesIO
|
174 |
+
img_buffer = BytesIO()
|
175 |
+
fig.savefig(img_buffer, format='png', bbox_inches='tight', dpi=150)
|
176 |
+
img_buffer.seek(0)
|
177 |
+
|
178 |
+
# Convert to base64
|
179 |
+
img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
|
180 |
+
plots.append(img_base64)
|
181 |
+
|
182 |
+
# Close the figure to free memory
|
183 |
+
plt.close(fig)
|
184 |
+
|
185 |
+
return plots
|
186 |
+
|
187 |
+
class ChatOpenRouter:
|
188 |
+
"""Custom ChatOpenRouter class for OpenRouter API integration"""
|
189 |
+
|
190 |
+
def __init__(self, model="google/gemma-3-27b-it:free", temperature=0.3, max_tokens=1500, **kwargs):
|
191 |
+
self.model = model
|
192 |
+
self.temperature = temperature
|
193 |
+
self.max_tokens = max_tokens
|
194 |
+
self.api_key = os.environ.get("OPENROUTER_API_KEY")
|
195 |
+
|
196 |
+
if not self.api_key:
|
197 |
+
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
198 |
+
|
199 |
+
if LANGCHAIN_AVAILABLE:
|
200 |
+
self.client = ChatOpenAI(
|
201 |
+
base_url="https://openrouter.ai/api/v1",
|
202 |
+
api_key=self.api_key,
|
203 |
+
model=model,
|
204 |
+
temperature=temperature,
|
205 |
+
max_tokens=max_tokens,
|
206 |
+
**kwargs
|
207 |
+
)
|
208 |
+
else:
|
209 |
+
self.client = None
|
210 |
+
|
211 |
+
def invoke(self, messages):
|
212 |
+
"""Invoke the model with messages"""
|
213 |
+
if self.client:
|
214 |
+
return self.client.invoke(messages)
|
215 |
+
else:
|
216 |
+
# Fallback response if LangChain not available
|
217 |
+
return type('Response', (), {'content': 'LangChain not available - using demo mode'})()
|
218 |
+
|
219 |
+
def is_available(self):
|
220 |
+
"""Check if the client is properly initialized"""
|
221 |
+
return self.client is not None and self.api_key is not None
|
222 |
+
|
223 |
+
class AIAssistant:
|
224 |
+
"""Main AI Assistant class that handles various AI-powered data analysis tasks"""
|
225 |
+
|
226 |
+
def __init__(self):
|
227 |
+
self.llm_client = None
|
228 |
+
self.openrouter_available = self._init_openrouter()
|
229 |
+
self.current_dataset = None
|
230 |
+
self.dataset_context = {}
|
231 |
+
self.python_repl = None
|
232 |
+
|
233 |
+
def _init_openrouter(self) -> bool:
|
234 |
+
"""Initialize OpenRouter LLM if API key is available"""
|
235 |
+
try:
|
236 |
+
self.llm_client = ChatOpenRouter()
|
237 |
+
if self.llm_client.is_available():
|
238 |
+
print("✅ OpenRouter initialized successfully")
|
239 |
+
return True
|
240 |
+
else:
|
241 |
+
print("⚠️ OpenRouter client not fully available - using demo mode")
|
242 |
+
return False
|
243 |
+
|
244 |
+
except Exception as e:
|
245 |
+
print(f"❌ Failed to initialize OpenRouter: {e}")
|
246 |
+
print("Using demo mode instead")
|
247 |
+
return False
|
248 |
+
|
249 |
+
def set_dataset(self, df: pd.DataFrame, dataset_name: str = "current"):
|
250 |
+
"""Set the current dataset for AI analysis"""
|
251 |
+
self.current_dataset = df
|
252 |
+
self.dataset_context[dataset_name] = {
|
253 |
+
'dataframe': df,
|
254 |
+
'shape': df.shape,
|
255 |
+
'columns': df.columns.tolist(),
|
256 |
+
'dtypes': df.dtypes.to_dict(),
|
257 |
+
'missing_values': df.isnull().sum().to_dict(),
|
258 |
+
'numeric_columns': df.select_dtypes(include=['number']).columns.tolist(),
|
259 |
+
'categorical_columns': df.select_dtypes(include=['object']).columns.tolist(),
|
260 |
+
'summary_stats': df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else {}
|
261 |
+
}
|
262 |
+
# Initialize Python REPL with the dataset
|
263 |
+
self.python_repl = PythonREPL(dataframe=df)
|
264 |
+
|
265 |
+
def get_llm_response(self, question: str, df: pd.DataFrame) -> str:
|
266 |
+
"""Generate LLM-powered response using OpenRouter"""
|
267 |
+
if not self.openrouter_available or not self.llm_client:
|
268 |
+
return self.get_basic_response(question, df)
|
269 |
+
|
270 |
+
try:
|
271 |
+
# Check if user is asking for code execution or analysis that would benefit from code
|
272 |
+
if self._should_execute_code(question):
|
273 |
+
return self._get_code_execution_response(question, df)
|
274 |
+
|
275 |
+
# Create data context for the LLM
|
276 |
+
data_context = self._create_data_context(df)
|
277 |
+
|
278 |
+
# Enhanced prompt with code execution capability
|
279 |
+
prompt = f"""You are a professional data analyst AI assistant with Python code execution capabilities. Based on the provided dataset information, answer the user's question with clear, actionable insights.
|
280 |
+
|
281 |
+
Dataset Context:
|
282 |
+
{data_context}
|
283 |
+
|
284 |
+
User Question: {question}
|
285 |
+
|
286 |
+
Available capabilities:
|
287 |
+
- You can write and execute Python code to analyze the data
|
288 |
+
- The dataset is available as 'df' variable
|
289 |
+
- Available libraries: pandas (pd), numpy (np), matplotlib (plt), seaborn (sns), plotly (px, go)
|
290 |
+
- You can create visualizations and perform complex analyses
|
291 |
+
|
292 |
+
Response format:
|
293 |
+
1. Direct answer to the question based on the actual data
|
294 |
+
2. Key insights or patterns you notice in this specific dataset
|
295 |
+
3. If analysis requires computation, suggest or provide Python code
|
296 |
+
4. Practical recommendations or next steps if applicable
|
297 |
+
5. Use emojis and markdown formatting to make your response engaging and easy to read
|
298 |
+
|
299 |
+
Keep your response concise but informative, focusing on actionable insights about this specific dataset.
|
300 |
+
"""
|
301 |
+
|
302 |
+
# Get response from OpenRouter
|
303 |
+
response = self.llm_client.invoke(prompt)
|
304 |
+
|
305 |
+
# Extract content
|
306 |
+
if hasattr(response, 'content'):
|
307 |
+
return response.content
|
308 |
+
else:
|
309 |
+
return str(response)
|
310 |
+
|
311 |
+
except Exception as e:
|
312 |
+
print(f"Error getting LLM response: {e}")
|
313 |
+
return self.get_basic_response(question, df)
|
314 |
+
|
315 |
+
def _create_data_context(self, df: pd.DataFrame) -> str:
|
316 |
+
"""Create comprehensive data context for LLM"""
|
317 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
318 |
+
categorical_cols = df.select_dtypes(include=['object']).columns
|
319 |
+
|
320 |
+
context = f"""Dataset Information:
|
321 |
+
- Shape: {df.shape[0]:,} rows × {df.shape[1]} columns
|
322 |
+
- Columns: {', '.join(df.columns.tolist())}
|
323 |
+
- Numeric columns ({len(numeric_cols)}): {', '.join(numeric_cols.tolist())}
|
324 |
+
- Categorical columns ({len(categorical_cols)}): {', '.join(categorical_cols.tolist())}
|
325 |
+
- Missing values: {df.isnull().sum().sum()} total
|
326 |
+
|
327 |
+
Sample Data (first 3 rows):
|
328 |
+
{df.head(3).to_string()}
|
329 |
+
|
330 |
+
Summary Statistics (numeric columns):
|
331 |
+
{df.describe().to_string() if len(numeric_cols) > 0 else 'No numeric columns for statistics'}
|
332 |
+
|
333 |
+
Data Types:
|
334 |
+
{df.dtypes.to_string()}"""
|
335 |
+
|
336 |
+
return context
|
337 |
+
|
338 |
+
def _should_execute_code(self, question: str) -> bool:
|
339 |
+
"""Determine if the question requires code execution"""
|
340 |
+
code_keywords = [
|
341 |
+
'run code', 'execute', 'calculate', 'compute', 'plot', 'visualize', 'graph',
|
342 |
+
'correlation matrix', 'regression', 'analysis', 'statistics', 'distribution',
|
343 |
+
'histogram', 'scatter plot', 'bar chart', 'create chart', 'show me',
|
344 |
+
'python code', 'pandas', 'numpy'
|
345 |
+
]
|
346 |
+
|
347 |
+
question_lower = question.lower()
|
348 |
+
return any(keyword in question_lower for keyword in code_keywords)
|
349 |
+
|
350 |
+
def _get_code_execution_response(self, question: str, df: pd.DataFrame) -> str:
|
351 |
+
"""Generate response with code execution"""
|
352 |
+
if not self.python_repl:
|
353 |
+
return "Code execution environment not available. Please load a dataset first."
|
354 |
+
|
355 |
+
# Create a prompt to generate code for the user's request
|
356 |
+
code_prompt = f"""You are a Python data analyst. Generate Python code to answer this question about the dataset:
|
357 |
+
|
358 |
+
Question: {question}
|
359 |
+
|
360 |
+
IMPORTANT - Dataset is already loaded:
|
361 |
+
- The dataset is already loaded and available as the variable 'df'
|
362 |
+
- DO NOT use pd.read_csv() or any file loading commands
|
363 |
+
- DO NOT try to load data from files - it's already available as 'df'
|
364 |
+
- The dataframe 'df' contains {df.shape[0]} rows and {df.shape[1]} columns
|
365 |
+
- Columns available in df: {df.columns.tolist()}
|
366 |
+
|
367 |
+
Sample data from df:
|
368 |
+
{df.head(3).to_string()}
|
369 |
+
|
370 |
+
Requirements:
|
371 |
+
1. Use the pre-loaded dataframe 'df' directly
|
372 |
+
2. Write clean, well-commented Python code
|
373 |
+
3. Use pandas, numpy, matplotlib, seaborn as needed
|
374 |
+
4. Include print statements to show results
|
375 |
+
5. Create visualizations if requested
|
376 |
+
6. DO NOT use plt.show() - plots are automatically captured
|
377 |
+
7. Only return the Python code, no explanations
|
378 |
+
|
379 |
+
Code:"""
|
380 |
+
|
381 |
+
try:
|
382 |
+
# Get code from LLM
|
383 |
+
response = self.llm_client.invoke(code_prompt)
|
384 |
+
generated_code = response.content if hasattr(response, 'content') else str(response)
|
385 |
+
|
386 |
+
# Extract Python code from the response
|
387 |
+
code = self._extract_code_from_response(generated_code)
|
388 |
+
|
389 |
+
if code:
|
390 |
+
# Execute the code
|
391 |
+
result = self.python_repl.execute_code(code)
|
392 |
+
|
393 |
+
# Format the response
|
394 |
+
return self._format_code_execution_result(question, code, result)
|
395 |
+
else:
|
396 |
+
return f"I couldn't generate appropriate code for your request: {question}"
|
397 |
+
|
398 |
+
except Exception as e:
|
399 |
+
return f"Error generating code execution response: {str(e)}"
|
400 |
+
|
401 |
+
def _extract_code_from_response(self, response: str) -> str:
|
402 |
+
"""Extract Python code from LLM response"""
|
403 |
+
# Look for code blocks
|
404 |
+
code_patterns = [
|
405 |
+
r'```python\s*\n(.*?)\n```',
|
406 |
+
r'```\s*\n(.*?)\n```',
|
407 |
+
r'`([^`]+)`'
|
408 |
+
]
|
409 |
+
|
410 |
+
for pattern in code_patterns:
|
411 |
+
matches = re.findall(pattern, response, re.DOTALL)
|
412 |
+
if matches:
|
413 |
+
code_result = matches[0].strip()
|
414 |
+
# Remove plt.show() calls as they don't work with non-GUI backend
|
415 |
+
code_result = re.sub(r'plt\.show\(\)\s*', '', code_result)
|
416 |
+
return code_result
|
417 |
+
|
418 |
+
# If no code blocks found, assume the entire response is code
|
419 |
+
lines = response.strip().split('\n')
|
420 |
+
code_lines = []
|
421 |
+
|
422 |
+
for line in lines:
|
423 |
+
# Skip common non-code patterns
|
424 |
+
if any(skip in line.lower() for skip in ['here', 'this code', 'explanation', 'result']):
|
425 |
+
continue
|
426 |
+
if line.strip().startswith(('#', '//', '/*')):
|
427 |
+
continue
|
428 |
+
code_lines.append(line)
|
429 |
+
|
430 |
+
code_result = '\n'.join(code_lines).strip()
|
431 |
+
|
432 |
+
# Remove plt.show() calls as they don't work with non-GUI backend
|
433 |
+
code_result = re.sub(r'plt\.show\(\)\s*', '', code_result)
|
434 |
+
|
435 |
+
return code_result
|
436 |
+
|
437 |
+
def _format_code_execution_result(self, question: str, code: str, result: Dict[str, Any]) -> str:
|
438 |
+
"""Format the code execution result for display"""
|
439 |
+
response_parts = [
|
440 |
+
f"## 🐍 **Code Execution Result**",
|
441 |
+
f"**Question:** {question}",
|
442 |
+
"",
|
443 |
+
"### **Code:**",
|
444 |
+
f"```python",
|
445 |
+
code,
|
446 |
+
"```",
|
447 |
+
""
|
448 |
+
]
|
449 |
+
|
450 |
+
if result['success']:
|
451 |
+
if result['output']:
|
452 |
+
response_parts.extend([
|
453 |
+
"### **Output:**",
|
454 |
+
"```",
|
455 |
+
result['output'],
|
456 |
+
"```",
|
457 |
+
""
|
458 |
+
])
|
459 |
+
|
460 |
+
if result['plots']:
|
461 |
+
response_parts.extend([
|
462 |
+
"### **Generated Plots:**",
|
463 |
+
f"📊 {len(result['plots'])} plot(s) created.",
|
464 |
+
""
|
465 |
+
])
|
466 |
+
|
467 |
+
# Add each plot as a base64 image
|
468 |
+
for i, plot_base64 in enumerate(result['plots'], 1):
|
469 |
+
response_parts.extend([
|
470 |
+
f"**Plot {i}:**",
|
471 |
+
f"",
|
472 |
+
""
|
473 |
+
])
|
474 |
+
else:
|
475 |
+
response_parts.extend([
|
476 |
+
"### **❌ Error:**",
|
477 |
+
"```",
|
478 |
+
result['error'],
|
479 |
+
"```",
|
480 |
+
""
|
481 |
+
])
|
482 |
+
|
483 |
+
return "\n".join(response_parts)
|
484 |
+
|
485 |
+
def get_basic_response(self, question: str, df: pd.DataFrame) -> str:
|
486 |
+
"""Generate a basic AI response for demo mode"""
|
487 |
+
|
488 |
+
# Basic question patterns and responses
|
489 |
+
question_lower = question.lower()
|
490 |
+
|
491 |
+
# Data overview questions
|
492 |
+
if any(word in question_lower for word in ['overview', 'summary', 'describe', 'about']):
|
493 |
+
return self._generate_data_overview(df)
|
494 |
+
|
495 |
+
# Missing data questions
|
496 |
+
elif any(word in question_lower for word in ['missing', 'null', 'empty', 'incomplete']):
|
497 |
+
return self._generate_missing_data_analysis(df)
|
498 |
+
|
499 |
+
# Correlation questions
|
500 |
+
elif any(word in question_lower for word in ['correlation', 'relationship', 'related', 'associated']):
|
501 |
+
return self._generate_correlation_analysis(df)
|
502 |
+
|
503 |
+
# Statistics questions
|
504 |
+
elif any(word in question_lower for word in ['statistics', 'stats', 'mean', 'average', 'median']):
|
505 |
+
return self._generate_statistics_analysis(df)
|
506 |
+
|
507 |
+
# Visualization suggestions
|
508 |
+
elif any(word in question_lower for word in ['chart', 'plot', 'visualize', 'graph']):
|
509 |
+
return self._generate_visualization_suggestions(df)
|
510 |
+
|
511 |
+
# Data quality questions
|
512 |
+
elif any(word in question_lower for word in ['quality', 'clean', 'issues', 'problems']):
|
513 |
+
return self._generate_data_quality_analysis(df)
|
514 |
+
|
515 |
+
# Default response with basic info
|
516 |
+
else:
|
517 |
+
return self._generate_default_response(question, df)
|
518 |
+
|
519 |
+
def _generate_data_overview(self, df: pd.DataFrame) -> str:
|
520 |
+
"""Generate data overview response"""
|
521 |
+
numeric_cols = len(df.select_dtypes(include=['number']).columns)
|
522 |
+
categorical_cols = len(df.select_dtypes(include=['object']).columns)
|
523 |
+
|
524 |
+
return f"""📊 **Data Overview**
|
525 |
+
|
526 |
+
**Dataset Summary:**
|
527 |
+
• Shape: {df.shape[0]:,} rows × {df.shape[1]} columns
|
528 |
+
• Numeric columns: {numeric_cols}
|
529 |
+
• Categorical columns: {categorical_cols}
|
530 |
+
• Total data points: {df.shape[0] * df.shape[1]:,}
|
531 |
+
|
532 |
+
**Key Insights:**
|
533 |
+
• The dataset contains {df.shape[0]:,} observations
|
534 |
+
• Memory usage: ~{df.memory_usage().sum() / 1024:.1f} KB
|
535 |
+
• Column diversity: {df.shape[1]} different variables to analyze
|
536 |
+
|
537 |
+
💡 **Suggested next steps:** Explore correlations, check data quality, or create visualizations!
|
538 |
+
"""
|
539 |
+
|
540 |
+
def _generate_missing_data_analysis(self, df: pd.DataFrame) -> str:
|
541 |
+
"""Generate missing data analysis response"""
|
542 |
+
missing = df.isnull().sum()
|
543 |
+
missing_cols = missing[missing > 0]
|
544 |
+
|
545 |
+
if missing_cols.empty:
|
546 |
+
return """✅ **Missing Data Analysis**
|
547 |
+
|
548 |
+
**Great news!** Your dataset has no missing values. This indicates:
|
549 |
+
• High data quality
|
550 |
+
• Complete observations for all variables
|
551 |
+
• Ready for analysis without imputation
|
552 |
+
|
553 |
+
💡 **This makes your analysis more reliable and straightforward!**
|
554 |
+
"""
|
555 |
+
else:
|
556 |
+
total_missing = missing_cols.sum()
|
557 |
+
missing_percentage = (total_missing / (df.shape[0] * df.shape[1])) * 100
|
558 |
+
|
559 |
+
missing_info = "\n".join([f"• {col}: {count} missing ({count/len(df)*100:.1f}%)"
|
560 |
+
for col, count in missing_cols.head(5).items()])
|
561 |
+
|
562 |
+
return f"""⚠️ **Missing Data Analysis**
|
563 |
+
|
564 |
+
**Missing Data Found:**
|
565 |
+
{missing_info}
|
566 |
+
|
567 |
+
**Impact Assessment:**
|
568 |
+
• Total missing values: {total_missing:,}
|
569 |
+
• Percentage of dataset: {missing_percentage:.2f}%
|
570 |
+
• Affected columns: {len(missing_cols)}
|
571 |
+
|
572 |
+
💡 **Recommendations:**
|
573 |
+
• Consider data imputation strategies
|
574 |
+
• Analyze patterns in missing data
|
575 |
+
• Evaluate if missing data is random or systematic
|
576 |
+
"""
|
577 |
+
|
578 |
+
def _generate_correlation_analysis(self, df: pd.DataFrame) -> str:
|
579 |
+
"""Generate correlation analysis response"""
|
580 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
581 |
+
|
582 |
+
if len(numeric_cols) < 2:
|
583 |
+
return """📊 **Correlation Analysis**
|
584 |
+
|
585 |
+
**Limited Analysis:** Your dataset has fewer than 2 numeric columns, so correlation analysis isn't applicable.
|
586 |
+
|
587 |
+
💡 **Suggestions:**
|
588 |
+
• Look at categorical relationships instead
|
589 |
+
• Consider frequency distributions
|
590 |
+
• Explore data patterns within individual variables
|
591 |
+
"""
|
592 |
+
|
593 |
+
# Calculate correlations
|
594 |
+
corr_matrix = df[numeric_cols].corr()
|
595 |
+
|
596 |
+
# Find strong correlations
|
597 |
+
strong_corr = []
|
598 |
+
for i in range(len(corr_matrix.columns)):
|
599 |
+
for j in range(i+1, len(corr_matrix.columns)):
|
600 |
+
corr_val = corr_matrix.iloc[i, j]
|
601 |
+
if abs(corr_val) > 0.5:
|
602 |
+
strength = "Strong" if abs(corr_val) > 0.7 else "Moderate"
|
603 |
+
direction = "positive" if corr_val > 0 else "negative"
|
604 |
+
strong_corr.append((corr_matrix.columns[i], corr_matrix.columns[j],
|
605 |
+
corr_val, strength, direction))
|
606 |
+
|
607 |
+
if strong_corr:
|
608 |
+
corr_info = "\n".join([f"• {pair[0]} ↔ {pair[1]}: {pair[2]:.3f} ({pair[3]} {pair[4]})"
|
609 |
+
for pair in strong_corr[:5]])
|
610 |
+
return f"""🔗 **Correlation Analysis**
|
611 |
+
|
612 |
+
**Strong Relationships Found:**
|
613 |
+
{corr_info}
|
614 |
+
|
615 |
+
**Analysis Summary:**
|
616 |
+
• {len(strong_corr)} significant correlations detected
|
617 |
+
• Analyzed {len(numeric_cols)} numeric variables
|
618 |
+
• Correlation threshold: >0.5
|
619 |
+
|
620 |
+
💡 **Insights:** These relationships could be key for predictive modeling or understanding data patterns!
|
621 |
+
"""
|
622 |
+
else:
|
623 |
+
return f"""🔗 **Correlation Analysis**
|
624 |
+
|
625 |
+
**Analysis Results:**
|
626 |
+
• Analyzed {len(numeric_cols)} numeric variables
|
627 |
+
• No strong correlations (>0.5) detected
|
628 |
+
• Variables appear relatively independent
|
629 |
+
|
630 |
+
💡 **This suggests:**
|
631 |
+
• Variables measure different aspects
|
632 |
+
• Good for diverse analysis approaches
|
633 |
+
• Less multicollinearity concerns
|
634 |
+
"""
|
635 |
+
|
636 |
+
def _generate_statistics_analysis(self, df: pd.DataFrame) -> str:
|
637 |
+
"""Generate statistical analysis response"""
|
638 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
639 |
+
|
640 |
+
if len(numeric_cols) == 0:
|
641 |
+
return """📊 **Statistical Analysis**
|
642 |
+
|
643 |
+
**No numeric columns found** for statistical analysis.
|
644 |
+
|
645 |
+
💡 **Alternative approaches:**
|
646 |
+
• Frequency distributions for categorical data
|
647 |
+
• Mode analysis for text columns
|
648 |
+
• Data type conversions if needed
|
649 |
+
"""
|
650 |
+
|
651 |
+
stats_summary = []
|
652 |
+
for col in numeric_cols[:5]: # Limit to first 5 columns
|
653 |
+
data = df[col]
|
654 |
+
stats_summary.append(f"**{col}:**")
|
655 |
+
stats_summary.append(f" • Mean: {data.mean():.2f}")
|
656 |
+
stats_summary.append(f" • Median: {data.median():.2f}")
|
657 |
+
stats_summary.append(f" • Std Dev: {data.std():.2f}")
|
658 |
+
stats_summary.append(f" • Range: {data.min():.2f} to {data.max():.2f}")
|
659 |
+
stats_summary.append("")
|
660 |
+
|
661 |
+
return f"""📊 **Statistical Analysis**
|
662 |
+
|
663 |
+
{chr(10).join(stats_summary)}
|
664 |
+
|
665 |
+
**Key Insights:**
|
666 |
+
• {len(numeric_cols)} numeric variables analyzed
|
667 |
+
• Statistical distributions vary across columns
|
668 |
+
• Ready for advanced analytics
|
669 |
+
|
670 |
+
💡 **Next steps:** Consider outlier detection, normalization, or predictive modeling!
|
671 |
+
"""
|
672 |
+
|
673 |
+
def _generate_visualization_suggestions(self, df: pd.DataFrame) -> str:
|
674 |
+
"""Generate visualization suggestions"""
|
675 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
676 |
+
categorical_cols = df.select_dtypes(include=['object']).columns
|
677 |
+
|
678 |
+
suggestions = []
|
679 |
+
|
680 |
+
if len(numeric_cols) >= 2:
|
681 |
+
suggestions.append("• **Scatter Plot**: Explore relationships between numeric variables")
|
682 |
+
suggestions.append("• **Correlation Heatmap**: Visualize all correlations at once")
|
683 |
+
|
684 |
+
if len(numeric_cols) >= 1:
|
685 |
+
suggestions.append("• **Histogram**: Show distribution of numeric variables")
|
686 |
+
suggestions.append("• **Box Plot**: Identify outliers and quartiles")
|
687 |
+
|
688 |
+
if len(categorical_cols) >= 1:
|
689 |
+
suggestions.append("• **Bar Chart**: Compare categories and frequencies")
|
690 |
+
suggestions.append("• **Pie Chart**: Show proportions of categories")
|
691 |
+
|
692 |
+
if len(numeric_cols) >= 1 and len(categorical_cols) >= 1:
|
693 |
+
suggestions.append("• **Grouped Charts**: Compare numeric values across categories")
|
694 |
+
|
695 |
+
if not suggestions:
|
696 |
+
suggestions.append("• **Data Table**: Explore your data structure first")
|
697 |
+
|
698 |
+
return f"""📈 **Visualization Suggestions**
|
699 |
+
|
700 |
+
**Recommended Charts for Your Data:**
|
701 |
+
{chr(10).join(suggestions)}
|
702 |
+
|
703 |
+
**Data Composition:**
|
704 |
+
• Numeric columns: {len(numeric_cols)}
|
705 |
+
• Categorical columns: {len(categorical_cols)}
|
706 |
+
• Total observations: {len(df):,}
|
707 |
+
|
708 |
+
💡 **Tip:** Start with simple charts and build complexity as you discover patterns!
|
709 |
+
"""
|
710 |
+
|
711 |
+
def _generate_data_quality_analysis(self, df: pd.DataFrame) -> str:
|
712 |
+
"""Generate data quality analysis"""
|
713 |
+
quality_issues = []
|
714 |
+
quality_score = 100
|
715 |
+
|
716 |
+
# Check for missing values
|
717 |
+
missing_count = df.isnull().sum().sum()
|
718 |
+
if missing_count > 0:
|
719 |
+
missing_pct = (missing_count / (df.shape[0] * df.shape[1])) * 100
|
720 |
+
quality_issues.append(f"• Missing values: {missing_count:,} ({missing_pct:.1f}% of data)")
|
721 |
+
quality_score -= min(missing_pct * 2, 30)
|
722 |
+
|
723 |
+
# Check for duplicate rows
|
724 |
+
duplicate_count = df.duplicated().sum()
|
725 |
+
if duplicate_count > 0:
|
726 |
+
duplicate_pct = (duplicate_count / len(df)) * 100
|
727 |
+
quality_issues.append(f"• Duplicate rows: {duplicate_count} ({duplicate_pct:.1f}%)")
|
728 |
+
quality_score -= min(duplicate_pct * 1.5, 25)
|
729 |
+
|
730 |
+
# Check for potential outliers in numeric columns
|
731 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
732 |
+
outlier_cols = []
|
733 |
+
for col in numeric_cols:
|
734 |
+
Q1 = df[col].quantile(0.25)
|
735 |
+
Q3 = df[col].quantile(0.75)
|
736 |
+
IQR = Q3 - Q1
|
737 |
+
outliers = df[(df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))][col].count()
|
738 |
+
if outliers > len(df) * 0.05: # More than 5% outliers
|
739 |
+
outlier_cols.append((col, outliers))
|
740 |
+
|
741 |
+
if outlier_cols:
|
742 |
+
quality_issues.append(f"• Potential outliers detected in {len(outlier_cols)} columns")
|
743 |
+
quality_score -= len(outlier_cols) * 5
|
744 |
+
|
745 |
+
quality_score = max(quality_score, 0)
|
746 |
+
|
747 |
+
if not quality_issues:
|
748 |
+
return f"""✅ **Data Quality Assessment**
|
749 |
+
|
750 |
+
**Excellent Data Quality! Score: {quality_score:.0f}/100**
|
751 |
+
|
752 |
+
**Quality Indicators:**
|
753 |
+
• No missing values detected
|
754 |
+
• No duplicate rows found
|
755 |
+
• Outliers within acceptable ranges
|
756 |
+
• Data ready for analysis
|
757 |
+
|
758 |
+
💡 **Your data is clean and analysis-ready!**
|
759 |
+
"""
|
760 |
+
else:
|
761 |
+
status_color = "🟢" if quality_score >= 80 else "🟡" if quality_score >= 60 else "🔴"
|
762 |
+
|
763 |
+
return f"""{status_color} **Data Quality Assessment**
|
764 |
+
|
765 |
+
**Quality Score: {quality_score:.0f}/100**
|
766 |
+
|
767 |
+
**Issues Detected:**
|
768 |
+
{chr(10).join(quality_issues)}
|
769 |
+
|
770 |
+
**Recommendations:**
|
771 |
+
• Address missing values through imputation or removal
|
772 |
+
• Consider duplicate row handling strategy
|
773 |
+
• Investigate outliers for business significance
|
774 |
+
|
775 |
+
💡 **Data cleaning will improve analysis reliability!**
|
776 |
+
"""
|
777 |
+
|
778 |
+
def _generate_default_response(self, question: str, df: pd.DataFrame) -> str:
|
779 |
+
"""Generate default response with data context"""
|
780 |
+
return f"""🤖 **AI Assistant** (Demo Mode)
|
781 |
+
|
782 |
+
**Your Question:** "{question}"
|
783 |
+
|
784 |
+
📊 **Dataset Context:**
|
785 |
+
• Shape: {df.shape[0]:,} rows × {df.shape[1]} columns
|
786 |
+
• Numeric columns: {len(df.select_dtypes(include=['number']).columns)}
|
787 |
+
• Categorical columns: {len(df.select_dtypes(include=['object']).columns)}
|
788 |
+
|
789 |
+
**I can help you with:**
|
790 |
+
• Data overviews and summaries
|
791 |
+
• Missing data analysis
|
792 |
+
• Correlation insights
|
793 |
+
• Statistical descriptions
|
794 |
+
• Visualization suggestions
|
795 |
+
• Data quality assessment
|
796 |
+
|
797 |
+
💡 **Try asking:** "What's the data overview?" or "Are there any correlations?"
|
798 |
+
|
799 |
+
⚙️ **Note:** Add OPENROUTER_API_KEY for advanced AI capabilities!
|
800 |
+
"""
|
801 |
+
|
802 |
+
# Create singleton instance
|
803 |
+
ai_assistant = AIAssistant()
|
804 |
+
|
805 |
+
def get_ai_response(question: str, df: pd.DataFrame) -> str:
|
806 |
+
"""Main function to get AI response - can be called from main app"""
|
807 |
+
ai_assistant.set_dataset(df)
|
808 |
+
# Try LLM response first, fallback to basic response
|
809 |
+
return ai_assistant.get_llm_response(question, df)
|
810 |
+
|
811 |
+
# Additional utility functions that can be expanded
|
812 |
+
|
813 |
+
def suggest_chart_type(df: pd.DataFrame, x_col: str = None, y_col: str = None) -> Dict[str, Any]:
|
814 |
+
"""Suggest the best chart type based on data types"""
|
815 |
+
suggestions = {
|
816 |
+
'recommended': 'scatter',
|
817 |
+
'alternatives': [],
|
818 |
+
'reasoning': ''
|
819 |
+
}
|
820 |
+
|
821 |
+
if x_col and y_col:
|
822 |
+
x_dtype = df[x_col].dtype
|
823 |
+
y_dtype = df[y_col].dtype
|
824 |
+
|
825 |
+
# Both numeric
|
826 |
+
if pd.api.types.is_numeric_dtype(x_dtype) and pd.api.types.is_numeric_dtype(y_dtype):
|
827 |
+
suggestions['recommended'] = 'scatter'
|
828 |
+
suggestions['alternatives'] = ['line', 'heatmap']
|
829 |
+
suggestions['reasoning'] = 'Both variables are numeric - scatter plot shows relationships best'
|
830 |
+
|
831 |
+
# One categorical, one numeric
|
832 |
+
elif (pd.api.types.is_numeric_dtype(x_dtype) and pd.api.types.is_object_dtype(y_dtype)) or \
|
833 |
+
(pd.api.types.is_object_dtype(x_dtype) and pd.api.types.is_numeric_dtype(y_dtype)):
|
834 |
+
suggestions['recommended'] = 'bar'
|
835 |
+
suggestions['alternatives'] = ['box', 'violin']
|
836 |
+
suggestions['reasoning'] = 'Categorical vs numeric - bar chart shows comparisons clearly'
|
837 |
+
|
838 |
+
# Both categorical
|
839 |
+
else:
|
840 |
+
suggestions['recommended'] = 'bar'
|
841 |
+
suggestions['alternatives'] = ['heatmap']
|
842 |
+
suggestions['reasoning'] = 'Both categorical - bar chart shows frequency distributions'
|
843 |
+
|
844 |
+
elif x_col:
|
845 |
+
if pd.api.types.is_numeric_dtype(df[x_col].dtype):
|
846 |
+
suggestions['recommended'] = 'histogram'
|
847 |
+
suggestions['alternatives'] = ['box']
|
848 |
+
suggestions['reasoning'] = 'Single numeric variable - histogram shows distribution'
|
849 |
+
else:
|
850 |
+
suggestions['recommended'] = 'pie'
|
851 |
+
suggestions['alternatives'] = ['bar']
|
852 |
+
suggestions['reasoning'] = 'Single categorical variable - pie chart shows proportions'
|
853 |
+
|
854 |
+
return suggestions
|
855 |
+
|
856 |
+
def analyze_data_patterns(df: pd.DataFrame) -> Dict[str, Any]:
|
857 |
+
"""Analyze patterns in the dataset"""
|
858 |
+
patterns = {
|
859 |
+
'trends': [],
|
860 |
+
'outliers': [],
|
861 |
+
'correlations': [],
|
862 |
+
'insights': []
|
863 |
+
}
|
864 |
+
|
865 |
+
# This can be expanded with more sophisticated analysis
|
866 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
867 |
+
|
868 |
+
if len(numeric_cols) >= 2:
|
869 |
+
corr_matrix = df[numeric_cols].corr()
|
870 |
+
# Find strong correlations
|
871 |
+
for i in range(len(corr_matrix.columns)):
|
872 |
+
for j in range(i+1, len(corr_matrix.columns)):
|
873 |
+
corr_val = corr_matrix.iloc[i, j]
|
874 |
+
if abs(corr_val) > 0.7:
|
875 |
+
patterns['correlations'].append({
|
876 |
+
'variables': (corr_matrix.columns[i], corr_matrix.columns[j]),
|
877 |
+
'correlation': corr_val,
|
878 |
+
'strength': 'strong'
|
879 |
+
})
|
880 |
+
|
881 |
+
return patterns
|
ai_enhanced_app.py
ADDED
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import io
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import plotly.graph_objects as go
|
7 |
+
from dash import Dash, html, dcc, Input, Output, State, callback_context
|
8 |
+
import dash_bootstrap_components as dbc
|
9 |
+
import numpy as np
|
10 |
+
from scipy import stats
|
11 |
+
import re
|
12 |
+
|
13 |
+
# Initialize Dash app
|
14 |
+
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
15 |
+
server = app.server
|
16 |
+
|
17 |
+
class AIVisualizationEngine:
|
18 |
+
def __init__(self, df):
|
19 |
+
self.df = df
|
20 |
+
self.numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
|
21 |
+
self.categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
|
22 |
+
self.datetime_cols = df.select_dtypes(include=['datetime64']).columns.tolist()
|
23 |
+
|
24 |
+
def recommend_chart_type(self, x_col=None, y_col=None):
|
25 |
+
"""AI-powered chart type recommendation"""
|
26 |
+
recommendations = []
|
27 |
+
|
28 |
+
if x_col and y_col:
|
29 |
+
x_type = 'numeric' if x_col in self.numeric_cols else 'categorical'
|
30 |
+
y_type = 'numeric' if y_col in self.numeric_cols else 'categorical'
|
31 |
+
|
32 |
+
if x_type == 'numeric' and y_type == 'numeric':
|
33 |
+
recommendations = [
|
34 |
+
{'type': 'scatter', 'confidence': 0.9, 'reason': 'Both variables are numeric - scatter plot shows correlation'},
|
35 |
+
{'type': 'line', 'confidence': 0.7, 'reason': 'Line chart good for trends if X is ordered'},
|
36 |
+
]
|
37 |
+
elif x_type == 'categorical' and y_type == 'numeric':
|
38 |
+
recommendations = [
|
39 |
+
{'type': 'bar', 'confidence': 0.9, 'reason': 'Categorical vs numeric - bar chart shows comparisons'},
|
40 |
+
{'type': 'box', 'confidence': 0.8, 'reason': 'Box plot shows distribution across categories'},
|
41 |
+
]
|
42 |
+
elif x_type == 'categorical' and y_type == 'categorical':
|
43 |
+
recommendations = [
|
44 |
+
{'type': 'bar', 'confidence': 0.8, 'reason': 'Count relationships between categories'},
|
45 |
+
]
|
46 |
+
elif x_col and not y_col:
|
47 |
+
if x_col in self.numeric_cols:
|
48 |
+
recommendations = [
|
49 |
+
{'type': 'histogram', 'confidence': 0.9, 'reason': 'Single numeric variable - histogram shows distribution'},
|
50 |
+
{'type': 'box', 'confidence': 0.7, 'reason': 'Box plot shows statistical summary'},
|
51 |
+
]
|
52 |
+
else:
|
53 |
+
recommendations = [
|
54 |
+
{'type': 'pie', 'confidence': 0.8, 'reason': 'Categorical variable - pie chart shows proportions'},
|
55 |
+
{'type': 'bar', 'confidence': 0.9, 'reason': 'Bar chart shows category frequencies'},
|
56 |
+
]
|
57 |
+
|
58 |
+
return recommendations
|
59 |
+
|
60 |
+
def detect_outliers(self, column):
|
61 |
+
"""Detect outliers using IQR method"""
|
62 |
+
if column not in self.numeric_cols:
|
63 |
+
return []
|
64 |
+
|
65 |
+
Q1 = self.df[column].quantile(0.25)
|
66 |
+
Q3 = self.df[column].quantile(0.75)
|
67 |
+
IQR = Q3 - Q1
|
68 |
+
lower_bound = Q1 - 1.5 * IQR
|
69 |
+
upper_bound = Q3 + 1.5 * IQR
|
70 |
+
|
71 |
+
outliers = self.df[(self.df[column] < lower_bound) | (self.df[column] > upper_bound)]
|
72 |
+
return outliers.index.tolist()
|
73 |
+
|
74 |
+
def generate_insights(self, x_col, y_col=None):
|
75 |
+
"""Generate AI insights about the data"""
|
76 |
+
insights = []
|
77 |
+
|
78 |
+
if x_col in self.numeric_cols:
|
79 |
+
mean_val = self.df[x_col].mean()
|
80 |
+
median_val = self.df[x_col].median()
|
81 |
+
std_val = self.df[x_col].std()
|
82 |
+
|
83 |
+
insights.append(f"📊 {x_col}: Mean = {mean_val:.2f}, Median = {median_val:.2f}")
|
84 |
+
|
85 |
+
if abs(mean_val - median_val) > std_val * 0.5:
|
86 |
+
insights.append(f"⚠️ {x_col} distribution appears skewed")
|
87 |
+
|
88 |
+
outliers = self.detect_outliers(x_col)
|
89 |
+
if outliers:
|
90 |
+
insights.append(f"🎯 Found {len(outliers)} potential outliers in {x_col}")
|
91 |
+
|
92 |
+
if y_col and x_col in self.numeric_cols and y_col in self.numeric_cols:
|
93 |
+
correlation = self.df[x_col].corr(self.df[y_col])
|
94 |
+
if abs(correlation) > 0.7:
|
95 |
+
strength = "strong" if abs(correlation) > 0.8 else "moderate"
|
96 |
+
direction = "positive" if correlation > 0 else "negative"
|
97 |
+
insights.append(f"🔗 {strength.title()} {direction} correlation ({correlation:.3f}) between {x_col} and {y_col}")
|
98 |
+
elif abs(correlation) < 0.3:
|
99 |
+
insights.append(f"📈 Weak correlation ({correlation:.3f}) between {x_col} and {y_col}")
|
100 |
+
|
101 |
+
return insights
|
102 |
+
|
103 |
+
def parse_natural_language_query(self, query):
|
104 |
+
"""Simple NLP to parse visualization requests"""
|
105 |
+
query = query.lower().strip()
|
106 |
+
|
107 |
+
# Extract chart types
|
108 |
+
chart_keywords = {
|
109 |
+
'scatter': ['scatter', 'correlation', 'relationship'],
|
110 |
+
'bar': ['bar', 'compare', 'comparison', 'by'],
|
111 |
+
'histogram': ['histogram', 'distribution', 'freq'],
|
112 |
+
'line': ['line', 'trend', 'over time', 'timeline'],
|
113 |
+
'box': ['box', 'quartile', 'median'],
|
114 |
+
'pie': ['pie', 'proportion', 'percentage'],
|
115 |
+
'heatmap': ['heatmap', 'correlation matrix']
|
116 |
+
}
|
117 |
+
|
118 |
+
suggested_chart = None
|
119 |
+
for chart_type, keywords in chart_keywords.items():
|
120 |
+
if any(keyword in query for keyword in keywords):
|
121 |
+
suggested_chart = chart_type
|
122 |
+
break
|
123 |
+
|
124 |
+
# Extract column names
|
125 |
+
mentioned_cols = []
|
126 |
+
for col in self.df.columns:
|
127 |
+
if col.lower() in query or col.lower().replace('_', ' ') in query:
|
128 |
+
mentioned_cols.append(col)
|
129 |
+
|
130 |
+
return {
|
131 |
+
'chart_type': suggested_chart,
|
132 |
+
'columns': mentioned_cols,
|
133 |
+
'query': query
|
134 |
+
}
|
135 |
+
|
136 |
+
def get_smart_color_scheme(self, chart_type, column=None):
|
137 |
+
"""AI-powered color scheme selection"""
|
138 |
+
color_schemes = {
|
139 |
+
'scatter': 'Viridis',
|
140 |
+
'line': 'Blues',
|
141 |
+
'bar': 'Set3',
|
142 |
+
'histogram': 'Plasma',
|
143 |
+
'box': 'Set2',
|
144 |
+
'pie': 'Pastel',
|
145 |
+
'heatmap': 'RdBu_r'
|
146 |
+
}
|
147 |
+
return color_schemes.get(chart_type, 'Viridis')
|
148 |
+
|
149 |
+
# App layout with AI features
|
150 |
+
app.layout = dbc.Container([
|
151 |
+
dbc.Row([
|
152 |
+
dbc.Col([
|
153 |
+
html.H1("🤖 AI-Enhanced Data Dashboard", className="text-center mb-4"),
|
154 |
+
html.P("Upload data and let AI help you create intelligent visualizations!",
|
155 |
+
className="text-center text-muted"),
|
156 |
+
html.Hr(),
|
157 |
+
], width=12)
|
158 |
+
]),
|
159 |
+
|
160 |
+
dbc.Row([
|
161 |
+
dbc.Col([
|
162 |
+
dbc.Card([
|
163 |
+
dbc.CardBody([
|
164 |
+
html.H4("📁 Data Upload", className="card-title"),
|
165 |
+
dcc.Upload(
|
166 |
+
id='upload-data',
|
167 |
+
children=html.Div([
|
168 |
+
'Drag and Drop or ',
|
169 |
+
html.A('Select Files')
|
170 |
+
]),
|
171 |
+
style={
|
172 |
+
'width': '100%',
|
173 |
+
'height': '60px',
|
174 |
+
'lineHeight': '60px',
|
175 |
+
'borderWidth': '1px',
|
176 |
+
'borderStyle': 'dashed',
|
177 |
+
'borderRadius': '5px',
|
178 |
+
'textAlign': 'center',
|
179 |
+
'margin': '10px'
|
180 |
+
},
|
181 |
+
multiple=False,
|
182 |
+
accept='.csv,.xlsx,.txt'
|
183 |
+
),
|
184 |
+
|
185 |
+
html.Div(id='upload-status', className="mt-2"),
|
186 |
+
html.Hr(),
|
187 |
+
|
188 |
+
html.H4("🎯 AI Query Interface", className="card-title"),
|
189 |
+
dbc.InputGroup([
|
190 |
+
dbc.Input(
|
191 |
+
id="ai-query",
|
192 |
+
placeholder="Try: 'Show scatter plot of age vs salary' or 'Bar chart of departments'",
|
193 |
+
type="text",
|
194 |
+
),
|
195 |
+
dbc.Button(
|
196 |
+
"🤖 AI Create",
|
197 |
+
id="ai-create-btn",
|
198 |
+
color="primary",
|
199 |
+
n_clicks=0
|
200 |
+
)
|
201 |
+
]),
|
202 |
+
|
203 |
+
html.Div(id="ai-recommendations", className="mt-3"),
|
204 |
+
html.Hr(),
|
205 |
+
|
206 |
+
html.H4("📊 Quick Analytics", className="card-title"),
|
207 |
+
dbc.ButtonGroup([
|
208 |
+
dbc.Button("Summary Stats", id="stats-btn", size="sm"),
|
209 |
+
dbc.Button("AI Insights", id="insights-btn", size="sm"),
|
210 |
+
dbc.Button("Outliers", id="outliers-btn", size="sm"),
|
211 |
+
], className="w-100"),
|
212 |
+
|
213 |
+
html.Div(id="quick-analytics", className="mt-3")
|
214 |
+
])
|
215 |
+
])
|
216 |
+
], width=4),
|
217 |
+
|
218 |
+
dbc.Col([
|
219 |
+
dbc.Card([
|
220 |
+
dbc.CardBody([
|
221 |
+
html.H4("📈 AI-Enhanced Visualizations", className="card-title"),
|
222 |
+
|
223 |
+
# Chart controls
|
224 |
+
dbc.Row([
|
225 |
+
dbc.Col([
|
226 |
+
html.Label("Chart Type:", className="form-label"),
|
227 |
+
dcc.Dropdown(
|
228 |
+
id='chart-type',
|
229 |
+
options=[
|
230 |
+
{'label': 'AI Recommend', 'value': 'ai_recommend'},
|
231 |
+
{'label': 'Scatter Plot', 'value': 'scatter'},
|
232 |
+
{'label': 'Line Chart', 'value': 'line'},
|
233 |
+
{'label': 'Bar Chart', 'value': 'bar'},
|
234 |
+
{'label': 'Histogram', 'value': 'histogram'},
|
235 |
+
{'label': 'Box Plot', 'value': 'box'},
|
236 |
+
{'label': 'Heatmap', 'value': 'heatmap'},
|
237 |
+
{'label': 'Pie Chart', 'value': 'pie'}
|
238 |
+
],
|
239 |
+
value='ai_recommend',
|
240 |
+
className="mb-2"
|
241 |
+
)
|
242 |
+
], width=6),
|
243 |
+
dbc.Col([
|
244 |
+
html.Label("Color By:", className="form-label"),
|
245 |
+
dcc.Dropdown(
|
246 |
+
id='color-column',
|
247 |
+
placeholder="AI will suggest colors",
|
248 |
+
className="mb-2"
|
249 |
+
)
|
250 |
+
], width=6)
|
251 |
+
]),
|
252 |
+
|
253 |
+
dbc.Row([
|
254 |
+
dbc.Col([
|
255 |
+
html.Label("X-Axis:", className="form-label"),
|
256 |
+
dcc.Dropdown(
|
257 |
+
id='x-column',
|
258 |
+
placeholder="Select X column"
|
259 |
+
)
|
260 |
+
], width=6),
|
261 |
+
dbc.Col([
|
262 |
+
html.Label("Y-Axis:", className="form-label"),
|
263 |
+
dcc.Dropdown(
|
264 |
+
id='y-column',
|
265 |
+
placeholder="Select Y column"
|
266 |
+
)
|
267 |
+
], width=6)
|
268 |
+
], className="mb-3"),
|
269 |
+
|
270 |
+
dcc.Graph(id='main-graph', style={'height': '500px'}),
|
271 |
+
|
272 |
+
html.Div(id='ai-insights-display', className="mt-3")
|
273 |
+
])
|
274 |
+
]),
|
275 |
+
|
276 |
+
dbc.Card([
|
277 |
+
dbc.CardBody([
|
278 |
+
html.H4("🔍 Data Explorer", className="card-title"),
|
279 |
+
html.Div(id='data-table')
|
280 |
+
])
|
281 |
+
], className="mt-3")
|
282 |
+
], width=8)
|
283 |
+
], className="mt-4"),
|
284 |
+
|
285 |
+
# Store components
|
286 |
+
dcc.Store(id='stored-data'),
|
287 |
+
dcc.Store(id='ai-engine'),
|
288 |
+
], fluid=True)
|
289 |
+
|
290 |
+
def parse_contents(contents, filename):
|
291 |
+
"""Parse uploaded file contents"""
|
292 |
+
content_type, content_string = contents.split(',')
|
293 |
+
decoded = base64.b64decode(content_string)
|
294 |
+
|
295 |
+
try:
|
296 |
+
if 'csv' in filename:
|
297 |
+
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
|
298 |
+
elif 'xls' in filename:
|
299 |
+
df = pd.read_excel(io.BytesIO(decoded))
|
300 |
+
else:
|
301 |
+
return None, "Unsupported file type"
|
302 |
+
|
303 |
+
return df, None
|
304 |
+
except Exception as e:
|
305 |
+
return None, f"Error processing file: {str(e)}"
|
306 |
+
|
307 |
+
@app.callback(
|
308 |
+
[Output('stored-data', 'data'),
|
309 |
+
Output('upload-status', 'children'),
|
310 |
+
Output('data-table', 'children'),
|
311 |
+
Output('x-column', 'options'),
|
312 |
+
Output('y-column', 'options'),
|
313 |
+
Output('color-column', 'options'),
|
314 |
+
Output('x-column', 'value'),
|
315 |
+
Output('y-column', 'value')],
|
316 |
+
[Input('upload-data', 'contents')],
|
317 |
+
[State('upload-data', 'filename')]
|
318 |
+
)
|
319 |
+
def update_data(contents, filename):
|
320 |
+
"""Update data when file is uploaded"""
|
321 |
+
if contents is None:
|
322 |
+
return None, "", "", [], [], [], None, None
|
323 |
+
|
324 |
+
df, error = parse_contents(contents, filename)
|
325 |
+
|
326 |
+
if error:
|
327 |
+
return None, dbc.Alert(error, color="danger"), "", [], [], [], None, None
|
328 |
+
|
329 |
+
# Create data table preview
|
330 |
+
table = dbc.Table.from_dataframe(
|
331 |
+
df.head(10),
|
332 |
+
striped=True,
|
333 |
+
bordered=True,
|
334 |
+
hover=True,
|
335 |
+
size='sm'
|
336 |
+
)
|
337 |
+
|
338 |
+
# AI analysis of dataset
|
339 |
+
ai_engine = AIVisualizationEngine(df)
|
340 |
+
|
341 |
+
success_msg = dbc.Alert([
|
342 |
+
html.H6(f"✅ File uploaded successfully! 🤖 AI Ready"),
|
343 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
344 |
+
html.P(f"📊 Numeric: {len(ai_engine.numeric_cols)}, 📝 Categorical: {len(ai_engine.categorical_cols)}")
|
345 |
+
], color="success")
|
346 |
+
|
347 |
+
# Create column options for dropdowns
|
348 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
349 |
+
|
350 |
+
# AI recommends default columns
|
351 |
+
if ai_engine.numeric_cols:
|
352 |
+
default_x = ai_engine.numeric_cols[0]
|
353 |
+
default_y = ai_engine.numeric_cols[1] if len(ai_engine.numeric_cols) > 1 else None
|
354 |
+
else:
|
355 |
+
default_x = all_columns[0]['value'] if all_columns else None
|
356 |
+
default_y = all_columns[1]['value'] if len(all_columns) > 1 else None
|
357 |
+
|
358 |
+
return df.to_dict('records'), success_msg, table, all_columns, all_columns, all_columns, default_x, default_y
|
359 |
+
|
360 |
+
@app.callback(
|
361 |
+
[Output('chart-type', 'value'),
|
362 |
+
Output('ai-recommendations', 'children')],
|
363 |
+
[Input('ai-create-btn', 'n_clicks')],
|
364 |
+
[State('ai-query', 'value'),
|
365 |
+
State('stored-data', 'data')]
|
366 |
+
)
|
367 |
+
def handle_ai_query(n_clicks, query, data):
|
368 |
+
"""Handle AI natural language queries"""
|
369 |
+
if not n_clicks or not query or not data:
|
370 |
+
return 'ai_recommend', ""
|
371 |
+
|
372 |
+
df = pd.DataFrame(data)
|
373 |
+
ai_engine = AIVisualizationEngine(df)
|
374 |
+
|
375 |
+
# Parse the natural language query
|
376 |
+
parsed = ai_engine.parse_natural_language_query(query)
|
377 |
+
|
378 |
+
recommendations = []
|
379 |
+
if parsed['chart_type']:
|
380 |
+
recommendations.append(f"🎯 Suggested chart type: **{parsed['chart_type'].title()}**")
|
381 |
+
|
382 |
+
if parsed['columns']:
|
383 |
+
recommendations.append(f"📊 Detected columns: {', '.join(parsed['columns'])}")
|
384 |
+
|
385 |
+
if not recommendations:
|
386 |
+
recommendations.append("🤖 Try queries like: 'scatter age salary', 'bar chart departments', 'histogram of scores'")
|
387 |
+
|
388 |
+
return parsed['chart_type'] or 'ai_recommend', dbc.Alert(recommendations, color="info")
|
389 |
+
|
390 |
+
@app.callback(
|
391 |
+
Output('quick-analytics', 'children'),
|
392 |
+
[Input('stats-btn', 'n_clicks'),
|
393 |
+
Input('insights-btn', 'n_clicks'),
|
394 |
+
Input('outliers-btn', 'n_clicks')],
|
395 |
+
[State('stored-data', 'data'),
|
396 |
+
State('x-column', 'value'),
|
397 |
+
State('y-column', 'value')]
|
398 |
+
)
|
399 |
+
def quick_analytics(stats_clicks, insights_clicks, outliers_clicks, data, x_col, y_col):
|
400 |
+
"""Handle quick analytics buttons with AI insights"""
|
401 |
+
if not data:
|
402 |
+
return ""
|
403 |
+
|
404 |
+
df = pd.DataFrame(data)
|
405 |
+
ai_engine = AIVisualizationEngine(df)
|
406 |
+
ctx = callback_context
|
407 |
+
|
408 |
+
if not ctx.triggered:
|
409 |
+
return ""
|
410 |
+
|
411 |
+
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
412 |
+
|
413 |
+
if button_id == 'stats-btn':
|
414 |
+
stats = df.describe()
|
415 |
+
return dbc.Alert([
|
416 |
+
html.H6("📊 Summary Statistics"),
|
417 |
+
dbc.Table.from_dataframe(stats.reset_index(), size='sm')
|
418 |
+
], color="light")
|
419 |
+
|
420 |
+
elif button_id == 'insights-btn':
|
421 |
+
if x_col:
|
422 |
+
insights = ai_engine.generate_insights(x_col, y_col)
|
423 |
+
return dbc.Alert([
|
424 |
+
html.H6("🤖 AI Insights"),
|
425 |
+
html.Ul([html.Li(insight) for insight in insights])
|
426 |
+
], color="info")
|
427 |
+
return dbc.Alert("Select columns to get AI insights", color="warning")
|
428 |
+
|
429 |
+
elif button_id == 'outliers-btn':
|
430 |
+
if x_col and x_col in ai_engine.numeric_cols:
|
431 |
+
outliers = ai_engine.detect_outliers(x_col)
|
432 |
+
if outliers:
|
433 |
+
outlier_data = df.loc[outliers, [x_col]]
|
434 |
+
return dbc.Alert([
|
435 |
+
html.H6(f"🎯 Outliers in {x_col}"),
|
436 |
+
dbc.Table.from_dataframe(outlier_data.reset_index(), size='sm')
|
437 |
+
], color="warning")
|
438 |
+
return dbc.Alert(f"✅ No outliers detected in {x_col}", color="success")
|
439 |
+
return dbc.Alert("Select a numeric column to detect outliers", color="warning")
|
440 |
+
|
441 |
+
return ""
|
442 |
+
|
443 |
+
@app.callback(
|
444 |
+
[Output('main-graph', 'figure'),
|
445 |
+
Output('ai-insights-display', 'children')],
|
446 |
+
[Input('stored-data', 'data'),
|
447 |
+
Input('chart-type', 'value'),
|
448 |
+
Input('x-column', 'value'),
|
449 |
+
Input('y-column', 'value'),
|
450 |
+
Input('color-column', 'value')]
|
451 |
+
)
|
452 |
+
def update_main_graph(data, chart_type, x_col, y_col, color_col):
|
453 |
+
"""Update visualization with AI enhancements"""
|
454 |
+
if not data:
|
455 |
+
fig = go.Figure()
|
456 |
+
fig.add_annotation(text="Upload data to see AI-powered visualizations",
|
457 |
+
x=0.5, y=0.5, showarrow=False,
|
458 |
+
font=dict(size=16, color="gray"))
|
459 |
+
fig.update_layout(template="plotly_white")
|
460 |
+
return fig, ""
|
461 |
+
|
462 |
+
df = pd.DataFrame(data)
|
463 |
+
ai_engine = AIVisualizationEngine(df)
|
464 |
+
|
465 |
+
# AI recommendation system
|
466 |
+
if chart_type == 'ai_recommend' and x_col:
|
467 |
+
recommendations = ai_engine.recommend_chart_type(x_col, y_col)
|
468 |
+
if recommendations:
|
469 |
+
chart_type = recommendations[0]['type']
|
470 |
+
|
471 |
+
# Handle cases where columns aren't selected yet
|
472 |
+
if not x_col and not y_col:
|
473 |
+
fig = go.Figure()
|
474 |
+
fig.add_annotation(text="Select columns or use AI Query to create visualization",
|
475 |
+
x=0.5, y=0.5, showarrow=False,
|
476 |
+
font=dict(size=16, color="gray"))
|
477 |
+
fig.update_layout(template="plotly_white")
|
478 |
+
return fig, ""
|
479 |
+
|
480 |
+
insights_display = ""
|
481 |
+
|
482 |
+
try:
|
483 |
+
# Get AI-powered color scheme
|
484 |
+
color_scheme = ai_engine.get_smart_color_scheme(chart_type, color_col)
|
485 |
+
|
486 |
+
# Create visualization based on chart type
|
487 |
+
if chart_type == 'scatter':
|
488 |
+
if x_col and y_col:
|
489 |
+
fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
|
490 |
+
title=f"🤖 AI Scatter Plot: {y_col} vs {x_col}",
|
491 |
+
color_continuous_scale=color_scheme)
|
492 |
+
# Add AI insights
|
493 |
+
insights = ai_engine.generate_insights(x_col, y_col)
|
494 |
+
insights_display = dbc.Alert([
|
495 |
+
html.H6("🤖 AI Insights"),
|
496 |
+
html.Ul([html.Li(insight) for insight in insights])
|
497 |
+
], color="info")
|
498 |
+
else:
|
499 |
+
fig = go.Figure()
|
500 |
+
fig.add_annotation(text="Select both X and Y columns for scatter plot",
|
501 |
+
x=0.5, y=0.5, showarrow=False)
|
502 |
+
|
503 |
+
elif chart_type == 'line':
|
504 |
+
if x_col and y_col:
|
505 |
+
fig = px.line(df, x=x_col, y=y_col, color=color_col,
|
506 |
+
title=f"🤖 AI Line Chart: {y_col} vs {x_col}",
|
507 |
+
color_discrete_sequence=px.colors.qualitative.Set3)
|
508 |
+
else:
|
509 |
+
fig = go.Figure()
|
510 |
+
fig.add_annotation(text="Select both X and Y columns for line chart",
|
511 |
+
x=0.5, y=0.5, showarrow=False)
|
512 |
+
|
513 |
+
elif chart_type == 'bar':
|
514 |
+
if x_col and y_col:
|
515 |
+
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
|
516 |
+
title=f"🤖 AI Bar Chart: {y_col} by {x_col}",
|
517 |
+
color_discrete_sequence=px.colors.qualitative.Set3)
|
518 |
+
elif x_col:
|
519 |
+
fig = px.bar(df[x_col].value_counts().reset_index(),
|
520 |
+
x='index', y=x_col,
|
521 |
+
title=f"🤖 AI Value Counts: {x_col}",
|
522 |
+
color_discrete_sequence=px.colors.qualitative.Set3)
|
523 |
+
else:
|
524 |
+
fig = go.Figure()
|
525 |
+
fig.add_annotation(text="Select at least X column for bar chart",
|
526 |
+
x=0.5, y=0.5, showarrow=False)
|
527 |
+
|
528 |
+
elif chart_type == 'histogram':
|
529 |
+
if x_col:
|
530 |
+
fig = px.histogram(df, x=x_col, color=color_col,
|
531 |
+
title=f"🤖 AI Histogram: {x_col}",
|
532 |
+
color_discrete_sequence=px.colors.qualitative.Pastel)
|
533 |
+
# Add statistical annotations
|
534 |
+
mean_val = df[x_col].mean() if x_col in ai_engine.numeric_cols else None
|
535 |
+
if mean_val:
|
536 |
+
fig.add_vline(x=mean_val, line_dash="dash", line_color="red",
|
537 |
+
annotation_text=f"Mean: {mean_val:.2f}")
|
538 |
+
else:
|
539 |
+
fig = go.Figure()
|
540 |
+
fig.add_annotation(text="Select X column for histogram",
|
541 |
+
x=0.5, y=0.5, showarrow=False)
|
542 |
+
|
543 |
+
elif chart_type == 'box':
|
544 |
+
if y_col:
|
545 |
+
fig = px.box(df, x=color_col, y=y_col,
|
546 |
+
title=f"🤖 AI Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""),
|
547 |
+
color_discrete_sequence=px.colors.qualitative.Set2)
|
548 |
+
elif x_col:
|
549 |
+
fig = px.box(df, y=x_col,
|
550 |
+
title=f"🤖 AI Box Plot: {x_col}",
|
551 |
+
color_discrete_sequence=px.colors.qualitative.Set2)
|
552 |
+
else:
|
553 |
+
fig = go.Figure()
|
554 |
+
fig.add_annotation(text="Select a column for box plot",
|
555 |
+
x=0.5, y=0.5, showarrow=False)
|
556 |
+
|
557 |
+
elif chart_type == 'heatmap':
|
558 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
559 |
+
if len(numeric_cols) > 1:
|
560 |
+
corr_matrix = df[numeric_cols].corr()
|
561 |
+
fig = px.imshow(corr_matrix,
|
562 |
+
text_auto=True,
|
563 |
+
aspect="auto",
|
564 |
+
title="🤖 AI Correlation Heatmap",
|
565 |
+
color_continuous_scale='RdBu_r')
|
566 |
+
else:
|
567 |
+
fig = go.Figure()
|
568 |
+
fig.add_annotation(text="Need at least 2 numeric columns for heatmap",
|
569 |
+
x=0.5, y=0.5, showarrow=False)
|
570 |
+
|
571 |
+
elif chart_type == 'pie':
|
572 |
+
if x_col:
|
573 |
+
value_counts = df[x_col].value_counts()
|
574 |
+
fig = px.pie(values=value_counts.values,
|
575 |
+
names=value_counts.index,
|
576 |
+
title=f"🤖 AI Pie Chart: {x_col}",
|
577 |
+
color_discrete_sequence=px.colors.qualitative.Pastel)
|
578 |
+
else:
|
579 |
+
fig = go.Figure()
|
580 |
+
fig.add_annotation(text="Select X column for pie chart",
|
581 |
+
x=0.5, y=0.5, showarrow=False)
|
582 |
+
|
583 |
+
else:
|
584 |
+
fig = go.Figure()
|
585 |
+
fig.add_annotation(text="🤖 AI is analyzing... Select chart type or use AI Query",
|
586 |
+
x=0.5, y=0.5, showarrow=False)
|
587 |
+
|
588 |
+
# Apply AI styling enhancements
|
589 |
+
fig.update_layout(
|
590 |
+
template="plotly_white",
|
591 |
+
height=500,
|
592 |
+
font=dict(size=12),
|
593 |
+
title_font_size=16,
|
594 |
+
)
|
595 |
+
|
596 |
+
return fig, insights_display
|
597 |
+
|
598 |
+
except Exception as e:
|
599 |
+
fig = go.Figure()
|
600 |
+
fig.add_annotation(text=f"AI Error: {str(e)}",
|
601 |
+
x=0.5, y=0.5, showarrow=False,
|
602 |
+
font=dict(color="red"))
|
603 |
+
fig.update_layout(template="plotly_white")
|
604 |
+
return fig, ""
|
605 |
+
|
606 |
+
if __name__ == '__main__':
|
607 |
+
app.run(host='0.0.0.0', port=8051, debug=True)
|
app1.py
ADDED
@@ -0,0 +1,842 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import io
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import plotly.graph_objects as go
|
7 |
+
from dash import Dash, html, dcc, Input, Output, State, callback_context
|
8 |
+
import dash_bootstrap_components as dbc
|
9 |
+
from typing import Optional
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
from pydantic import Field, SecretStr
|
12 |
+
import numpy as np
|
13 |
+
|
14 |
+
# Langchain imports - simplified without embeddings
|
15 |
+
from langchain_community.vectorstores import FAISS
|
16 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
17 |
+
from langchain.schema import Document
|
18 |
+
from langchain_core.prompts import PromptTemplate
|
19 |
+
|
20 |
+
# Load environment variables
|
21 |
+
load_dotenv()
|
22 |
+
|
23 |
+
# Simplified - no OpenRouter for now
|
24 |
+
AI_AVAILABLE = False
|
25 |
+
openrouter_model = None
|
26 |
+
|
27 |
+
# Initialize Dash app
|
28 |
+
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
29 |
+
server = app.server
|
30 |
+
|
31 |
+
# Global variables
|
32 |
+
vector_store = None
|
33 |
+
|
34 |
+
# Built-in datasets
|
35 |
+
def create_builtin_datasets():
|
36 |
+
"""Create built-in sample datasets"""
|
37 |
+
datasets = {}
|
38 |
+
|
39 |
+
# Gapminder dataset
|
40 |
+
np.random.seed(42)
|
41 |
+
countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia']
|
42 |
+
years = list(range(2000, 2021))
|
43 |
+
gapminder_data = []
|
44 |
+
for country in countries:
|
45 |
+
base_gdp = np.random.uniform(20000, 80000)
|
46 |
+
base_life_exp = np.random.uniform(70, 85)
|
47 |
+
base_pop = np.random.uniform(10000000, 100000000)
|
48 |
+
for year in years:
|
49 |
+
gapminder_data.append({
|
50 |
+
'country': country,
|
51 |
+
'year': year,
|
52 |
+
'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1),
|
53 |
+
'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1,
|
54 |
+
'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000),
|
55 |
+
'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania'
|
56 |
+
})
|
57 |
+
datasets['Gapminder'] = pd.DataFrame(gapminder_data)
|
58 |
+
|
59 |
+
# Iris dataset
|
60 |
+
from sklearn.datasets import load_iris
|
61 |
+
try:
|
62 |
+
iris = load_iris()
|
63 |
+
datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names)
|
64 |
+
datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target]
|
65 |
+
except ImportError:
|
66 |
+
# Fallback if sklearn not available
|
67 |
+
iris_data = {
|
68 |
+
'sepal_length': np.random.normal(5.8, 0.8, 150),
|
69 |
+
'sepal_width': np.random.normal(3.0, 0.4, 150),
|
70 |
+
'petal_length': np.random.normal(3.8, 1.8, 150),
|
71 |
+
'petal_width': np.random.normal(1.2, 0.8, 150),
|
72 |
+
'species': ['setosa']*50 + ['versicolor']*50 + ['virginica']*50
|
73 |
+
}
|
74 |
+
datasets['Iris'] = pd.DataFrame(iris_data)
|
75 |
+
|
76 |
+
# Tips dataset
|
77 |
+
tips_data = {
|
78 |
+
'total_bill': np.random.uniform(10, 50, 200),
|
79 |
+
'tip': np.random.uniform(1, 10, 200),
|
80 |
+
'sex': np.random.choice(['Male', 'Female'], 200),
|
81 |
+
'smoker': np.random.choice(['Yes', 'No'], 200),
|
82 |
+
'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200),
|
83 |
+
'time': np.random.choice(['Lunch', 'Dinner'], 200),
|
84 |
+
'size': np.random.choice([1, 2, 3, 4, 5, 6], 200)
|
85 |
+
}
|
86 |
+
datasets['Tips'] = pd.DataFrame(tips_data)
|
87 |
+
|
88 |
+
# Stock Data
|
89 |
+
dates = pd.date_range('2020-01-01', '2023-12-31', freq='D')
|
90 |
+
stock_price = 100
|
91 |
+
stock_data = []
|
92 |
+
for date in dates:
|
93 |
+
daily_return = np.random.normal(0.001, 0.02)
|
94 |
+
stock_price *= (1 + daily_return)
|
95 |
+
stock_data.append({
|
96 |
+
'date': date,
|
97 |
+
'price': stock_price,
|
98 |
+
'volume': np.random.randint(1000000, 5000000),
|
99 |
+
'high': stock_price * (1 + abs(np.random.normal(0, 0.01))),
|
100 |
+
'low': stock_price * (1 - abs(np.random.normal(0, 0.01))),
|
101 |
+
'open': stock_price * (1 + np.random.normal(0, 0.005))
|
102 |
+
})
|
103 |
+
datasets['Stock Data'] = pd.DataFrame(stock_data)
|
104 |
+
|
105 |
+
# Wind Data
|
106 |
+
hours = list(range(24))
|
107 |
+
wind_data = []
|
108 |
+
for month in range(1, 13):
|
109 |
+
for day in range(1, 29):
|
110 |
+
for hour in hours:
|
111 |
+
wind_data.append({
|
112 |
+
'month': month,
|
113 |
+
'day': day,
|
114 |
+
'hour': hour,
|
115 |
+
'wind_speed': abs(np.random.normal(15, 8)) + 5*np.sin(hour/24*2*np.pi),
|
116 |
+
'temperature': np.random.normal(20, 15) + 10*np.cos(month/12*2*np.pi),
|
117 |
+
'humidity': np.random.uniform(30, 90),
|
118 |
+
'pressure': np.random.normal(1013, 20)
|
119 |
+
})
|
120 |
+
datasets['Wind Data'] = pd.DataFrame(wind_data)
|
121 |
+
|
122 |
+
return datasets
|
123 |
+
|
124 |
+
# Initialize built-in datasets
|
125 |
+
builtin_datasets = create_builtin_datasets()
|
126 |
+
|
127 |
+
# App layout
|
128 |
+
app.layout = dbc.Container([
|
129 |
+
dbc.Row([
|
130 |
+
dbc.Col([
|
131 |
+
html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"),
|
132 |
+
html.P("Upload data, ask questions, and get AI-powered insights!",
|
133 |
+
className="text-center text-muted"),
|
134 |
+
html.Hr(),
|
135 |
+
], width=12)
|
136 |
+
]),
|
137 |
+
|
138 |
+
# Tabbed interface
|
139 |
+
dbc.Tabs([
|
140 |
+
# Tab 1: Dataset Management
|
141 |
+
dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[
|
142 |
+
dbc.Row([
|
143 |
+
dbc.Col([
|
144 |
+
dbc.Card([
|
145 |
+
dbc.CardBody([
|
146 |
+
html.H4("Load Built-in Dataset", className="card-title"),
|
147 |
+
dcc.Dropdown(
|
148 |
+
id="builtin-choice",
|
149 |
+
options=[
|
150 |
+
{"label": "Gapminder", "value": "Gapminder"},
|
151 |
+
{"label": "Iris", "value": "Iris"},
|
152 |
+
{"label": "Tips", "value": "Tips"},
|
153 |
+
{"label": "Stock Data", "value": "Stock Data"},
|
154 |
+
{"label": "Wind Data", "value": "Wind Data"}
|
155 |
+
],
|
156 |
+
value="Gapminder",
|
157 |
+
className="mb-2"
|
158 |
+
),
|
159 |
+
dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"),
|
160 |
+
|
161 |
+
html.Hr(),
|
162 |
+
html.H4("Upload Custom Dataset", className="card-title"),
|
163 |
+
dcc.Upload(
|
164 |
+
id='file-upload',
|
165 |
+
children=html.Div([
|
166 |
+
'Drag and Drop or ',
|
167 |
+
html.A('Select CSV/Excel Files')
|
168 |
+
]),
|
169 |
+
style={
|
170 |
+
'width': '100%',
|
171 |
+
'height': '60px',
|
172 |
+
'lineHeight': '60px',
|
173 |
+
'borderWidth': '1px',
|
174 |
+
'borderStyle': 'dashed',
|
175 |
+
'borderRadius': '5px',
|
176 |
+
'textAlign': 'center',
|
177 |
+
'margin': '10px'
|
178 |
+
},
|
179 |
+
multiple=False,
|
180 |
+
accept='.csv,.xlsx,.xls'
|
181 |
+
),
|
182 |
+
|
183 |
+
dbc.Input(
|
184 |
+
id="custom-name",
|
185 |
+
placeholder="Dataset Name (optional)",
|
186 |
+
type="text",
|
187 |
+
className="mb-2"
|
188 |
+
),
|
189 |
+
dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"),
|
190 |
+
|
191 |
+
html.Hr(),
|
192 |
+
html.H4("Active Datasets", className="card-title"),
|
193 |
+
dcc.Dropdown(
|
194 |
+
id="dataset-selector",
|
195 |
+
options=[{"label": "Gapminder", "value": "Gapminder"}],
|
196 |
+
value="Gapminder",
|
197 |
+
className="mb-2"
|
198 |
+
),
|
199 |
+
|
200 |
+
html.Hr(),
|
201 |
+
html.Div(id="status-msg", children=[
|
202 |
+
dbc.Alert("Ready to load data", color="info")
|
203 |
+
]),
|
204 |
+
html.Div(id="data-info")
|
205 |
+
])
|
206 |
+
])
|
207 |
+
], width=4),
|
208 |
+
|
209 |
+
dbc.Col([
|
210 |
+
dbc.Card([
|
211 |
+
dbc.CardBody([
|
212 |
+
html.H4("Data Preview (First 10 rows)", className="card-title"),
|
213 |
+
html.Div(id="data-preview", className="mb-4"),
|
214 |
+
html.H4("Quick Analytics", className="card-title"),
|
215 |
+
html.Div(id="auto-analytics")
|
216 |
+
])
|
217 |
+
])
|
218 |
+
], width=8)
|
219 |
+
], className="mt-4")
|
220 |
+
]),
|
221 |
+
|
222 |
+
# Tab 2: AI Assistant
|
223 |
+
dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[
|
224 |
+
dbc.Row([
|
225 |
+
dbc.Col([
|
226 |
+
dbc.Card([
|
227 |
+
dbc.CardBody([
|
228 |
+
html.H4("🤖 AI Assistant", className="card-title"),
|
229 |
+
html.Div(id="ai-dataset-info", className="mb-3", children=[
|
230 |
+
dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
|
231 |
+
color="warning", className="mb-3")
|
232 |
+
]),
|
233 |
+
dbc.InputGroup([
|
234 |
+
dbc.Input(
|
235 |
+
id="ai-question",
|
236 |
+
placeholder="Ask questions about your data...",
|
237 |
+
type="text",
|
238 |
+
style={"fontSize": "14px"}
|
239 |
+
),
|
240 |
+
dbc.Button(
|
241 |
+
"Ask AI",
|
242 |
+
id="ask-button",
|
243 |
+
color="primary",
|
244 |
+
n_clicks=0
|
245 |
+
)
|
246 |
+
]),
|
247 |
+
|
248 |
+
html.Div(id="ai-response", className="mt-3")
|
249 |
+
])
|
250 |
+
])
|
251 |
+
], width=12)
|
252 |
+
], className="mt-4")
|
253 |
+
]),
|
254 |
+
|
255 |
+
# Tab 3: Visualizations
|
256 |
+
dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[
|
257 |
+
dbc.Row([
|
258 |
+
dbc.Col([
|
259 |
+
dbc.Card([
|
260 |
+
dbc.CardBody([
|
261 |
+
html.H4("📈 Visualizations", className="card-title"),
|
262 |
+
|
263 |
+
# Chart controls
|
264 |
+
dbc.Row([
|
265 |
+
dbc.Col([
|
266 |
+
html.Label("Chart Type:", className="form-label"),
|
267 |
+
dcc.Dropdown(
|
268 |
+
id='chart-type',
|
269 |
+
options=[
|
270 |
+
{'label': 'Scatter Plot', 'value': 'scatter'},
|
271 |
+
{'label': 'Line Chart', 'value': 'line'},
|
272 |
+
{'label': 'Bar Chart', 'value': 'bar'},
|
273 |
+
{'label': 'Histogram', 'value': 'histogram'},
|
274 |
+
{'label': 'Box Plot', 'value': 'box'},
|
275 |
+
{'label': 'Heatmap', 'value': 'heatmap'},
|
276 |
+
{'label': 'Pie Chart', 'value': 'pie'}
|
277 |
+
],
|
278 |
+
value='scatter',
|
279 |
+
className="mb-2"
|
280 |
+
)
|
281 |
+
], width=6),
|
282 |
+
dbc.Col([
|
283 |
+
html.Label("Color By:", className="form-label"),
|
284 |
+
dcc.Dropdown(
|
285 |
+
id='color-column',
|
286 |
+
placeholder="Select column (optional)",
|
287 |
+
className="mb-2"
|
288 |
+
)
|
289 |
+
], width=6)
|
290 |
+
]),
|
291 |
+
|
292 |
+
dbc.Row([
|
293 |
+
dbc.Col([
|
294 |
+
html.Label("X-Axis:", className="form-label"),
|
295 |
+
dcc.Dropdown(
|
296 |
+
id='x-column',
|
297 |
+
placeholder="Select X column"
|
298 |
+
)
|
299 |
+
], width=6),
|
300 |
+
dbc.Col([
|
301 |
+
html.Label("Y-Axis:", className="form-label"),
|
302 |
+
dcc.Dropdown(
|
303 |
+
id='y-column',
|
304 |
+
placeholder="Select Y column"
|
305 |
+
)
|
306 |
+
], width=6)
|
307 |
+
], className="mb-3"),
|
308 |
+
|
309 |
+
dcc.Graph(id='main-graph', style={'height': '500px'}),
|
310 |
+
])
|
311 |
+
])
|
312 |
+
], width=12)
|
313 |
+
], className="mt-4")
|
314 |
+
]),
|
315 |
+
|
316 |
+
# Tab 4: Data Explorer
|
317 |
+
dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[
|
318 |
+
dbc.Row([
|
319 |
+
dbc.Col([
|
320 |
+
dbc.Card([
|
321 |
+
dbc.CardBody([
|
322 |
+
html.H4("🔍 Data Explorer", className="card-title"),
|
323 |
+
html.Div(id='data-table')
|
324 |
+
])
|
325 |
+
])
|
326 |
+
], width=12)
|
327 |
+
], className="mt-4")
|
328 |
+
])
|
329 |
+
], id="main-tabs", active_tab="dataset-management"),
|
330 |
+
|
331 |
+
# Store components
|
332 |
+
dcc.Store(id='stored-data'),
|
333 |
+
dcc.Store(id='data-context'),
|
334 |
+
dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}),
|
335 |
+
dcc.Store(id='current-dataset-name', data="Gapminder")
|
336 |
+
], fluid=True)
|
337 |
+
|
338 |
+
def create_vector_store(df):
|
339 |
+
"""Simplified - just return True for now"""
|
340 |
+
return True
|
341 |
+
|
342 |
+
# Import AI assistant module
|
343 |
+
from ai_assistant import get_ai_response
|
344 |
+
|
345 |
+
def create_auto_analytics(df):
|
346 |
+
"""Create automatic analytics display"""
|
347 |
+
analytics_components = []
|
348 |
+
|
349 |
+
# Summary Statistics
|
350 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
351 |
+
if len(numeric_cols) > 0:
|
352 |
+
stats = df[numeric_cols].describe()
|
353 |
+
analytics_components.extend([
|
354 |
+
html.H6("📊 Summary Statistics", className="mt-2"),
|
355 |
+
dbc.Table.from_dataframe(
|
356 |
+
stats.reset_index().round(2),
|
357 |
+
size='sm',
|
358 |
+
striped=True,
|
359 |
+
hover=True
|
360 |
+
)
|
361 |
+
])
|
362 |
+
|
363 |
+
# Missing Data Analysis
|
364 |
+
missing_data = df.isnull().sum()
|
365 |
+
missing_data = missing_data[missing_data > 0]
|
366 |
+
if not missing_data.empty:
|
367 |
+
analytics_components.extend([
|
368 |
+
html.H6("⚠️ Missing Data", className="mt-3"),
|
369 |
+
dbc.Alert([
|
370 |
+
html.Pre(missing_data.to_string())
|
371 |
+
], color="warning")
|
372 |
+
])
|
373 |
+
else:
|
374 |
+
analytics_components.extend([
|
375 |
+
html.H6("✅ Data Quality", className="mt-3"),
|
376 |
+
dbc.Alert("No missing values found!", color="success")
|
377 |
+
])
|
378 |
+
|
379 |
+
# Data Types Analysis
|
380 |
+
dtype_info = df.dtypes.value_counts()
|
381 |
+
analytics_components.extend([
|
382 |
+
html.H6("🔍 Data Types", className="mt-3"),
|
383 |
+
dbc.Alert([
|
384 |
+
html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"),
|
385 |
+
html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"),
|
386 |
+
html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"),
|
387 |
+
html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}")
|
388 |
+
], color="light")
|
389 |
+
])
|
390 |
+
|
391 |
+
# Correlation Analysis for numeric columns
|
392 |
+
if len(numeric_cols) > 1:
|
393 |
+
corr_matrix = df[numeric_cols].corr()
|
394 |
+
# Find highest correlations
|
395 |
+
corr_pairs = []
|
396 |
+
for i in range(len(corr_matrix.columns)):
|
397 |
+
for j in range(i+1, len(corr_matrix.columns)):
|
398 |
+
corr_val = corr_matrix.iloc[i, j]
|
399 |
+
if abs(corr_val) > 0.5: # Only show strong correlations
|
400 |
+
corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))
|
401 |
+
|
402 |
+
if corr_pairs:
|
403 |
+
analytics_components.extend([
|
404 |
+
html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"),
|
405 |
+
dbc.Alert([
|
406 |
+
html.P(f"{pair[0]} ↔ {pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5] # Show top 5
|
407 |
+
], color="info")
|
408 |
+
])
|
409 |
+
|
410 |
+
return analytics_components
|
411 |
+
|
412 |
+
def parse_contents(contents, filename):
|
413 |
+
"""Parse uploaded file contents"""
|
414 |
+
content_type, content_string = contents.split(',')
|
415 |
+
decoded = base64.b64decode(content_string)
|
416 |
+
|
417 |
+
try:
|
418 |
+
if 'csv' in filename:
|
419 |
+
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
|
420 |
+
elif 'xls' in filename:
|
421 |
+
df = pd.read_excel(io.BytesIO(decoded))
|
422 |
+
else:
|
423 |
+
return None, "Unsupported file type"
|
424 |
+
|
425 |
+
return df, None
|
426 |
+
except Exception as e:
|
427 |
+
return None, f"Error processing file: {str(e)}"
|
428 |
+
|
429 |
+
# Dataset management callbacks
|
430 |
+
@app.callback(
|
431 |
+
[Output('stored-data', 'data'),
|
432 |
+
Output('status-msg', 'children'),
|
433 |
+
Output('data-preview', 'children'),
|
434 |
+
Output('data-info', 'children'),
|
435 |
+
Output('auto-analytics', 'children'),
|
436 |
+
Output('x-column', 'options'),
|
437 |
+
Output('y-column', 'options'),
|
438 |
+
Output('color-column', 'options'),
|
439 |
+
Output('x-column', 'value'),
|
440 |
+
Output('y-column', 'value'),
|
441 |
+
Output('dataset-registry', 'data'),
|
442 |
+
Output('dataset-selector', 'options'),
|
443 |
+
Output('current-dataset-name', 'data')],
|
444 |
+
[Input('load-builtin-btn', 'n_clicks'),
|
445 |
+
Input('file-upload', 'contents'),
|
446 |
+
Input('dataset-selector', 'value')],
|
447 |
+
[State('builtin-choice', 'value'),
|
448 |
+
State('file-upload', 'filename'),
|
449 |
+
State('custom-name', 'value'),
|
450 |
+
State('dataset-registry', 'data')]
|
451 |
+
)
|
452 |
+
def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry):
|
453 |
+
"""Handle dataset loading and switching"""
|
454 |
+
ctx = callback_context
|
455 |
+
|
456 |
+
# Initialize defaults
|
457 |
+
registry = registry or {"Gapminder": "builtin"}
|
458 |
+
|
459 |
+
if not ctx.triggered:
|
460 |
+
# Initial load - load Gapminder dataset
|
461 |
+
df = builtin_datasets["Gapminder"]
|
462 |
+
dataset_name = "Gapminder"
|
463 |
+
|
464 |
+
# Create vector store for AI
|
465 |
+
vector_success = create_vector_store(df)
|
466 |
+
|
467 |
+
# Create data table preview
|
468 |
+
table = dbc.Table.from_dataframe(
|
469 |
+
df.head(10),
|
470 |
+
striped=True,
|
471 |
+
bordered=True,
|
472 |
+
hover=True,
|
473 |
+
size='sm'
|
474 |
+
)
|
475 |
+
|
476 |
+
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
|
477 |
+
status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success")
|
478 |
+
|
479 |
+
data_info = dbc.Alert([
|
480 |
+
html.H6("Dataset Information:"),
|
481 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
482 |
+
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
|
483 |
+
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
|
484 |
+
], color="light")
|
485 |
+
|
486 |
+
# Create automatic analytics
|
487 |
+
auto_analytics = create_auto_analytics(df)
|
488 |
+
|
489 |
+
# Create column options for dropdowns
|
490 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
491 |
+
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
|
492 |
+
|
493 |
+
# Set default values - prefer numeric columns for x and y
|
494 |
+
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
|
495 |
+
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
|
496 |
+
|
497 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
498 |
+
|
499 |
+
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
|
500 |
+
|
501 |
+
trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
502 |
+
|
503 |
+
if trigger_id == 'load-builtin-btn' and builtin_clicks:
|
504 |
+
# Load built-in dataset
|
505 |
+
if builtin_choice in builtin_datasets:
|
506 |
+
df = builtin_datasets[builtin_choice]
|
507 |
+
registry[builtin_choice] = "builtin"
|
508 |
+
|
509 |
+
# Create vector store for AI
|
510 |
+
vector_success = create_vector_store(df)
|
511 |
+
|
512 |
+
# Create data table preview
|
513 |
+
table = dbc.Table.from_dataframe(
|
514 |
+
df.head(10),
|
515 |
+
striped=True,
|
516 |
+
bordered=True,
|
517 |
+
hover=True,
|
518 |
+
size='sm'
|
519 |
+
)
|
520 |
+
|
521 |
+
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
|
522 |
+
status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success")
|
523 |
+
|
524 |
+
data_info = dbc.Alert([
|
525 |
+
html.H6(f"{builtin_choice} Dataset Information:"),
|
526 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
527 |
+
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
|
528 |
+
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
|
529 |
+
], color="light")
|
530 |
+
|
531 |
+
# Create automatic analytics
|
532 |
+
auto_analytics = create_auto_analytics(df)
|
533 |
+
|
534 |
+
# Create column options for dropdowns
|
535 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
536 |
+
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
|
537 |
+
|
538 |
+
# Set default values - prefer numeric columns for x and y
|
539 |
+
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
|
540 |
+
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
|
541 |
+
|
542 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
543 |
+
|
544 |
+
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice
|
545 |
+
|
546 |
+
elif trigger_id == 'file-upload' and file_contents:
|
547 |
+
# Upload custom dataset
|
548 |
+
df, error = parse_contents(file_contents, filename)
|
549 |
+
|
550 |
+
if error:
|
551 |
+
status_msg = dbc.Alert(error, color="danger")
|
552 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
553 |
+
return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None
|
554 |
+
|
555 |
+
# Determine dataset name
|
556 |
+
dataset_name = custom_name if custom_name else filename.split('.')[0]
|
557 |
+
registry[dataset_name] = "custom"
|
558 |
+
|
559 |
+
# Create vector store for AI
|
560 |
+
vector_success = create_vector_store(df)
|
561 |
+
|
562 |
+
# Create data table preview
|
563 |
+
table = dbc.Table.from_dataframe(
|
564 |
+
df.head(10),
|
565 |
+
striped=True,
|
566 |
+
bordered=True,
|
567 |
+
hover=True,
|
568 |
+
size='sm'
|
569 |
+
)
|
570 |
+
|
571 |
+
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
|
572 |
+
status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success")
|
573 |
+
|
574 |
+
data_info = dbc.Alert([
|
575 |
+
html.H6(f"{dataset_name} Dataset Information:"),
|
576 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
577 |
+
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
|
578 |
+
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
|
579 |
+
], color="light")
|
580 |
+
|
581 |
+
# Create automatic analytics
|
582 |
+
auto_analytics = create_auto_analytics(df)
|
583 |
+
|
584 |
+
# Create column options for dropdowns
|
585 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
586 |
+
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
|
587 |
+
|
588 |
+
# Set default values - prefer numeric columns for x and y
|
589 |
+
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
|
590 |
+
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
|
591 |
+
|
592 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
593 |
+
|
594 |
+
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
|
595 |
+
|
596 |
+
elif trigger_id == 'dataset-selector' and selected_dataset:
|
597 |
+
# Switch between datasets
|
598 |
+
if selected_dataset in registry:
|
599 |
+
if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets:
|
600 |
+
df = builtin_datasets[selected_dataset]
|
601 |
+
else:
|
602 |
+
# For custom datasets, we would need to store them persistently
|
603 |
+
# For now, just reload builtin if available
|
604 |
+
if selected_dataset in builtin_datasets:
|
605 |
+
df = builtin_datasets[selected_dataset]
|
606 |
+
else:
|
607 |
+
# Fallback to Gapminder if dataset not found
|
608 |
+
df = builtin_datasets["Gapminder"]
|
609 |
+
selected_dataset = "Gapminder"
|
610 |
+
|
611 |
+
# Create vector store for AI
|
612 |
+
vector_success = create_vector_store(df)
|
613 |
+
|
614 |
+
# Create data table preview
|
615 |
+
table = dbc.Table.from_dataframe(
|
616 |
+
df.head(10),
|
617 |
+
striped=True,
|
618 |
+
bordered=True,
|
619 |
+
hover=True,
|
620 |
+
size='sm'
|
621 |
+
)
|
622 |
+
|
623 |
+
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
|
624 |
+
status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success")
|
625 |
+
|
626 |
+
data_info = dbc.Alert([
|
627 |
+
html.H6(f"{selected_dataset} Dataset Information:"),
|
628 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
629 |
+
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
|
630 |
+
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
|
631 |
+
], color="light")
|
632 |
+
|
633 |
+
# Create automatic analytics
|
634 |
+
auto_analytics = create_auto_analytics(df)
|
635 |
+
|
636 |
+
# Create column options for dropdowns
|
637 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
638 |
+
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
|
639 |
+
|
640 |
+
# Set default values - prefer numeric columns for x and y
|
641 |
+
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
|
642 |
+
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
|
643 |
+
|
644 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
645 |
+
|
646 |
+
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset
|
647 |
+
|
648 |
+
# Default fallback
|
649 |
+
selector_options = [{"label": name, "value": name} for name in registry.keys()]
|
650 |
+
return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None
|
651 |
+
|
652 |
+
# Updated callback for data table (now shared across tabs)
|
653 |
+
@app.callback(
|
654 |
+
Output('data-table', 'children'),
|
655 |
+
[Input('stored-data', 'data')]
|
656 |
+
)
|
657 |
+
def update_data_table(data):
|
658 |
+
"""Update data table for data explorer tab"""
|
659 |
+
if not data:
|
660 |
+
return html.P("No data loaded", className="text-muted")
|
661 |
+
|
662 |
+
df = pd.DataFrame(data)
|
663 |
+
return dbc.Table.from_dataframe(
|
664 |
+
df.head(20),
|
665 |
+
striped=True,
|
666 |
+
bordered=True,
|
667 |
+
hover=True,
|
668 |
+
size='sm',
|
669 |
+
responsive=True
|
670 |
+
)
|
671 |
+
|
672 |
+
# Callback to update AI assistant tab with current dataset info
|
673 |
+
@app.callback(
|
674 |
+
Output('ai-dataset-info', 'children'),
|
675 |
+
[Input('stored-data', 'data'),
|
676 |
+
Input('current-dataset-name', 'data')]
|
677 |
+
)
|
678 |
+
def update_ai_dataset_info(data, dataset_name):
|
679 |
+
"""Update AI assistant tab with current dataset information"""
|
680 |
+
if not data or not dataset_name:
|
681 |
+
return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
|
682 |
+
color="warning", className="mb-3")
|
683 |
+
|
684 |
+
df = pd.DataFrame(data)
|
685 |
+
return dbc.Alert([
|
686 |
+
html.H6(f"📊 Current Dataset: {dataset_name}"),
|
687 |
+
html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"),
|
688 |
+
html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"),
|
689 |
+
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"),
|
690 |
+
html.Small("✨ AI is ready to answer questions about this data!", className="text-muted")
|
691 |
+
], color="success", className="mb-3")
|
692 |
+
|
693 |
+
@app.callback(
|
694 |
+
Output('ai-response', 'children'),
|
695 |
+
[Input('ask-button', 'n_clicks')],
|
696 |
+
[State('ai-question', 'value'),
|
697 |
+
State('stored-data', 'data'),
|
698 |
+
State('current-dataset-name', 'data')]
|
699 |
+
)
|
700 |
+
def handle_ai_question(n_clicks, question, data, dataset_name):
|
701 |
+
"""Handle AI question"""
|
702 |
+
if not n_clicks or not question or not data:
|
703 |
+
return ""
|
704 |
+
|
705 |
+
if not dataset_name:
|
706 |
+
return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning")
|
707 |
+
|
708 |
+
df = pd.DataFrame(data)
|
709 |
+
response = get_ai_response(question, df)
|
710 |
+
|
711 |
+
return dbc.Alert(
|
712 |
+
dcc.Markdown(response),
|
713 |
+
color="info"
|
714 |
+
)
|
715 |
+
|
716 |
+
|
717 |
+
@app.callback(
|
718 |
+
Output('main-graph', 'figure'),
|
719 |
+
[Input('stored-data', 'data'),
|
720 |
+
Input('chart-type', 'value'),
|
721 |
+
Input('x-column', 'value'),
|
722 |
+
Input('y-column', 'value'),
|
723 |
+
Input('color-column', 'value')]
|
724 |
+
)
|
725 |
+
def update_main_graph(data, chart_type, x_col, y_col, color_col):
|
726 |
+
"""Update main visualization based on user selections"""
|
727 |
+
if not data:
|
728 |
+
fig = go.Figure()
|
729 |
+
fig.add_annotation(text="Upload data to see visualizations",
|
730 |
+
x=0.5, y=0.5, showarrow=False,
|
731 |
+
font=dict(size=16, color="gray"))
|
732 |
+
fig.update_layout(template="plotly_white")
|
733 |
+
return fig
|
734 |
+
|
735 |
+
df = pd.DataFrame(data)
|
736 |
+
|
737 |
+
# Handle cases where columns aren't selected yet
|
738 |
+
if not x_col and not y_col:
|
739 |
+
fig = go.Figure()
|
740 |
+
fig.add_annotation(text="Select columns to create visualization",
|
741 |
+
x=0.5, y=0.5, showarrow=False,
|
742 |
+
font=dict(size=16, color="gray"))
|
743 |
+
fig.update_layout(template="plotly_white")
|
744 |
+
return fig
|
745 |
+
|
746 |
+
try:
|
747 |
+
# Create visualization based on chart type
|
748 |
+
if chart_type == 'scatter':
|
749 |
+
if x_col and y_col:
|
750 |
+
fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
|
751 |
+
title=f"Scatter Plot: {y_col} vs {x_col}")
|
752 |
+
else:
|
753 |
+
fig = go.Figure()
|
754 |
+
fig.add_annotation(text="Select both X and Y columns for scatter plot",
|
755 |
+
x=0.5, y=0.5, showarrow=False)
|
756 |
+
|
757 |
+
elif chart_type == 'line':
|
758 |
+
if x_col and y_col:
|
759 |
+
fig = px.line(df, x=x_col, y=y_col, color=color_col,
|
760 |
+
title=f"Line Chart: {y_col} vs {x_col}")
|
761 |
+
else:
|
762 |
+
fig = go.Figure()
|
763 |
+
fig.add_annotation(text="Select both X and Y columns for line chart",
|
764 |
+
x=0.5, y=0.5, showarrow=False)
|
765 |
+
|
766 |
+
elif chart_type == 'bar':
|
767 |
+
if x_col and y_col:
|
768 |
+
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
|
769 |
+
title=f"Bar Chart: {y_col} by {x_col}")
|
770 |
+
elif x_col:
|
771 |
+
fig = px.bar(df[x_col].value_counts().reset_index(),
|
772 |
+
x='index', y=x_col,
|
773 |
+
title=f"Value Counts: {x_col}")
|
774 |
+
else:
|
775 |
+
fig = go.Figure()
|
776 |
+
fig.add_annotation(text="Select at least X column for bar chart",
|
777 |
+
x=0.5, y=0.5, showarrow=False)
|
778 |
+
|
779 |
+
elif chart_type == 'histogram':
|
780 |
+
if x_col:
|
781 |
+
fig = px.histogram(df, x=x_col, color=color_col,
|
782 |
+
title=f"Histogram: {x_col}")
|
783 |
+
else:
|
784 |
+
fig = go.Figure()
|
785 |
+
fig.add_annotation(text="Select X column for histogram",
|
786 |
+
x=0.5, y=0.5, showarrow=False)
|
787 |
+
|
788 |
+
elif chart_type == 'box':
|
789 |
+
if y_col:
|
790 |
+
fig = px.box(df, x=color_col, y=y_col,
|
791 |
+
title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""))
|
792 |
+
elif x_col:
|
793 |
+
fig = px.box(df, y=x_col,
|
794 |
+
title=f"Box Plot: {x_col}")
|
795 |
+
else:
|
796 |
+
fig = go.Figure()
|
797 |
+
fig.add_annotation(text="Select a column for box plot",
|
798 |
+
x=0.5, y=0.5, showarrow=False)
|
799 |
+
|
800 |
+
elif chart_type == 'heatmap':
|
801 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
802 |
+
if len(numeric_cols) > 1:
|
803 |
+
corr_matrix = df[numeric_cols].corr()
|
804 |
+
fig = px.imshow(corr_matrix,
|
805 |
+
text_auto=True,
|
806 |
+
aspect="auto",
|
807 |
+
title="Correlation Heatmap",
|
808 |
+
color_continuous_scale='RdBu_r')
|
809 |
+
else:
|
810 |
+
fig = go.Figure()
|
811 |
+
fig.add_annotation(text="Need at least 2 numeric columns for heatmap",
|
812 |
+
x=0.5, y=0.5, showarrow=False)
|
813 |
+
|
814 |
+
elif chart_type == 'pie':
|
815 |
+
if x_col:
|
816 |
+
value_counts = df[x_col].value_counts()
|
817 |
+
fig = px.pie(values=value_counts.values,
|
818 |
+
names=value_counts.index,
|
819 |
+
title=f"Pie Chart: {x_col}")
|
820 |
+
else:
|
821 |
+
fig = go.Figure()
|
822 |
+
fig.add_annotation(text="Select X column for pie chart",
|
823 |
+
x=0.5, y=0.5, showarrow=False)
|
824 |
+
|
825 |
+
else:
|
826 |
+
fig = go.Figure()
|
827 |
+
fig.add_annotation(text="Select a chart type",
|
828 |
+
x=0.5, y=0.5, showarrow=False)
|
829 |
+
|
830 |
+
fig.update_layout(template="plotly_white", height=500)
|
831 |
+
return fig
|
832 |
+
|
833 |
+
except Exception as e:
|
834 |
+
fig = go.Figure()
|
835 |
+
fig.add_annotation(text=f"Error creating chart: {str(e)}",
|
836 |
+
x=0.5, y=0.5, showarrow=False,
|
837 |
+
font=dict(color="red"))
|
838 |
+
fig.update_layout(template="plotly_white")
|
839 |
+
return fig
|
840 |
+
|
841 |
+
if __name__ == '__main__':
|
842 |
+
app.run(host='0.0.0.0', port=7860, debug=False)
|
gradio_demo.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Simple Gradio examples
|
6 |
+
def simple_greet(name):
|
7 |
+
return f"Hello {name}!"
|
8 |
+
|
9 |
+
def simple_calculator(x, y, operation):
|
10 |
+
if operation == "add":
|
11 |
+
return x + y
|
12 |
+
elif operation == "subtract":
|
13 |
+
return x - y
|
14 |
+
elif operation == "multiply":
|
15 |
+
return x * y
|
16 |
+
elif operation == "divide":
|
17 |
+
return x / y if y != 0 else "Cannot divide by zero"
|
18 |
+
|
19 |
+
# Simple interface examples (uncomment to use)
|
20 |
+
# demo1 = gr.Interface(
|
21 |
+
# fn=simple_greet,
|
22 |
+
# inputs="text",
|
23 |
+
# outputs="text",
|
24 |
+
# title="Simple Greeter"
|
25 |
+
# )
|
26 |
+
|
27 |
+
# demo2 = gr.Interface(
|
28 |
+
# fn=simple_calculator,
|
29 |
+
# inputs=[
|
30 |
+
# gr.Number(label="First Number"),
|
31 |
+
# gr.Number(label="Second Number"),
|
32 |
+
# gr.Radio(["add", "subtract", "multiply", "divide"], label="Operation")
|
33 |
+
# ],
|
34 |
+
# outputs="text",
|
35 |
+
# title="Calculator"
|
36 |
+
# )
|
37 |
+
|
38 |
+
def analyze_data(csv_file, chart_type):
|
39 |
+
"""Analyze uploaded CSV and return info"""
|
40 |
+
if csv_file is None:
|
41 |
+
return "Please upload a CSV file"
|
42 |
+
|
43 |
+
try:
|
44 |
+
# Read the CSV file
|
45 |
+
df = pd.read_csv(csv_file.name)
|
46 |
+
|
47 |
+
# Get basic info
|
48 |
+
info = f"""📊 Dataset Analysis:
|
49 |
+
|
50 |
+
🔢 Shape: {df.shape[0]} rows × {df.shape[1]} columns
|
51 |
+
📝 Columns: {', '.join(df.columns.tolist())}
|
52 |
+
❌ Missing values: {df.isnull().sum().sum()}
|
53 |
+
|
54 |
+
📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}
|
55 |
+
📋 Text columns: {len(df.select_dtypes(include=['object']).columns)}
|
56 |
+
|
57 |
+
💡 Chart type selected: {chart_type}
|
58 |
+
|
59 |
+
📋 First 5 rows preview:
|
60 |
+
{df.head().to_string()}
|
61 |
+
|
62 |
+
📊 Summary statistics:
|
63 |
+
{df.describe().to_string() if len(df.select_dtypes(include=['number']).columns) > 0 else 'No numeric data for statistics'}
|
64 |
+
"""
|
65 |
+
return info
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
return f"Error reading file: {str(e)}"
|
69 |
+
|
70 |
+
def greet(name, enthusiasm):
|
71 |
+
"""Simple greeting function"""
|
72 |
+
excitement = "!" * int(enthusiasm)
|
73 |
+
return f"Hello {name}{excitement}"
|
74 |
+
|
75 |
+
def calculator(num1, operation, num2):
|
76 |
+
"""Simple calculator"""
|
77 |
+
if operation == "Add":
|
78 |
+
return num1 + num2
|
79 |
+
elif operation == "Subtract":
|
80 |
+
return num1 - num2
|
81 |
+
elif operation == "Multiply":
|
82 |
+
return num1 * num2
|
83 |
+
elif operation == "Divide":
|
84 |
+
return num1 / num2 if num2 != 0 else "Cannot divide by zero!"
|
85 |
+
|
86 |
+
# Create Gradio interface with tabs
|
87 |
+
with gr.Blocks(title="Gradio Demo App") as demo:
|
88 |
+
gr.Markdown("# 🚀 Gradio Demo Application")
|
89 |
+
gr.Markdown("This demo showcases various Gradio components and functionalities.")
|
90 |
+
|
91 |
+
with gr.Tab("📊 Data Analysis"):
|
92 |
+
gr.Markdown("## Upload CSV and Create Visualizations")
|
93 |
+
|
94 |
+
with gr.Row():
|
95 |
+
with gr.Column():
|
96 |
+
csv_input = gr.File(label="Upload CSV File", file_types=[".csv"])
|
97 |
+
chart_dropdown = gr.Dropdown(
|
98 |
+
choices=["Histogram", "Scatter Plot"],
|
99 |
+
label="Chart Type",
|
100 |
+
value="Histogram"
|
101 |
+
)
|
102 |
+
analyze_btn = gr.Button("Analyze Data", variant="primary")
|
103 |
+
|
104 |
+
with gr.Column():
|
105 |
+
info_output = gr.Textbox(label="Dataset Info", lines=15, max_lines=20)
|
106 |
+
|
107 |
+
analyze_btn.click(
|
108 |
+
fn=analyze_data,
|
109 |
+
inputs=[csv_input, chart_dropdown],
|
110 |
+
outputs=info_output
|
111 |
+
)
|
112 |
+
|
113 |
+
with gr.Tab("👋 Greeting"):
|
114 |
+
gr.Markdown("## Personal Greeting Generator")
|
115 |
+
|
116 |
+
with gr.Row():
|
117 |
+
name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
|
118 |
+
enthusiasm_slider = gr.Slider(1, 10, value=3, label="Enthusiasm Level")
|
119 |
+
|
120 |
+
greet_output = gr.Textbox(label="Greeting")
|
121 |
+
greet_btn = gr.Button("Generate Greeting")
|
122 |
+
|
123 |
+
greet_btn.click(
|
124 |
+
fn=greet,
|
125 |
+
inputs=[name_input, enthusiasm_slider],
|
126 |
+
outputs=greet_output
|
127 |
+
)
|
128 |
+
|
129 |
+
with gr.Tab("🧮 Calculator"):
|
130 |
+
gr.Markdown("## Simple Calculator")
|
131 |
+
|
132 |
+
with gr.Row():
|
133 |
+
num1_input = gr.Number(label="First Number", value=0)
|
134 |
+
operation_radio = gr.Radio(
|
135 |
+
choices=["Add", "Subtract", "Multiply", "Divide"],
|
136 |
+
label="Operation",
|
137 |
+
value="Add"
|
138 |
+
)
|
139 |
+
num2_input = gr.Number(label="Second Number", value=0)
|
140 |
+
|
141 |
+
calc_output = gr.Number(label="Result")
|
142 |
+
calc_btn = gr.Button("Calculate", variant="secondary")
|
143 |
+
|
144 |
+
calc_btn.click(
|
145 |
+
fn=calculator,
|
146 |
+
inputs=[num1_input, operation_radio, num2_input],
|
147 |
+
outputs=calc_output
|
148 |
+
)
|
149 |
+
|
150 |
+
with gr.Tab("🎨 Interactive Demo"):
|
151 |
+
gr.Markdown("## Real-time Updates")
|
152 |
+
|
153 |
+
with gr.Row():
|
154 |
+
slider_input = gr.Slider(0, 100, value=50, label="Value")
|
155 |
+
checkbox_input = gr.Checkbox(label="Enable Processing", value=True)
|
156 |
+
|
157 |
+
with gr.Row():
|
158 |
+
text_output = gr.Textbox(label="Live Output")
|
159 |
+
number_output = gr.Number(label="Processed Value")
|
160 |
+
|
161 |
+
def process_inputs(value, enabled):
|
162 |
+
if enabled:
|
163 |
+
processed = value * 1.5
|
164 |
+
message = f"Processing enabled: {value} → {processed}"
|
165 |
+
return message, processed
|
166 |
+
else:
|
167 |
+
return "Processing disabled", value
|
168 |
+
|
169 |
+
# Real-time updates
|
170 |
+
slider_input.change(
|
171 |
+
fn=process_inputs,
|
172 |
+
inputs=[slider_input, checkbox_input],
|
173 |
+
outputs=[text_output, number_output]
|
174 |
+
)
|
175 |
+
checkbox_input.change(
|
176 |
+
fn=process_inputs,
|
177 |
+
inputs=[slider_input, checkbox_input],
|
178 |
+
outputs=[text_output, number_output]
|
179 |
+
)
|
180 |
+
|
181 |
+
with gr.Tab("📝 Basic Examples"):
|
182 |
+
gr.Markdown("## Simple Gradio Code Examples")
|
183 |
+
|
184 |
+
gr.Markdown("""
|
185 |
+
### Example 1: Simple Greeter
|
186 |
+
```python
|
187 |
+
def greet(name):
|
188 |
+
return f"Hello {name}!"
|
189 |
+
|
190 |
+
demo = gr.Interface(
|
191 |
+
fn=greet,
|
192 |
+
inputs="text",
|
193 |
+
outputs="text"
|
194 |
+
)
|
195 |
+
```
|
196 |
+
""")
|
197 |
+
|
198 |
+
with gr.Row():
|
199 |
+
simple_name = gr.Textbox(label="Your Name", placeholder="Enter name")
|
200 |
+
simple_greet_output = gr.Textbox(label="Greeting")
|
201 |
+
|
202 |
+
simple_greet_btn = gr.Button("Greet Me!")
|
203 |
+
simple_greet_btn.click(
|
204 |
+
fn=simple_greet,
|
205 |
+
inputs=simple_name,
|
206 |
+
outputs=simple_greet_output
|
207 |
+
)
|
208 |
+
|
209 |
+
gr.Markdown("""
|
210 |
+
### Example 2: Calculator with Interface
|
211 |
+
```python
|
212 |
+
def calculator(x, y, operation):
|
213 |
+
if operation == "add":
|
214 |
+
return x + y
|
215 |
+
# ... other operations
|
216 |
+
|
217 |
+
demo = gr.Interface(
|
218 |
+
fn=calculator,
|
219 |
+
inputs=[
|
220 |
+
gr.Number(label="First Number"),
|
221 |
+
gr.Number(label="Second Number"),
|
222 |
+
gr.Radio(["add", "subtract", "multiply", "divide"])
|
223 |
+
],
|
224 |
+
outputs="text"
|
225 |
+
)
|
226 |
+
```
|
227 |
+
""")
|
228 |
+
|
229 |
+
with gr.Row():
|
230 |
+
calc_x = gr.Number(label="X", value=0)
|
231 |
+
calc_y = gr.Number(label="Y", value=0)
|
232 |
+
calc_op = gr.Radio(["add", "subtract", "multiply", "divide"],
|
233 |
+
label="Operation", value="add")
|
234 |
+
|
235 |
+
calc_result = gr.Textbox(label="Result")
|
236 |
+
calc_btn = gr.Button("Calculate")
|
237 |
+
|
238 |
+
calc_btn.click(
|
239 |
+
fn=simple_calculator,
|
240 |
+
inputs=[calc_x, calc_y, calc_op],
|
241 |
+
outputs=calc_result
|
242 |
+
)
|
243 |
+
|
244 |
+
gr.Markdown("""
|
245 |
+
### Example 3: Custom Layout with Blocks
|
246 |
+
```python
|
247 |
+
with gr.Blocks() as demo:
|
248 |
+
gr.Markdown("# My App")
|
249 |
+
|
250 |
+
with gr.Row():
|
251 |
+
input1 = gr.Textbox()
|
252 |
+
input2 = gr.Slider()
|
253 |
+
|
254 |
+
output = gr.Textbox()
|
255 |
+
btn = gr.Button("Process")
|
256 |
+
|
257 |
+
btn.click(fn=my_function, inputs=[input1, input2], outputs=output)
|
258 |
+
|
259 |
+
demo.launch()
|
260 |
+
```
|
261 |
+
""")
|
262 |
+
|
263 |
+
gr.Markdown("**Key Components:**")
|
264 |
+
gr.Markdown("- `gr.Interface()` - Simple wrapper")
|
265 |
+
gr.Markdown("- `gr.Blocks()` - Custom layouts")
|
266 |
+
gr.Markdown("- `gr.Row()`, `gr.Column()` - Layout containers")
|
267 |
+
gr.Markdown("- `gr.Textbox()`, `gr.Number()`, `gr.Slider()` - Input components")
|
268 |
+
gr.Markdown("- `demo.launch()` - Start the server")
|
269 |
+
|
270 |
+
# Launch the app
|
271 |
+
if __name__ == "__main__":
|
272 |
+
demo.launch(
|
273 |
+
server_name="0.0.0.0", # Allow external access
|
274 |
+
server_port=7861, # Different port from Dash app
|
275 |
+
share=False, # Set to True to create public link
|
276 |
+
debug=True # Enable debug mode
|
277 |
+
)
|
requirements.txt
CHANGED
@@ -4,6 +4,7 @@ dash==2.17.1
|
|
4 |
dash-bootstrap-components==1.5.0
|
5 |
numpy==1.24.3
|
6 |
openpyxl==3.1.2
|
|
|
7 |
|
8 |
# Fixed Langchain components
|
9 |
langchain==0.2.6
|
@@ -18,5 +19,7 @@ torch==2.1.0
|
|
18 |
tokenizers==0.19.1
|
19 |
pydantic==2.5.0
|
20 |
|
21 |
-
# Additional utilities
|
22 |
-
python-dotenv==1.0.0
|
|
|
|
|
|
4 |
dash-bootstrap-components==1.5.0
|
5 |
numpy==1.24.3
|
6 |
openpyxl==3.1.2
|
7 |
+
scikit-learn==1.3.2
|
8 |
|
9 |
# Fixed Langchain components
|
10 |
langchain==0.2.6
|
|
|
19 |
tokenizers==0.19.1
|
20 |
pydantic==2.5.0
|
21 |
|
22 |
+
# Additional utilities for AI assistant
|
23 |
+
python-dotenv==1.0.0
|
24 |
+
matplotlib==3.7.5
|
25 |
+
seaborn==0.12.2
|
simple_app.py
ADDED
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import io
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import plotly.graph_objects as go
|
7 |
+
from dash import Dash, html, dcc, Input, Output, State, callback_context
|
8 |
+
import dash_bootstrap_components as dbc
|
9 |
+
|
10 |
+
# Initialize Dash app
|
11 |
+
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
12 |
+
server = app.server
|
13 |
+
|
14 |
+
# App layout
|
15 |
+
app.layout = dbc.Container([
|
16 |
+
dbc.Row([
|
17 |
+
dbc.Col([
|
18 |
+
html.H1("📊 Interactive Data Dashboard", className="text-center mb-4"),
|
19 |
+
html.P("Upload data and create interactive visualizations with different chart types!",
|
20 |
+
className="text-center text-muted"),
|
21 |
+
html.Hr(),
|
22 |
+
], width=12)
|
23 |
+
]),
|
24 |
+
|
25 |
+
dbc.Row([
|
26 |
+
dbc.Col([
|
27 |
+
dbc.Card([
|
28 |
+
dbc.CardBody([
|
29 |
+
html.H4("📁 Data Upload", className="card-title"),
|
30 |
+
dcc.Upload(
|
31 |
+
id='upload-data',
|
32 |
+
children=html.Div([
|
33 |
+
'Drag and Drop or ',
|
34 |
+
html.A('Select Files')
|
35 |
+
]),
|
36 |
+
style={
|
37 |
+
'width': '100%',
|
38 |
+
'height': '60px',
|
39 |
+
'lineHeight': '60px',
|
40 |
+
'borderWidth': '1px',
|
41 |
+
'borderStyle': 'dashed',
|
42 |
+
'borderRadius': '5px',
|
43 |
+
'textAlign': 'center',
|
44 |
+
'margin': '10px'
|
45 |
+
},
|
46 |
+
multiple=False,
|
47 |
+
accept='.csv,.xlsx,.txt'
|
48 |
+
),
|
49 |
+
|
50 |
+
html.Div(id='upload-status', className="mt-2"),
|
51 |
+
html.Hr(),
|
52 |
+
|
53 |
+
html.H4("📊 Quick Analytics", className="card-title"),
|
54 |
+
dbc.ButtonGroup([
|
55 |
+
dbc.Button("Summary Stats", id="stats-btn", size="sm"),
|
56 |
+
dbc.Button("Correlations", id="corr-btn", size="sm"),
|
57 |
+
dbc.Button("Missing Data", id="missing-btn", size="sm"),
|
58 |
+
], className="w-100"),
|
59 |
+
|
60 |
+
html.Div(id="quick-analytics", className="mt-3")
|
61 |
+
])
|
62 |
+
])
|
63 |
+
], width=4),
|
64 |
+
|
65 |
+
dbc.Col([
|
66 |
+
dbc.Card([
|
67 |
+
dbc.CardBody([
|
68 |
+
html.H4("📈 Visualizations", className="card-title"),
|
69 |
+
|
70 |
+
# Chart controls
|
71 |
+
dbc.Row([
|
72 |
+
dbc.Col([
|
73 |
+
html.Label("Chart Type:", className="form-label"),
|
74 |
+
dcc.Dropdown(
|
75 |
+
id='chart-type',
|
76 |
+
options=[
|
77 |
+
{'label': 'Scatter Plot', 'value': 'scatter'},
|
78 |
+
{'label': 'Line Chart', 'value': 'line'},
|
79 |
+
{'label': 'Bar Chart', 'value': 'bar'},
|
80 |
+
{'label': 'Histogram', 'value': 'histogram'},
|
81 |
+
{'label': 'Box Plot', 'value': 'box'},
|
82 |
+
{'label': 'Heatmap', 'value': 'heatmap'},
|
83 |
+
{'label': 'Pie Chart', 'value': 'pie'}
|
84 |
+
],
|
85 |
+
value='scatter',
|
86 |
+
className="mb-2"
|
87 |
+
)
|
88 |
+
], width=6),
|
89 |
+
dbc.Col([
|
90 |
+
html.Label("Color By:", className="form-label"),
|
91 |
+
dcc.Dropdown(
|
92 |
+
id='color-column',
|
93 |
+
placeholder="Select column (optional)",
|
94 |
+
className="mb-2"
|
95 |
+
)
|
96 |
+
], width=6)
|
97 |
+
]),
|
98 |
+
|
99 |
+
dbc.Row([
|
100 |
+
dbc.Col([
|
101 |
+
html.Label("X-Axis:", className="form-label"),
|
102 |
+
dcc.Dropdown(
|
103 |
+
id='x-column',
|
104 |
+
placeholder="Select X column"
|
105 |
+
)
|
106 |
+
], width=6),
|
107 |
+
dbc.Col([
|
108 |
+
html.Label("Y-Axis:", className="form-label"),
|
109 |
+
dcc.Dropdown(
|
110 |
+
id='y-column',
|
111 |
+
placeholder="Select Y column"
|
112 |
+
)
|
113 |
+
], width=6)
|
114 |
+
], className="mb-3"),
|
115 |
+
|
116 |
+
dcc.Graph(id='main-graph', style={'height': '500px'}),
|
117 |
+
])
|
118 |
+
]),
|
119 |
+
|
120 |
+
dbc.Card([
|
121 |
+
dbc.CardBody([
|
122 |
+
html.H4("🔍 Data Explorer", className="card-title"),
|
123 |
+
html.Div(id='data-table')
|
124 |
+
])
|
125 |
+
], className="mt-3")
|
126 |
+
], width=8)
|
127 |
+
], className="mt-4"),
|
128 |
+
|
129 |
+
# Store components
|
130 |
+
dcc.Store(id='stored-data'),
|
131 |
+
], fluid=True)
|
132 |
+
|
133 |
+
def parse_contents(contents, filename):
|
134 |
+
"""Parse uploaded file contents"""
|
135 |
+
content_type, content_string = contents.split(',')
|
136 |
+
decoded = base64.b64decode(content_string)
|
137 |
+
|
138 |
+
try:
|
139 |
+
if 'csv' in filename:
|
140 |
+
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
|
141 |
+
elif 'xls' in filename:
|
142 |
+
df = pd.read_excel(io.BytesIO(decoded))
|
143 |
+
else:
|
144 |
+
return None, "Unsupported file type"
|
145 |
+
|
146 |
+
return df, None
|
147 |
+
except Exception as e:
|
148 |
+
return None, f"Error processing file: {str(e)}"
|
149 |
+
|
150 |
+
@app.callback(
|
151 |
+
[Output('stored-data', 'data'),
|
152 |
+
Output('upload-status', 'children'),
|
153 |
+
Output('data-table', 'children'),
|
154 |
+
Output('x-column', 'options'),
|
155 |
+
Output('y-column', 'options'),
|
156 |
+
Output('color-column', 'options'),
|
157 |
+
Output('x-column', 'value'),
|
158 |
+
Output('y-column', 'value')],
|
159 |
+
[Input('upload-data', 'contents')],
|
160 |
+
[State('upload-data', 'filename')]
|
161 |
+
)
|
162 |
+
def update_data(contents, filename):
|
163 |
+
"""Update data when file is uploaded"""
|
164 |
+
if contents is None:
|
165 |
+
return None, "", "", [], [], [], None, None
|
166 |
+
|
167 |
+
df, error = parse_contents(contents, filename)
|
168 |
+
|
169 |
+
if error:
|
170 |
+
return None, dbc.Alert(error, color="danger"), "", [], [], [], None, None
|
171 |
+
|
172 |
+
# Create data table preview
|
173 |
+
table = dbc.Table.from_dataframe(
|
174 |
+
df.head(10),
|
175 |
+
striped=True,
|
176 |
+
bordered=True,
|
177 |
+
hover=True,
|
178 |
+
size='sm'
|
179 |
+
)
|
180 |
+
|
181 |
+
success_msg = dbc.Alert([
|
182 |
+
html.H6(f"✅ File uploaded successfully!"),
|
183 |
+
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
|
184 |
+
html.P(f"Columns: {', '.join(df.columns.tolist())}")
|
185 |
+
], color="success")
|
186 |
+
|
187 |
+
# Create column options for dropdowns
|
188 |
+
all_columns = [{'label': col, 'value': col} for col in df.columns]
|
189 |
+
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
|
190 |
+
|
191 |
+
# Set default values - prefer numeric columns for x and y
|
192 |
+
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
|
193 |
+
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
|
194 |
+
|
195 |
+
return df.to_dict('records'), success_msg, table, all_columns, all_columns, all_columns, default_x, default_y
|
196 |
+
|
197 |
+
@app.callback(
|
198 |
+
Output('quick-analytics', 'children'),
|
199 |
+
[Input('stats-btn', 'n_clicks'),
|
200 |
+
Input('corr-btn', 'n_clicks'),
|
201 |
+
Input('missing-btn', 'n_clicks')],
|
202 |
+
[State('stored-data', 'data')]
|
203 |
+
)
|
204 |
+
def quick_analytics(stats_clicks, corr_clicks, missing_clicks, data):
|
205 |
+
"""Handle quick analytics buttons"""
|
206 |
+
if not data:
|
207 |
+
return ""
|
208 |
+
|
209 |
+
df = pd.DataFrame(data)
|
210 |
+
ctx = callback_context
|
211 |
+
|
212 |
+
if not ctx.triggered:
|
213 |
+
return ""
|
214 |
+
|
215 |
+
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
216 |
+
|
217 |
+
if button_id == 'stats-btn':
|
218 |
+
stats = df.describe()
|
219 |
+
return dbc.Alert([
|
220 |
+
html.H6("📊 Summary Statistics"),
|
221 |
+
dbc.Table.from_dataframe(stats.reset_index(), size='sm')
|
222 |
+
], color="light")
|
223 |
+
|
224 |
+
elif button_id == 'corr-btn':
|
225 |
+
numeric_df = df.select_dtypes(include=['number'])
|
226 |
+
if len(numeric_df.columns) > 1:
|
227 |
+
corr = numeric_df.corr()
|
228 |
+
fig = px.imshow(corr, text_auto=True, aspect="auto",
|
229 |
+
title="Correlation Matrix")
|
230 |
+
return dcc.Graph(figure=fig, style={'height': '300px'})
|
231 |
+
return dbc.Alert("No numeric columns for correlation analysis", color="warning")
|
232 |
+
|
233 |
+
elif button_id == 'missing-btn':
|
234 |
+
missing = df.isnull().sum()
|
235 |
+
missing = missing[missing > 0]
|
236 |
+
if missing.empty:
|
237 |
+
return dbc.Alert("✅ No missing values!", color="success")
|
238 |
+
return dbc.Alert([
|
239 |
+
html.H6("⚠️ Missing Values"),
|
240 |
+
html.Pre(missing.to_string())
|
241 |
+
], color="warning")
|
242 |
+
|
243 |
+
return ""
|
244 |
+
|
245 |
+
@app.callback(
|
246 |
+
Output('main-graph', 'figure'),
|
247 |
+
[Input('stored-data', 'data'),
|
248 |
+
Input('chart-type', 'value'),
|
249 |
+
Input('x-column', 'value'),
|
250 |
+
Input('y-column', 'value'),
|
251 |
+
Input('color-column', 'value')]
|
252 |
+
)
|
253 |
+
def update_main_graph(data, chart_type, x_col, y_col, color_col):
|
254 |
+
"""Update main visualization based on user selections"""
|
255 |
+
if not data:
|
256 |
+
fig = go.Figure()
|
257 |
+
fig.add_annotation(text="Upload data to see visualizations",
|
258 |
+
x=0.5, y=0.5, showarrow=False,
|
259 |
+
font=dict(size=16, color="gray"))
|
260 |
+
fig.update_layout(template="plotly_white")
|
261 |
+
return fig
|
262 |
+
|
263 |
+
df = pd.DataFrame(data)
|
264 |
+
|
265 |
+
# Handle cases where columns aren't selected yet
|
266 |
+
if not x_col and not y_col:
|
267 |
+
fig = go.Figure()
|
268 |
+
fig.add_annotation(text="Select columns to create visualization",
|
269 |
+
x=0.5, y=0.5, showarrow=False,
|
270 |
+
font=dict(size=16, color="gray"))
|
271 |
+
fig.update_layout(template="plotly_white")
|
272 |
+
return fig
|
273 |
+
|
274 |
+
try:
|
275 |
+
# Create visualization based on chart type
|
276 |
+
if chart_type == 'scatter':
|
277 |
+
if x_col and y_col:
|
278 |
+
fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
|
279 |
+
title=f"Scatter Plot: {y_col} vs {x_col}")
|
280 |
+
else:
|
281 |
+
fig = go.Figure()
|
282 |
+
fig.add_annotation(text="Select both X and Y columns for scatter plot",
|
283 |
+
x=0.5, y=0.5, showarrow=False)
|
284 |
+
|
285 |
+
elif chart_type == 'line':
|
286 |
+
if x_col and y_col:
|
287 |
+
fig = px.line(df, x=x_col, y=y_col, color=color_col,
|
288 |
+
title=f"Line Chart: {y_col} vs {x_col}")
|
289 |
+
else:
|
290 |
+
fig = go.Figure()
|
291 |
+
fig.add_annotation(text="Select both X and Y columns for line chart",
|
292 |
+
x=0.5, y=0.5, showarrow=False)
|
293 |
+
|
294 |
+
elif chart_type == 'bar':
|
295 |
+
if x_col and y_col:
|
296 |
+
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
|
297 |
+
title=f"Bar Chart: {y_col} by {x_col}")
|
298 |
+
elif x_col:
|
299 |
+
fig = px.bar(df[x_col].value_counts().reset_index(),
|
300 |
+
x='index', y=x_col,
|
301 |
+
title=f"Value Counts: {x_col}")
|
302 |
+
else:
|
303 |
+
fig = go.Figure()
|
304 |
+
fig.add_annotation(text="Select at least X column for bar chart",
|
305 |
+
x=0.5, y=0.5, showarrow=False)
|
306 |
+
|
307 |
+
elif chart_type == 'histogram':
|
308 |
+
if x_col:
|
309 |
+
fig = px.histogram(df, x=x_col, color=color_col,
|
310 |
+
title=f"Histogram: {x_col}")
|
311 |
+
else:
|
312 |
+
fig = go.Figure()
|
313 |
+
fig.add_annotation(text="Select X column for histogram",
|
314 |
+
x=0.5, y=0.5, showarrow=False)
|
315 |
+
|
316 |
+
elif chart_type == 'box':
|
317 |
+
if y_col:
|
318 |
+
fig = px.box(df, x=color_col, y=y_col,
|
319 |
+
title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""))
|
320 |
+
elif x_col:
|
321 |
+
fig = px.box(df, y=x_col,
|
322 |
+
title=f"Box Plot: {x_col}")
|
323 |
+
else:
|
324 |
+
fig = go.Figure()
|
325 |
+
fig.add_annotation(text="Select a column for box plot",
|
326 |
+
x=0.5, y=0.5, showarrow=False)
|
327 |
+
|
328 |
+
elif chart_type == 'heatmap':
|
329 |
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
330 |
+
if len(numeric_cols) > 1:
|
331 |
+
corr_matrix = df[numeric_cols].corr()
|
332 |
+
fig = px.imshow(corr_matrix,
|
333 |
+
text_auto=True,
|
334 |
+
aspect="auto",
|
335 |
+
title="Correlation Heatmap",
|
336 |
+
color_continuous_scale='RdBu_r')
|
337 |
+
else:
|
338 |
+
fig = go.Figure()
|
339 |
+
fig.add_annotation(text="Need at least 2 numeric columns for heatmap",
|
340 |
+
x=0.5, y=0.5, showarrow=False)
|
341 |
+
|
342 |
+
elif chart_type == 'pie':
|
343 |
+
if x_col:
|
344 |
+
value_counts = df[x_col].value_counts()
|
345 |
+
fig = px.pie(values=value_counts.values,
|
346 |
+
names=value_counts.index,
|
347 |
+
title=f"Pie Chart: {x_col}")
|
348 |
+
else:
|
349 |
+
fig = go.Figure()
|
350 |
+
fig.add_annotation(text="Select X column for pie chart",
|
351 |
+
x=0.5, y=0.5, showarrow=False)
|
352 |
+
|
353 |
+
else:
|
354 |
+
fig = go.Figure()
|
355 |
+
fig.add_annotation(text="Select a chart type",
|
356 |
+
x=0.5, y=0.5, showarrow=False)
|
357 |
+
|
358 |
+
fig.update_layout(template="plotly_white", height=500)
|
359 |
+
return fig
|
360 |
+
|
361 |
+
except Exception as e:
|
362 |
+
fig = go.Figure()
|
363 |
+
fig.add_annotation(text=f"Error creating chart: {str(e)}",
|
364 |
+
x=0.5, y=0.5, showarrow=False,
|
365 |
+
font=dict(color="red"))
|
366 |
+
fig.update_layout(template="plotly_white")
|
367 |
+
return fig
|
368 |
+
|
369 |
+
if __name__ == '__main__':
|
370 |
+
app.run_server(host='0.0.0.0', port=8050, debug=True)
|
simple_gradio.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import plotly.graph_objects as go
|
3 |
+
from datasets import load_dataset
|
4 |
+
|
5 |
+
# Load dataset once
|
6 |
+
dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train")
|
7 |
+
df = dataset.to_pandas()
|
8 |
+
|
9 |
+
def filter_map(min_price, max_price, boroughs):
|
10 |
+
# Handle empty boroughs list
|
11 |
+
if not boroughs:
|
12 |
+
boroughs = ["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"]
|
13 |
+
|
14 |
+
# Filter dataframe
|
15 |
+
filtered_df = df[(df['neighbourhood_group'].isin(boroughs)) &
|
16 |
+
(df['price'] > min_price) & (df['price'] < max_price)]
|
17 |
+
|
18 |
+
# Handle empty results
|
19 |
+
if filtered_df.empty:
|
20 |
+
# Return empty map
|
21 |
+
fig = go.Figure()
|
22 |
+
fig.update_layout(
|
23 |
+
title="No properties found with current filters",
|
24 |
+
mapbox_style="open-street-map",
|
25 |
+
mapbox=dict(
|
26 |
+
center=go.layout.mapbox.Center(lat=40.67, lon=-73.90),
|
27 |
+
zoom=9
|
28 |
+
),
|
29 |
+
)
|
30 |
+
return fig
|
31 |
+
|
32 |
+
# Prepare data for map
|
33 |
+
names = filtered_df["name"].tolist()
|
34 |
+
prices = filtered_df["price"].tolist()
|
35 |
+
text_list = [(names[i], prices[i]) for i in range(len(names))]
|
36 |
+
|
37 |
+
# Create map
|
38 |
+
fig = go.Figure(go.Scattermapbox(
|
39 |
+
customdata=text_list,
|
40 |
+
lat=filtered_df['latitude'].tolist(),
|
41 |
+
lon=filtered_df['longitude'].tolist(),
|
42 |
+
mode='markers',
|
43 |
+
marker=go.scattermapbox.Marker(
|
44 |
+
size=6,
|
45 |
+
color='red',
|
46 |
+
opacity=0.7
|
47 |
+
),
|
48 |
+
hoverinfo="text",
|
49 |
+
hovertemplate='<b>Name</b>: %{customdata[0]}<br><b>Price</b>: $%{customdata[1]}<extra></extra>'
|
50 |
+
))
|
51 |
+
|
52 |
+
fig.update_layout(
|
53 |
+
title=f"Found {len(filtered_df)} properties",
|
54 |
+
mapbox_style="open-street-map",
|
55 |
+
hovermode='closest',
|
56 |
+
mapbox=dict(
|
57 |
+
bearing=0,
|
58 |
+
center=go.layout.mapbox.Center(
|
59 |
+
lat=40.67,
|
60 |
+
lon=-73.90
|
61 |
+
),
|
62 |
+
pitch=0,
|
63 |
+
zoom=9
|
64 |
+
),
|
65 |
+
height=600
|
66 |
+
)
|
67 |
+
|
68 |
+
return fig
|
69 |
+
|
70 |
+
with gr.Blocks() as demo:
|
71 |
+
with gr.Column():
|
72 |
+
with gr.Row():
|
73 |
+
min_price = gr.Number(value=250, label="Minimum Price")
|
74 |
+
max_price = gr.Number(value=1000, label="Maximum Price")
|
75 |
+
boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Boroughs:")
|
76 |
+
btn = gr.Button(value="Update Filter")
|
77 |
+
map = gr.Plot()
|
78 |
+
demo.load(filter_map, [min_price, max_price, boroughs], map)
|
79 |
+
btn.click(filter_map, [min_price, max_price, boroughs], map)
|
80 |
+
|
81 |
+
demo.launch()
|
test.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|