Myoussef11 commited on
Commit
2e94685
·
1 Parent(s): 582a7e3

Add project files for HF Space

Browse files
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY="YOUR_API_KEY_HERE"
.gitignore ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # Logs
210
+ logs
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.10 slim-buster image as a lightweight base
2
+ FROM python:3.10-slim
3
+
4
+ # Set the application's working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Install essential system-level dependencies
8
+ # - ffmpeg: Required by the pydub library for audio processing
9
+ # - git: Required by some pip packages for installation from version control
10
+ RUN apt-get update && apt-get install -y \
11
+ ffmpeg \
12
+ git \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy the dependency file first to leverage Docker's layer caching
16
+ COPY requirements.txt .
17
+
18
+ # Install Python packages, disabling the cache to reduce image size
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Copy the application source code into the container
22
+ # force a re-installation of all packages
23
+ COPY src/ src/
24
+
25
+ # Expose the port Gradio will run on, making it accessible to the host
26
+ EXPOSE 7860
27
+
28
+ # Set the environment variable for the Ollama host when running inside Docker.
29
+ ENV OLLAMA_HOST=host.docker.internal
30
+
31
+ # Define the default command to run when the container starts
32
+ # Uses Python's module flag '-m' for correct package path resolution
33
+ CMD ["python", "-m", "src.app"]
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from src.controllers.processing_controller import ProcessingController
4
+ from src.utils.exceptions import AppError
5
+ from src.logging_config import logger
6
+ from src import config
7
+
8
+
9
+ def create_controller():
10
+ """Factory function to create a new controller instance."""
11
+ return ProcessingController()
12
+
13
+ def process_audio_file(audio_file_path, controller):
14
+ """
15
+ Handles the primary audio processing workflow when a user uploads a file.
16
+ """
17
+ # Define the updates for a failed state (all components disabled)
18
+ fail_updates = (
19
+ gr.update(), gr.update(interactive=False), gr.update(interactive=False),
20
+ gr.update(interactive=False), gr.update(interactive=False),
21
+ gr.update(interactive=False), "<p>Please upload a file to begin.</p>", []
22
+ )
23
+ # Define the updates for a successful state (all components enabled)
24
+ success_updates = (
25
+ gr.update(), gr.update(interactive=True), gr.update(interactive=True),
26
+ gr.update(interactive=True), gr.update(interactive=True),
27
+ gr.update(interactive=True), "<p style='color:green;'>File processed successfully. Ready for analysis.</p>", []
28
+ )
29
+
30
+ if audio_file_path is None:
31
+ return fail_updates
32
+
33
+ try:
34
+ logger.info(f"UI received file: {audio_file_path}")
35
+ controller.process_audio_file(audio_file_path)
36
+ return success_updates
37
+ except AppError as e:
38
+ logger.error(f"UI caught an application error: {e}")
39
+ # Return a failure state but with a specific error message for the user.
40
+ error_return = list(fail_updates)
41
+ error_return[6] = f"<p style='color:red;'>Error: {e}</p>"
42
+ return tuple(error_return)
43
+ finally:
44
+ # Ensure temporary files created by Gradio are always cleaned up.
45
+ if audio_file_path and os.path.exists(audio_file_path):
46
+ try:
47
+ os.remove(audio_file_path)
48
+ logger.info(f"Cleaned up temporary file: {audio_file_path}")
49
+ except OSError as e:
50
+ logger.error(f"Error removing temporary file {audio_file_path}: {e}")
51
+
52
+ def handle_question(question, controller):
53
+ """
54
+ Manages the conversational Q&A flow. It takes the user's question, gets the
55
+ model's response via the controller, and returns the updated chat history.
56
+ """
57
+ if not question.strip():
58
+ # Do not process empty questions, just return the current state.
59
+ return controller.chat_history, ""
60
+ try:
61
+ # The controller manages appending the new Q&A turn to its internal history.
62
+ controller.answer_question(question)
63
+ # Return the full, updated history and a blank string to clear the input box.
64
+ return controller.chat_history, ""
65
+ except AppError as e:
66
+ logger.error(f"UI caught an application error during Q&A: {e}")
67
+ # On error, create a temporary history to show the error without saving it to the permanent chat log.
68
+ temp_history = controller.chat_history + [[question, f"Error: {e}"]]
69
+ return temp_history, question
70
+
71
+ def handle_transcript(controller, chat_history):
72
+ """Appends the full transcript to the current chat display."""
73
+ chat_history.append([None, controller.get_transcript()])
74
+ return chat_history
75
+
76
+ def handle_summary(controller, chat_history):
77
+ """Appends a generated summary to the current chat display."""
78
+ chat_history.append([None, controller.get_summary()])
79
+ return chat_history
80
+
81
+ def handle_sentiment(controller, chat_history):
82
+ """Appends a sentiment analysis to the current chat display."""
83
+ chat_history.append([None, controller.get_sentiment()])
84
+ return chat_history
85
+
86
+ # Gradio UI Definition
87
+ with gr.Blocks(theme=gr.themes.Default(), css="footer {visibility: hidden}", title=config.APP_TITLE) as demo:
88
+ # A session-specific state object to hold a unique controller instance for each user.
89
+ controller_state = gr.State(value=create_controller)
90
+
91
+ # Main layout starts here.
92
+ gr.Markdown(f"# 🗣️ {config.APP_TITLE}")
93
+ gr.Markdown(config.APP_DESCRIPTION)
94
+
95
+ with gr.Row(equal_height=True):
96
+ # Left column for inputs and actions
97
+ with gr.Column(scale=1):
98
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File")
99
+ status_output = gr.Markdown(value="<p>Please upload a file to begin.</p>")
100
+
101
+ with gr.Accordion("Analysis Actions", open=True):
102
+ transcript_btn = gr.Button("Show Full Transcript", interactive=False)
103
+ summarize_btn = gr.Button("Generate Summary", interactive=False)
104
+ sentiment_btn = gr.Button("Analyze Sentiment", interactive=False)
105
+
106
+ # Right column for chatbot interaction
107
+ with gr.Column(scale=2):
108
+ chatbot_ui = gr.Chatbot(label="Chatbot", height=500, show_copy_button=True)
109
+ with gr.Row():
110
+ question_input = gr.Textbox(
111
+ show_label=False,
112
+ placeholder="Type your question here...",
113
+ interactive=False,
114
+ scale=4
115
+ )
116
+ submit_btn = gr.Button(
117
+ value="Submit",
118
+ interactive=False,
119
+ variant="primary",
120
+ scale=1
121
+ )
122
+
123
+ # Defines how UI components react to user actions.
124
+ # Handle the audio file upload and processing.
125
+ audio_input.upload(
126
+ fn=process_audio_file,
127
+ inputs=[audio_input, controller_state],
128
+ outputs=[
129
+ audio_input, transcript_btn, summarize_btn, sentiment_btn,
130
+ question_input, submit_btn, status_output, chatbot_ui
131
+ ]
132
+ )
133
+
134
+ # Handle the analysis actions.
135
+ transcript_btn.click(fn=handle_transcript, inputs=[controller_state, chatbot_ui], outputs=[chatbot_ui])
136
+ summarize_btn.click(fn=handle_summary, inputs=[controller_state, chatbot_ui], outputs=[chatbot_ui])
137
+ sentiment_btn.click(fn=handle_sentiment, inputs=[controller_state, chatbot_ui], outputs=[chatbot_ui])
138
+
139
+ # Handle the question submission.
140
+ question_input.submit(fn=handle_question, inputs=[question_input, controller_state], outputs=[chatbot_ui, question_input])
141
+ submit_btn.click(fn=handle_question, inputs=[question_input, controller_state], outputs=[chatbot_ui, question_input])
142
+
143
+ if __name__ == "__main__":
144
+ logger.info("Starting Gradio application...")
145
+ demo.launch(
146
+ #share=True,
147
+ server_name="0.0.0.0",
148
+ )
docker-compose.yml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # Service 1: Our Voice Analysis Application
5
+ app:
6
+ # Build the image using the Dockerfile in the current directory
7
+ build: .
8
+ # Expose the Gradio port
9
+ ports:
10
+ - "7860:7860"
11
+ # Set the environment variables for the application
12
+ environment:
13
+ # Define the Ollama host address
14
+ # This allows the application to connect to the Ollama server
15
+ - OLLAMA_HOST=ollama
16
+ # Make this service depend on the 'ollama' service
17
+ # This ensures that Ollama starts up before our application tries to connect to it
18
+ depends_on:
19
+ - ollama
20
+
21
+ # Service 2: The Ollama Server
22
+ ollama:
23
+ # Use the official Ollama Docker image
24
+ image: ollama/ollama
25
+ # Expose the Ollama API port so our 'app' service can reach it.
26
+ ports:
27
+ - "11434:11434"
28
+ # Set the environment variable to enable GPU support
29
+ deploy:
30
+ resources:
31
+ reservations:
32
+ devices:
33
+ - driver: nvidia
34
+ count: all
35
+ capabilities: [gpu]
36
+ # Mount a volume to persist Ollama data
37
+ # This allows Ollama to retain its state and models across container restarts
38
+ volumes:
39
+ - ollama_data:/root/.ollama
40
+
41
+ volumes:
42
+ ollama_data:
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers==4.41.2
3
+ accelerate==0.29.3
4
+ torch==2.3.0
5
+ openai-whisper==20231117
6
+ openai
7
+ pydub==0.25.1
8
+ python-dotenv==1.0.1
9
+ requests==2.32.3
10
+ pytest==8.2.2
11
+ pytest-mock==3.14.0
12
+ ollama==0.2.1
src/__init__.py ADDED
File without changes
src/config.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables from .env file
5
+ load_dotenv()
6
+
7
+ # Application Configuration
8
+ APP_TITLE = "Voice Analysis Toolkit"
9
+ APP_DESCRIPTION = (
10
+ "Upload an audio file to transcribe, summarize, analyze sentiment, "
11
+ "and ask questions about its content. All processing is done locally "
12
+ "and your data remains private."
13
+ )
14
+
15
+ # Model Configuration
16
+ MODEL_PROVIDER = "local" # Set to 'local' or 'openai'
17
+ OLLAMA_HOST = os.getenv("OLLAMA_HOST", "localhost") # Default to localhost if not set
18
+ OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") # Default model for Ollama
19
+
20
+ # Local model settings (if MODEL_PROVIDER is 'local')
21
+ LOCAL_TRANSCRIPTION_MODEL = "openai/whisper-base.en"
22
+ LOCAL_ANALYSIS_MODEL = "microsoft/Phi-3-mini-4k-instruct"
23
+
24
+ # OpenAI API settings (if MODEL_PROVIDER is 'openai')
25
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
26
+ OPENAI_TRANSCRIPTION_MODEL = "whisper-1"
27
+ OPENAI_ANALYSIS_MODEL = "gpt-3.5-turbo"
28
+
29
+ # File Validation Configuration
30
+ # Maximum file size in megabytes (MB)
31
+ MAX_FILE_SIZE_MB = 25
32
+ # Maximum audio duration in minutes
33
+ MAX_FILE_LENGTH_MINS = 15
34
+ # List of allowed audio file extensions (add more as needed)
35
+ ALLOWED_FILE_EXTENSIONS = [".mp3", ".wav", ".m4a", ".flac", ".ogg"]
36
+
37
+ # Logging Configuration
38
+ LOG_FILE_PATH = "logs/app.log"
39
+ LOG_LEVEL = "INFO" # Can be "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
src/controllers/__init__.py ADDED
File without changes
src/controllers/processing_controller.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.services.transcription_service import TranscriptionService
2
+ from src.services.analysis_service import AnalysisService
3
+ from src.utils.validator import Validator
4
+ from src.utils.exceptions import AppError
5
+ from src.logging_config import logger
6
+
7
+
8
+ class ProcessingController:
9
+ """
10
+ The central controller that orchestrates the entire analysis process.
11
+ It manages the application's state and coordinates the services.
12
+ """
13
+ def __init__(self):
14
+ """
15
+ Initializes the controller and its required services.
16
+ """
17
+ self.transcription_service = TranscriptionService()
18
+ self.analysis_service = AnalysisService()
19
+ self.validator = Validator()
20
+ self.transcript: str | None = None
21
+ self.chat_history: list = []
22
+ logger.info("ProcessingController initialized.")
23
+
24
+ def process_audio_file(self, file_path: str):
25
+ """
26
+ The main workflow method. It validates and transcribes the audio file.
27
+ This method prepares the controller for on-demand analysis.
28
+
29
+ Args:
30
+ file_path: The path to the temporary audio file uploaded by the user.
31
+
32
+ Raises:
33
+ AppError: If any step in the validation or transcription fails.
34
+ """
35
+ try:
36
+ # Reset state for a new file
37
+ self.transcript = None
38
+ self.chat_history = []
39
+ logger.info(f"Starting processing for audio file: {file_path}")
40
+
41
+ # 1. Validate the file
42
+ self.validator.validate_audio_file(file_path)
43
+
44
+ # 2. Transcribe the file
45
+ self.transcript = self.transcription_service.transcribe(file_path)
46
+
47
+ logger.info(f"Successfully processed and transcribed file: {file_path}")
48
+
49
+ except AppError as e:
50
+ # Catch our known application errors, log them, and re-raise
51
+ logger.error(f"An application error occurred during processing: {e}", exc_info=True)
52
+ raise e
53
+ except Exception as e:
54
+ # Catch any other unexpected errors
55
+ logger.critical(f"An unexpected critical error occurred: {e}", exc_info=True)
56
+ raise AppError("An unexpected error occurred. Please check the logs.")
57
+
58
+ def _ensure_transcript_exists(self):
59
+ """
60
+ A private helper to check if a transcript is ready for analysis.
61
+ """
62
+ if not self.transcript:
63
+ logger.warning("Attempted to perform analysis before processing a file.")
64
+ raise AppError("Please process an audio file before requesting analysis.")
65
+
66
+ def get_transcript(self) -> str:
67
+ """
68
+ Returns the stored transcript.
69
+ """
70
+ self._ensure_transcript_exists()
71
+ logger.info("Transcript requested by user.")
72
+ return self.transcript
73
+
74
+ def get_summary(self) -> str:
75
+ """
76
+ Generates a summary for the currently loaded transcript.
77
+ """
78
+ self._ensure_transcript_exists()
79
+ logger.info("Summary requested by user.")
80
+ return self.analysis_service.summarize(self.transcript)
81
+
82
+ def get_sentiment(self) -> str:
83
+ """
84
+ Performs sentiment analysis on the currently loaded transcript.
85
+ """
86
+ self._ensure_transcript_exists()
87
+ logger.info("Sentiment analysis requested by user.")
88
+ return self.analysis_service.get_sentiment(self.transcript)
89
+
90
+ def answer_question(self, question: str) -> str:
91
+ """
92
+ Answers a question about the currently loaded transcript.
93
+ """
94
+ self._ensure_transcript_exists()
95
+ if not question or not question.strip():
96
+ raise AppError("Question cannot be empty.")
97
+
98
+ logger.info(f"Question received from user: '{question}'")
99
+ # The entire history to the analysis service
100
+ response = self.analysis_service.answer_question(self.transcript, question, self.chat_history)
101
+ # Update history with the new turn
102
+ self.chat_history.append([question, response])
103
+ return response
104
+
src/logging_config.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from logging.handlers import RotatingFileHandler
4
+ from src import config
5
+
6
+ def setup_logging():
7
+ """
8
+ Configures the application's logger.
9
+ """
10
+ # Create logs directory if it doesn't exist
11
+ os.makedirs(os.path.dirname(config.LOG_FILE_PATH), exist_ok=True)
12
+
13
+ # Configure the root logger
14
+ logger = logging.getLogger()
15
+ logger.setLevel(config.LOG_LEVEL)
16
+
17
+ # Prevent logging from propagating to the root logger if handlers are already configured
18
+ if logger.hasHandlers():
19
+ logger.handlers.clear()
20
+
21
+ # Formatter
22
+ formatter = logging.Formatter(
23
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
24
+ )
25
+
26
+ # Console Handler (for printing logs to the terminal)
27
+ ch = logging.StreamHandler()
28
+ ch.setFormatter(formatter)
29
+ logger.addHandler(ch)
30
+
31
+ # File Handler (for writing logs to a file)
32
+ # RotatingFileHandler ensures log files don't grow indefinitely
33
+ fh = RotatingFileHandler(
34
+ config.LOG_FILE_PATH,
35
+ maxBytes=10*1024*1024, # 10 MB
36
+ backupCount=5
37
+ )
38
+ fh.setFormatter(formatter)
39
+ logger.addHandler(fh)
40
+
41
+ return logger
42
+
43
+ # Initialize the logger for the application
44
+ logger = setup_logging()
src/services/__init__.py ADDED
File without changes
src/services/analysis_service.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ollama
2
+ from openai import OpenAI, OpenAIError
3
+ from src import config
4
+ from src.utils.exceptions import AnalysisError, IrrelevantQuestionError
5
+ from src.logging_config import logger
6
+
7
+
8
+ class AnalysisService:
9
+ """
10
+ A service class for performing text analysis tasks.
11
+ It uses Ollama for local analysis and the OpenAI API for remote analysis.
12
+ """
13
+
14
+ def _analyze_local(self, prompt: str) -> str:
15
+ """
16
+ Generates a response by calling the local Ollama server using the host
17
+ address defined in the application's configuration.
18
+ """
19
+
20
+ # Read the configured host from config.py
21
+ ollama_host = config.OLLAMA_HOST
22
+
23
+ try:
24
+ # Initialize the Ollama client with the determined host
25
+ client = ollama.Client(host=f"http://{ollama_host}:11434")
26
+ logger.info(f"Sending analysis request to Ollama server at {ollama_host}.")
27
+
28
+ response = client.generate(
29
+ model=config.OLLAMA_MODEL,
30
+ prompt=prompt
31
+ )
32
+
33
+ logger.info("Ollama analysis successful.")
34
+ return response['response'].strip()
35
+
36
+ except ollama.ResponseError as e:
37
+ logger.error(f"Ollama API error: {e.error}", exc_info=True)
38
+ raise AnalysisError(f"An error occurred with the Ollama API: {e.error}")
39
+ except Exception as e:
40
+ # Catch other potential issues like connection problems
41
+ logger.error(f"Error during Ollama request: {e}", exc_info=True)
42
+ raise AnalysisError("An unexpected error occurred while communicating with the Ollama server.")
43
+
44
+ def _analyze_openai(self, prompt: str) -> str:
45
+ """
46
+ Generates a response using the OpenAI API.
47
+ """
48
+ if not config.OPENAI_API_KEY:
49
+ logger.error("OpenAI API key not found for analysis.")
50
+ raise AnalysisError("OpenAI API key is not configured.")
51
+
52
+ try:
53
+ logger.info("Sending analysis request to OpenAI.")
54
+ client = OpenAI(api_key=config.OPENAI_API_KEY)
55
+
56
+ response = client.chat.completions.create(
57
+ model=config.OPENAI_ANALYSIS_MODEL,
58
+ messages=[{"role": "user", "content": prompt}]
59
+ )
60
+
61
+ content = response.choices[0].message.content
62
+ logger.info("OpenAI analysis successful.")
63
+ return content.strip()
64
+ except OpenAIError as e:
65
+ logger.error(f"OpenAI API error during analysis: {e.response.text}", exc_info=True)
66
+ raise AnalysisError(f"An OpenAI API error occurred: {e.response.status_code}")
67
+ except Exception as e:
68
+ logger.error(f"An unexpected error occurred during OpenAI analysis: {e}", exc_info=True)
69
+ raise AnalysisError("An unexpected error occurred while using the OpenAI API.")
70
+
71
+ def _analyze(self, prompt: str) -> str:
72
+ """
73
+ Private dispatcher method to route analysis to the correct provider.
74
+ """
75
+ provider = config.MODEL_PROVIDER.lower()
76
+ if provider == 'local':
77
+ return self._analyze_local(prompt)
78
+ elif provider == 'openai':
79
+ return self._analyze_openai(prompt)
80
+ else:
81
+ logger.error(f"Invalid MODEL_PROVIDER configured: {config.MODEL_PROVIDER}")
82
+ raise ValueError(f"Invalid model provider '{config.MODEL_PROVIDER}' specified in config.")
83
+
84
+ def summarize(self, text: str) -> str:
85
+ """
86
+ Generates a summary of the provided text.
87
+ """
88
+ logger.info("Summarization task requested.")
89
+ prompt = f"""
90
+ Provide a concise summary of the following text.
91
+ Focus on the key points and main conclusions.
92
+
93
+ Text:
94
+ ---
95
+ {text}
96
+ ---
97
+ Summary:
98
+ """
99
+ return self._analyze(prompt)
100
+
101
+ def get_sentiment(self, text: str) -> str:
102
+ """
103
+ Performs sentiment analysis on the provided text.
104
+ """
105
+ logger.info("Sentiment analysis task requested.")
106
+ prompt = f"""
107
+ Analyze the sentiment of the following text.
108
+ Your response must have two parts:
109
+ 1. **Sentiment:** Classify the sentiment as Positive, Negative, or Neutral.
110
+ 2. **Justification:** Briefly explain why you chose that sentiment, referencing key words or phrases from the text.
111
+
112
+ Format your response clearly using Markdown.
113
+
114
+ Text:
115
+ ---
116
+ {text}
117
+ ---
118
+ Sentiment:
119
+ """
120
+ return self._analyze(prompt)
121
+
122
+ def answer_question(self, text: str, question: str, chat_history: list) -> str:
123
+ """
124
+ Answers a question based on the provided text.
125
+ """
126
+ logger.info(f"Q&A task requested for question: '{question}'")
127
+ # Format the chat history for the prompt
128
+ formatted_history = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in chat_history])
129
+
130
+ prompt = f"""
131
+ You are a machine. You are a Q&A engine that answers questions about a document.
132
+ You MUST follow these rules strictly:
133
+ 1. Use the "Conversation History" to understand the user's question, especially for follow-ups.
134
+ 2. Find the answer to the user's "New User Question" using ONLY the "Document Transcript".
135
+ 3. If the answer is not in the transcript, you MUST ONLY respond with the exact phrase: 'That information is not available in the provided document.'
136
+ 4. Do not apologize. Do not explain your reasoning. Do not add any other words.
137
+
138
+ ---
139
+ **DOCUMENT TRANSCRIPT:**
140
+ {text}
141
+ ---
142
+ **CONVERSATION HISTORY:**
143
+ {formatted_history}
144
+ ---
145
+ **NEW USER QUESTION:**
146
+ {question}
147
+ """
148
+
149
+ response = self._analyze(prompt)
150
+
151
+ # Check for our custom error signal from the LLM
152
+ if "ERROR: The answer to this question cannot be found" in response:
153
+ logger.warning(f"Model indicated question '{question}' is unanswerable from text.")
154
+ raise IrrelevantQuestionError(
155
+ "The question could not be answered based on the provided audio content."
156
+ )
157
+
158
+ return response
src/services/transcription_service.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline, Pipeline
3
+ from openai import OpenAI, OpenAIError
4
+ from src import config
5
+ from src.utils.exceptions import TranscriptionError
6
+ from src.logging_config import logger
7
+
8
+
9
+ class TranscriptionService:
10
+ """
11
+ A service class for handling audio transcription.
12
+ It can use either a local model or the OpenAI API based on the configuration.
13
+ """
14
+ _local_pipeline: Pipeline = None
15
+
16
+ @classmethod
17
+ def _get_local_pipeline(cls) -> Pipeline:
18
+ """
19
+ Initializes and returns the local transcription pipeline.
20
+ This method uses a class-level variable to cache the pipeline,
21
+ ensuring the model is loaded only once.
22
+ """
23
+ if cls._local_pipeline is None:
24
+ try:
25
+ logger.info(
26
+ f"Initializing local transcription model: {config.LOCAL_TRANSCRIPTION_MODEL}"
27
+ )
28
+ # Check for GPU availability
29
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
30
+ logger.info(f"Using device: {device} for transcription.")
31
+
32
+ cls._local_pipeline = pipeline(
33
+ "automatic-speech-recognition",
34
+ model=config.LOCAL_TRANSCRIPTION_MODEL,
35
+ device=device
36
+ )
37
+ logger.info("Local transcription model initialized successfully.")
38
+ except Exception as e:
39
+ logger.critical(f"Failed to load local transcription model: {e}", exc_info=True)
40
+ raise TranscriptionError(
41
+ "Could not initialize the local transcription model. "
42
+ "Please check model name and dependencies."
43
+ )
44
+ return cls._local_pipeline
45
+
46
+ def _transcribe_local(self, file_path: str) -> str:
47
+ """
48
+ Transcribes audio using a local Hugging Face model.
49
+ """
50
+ try:
51
+ logger.info(f"Starting local transcription for {file_path}")
52
+ pipeline = self._get_local_pipeline()
53
+ # The pipeline handles chunking for long audio files automatically
54
+ result = pipeline(file_path)
55
+ transcript_text = result["text"].strip()
56
+ logger.info(f"Local transcription successful for {file_path}")
57
+ return transcript_text
58
+ except Exception as e:
59
+ logger.error(f"Error during local transcription for {file_path}: {e}", exc_info=True)
60
+ raise TranscriptionError("An unexpected error occurred during local transcription.")
61
+
62
+ def _transcribe_openai(self, file_path: str) -> str:
63
+ """
64
+ Transcribes audio using the OpenAI API.
65
+ """
66
+ if not config.OPENAI_API_KEY:
67
+ logger.error("OpenAI API key not found for transcription.")
68
+ raise TranscriptionError("OpenAI API key is not configured.")
69
+
70
+ try:
71
+ logger.info(f"Sending transcription request to OpenAI for {file_path}")
72
+ client = OpenAI(api_key=config.OPENAI_API_KEY)
73
+
74
+ with open(file_path, "rb") as audio_file:
75
+ transcript = client.audio.transcriptions.create(
76
+ model=config.OPENAI_TRANSCRIPTION_MODEL,
77
+ file=audio_file
78
+ )
79
+
80
+ transcript_text = transcript.text.strip()
81
+ logger.info(f"OpenAI transcription successful for {file_path}")
82
+ return transcript_text
83
+ except OpenAIError as e:
84
+ logger.error(f"OpenAI API error during transcription for {file_path}: {e.response.text}", exc_info=True)
85
+ raise TranscriptionError(f"An OpenAI API error occurred: {e.response.status_code}")
86
+ except Exception as e:
87
+ logger.error(f"An unexpected error occurred during OpenAI transcription for {file_path}: {e}", exc_info=True)
88
+ raise TranscriptionError("An unexpected error occurred while using the OpenAI API.")
89
+
90
+
91
+ def transcribe(self, file_path: str) -> str:
92
+ """
93
+ Public method to transcribe an audio file.
94
+ Delegates to the appropriate method based on the MODEL_PROVIDER config.
95
+
96
+ Args:
97
+ file_path: The path to the audio file to be transcribed.
98
+
99
+ Returns:
100
+ The transcribed text as a string.
101
+
102
+ Raises:
103
+ TranscriptionError: If the transcription process fails.
104
+ ValueError: If the configured MODEL_PROVIDER is invalid.
105
+ """
106
+ provider = config.MODEL_PROVIDER.lower()
107
+ logger.info(f"Transcription requested with provider: {provider}")
108
+
109
+ if provider == 'local':
110
+ return self._transcribe_local(file_path)
111
+ elif provider == 'openai':
112
+ return self._transcribe_openai(file_path)
113
+ else:
114
+ logger.error(f"Invalid MODEL_PROVIDER configured: {config.MODEL_PROVIDER}")
115
+ raise ValueError(f"Invalid model provider '{config.MODEL_PROVIDER}' specified in config.")
src/utils/__init__.py ADDED
File without changes
src/utils/exceptions.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class AppError(Exception):
2
+ """Base exception class for the application."""
3
+ pass
4
+
5
+ class ValidationError(AppError):
6
+ """Custom exception for data validation errors."""
7
+ pass
8
+
9
+ class InvalidFileType(ValidationError):
10
+ """Raised when the uploaded file type is not allowed."""
11
+ pass
12
+
13
+ class FileSizeExceeded(ValidationError):
14
+ """Raised when the uploaded file is larger than the allowed limit."""
15
+ pass
16
+
17
+ class FileLengthExceeded(ValidationError):
18
+ """Raised when the uploaded audio file's duration is longer than the allowed limit."""
19
+ pass
20
+
21
+ class TranscriptionError(AppError):
22
+ """Raised when there is an error during the transcription process."""
23
+ pass
24
+
25
+ class AnalysisError(AppError):
26
+ """Raised when there is an error during the text analysis process."""
27
+ pass
28
+
29
+ class IrrelevantQuestionError(AnalysisError):
30
+ """Raised when a user's question is not related to the provided text."""
31
+ pass
src/utils/validator.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydub import AudioSegment
3
+ from pydub.exceptions import CouldntDecodeError
4
+ from src import config
5
+ from src.utils.exceptions import (
6
+ InvalidFileType,
7
+ FileSizeExceeded,
8
+ FileLengthExceeded,
9
+ ValidationError
10
+ )
11
+ from src.logging_config import logger
12
+
13
+
14
+ class Validator:
15
+ """
16
+ A class to handle all input validation for the application.
17
+ """
18
+
19
+ @staticmethod
20
+ def validate_audio_file(file_path: str):
21
+ """
22
+ Validates an uploaded audio file against the rules in config.py.
23
+
24
+ This method checks for file existence, type, size, and duration.
25
+
26
+ Args:
27
+ file_path: The path to the uploaded audio file.
28
+
29
+ Raises:
30
+ ValidationError: If the file does not exist.
31
+ InvalidFileType: If the file extension is not in the allowed list.
32
+ FileSizeExceeded: If the file size is over the configured limit.
33
+ FileLengthExceeded: If the audio duration is over the configured limit.
34
+ ValidationError: If the audio file is corrupted or cannot be read.
35
+ """
36
+ logger.info(f"Initiating validation for file: {file_path}")
37
+
38
+ # 1. Check for file existence
39
+ if not os.path.exists(file_path):
40
+ logger.error(f"Validation failed: File not found at {file_path}")
41
+ raise ValidationError(f"File not found at path: {file_path}")
42
+
43
+ # 2. Validate file type (extension)
44
+ _, ext = os.path.splitext(file_path)
45
+ if ext.lower() not in config.ALLOWED_FILE_EXTENSIONS:
46
+ logger.warning(
47
+ f"Validation failed: Invalid file type '{ext}' for {file_path}"
48
+ )
49
+ raise InvalidFileType(
50
+ f"Invalid file type. Allowed types are: "
51
+ f"{', '.join(config.ALLOWED_FILE_EXTENSIONS)}"
52
+ )
53
+
54
+ # 3. Validate file size
55
+ file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
56
+ if file_size_mb > config.MAX_FILE_SIZE_MB:
57
+ logger.warning(
58
+ f"Validation failed: File size {file_size_mb:.2f}MB exceeds "
59
+ f"limit of {config.MAX_FILE_SIZE_MB}MB for {file_path}"
60
+ )
61
+ raise FileSizeExceeded(
62
+ f"File size of {file_size_mb:.2f}MB exceeds the "
63
+ f"{config.MAX_FILE_SIZE_MB}MB limit."
64
+ )
65
+
66
+ # 4. Validate file duration
67
+ try:
68
+ audio = AudioSegment.from_file(file_path)
69
+ duration_mins = audio.duration_seconds / 60
70
+ if duration_mins > config.MAX_FILE_LENGTH_MINS:
71
+ logger.warning(
72
+ f"Validation failed: Duration {duration_mins:.2f} mins exceeds "
73
+ f"limit of {config.MAX_FILE_LENGTH_MINS} mins for {file_path}"
74
+ )
75
+ raise FileLengthExceeded(
76
+ f"Audio duration of {duration_mins:.2f} minutes exceeds the "
77
+ f"{config.MAX_FILE_LENGTH_MINS} minute limit."
78
+ )
79
+ except CouldntDecodeError:
80
+ logger.error(f"Validation failed: Could not decode audio file {file_path}. "
81
+ "It may be corrupted or an unsupported format.")
82
+ raise ValidationError(
83
+ "Failed to read audio file. It may be corrupted or in an "
84
+ "unsupported format despite the file extension."
85
+ )
86
+
87
+ logger.info(f"Validation successful for file: {file_path}")