Spaces:
Sleeping
Sleeping
Commit
·
11bb7a3
1
Parent(s):
ae9bdd2
fix: added haystack-ai-integrations in requirements
Browse files- .gitignore +0 -0
- Dockerfile +6 -10
- pipelines.py +4 -5
- requirements.txt +7 -4
.gitignore
CHANGED
Binary files a/.gitignore and b/.gitignore differ
|
|
Dockerfile
CHANGED
@@ -1,25 +1,21 @@
|
|
1 |
-
FROM python:3.11
|
2 |
|
3 |
-
# Install system dependencies
|
4 |
RUN apt-get update && \
|
5 |
apt-get install -y \
|
6 |
tesseract-ocr libtesseract-dev poppler-utils \
|
|
|
7 |
build-essential pkg-config libgl1 && \
|
|
|
8 |
rm -rf /var/lib/apt/lists/*
|
9 |
|
10 |
WORKDIR /app
|
11 |
|
12 |
-
# Install PyTorch CPU first
|
13 |
-
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
|
14 |
-
|
15 |
-
# Install project dependencies
|
16 |
COPY requirements.txt .
|
17 |
RUN pip install --no-cache-dir -r requirements.txt
|
18 |
|
19 |
-
# Copy app code
|
20 |
COPY . .
|
21 |
|
22 |
-
ENV PYTHONUNBUFFERED=1
|
|
|
23 |
|
24 |
-
|
25 |
-
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT}"]
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
|
|
|
3 |
RUN apt-get update && \
|
4 |
apt-get install -y \
|
5 |
tesseract-ocr libtesseract-dev poppler-utils \
|
6 |
+
tesseract-ocr-eng tesseract-ocr-fra \
|
7 |
build-essential pkg-config libgl1 && \
|
8 |
+
apt-get clean && \
|
9 |
rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
WORKDIR /app
|
12 |
|
|
|
|
|
|
|
|
|
13 |
COPY requirements.txt .
|
14 |
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
|
|
|
16 |
COPY . .
|
17 |
|
18 |
+
ENV PYTHONUNBUFFERED=1 \
|
19 |
+
PORT=8000
|
20 |
|
21 |
+
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT}"]
|
|
pipelines.py
CHANGED
@@ -5,9 +5,9 @@ from haystack.dataclasses import Document
|
|
5 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
6 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
7 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
8 |
-
|
9 |
-
from haystack_integrations.components.
|
10 |
-
from
|
11 |
from haystack.components.preprocessors import DocumentSplitter
|
12 |
|
13 |
# Set up logging
|
@@ -19,8 +19,7 @@ document_store = InMemoryDocumentStore()
|
|
19 |
doc_embedder = SentenceTransformersDocumentEmbedder(model="BAAI/bge-large-en-v1.5")
|
20 |
text_embedder = SentenceTransformersTextEmbedder(model="BAAI/bge-large-en-v1.5")
|
21 |
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=5)
|
22 |
-
|
23 |
-
ranker = SentenceTransformersRanker(model="sentence-transformers/all-MiniLM-L6-v2")
|
24 |
|
25 |
# Initialize generator
|
26 |
generator = GoogleAIGeminiGenerator(
|
|
|
5 |
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
6 |
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
7 |
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
8 |
+
from haystack.components.rankers import SentenceTransformersSimilarityRanker
|
9 |
+
# from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
|
10 |
+
from haystack_google_generative_ai.generators import GoogleAIGeminiGenerator
|
11 |
from haystack.components.preprocessors import DocumentSplitter
|
12 |
|
13 |
# Set up logging
|
|
|
19 |
doc_embedder = SentenceTransformersDocumentEmbedder(model="BAAI/bge-large-en-v1.5")
|
20 |
text_embedder = SentenceTransformersTextEmbedder(model="BAAI/bge-large-en-v1.5")
|
21 |
retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=5)
|
22 |
+
reranker = SentenceTransformersSimilarityRanker(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
|
23 |
|
24 |
# Initialize generator
|
25 |
generator = GoogleAIGeminiGenerator(
|
requirements.txt
CHANGED
@@ -5,12 +5,15 @@ python-multipart==0.0.9
|
|
5 |
pillow==10.3.0
|
6 |
pdfplumber==0.11.0
|
7 |
pytesseract==0.3.10
|
8 |
-
|
9 |
-
# Sentence Transformers (explicit, though Haystack pulls it too)
|
10 |
sentence-transformers==3.0.1
|
11 |
|
12 |
-
#
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# gRPC (needed by Google / Haystack)
|
16 |
grpcio
|
|
|
5 |
pillow==10.3.0
|
6 |
pdfplumber==0.11.0
|
7 |
pytesseract==0.3.10
|
|
|
|
|
8 |
sentence-transformers==3.0.1
|
9 |
|
10 |
+
# Google Generative AI
|
11 |
+
google-generativeai==0.7.2
|
12 |
+
|
13 |
+
# Haystack v2 core + integrations
|
14 |
+
haystack-ai==2.1.0
|
15 |
+
haystack-google-generative-ai # Required for Gemini integration
|
16 |
+
google-generativeai==0.7.2 # Ensure compatibility
|
17 |
|
18 |
# gRPC (needed by Google / Haystack)
|
19 |
grpcio
|