Spaces:

nikhildsst
/

RAG_Chatbot

Running

nikhildsst commited on Jan 30

Commit

1638245

verified ·

1 Parent(s): c5322fc

Upload 6 files

Files changed (6) hide show

embeddings/__pycache__/embedding_manager.cpython-310.pyc ADDED Viewed

Binary file (907 Bytes). View file

embeddings/embedding_manager.py ADDED Viewed

+# embeddings/embedding_manager.py
+from sentence_transformers import SentenceTransformer
+from config import Config
+class EmbeddingManager:
+    def __init__(self):
+        self.model = SentenceTransformer(Config.EMBEDDING_MODEL)
+    def get_embedding(self, text):
+        return self.model.encode(text)
+    def get_embeddings(self, texts):
+        return self.model.encode(texts)

file_processor/__pycache__/processor.cpython-310.pyc ADDED Viewed

Binary file (1.84 kB). View file

file_processor/processor.py ADDED Viewed

+# file_processor/processor.py
+import PyPDF2
+import docx
+import os
+from typing import List, Dict
+class FileProcessor:
+    """Handles processing of uploaded files in various formats"""
+    @staticmethod
+    def process_pdf(file_path: str) -> str:
+        """Extract text from PDF files"""
+        text = ""
+        with open(file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+        return text
+    @staticmethod
+    def process_docx(file_path: str) -> str:
+        """Extract text from DOCX files"""
+        doc = docx.Document(file_path)
+        text = ""
+        for paragraph in doc.paragraphs:
+            text += paragraph.text + "\n"
+        return text
+    @staticmethod
+    def process_txt(file_path: str) -> str:
+        """Extract text from TXT files"""
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read()
+    def process_file(self, file_path: str) -> str:
+        """Process file based on its extension"""
+        _, ext = os.path.splitext(file_path)
+        ext = ext.lower()
+        if ext == '.pdf':
+            return self.process_pdf(file_path)
+        elif ext == '.docx':
+            return self.process_docx(file_path)
+        elif ext == '.txt':
+            return self.process_txt(file_path)
+        else:
+            raise ValueError(f"Unsupported file format: {ext}")

retrieval/__pycache__/vector_store.cpython-310.pyc ADDED Viewed

Binary file (1.27 kB). View file

retrieval/vector_store.py ADDED Viewed

+# retrieval/vector_store.py
+import faiss
+import numpy as np
+from typing import List, Tuple
+class VectorStore:
+    def __init__(self, dimension: int):
+        self.dimension = dimension
+        self.index = faiss.IndexFlatL2(dimension)
+        self.texts = []
+    def add_texts(self, texts: List[str], embeddings: np.ndarray):
+        self.texts.extend(texts)
+        self.index.add(embeddings)
+    def search(self, query_embedding: np.ndarray, k: int) -> List[Tuple[str, float]]:
+        query_embedding = query_embedding.reshape(1, -1)
+        distances, indices = self.index.search(query_embedding, k)
+        results = []
+        for idx, distance in zip(indices[0], distances[0]):
+            if idx < len(self.texts):
+                results.append((self.texts[idx], float(distance)))
+        return results