Spaces:

MouadHSB
/

ResearchRAG

Sleeping

App Files Files Community

MouadHsb commited on May 14

Commit

cee458a

1 Parent(s): c1911d8

Switching to api for embedding

Browse files

Files changed (2) hide show

app/services/embedding_service copy.py +94 -42
app/services/embedding_service.py +64 -88

app/services/embedding_service copy.py CHANGED Viewed

@@ -1,47 +1,61 @@
 import logging
 import numpy as np
-from typing import List, Dict, Any, Tuple
-from sentence_transformers import SentenceTransformer
 import torch
-import os
-os.environ["PYTORCH_ENABLE_META_TENSORS"] = "0"
 logger = logging.getLogger(__name__)
 class EmbeddingService:
-    """Service for handling document embeddings using Sentence Transformers."""
-    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
         """
         Initialize the embedding system.
         Args:
-            model_name: Name of the Sentence Transformers model to use. Default is "all-MiniLM-L6-v2" (80MB).
         """
         logger.info(f"Loading embedding model: {model_name}")
-        # Explicitly set device to CPU to avoid meta tensor issue
         torch.set_grad_enabled(False)
-        # With 16GB of RAM, we can afford to use standard loading without memory optimization
-        # Force the model to load fully into memory without any meta tensors
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        #########################################################
-        torch.set_default_device("cpu")
-        #########################################################
-        self.model = SentenceTransformer(model_name, device="cpu")
-        # Ensure model is fully materialized, not using meta tensors
-        for param in self.model.parameters():
-            if hasattr(param, 'is_meta') and param.is_meta:
-                # Should not happen with environment variable set, but just in case
-                raise RuntimeError("Meta tensors still detected despite disabling them")
-        self.embedding_dim = self.model.get_sentence_embedding_dimension()
         logger.info(f"Embedding dimension: {self.embedding_dim}")
     def embed_documents(self, documents: List[Dict[str, Any]]) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
         """
         Embed a list of documents.
@@ -54,7 +68,38 @@ class EmbeddingService:
         """
         texts = [doc["text"] for doc in documents]
         logger.info(f"Embedding {len(texts)} documents...")
-        embeddings = self.model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
         return embeddings, documents
@@ -68,7 +113,26 @@ class EmbeddingService:
         Returns:
             Query embedding array.
         """
-        return self.model.encode([query], convert_to_numpy=True)
     def get_model_info(self) -> Dict[str, Any]:
         """
@@ -77,21 +141,9 @@ class EmbeddingService:
         Returns:
             Dictionary with model information.
         """
-        # Access the model attributes in a safer way
-        try:
-            model_name = self.model._model_config.get('name',
-                         self.model._model_config.get('model_name_or_path', 'unknown'))
-        except:
-            model_name = str(self.model)  # Fallback to string representation
-        try:
-            max_seq_length = self.model.get_max_seq_length()
-        except:
-            max_seq_length = 512  # Default value if method not available
         return {
-            "model_name": model_name,
             "dimension": self.embedding_dim,
-            "max_seq_length": max_seq_length,
-            "normalize_embeddings": getattr(self.model, "normalize_embeddings", True)
-        }

 import logging
 import numpy as np
 import torch
+from typing import List, Dict, Any, Tuple
+from transformers import AutoModel, AutoTokenizer
 logger = logging.getLogger(__name__)
 class EmbeddingService:
+    """Service for handling document embeddings using Hugging Face Transformers."""
+    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
         """
         Initialize the embedding system.
         Args:
+            model_name: Name of the model to use. Default is "sentence-transformers/all-MiniLM-L6-v2".
         """
         logger.info(f"Loading embedding model: {model_name}")
+        # Disable gradients for inference
         torch.set_grad_enabled(False)
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name)
+        self.model.eval()
+        # Get embedding dimension from model config
+        self.embedding_dim = self.model.config.hidden_size
         logger.info(f"Embedding dimension: {self.embedding_dim}")
+    def _mean_pooling(self, model_output, attention_mask):
+        """
+        Perform mean pooling on token embeddings.
+        Args:
+            model_output: Output from the transformer model
+            attention_mask: Attention mask to avoid padding tokens
+        Returns:
+            Sentence embeddings
+        """
+        # First element of model_output contains token embeddings
+        token_embeddings = model_output[0]
+        # Expand attention mask from [batch_size, seq_length] to [batch_size, seq_length, hidden_size]
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        # Sum token embeddings and divide by the expanded mask
+        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        # Return mean-pooled embeddings
+        return sum_embeddings / sum_mask
     def embed_documents(self, documents: List[Dict[str, Any]]) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
         """
         Embed a list of documents.
         """
         texts = [doc["text"] for doc in documents]
         logger.info(f"Embedding {len(texts)} documents...")
+        # Process in batches to avoid OOM
+        batch_size = 8
+        all_embeddings = []
+        for i in range(0, len(texts), batch_size):
+            batch_texts = texts[i:i+batch_size]
+            # Tokenize batch
+            encoded_input = self.tokenizer(
+                batch_texts,
+                padding=True,
+                truncation=True,
+                max_length=512,
+                return_tensors='pt'
+            ).to("cpu")
+            # Compute token embeddings
+            with torch.no_grad():
+                model_output = self.model(**encoded_input)
+            # Apply mean pooling
+            batch_embeddings = self._mean_pooling(model_output, encoded_input['attention_mask'])
+            # Normalize embeddings
+            batch_embeddings = torch.nn.functional.normalize(batch_embeddings, p=2, dim=1)
+            # Convert to numpy and add to result
+            all_embeddings.append(batch_embeddings.cpu().numpy())
+        # Combine all batches
+        embeddings = np.vstack(all_embeddings)
         return embeddings, documents
         Returns:
             Query embedding array.
         """
+        # Tokenize query
+        encoded_input = self.tokenizer(
+            [query],
+            padding=True,
+            truncation=True,
+            max_length=512,
+            return_tensors='pt'
+        ).to("cpu")
+        # Compute token embeddings
+        with torch.no_grad():
+            model_output = self.model(**encoded_input)
+        # Apply mean pooling
+        embeddings = self._mean_pooling(model_output, encoded_input['attention_mask'])
+        # Normalize embeddings
+        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+        return embeddings.cpu().numpy()
     def get_model_info(self) -> Dict[str, Any]:
         """
         Returns:
             Dictionary with model information.
         """
         return {
+            "model_name": self.model.config.name_or_path,
             "dimension": self.embedding_dim,
+            "max_seq_length": self.model.config.max_position_embeddings,
+            "normalize_embeddings": True  # We're always normalizing
+        }

app/services/embedding_service.py CHANGED Viewed

@@ -1,64 +1,48 @@
 import logging
 import numpy as np
-import torch
 from typing import List, Dict, Any, Tuple
-from transformers import AutoModel, AutoTokenizer
 logger = logging.getLogger(__name__)
 class EmbeddingService:
-    """Service for handling document embeddings using Hugging Face Transformers."""
-    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
         """
         Initialize the embedding system.
         Args:
             model_name: Name of the model to use. Default is "sentence-transformers/all-MiniLM-L6-v2".
         """
         logger.info(f"Loading embedding model: {model_name}")
-        # Disable gradients for inference
-        torch.set_grad_enabled(False)
-        # Load tokenizer and model
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModel.from_pretrained(model_name)
-        self.model.eval()
-        # Get embedding dimension from model config
-        self.embedding_dim = self.model.config.hidden_size
         logger.info(f"Embedding dimension: {self.embedding_dim}")
-    def _mean_pooling(self, model_output, attention_mask):
-        """
-        Perform mean pooling on token embeddings.
-        Args:
-            model_output: Output from the transformer model
-            attention_mask: Attention mask to avoid padding tokens
-        Returns:
-            Sentence embeddings
-        """
-        # First element of model_output contains token embeddings
-        token_embeddings = model_output[0]
-        # Expand attention mask from [batch_size, seq_length] to [batch_size, seq_length, hidden_size]
-        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-        # Sum token embeddings and divide by the expanded mask
-        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
-        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-        # Return mean-pooled embeddings
-        return sum_embeddings / sum_mask
     def embed_documents(self, documents: List[Dict[str, Any]]) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
         """
-        Embed a list of documents.
         Args:
             documents: List of document dictionaries.
@@ -69,43 +53,42 @@ class EmbeddingService:
         texts = [doc["text"] for doc in documents]
         logger.info(f"Embedding {len(texts)} documents...")
-        # Process in batches to avoid OOM
-        batch_size = 8
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
             batch_texts = texts[i:i+batch_size]
-            # Tokenize batch
-            encoded_input = self.tokenizer(
-                batch_texts,
-                padding=True,
-                truncation=True,
-                max_length=512,
-                return_tensors='pt'
-            ).to("cpu")
-            # Compute token embeddings
-            with torch.no_grad():
-                model_output = self.model(**encoded_input)
-            # Apply mean pooling
-            batch_embeddings = self._mean_pooling(model_output, encoded_input['attention_mask'])
-            # Normalize embeddings
-            batch_embeddings = torch.nn.functional.normalize(batch_embeddings, p=2, dim=1)
-            # Convert to numpy and add to result
-            all_embeddings.append(batch_embeddings.cpu().numpy())
         # Combine all batches
         embeddings = np.vstack(all_embeddings)
         return embeddings, documents
     def embed_query(self, query: str) -> np.ndarray:
         """
-        Embed a search query.
         Args:
             query: The search query.
@@ -113,26 +96,19 @@ class EmbeddingService:
         Returns:
             Query embedding array.
         """
-        # Tokenize query
-        encoded_input = self.tokenizer(
-            [query],
-            padding=True,
-            truncation=True,
-            max_length=512,
-            return_tensors='pt'
-        ).to("cpu")
-        # Compute token embeddings
-        with torch.no_grad():
-            model_output = self.model(**encoded_input)
-        # Apply mean pooling
-        embeddings = self._mean_pooling(model_output, encoded_input['attention_mask'])
-        # Normalize embeddings
-        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
-        return embeddings.cpu().numpy()
     def get_model_info(self) -> Dict[str, Any]:
         """
@@ -142,8 +118,8 @@ class EmbeddingService:
             Dictionary with model information.
         """
         return {
-            "model_name": self.model.config.name_or_path,
             "dimension": self.embedding_dim,
-            "max_seq_length": self.model.config.max_position_embeddings,
-            "normalize_embeddings": True  # We're always normalizing
         }

+import os
 import logging
 import numpy as np
 from typing import List, Dict, Any, Tuple
+from huggingface_hub import InferenceClient
 logger = logging.getLogger(__name__)
 class EmbeddingService:
+    """Service for handling document embeddings using Hugging Face Inference API."""
+    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", api_key=None):
         """
         Initialize the embedding system.
         Args:
             model_name: Name of the model to use. Default is "sentence-transformers/all-MiniLM-L6-v2".
+            api_key: Hugging Face API key (will use env var if None)
         """
         logger.info(f"Loading embedding model: {model_name}")
+        # Set up API credentials
+        self.api_key = api_key or os.environ.get("HF_API_KEY")
+        self.client = InferenceClient(api_key=self.api_key)
+        # Store model name for future references
+        self.model_name = model_name
+        # Known embedding dimensions for common models
+        # Update this if you use a different model
+        embedding_dims = {
+            "sentence-transformers/all-MiniLM-L6-v2": 384,
+            "sentence-transformers/all-mpnet-base-v2": 768,
+            "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 384,
+            "sentence-transformers/paraphrase-MiniLM-L6-v2": 384,
+            "BAAI/bge-small-en-v1.5": 384,
+            "BAAI/bge-base-en-v1.5": 768
+        }
+        self.embedding_dim = embedding_dims.get(model_name, 384)  # Default to 384 if unknown
         logger.info(f"Embedding dimension: {self.embedding_dim}")
     def embed_documents(self, documents: List[Dict[str, Any]]) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
         """
+        Embed a list of documents using the Hugging Face Inference API.
         Args:
             documents: List of document dictionaries.
         texts = [doc["text"] for doc in documents]
         logger.info(f"Embedding {len(texts)} documents...")
+        # Process in reasonably sized batches to optimize API calls
+        batch_size = 32  # Adjust based on your needs and API limits
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
             batch_texts = texts[i:i+batch_size]
+            try:
+                # Call Inference API for feature-extraction (embeddings)
+                response = self.client.feature_extraction(
+                    text=batch_texts,
+                    model=self.model_name
+                )
+                # Convert response to numpy array and add to results
+                batch_embeddings = np.array(response)
+                all_embeddings.append(batch_embeddings)
+                logger.info(f"Successfully embedded batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}")
+            except Exception as e:
+                logger.error(f"Error embedding batch {i//batch_size + 1}: {str(e)}")
+                # Skip problematic batch or raise exception
+                raise  # Re-raise for now to see errors in logs
         # Combine all batches
+        if not all_embeddings:
+            logger.warning("No embeddings were generated. Returning empty array.")
+            return np.array([]), documents
         embeddings = np.vstack(all_embeddings)
         return embeddings, documents
     def embed_query(self, query: str) -> np.ndarray:
         """
+        Embed a search query using the Hugging Face Inference API.
         Args:
             query: The search query.
         Returns:
             Query embedding array.
         """
+        try:
+            # Call Inference API for feature-extraction
+            response = self.client.feature_extraction(
+                text=[query],
+                model=self.model_name
+            )
+            # Convert to numpy array
+            embedding = np.array(response)
+            return embedding
+        except Exception as e:
+            logger.error(f"Error embedding query: {str(e)}")
+            raise  # Re-raise for now to see errors in logs
     def get_model_info(self) -> Dict[str, Any]:
         """
             Dictionary with model information.
         """
         return {
+            "model_name": self.model_name,
             "dimension": self.embedding_dim,
+            "max_seq_length": 512,  # Common default, may vary by model
+            "normalize_embeddings": True  # Typically normalized in sentence-transformers models
         }