Spaces:

Germin
/

Backend

Sleeping

App Files Files Community

sravan commited on 13 days ago

Commit

dc078e3

1 Parent(s): ae692a1

first working application

Browse files

Files changed (9) hide show

.gitignore +1 -1
callbacks.py +6 -3
chains.py +88 -8
code_data/langchain_repo +1 -0
data_indexing.py +132 -29
main.py +57 -31
prompts.py +14 -12
sources.txt +0 -0
test.db +0 -0

.gitignore CHANGED Viewed

@@ -1,4 +1,4 @@
 myenv
 *pycache*

 myenv
 *pycache*
+dang.py

callbacks.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Dict, Any, List
 from langchain_core.callbacks import BaseCallbackHandler
 import schemas
 import crud
 class LogResponseCallback(BaseCallbackHandler):
@@ -16,13 +17,15 @@ class LogResponseCallback(BaseCallbackHandler):
         # TODO: The function on_llm_end is going to be called when the LLM stops sending
         # the response. Use the crud.add_message function to capture that response.
         type = 'AI'
-        user_data = crud.get_or_create(self.db, self.user_request.username)
-        user_id = user_data.user_id
         timestamp  = datetime.now()
-        message = outputs.generations[0][0].text # answer from the prompt message
         message_to_add = schemas.MessageBase(
         user_id = user_id,
         message = message,
         type = type,
         timestamp = timestamp
     )

 from langchain_core.callbacks import BaseCallbackHandler
 import schemas
 import crud
+from datetime import datetime
 class LogResponseCallback(BaseCallbackHandler):
         # TODO: The function on_llm_end is going to be called when the LLM stops sending
         # the response. Use the crud.add_message function to capture that response.
         type = 'AI'
+        user_data = crud.get_or_create_user(self.db, self.user_request.username)
+        user_id = user_data.id
         timestamp  = datetime.now()
+        message = str(outputs) # answer from the prompt message
+        print("hoistory messages", message)
         message_to_add = schemas.MessageBase(
         user_id = user_id,
         message = message,
+        user=user_data.username,
         type = type,
         timestamp = timestamp
     )

chains.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain_core.runnables import RunnablePassthrough
 import schemas
 import prompts
@@ -12,7 +13,7 @@ from prompts import (
     standalone_prompt_formatted,
     rag_prompt_formatted
 )
-from data_indexing import DataIndexer
 from transformers import AutoTokenizer
 data_indexer = DataIndexer()
@@ -52,37 +53,116 @@ llm_endpoint = HuggingFaceEndpoint(
 llm = ChatHuggingFace(llm=llm_endpoint)
 simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.
-formatted_chain = (raw_prompt_formatted | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: use history_prompt_formatted and HistoryInput to create the history_chain
-history_chain = (history_prompt_formatted | llm).with_types(input_type=schemas.HistoryInput)
 # TODO: Let's construct the standalone_chain by piping standalone_prompt_formatted with the LLM
 standalone_chain = (standalone_prompt_formatted | llm).with_types(input_type=schemas.HistoryInput)
 input_1 = RunnablePassthrough.assign(new_question=standalone_chain)
 input_2 = {
-    'context': lambda x: format_context(data_indexer.search(x['new_question'])),
-    'standalone_question': lambda x: x['new_question'] # new question was the parameter in input1
 }
 input_to_rag_chain = input_1 | input_2
 # TODO: use input_to_rag_chain, rag_prompt_formatted,
 # HistoryInput and the LLM to build the rag_chain.
-rag_chain = (input_to_rag_chain | rag_prompt_formatted | llm).with_types(input_type=schemas.RagInput)
 # TODO:  Implement the filtered_rag_chain. It should be the
 # same as the rag_chain but with hybrid_search = True.
 input_2_hybrid_search = {
-    'context': lambda x: format_context(data_indexer.search(x['new_question'], hybrid_search=True)),
     'standalone_question': lambda x: x['new_question']
 }
-filtered_rag_chain = (input_1 | input_2_hybrid_search | rag_prompt_formatted | llm ).with_types(input_type=schemas.RagInput)

 import os
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain_core.runnables import RunnablePassthrough
+from langchain.schema.runnable import RunnableLambda
 import schemas
 import prompts
     standalone_prompt_formatted,
     rag_prompt_formatted
 )
+from data_indexing import DataIndexer
 from transformers import AutoTokenizer
 data_indexer = DataIndexer()
 llm = ChatHuggingFace(llm=llm_endpoint)
+def print_and_pass(prompt_output):
+    print("=" * 60)
+    print("🔍 RAW PROMPT FORMATTED:")
+    print("=" * 60)
+    print(prompt_output)
+    print("=" * 60)
+    return prompt_output  # IMPORTANT: Must return the prompt unchanged
 simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: create formatted_chain by piping raw_prompt_formatted and the LLM endpoint.
+formatted_chain = (raw_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.UserQuestion)
 # TODO: use history_prompt_formatted and HistoryInput to create the history_chain
+history_chain = (history_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.HistoryInput)
 # TODO: Let's construct the standalone_chain by piping standalone_prompt_formatted with the LLM
 standalone_chain = (standalone_prompt_formatted | llm).with_types(input_type=schemas.HistoryInput)
+# summarize_chain = (summarize_propt_formatted | llm)
+import ast
+def extract_definitions(source_code):
+    """
+    Extract top-level function and class definitions from Python code.
+    """
+    result = []
+    try:
+        tree = ast.parse(source_code)
+        for node in ast.iter_child_nodes(tree):
+            if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
+                snippet = ast.get_source_segment(source_code, node)
+                if snippet:
+                    result.append(snippet)
+    except Exception as e:
+        print(f"Failed to parse code: {e}")
+    return result
+import re
+def clean_code_text(code_text):
+    """
+    Remove comments and excessive blank lines for brevity.
+    """
+    # Remove multiline docstrings and comments
+    code_text = re.sub(r'"""(.*?)"""', '', code_text, flags=re.DOTALL)
+    code_text = re.sub(r"'''(.*?)'''", '', code_text, flags=re.DOTALL)
+    # Remove inline comments
+    code_text = re.sub(r'#.*', '', code_text)
+    # Remove excessive whitespace
+    code_text = re.sub(r'\n\s*\n+', '\n\n', code_text)
+    return code_text.strip()
+def safe_format_context(search_results):
+    try:
+        cleaned_results = []
+        for result in search_results:
+            if isinstance(result, str):
+                # Optionally: extract relevant functions/classes
+                code_parts = extract_definitions(result)
+                for part in code_parts:
+                    cleaned = clean_code_text(part)
+                    cleaned_results.append(cleaned)
+        return format_context(cleaned_results)
+    except Exception as e:
+        print(f"Error formatting context: {str(e)}")
+    return "No relevant context found."
 input_1 = RunnablePassthrough.assign(new_question=standalone_chain)
+# input_1_beta = RunnablePassThrough.assign(new_context=summarize_chain)
+def extract_question_text(new_question):
+    if hasattr(new_question, "content"):
+        return new_question.content
+    return str(new_question)
+# summarize_context = {
+#     'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']))),
+#     'standalone_question': lambda x: extract_question_text(x['new_question']),
+# }
 input_2 = {
+    'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']))),
+    'standalone_question': lambda x: extract_question_text(x['new_question']),
 }
 input_to_rag_chain = input_1 | input_2
 # TODO: use input_to_rag_chain, rag_prompt_formatted,
 # HistoryInput and the LLM to build the rag_chain.
+rag_chain = (input_to_rag_chain | RunnableLambda(print_and_pass) | rag_prompt_formatted | RunnableLambda(print_and_pass) | llm).with_types(input_type=schemas.RagInput)
 # TODO:  Implement the filtered_rag_chain. It should be the
 # same as the rag_chain but with hybrid_search = True.
 input_2_hybrid_search = {
+    'context': lambda x: safe_format_context(data_indexer.search(extract_question_text(x['new_question']), hybrid_search=True)),
     'standalone_question': lambda x: x['new_question']
 }
+filtered_rag_chain = (input_1 | input_2_hybrid_search | rag_prompt_formatted | RunnableLambda(print_and_pass)| llm ).with_types(input_type=schemas.RagInput)

code_data/langchain_repo ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 2d0713c2fc5a457578635b03b7a00e970ce534ee

data_indexing.py CHANGED Viewed

@@ -6,9 +6,41 @@ from pinecone import ServerlessSpec
 from langchain_community.vectorstores import Chroma
 from langchain_openai import OpenAIEmbeddings
 from huggingface_hub import InferenceClient
 current_dir = Path(__file__).resolve().parent
 class DataIndexer:
@@ -18,9 +50,16 @@ class DataIndexer:
         # TODO: choose your embedding model
         self.embedding_client = InferenceClient(
-            "dunzhang/stella_en_1.5B_v5",
              token=os.environ['HF_TOKEN'],
         )
         self.spec = ServerlessSpec(
             cloud = 'aws',
             region='us-east-1'
@@ -34,14 +73,22 @@ class DataIndexer:
             # Make sure to choose the dimension that corresponds to your embedding model
             self.pinecone_client.create_index(
                 name=index_name,
-                dimension=1024,
                 metric='cosine',
                 spec=self.spec
             )
         self.index = self.pinecone_client.Index(self.index_name)
         # TODO: make sure to build the index.
-        self.source_index = self.get_source_index()
     def get_source_index(self):
         if not os.path.isfile(self.source_file):
@@ -53,9 +100,17 @@ class DataIndexer:
         with open(self.source_file, 'r') as file:
             sources = file.readlines()
-        sources = [s.rstrip('\n') for s in sources]
         vectorstore = Chroma.from_texts(
-            sources, embedding=self.embedding_client
         )
         return vectorstore
@@ -64,19 +119,21 @@ class DataIndexer:
         with open(self.source_file, 'a') as file:
             for doc in docs:
                 file.writelines(doc.metadata['source'] + '\n')
         for i in range(0, len(docs), batch_size):
             batch = docs[i: i + batch_size]
             # TODO: create a list of the vector representations of each text data in the batch
             # TODO: choose your embedding model
-            values = self.embedding_client.embed_documents([
-                doc.page_content for doc in batch
-            ])
-            # values = self.embedding_client.feature_extraction([
             #     doc.page_content for doc in batch
             # ])
             # values = None
             # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
@@ -85,7 +142,7 @@ class DataIndexer:
             # TODO: create a list of dictionaries representing the metadata. Capture the text data
             # with the "text" key, and make sure to capture the rest of the doc.metadata.
             metadatas = [{"text": doc.page_content,
-                **doc.metadata
             } for doc in batch]
             # create a list of dictionaries with keys "id" (the unique identifiers), "values"
@@ -96,6 +153,8 @@ class DataIndexer:
                 'metadata': metadata
             } for vector_id, value, metadata in zip(vector_ids, values, metadatas)]
             try:
                 # TODO: Use the function upsert to upload the data to the database.
                 upsert_response = self.index.upsert(vectors)
@@ -111,28 +170,48 @@ class DataIndexer:
             # to the question. Make sure to adjust this number as you see fit.
             source_docs = self.source_index.similarity_search(text_query, 50)
             filter = {"source": {"$in":[doc.page_content for doc in source_docs]}}
         # TODO: embed the text_query by using the embedding model
         # TODO: choose your embedding model
         # vector = self.embedding_client.feature_extraction(text_query)
-        vector = self.embedding_client.embed_query(text_query)
-        # vector = None
          # TODO: use the vector representation of the text_query to
          # search the database by using the query function.
-        result = self.index.query(vector,
-        filter=filter,
-        top_k=top_k,
-        include_values=True
-        )
-        docs = []
-        for res in result["matches"]:
-            # TODO: From the result's metadata, extract the "text" element.
-            docs.append(res['metadata']['text'])
-            # pass
-        return docs
 if __name__ == '__main__':
@@ -142,6 +221,7 @@ if __name__ == '__main__':
         Language,
         RecursiveCharacterTextSplitter,
     )
     loader = GitLoader(
         clone_url="https://github.com/langchain-ai/langchain",
@@ -159,9 +239,32 @@ if __name__ == '__main__':
     docs = python_splitter.split_documents(docs)
     for doc in docs:
         doc.page_content = '# {}\n\n'.format(doc.metadata['source']) + doc.page_content
     indexer = DataIndexer()
-    with open('/app/sources.txt', 'a') as file:
         for doc in docs:
             file.writelines(doc.metadata['source'] + '\n')
     indexer.index_data(docs)

 from langchain_community.vectorstores import Chroma
 from langchain_openai import OpenAIEmbeddings
 from huggingface_hub import InferenceClient
+from typing import List
+from datetime import datetime
+from sentence_transformers import SentenceTransformer
+from langchain.embeddings.base import Embeddings
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import json
 current_dir = Path(__file__).resolve().parent
+class SentenceTransfmEmbeddings(Embeddings):
+    """Sentence Transformers embedding class"""
+    def __init__(self, model_name: str = "sentence-transformers/all-mpnet-base-v2"):
+        self.model = SentenceTransformer(model_name)
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed a list of documents"""
+        try:
+            embeddings = self.model.encode(texts)
+            return embeddings.tolist()
+        except Exception as e:
+            print(f"Error embedding documents: {e}")
+            # Return dummy embeddings to prevent crash
+            return [[0.0] * 768 for _ in texts]
+    def embed_query(self, text: str) -> List[float]:
+        """Embed a single query"""
+        try:
+            embedding = self.model.encode([text])
+            return embedding[0].tolist()
+        except Exception as e:
+            print(f"Error embedding query: {e}")
+            return [0.0] * 768
 class DataIndexer:
         # TODO: choose your embedding model
         self.embedding_client = InferenceClient(
+            # "dunzhang/stella_en_1.5B_v5",
+            "sentence-transformers/all-mpnet-base-v2",
              token=os.environ['HF_TOKEN'],
         )
+        self.embeddings = SentenceTransfmEmbeddings(
+            "sentence-transformers/all-mpnet-base-v2"
+        )
+        # self.embeddings = HuggingFaceEmbeddings(
+        #     model_name="sentence-transformers/all-mpnet-base-v2"
+        # )
         self.spec = ServerlessSpec(
             cloud = 'aws',
             region='us-east-1'
             # Make sure to choose the dimension that corresponds to your embedding model
             self.pinecone_client.create_index(
                 name=index_name,
+                dimension=768,
                 metric='cosine',
                 spec=self.spec
             )
         self.index = self.pinecone_client.Index(self.index_name)
         # TODO: make sure to build the index.
+        # with open(self.source_file, 'r') as file:
+        #     sources = file.readlines()
+        # sources = [s.strip() for s in sources if s.strip()]
+        # if not sources:
+        #     self.source_index = None
+        # else:
+            # self.source_index = self.get_source_index()
+        self.source_index=None
     def get_source_index(self):
         if not os.path.isfile(self.source_file):
         with open(self.source_file, 'r') as file:
             sources = file.readlines()
+        sources = [s.strip() for s in sources if s.strip()]
+        if not sources:
+            print("No valid sources to index")
+            return None
+        print("sources are:", sources)
+        ## testing
+        embeddings = self.embeddings.embed_documents(sources)
+        print(f"Generated {len(embeddings)} embeddings for {len(sources)} sources")
+        ##   testing
         vectorstore = Chroma.from_texts(
+            sources, embedding=self.embeddings
         )
         return vectorstore
         with open(self.source_file, 'a') as file:
             for doc in docs:
                 file.writelines(doc.metadata['source'] + '\n')
+        self.source_index = self.get_source_index()
         for i in range(0, len(docs), batch_size):
             batch = docs[i: i + batch_size]
             # TODO: create a list of the vector representations of each text data in the batch
             # TODO: choose your embedding model
+            # values = self.embedding_client.embed_documents([
             #     doc.page_content for doc in batch
             # ])
+            values = self.embedding_client.feature_extraction([
+                doc.page_content for doc in batch
+            ])
             # values = None
             # TODO: create a list of unique identifiers for each element in the batch with the uuid package.
             # TODO: create a list of dictionaries representing the metadata. Capture the text data
             # with the "text" key, and make sure to capture the rest of the doc.metadata.
             metadatas = [{"text": doc.page_content,
+                **(doc.metadata if doc.metadata else {})
             } for doc in batch]
             # create a list of dictionaries with keys "id" (the unique identifiers), "values"
                 'metadata': metadata
             } for vector_id, value, metadata in zip(vector_ids, values, metadatas)]
+            for v in vectors[:5]:
+                print("Metadata:", v['metadata'])
             try:
                 # TODO: Use the function upsert to upload the data to the database.
                 upsert_response = self.index.upsert(vectors)
             # to the question. Make sure to adjust this number as you see fit.
             source_docs = self.source_index.similarity_search(text_query, 50)
             filter = {"source": {"$in":[doc.page_content for doc in source_docs]}}
+        result=""
         # TODO: embed the text_query by using the embedding model
         # TODO: choose your embedding model
         # vector = self.embedding_client.feature_extraction(text_query)
+        try:
+            print("text")
+            print(text_query)
+            vector = self.embedding_client.feature_extraction(
+                text = text_query,
+                )
+            if vector is None:
+                print("failed to embed the text query in vector  search query for pinecone")
+                return []
+            else:
+                print("debug1_result")
+                result = self.index.query(vector,
+                    filter=filter,
+                    top_k=top_k,
+                    include_values=True,
+                    include_metadata=True
+                )
+                print(f"debugged_result query successful without error for the question:{text_query}")
+            docs = []
+            # print(f" none type in result? {result}")
+            for res in result["matches"]:
+                # TODO: From the result's metadata, extract the "text" element.
+                print("results filename:",res['metadata']['file_name'])
+                print("result score:",res['score'])
+                if res['score']>0.540:
+                    docs.append(res['metadata']['text'])
+                # pass
+            # print("docs: ",docs[0])
+            return docs
+        except Exception as e:
+            print(f"error in search:{e}")
+            return []
          # TODO: use the vector representation of the text_query to
          # search the database by using the query function.
 if __name__ == '__main__':
         Language,
         RecursiveCharacterTextSplitter,
     )
+    print("start:", datetime.now())
     loader = GitLoader(
         clone_url="https://github.com/langchain-ai/langchain",
     docs = python_splitter.split_documents(docs)
     for doc in docs:
         doc.page_content = '# {}\n\n'.format(doc.metadata['source']) + doc.page_content
+    print("before instacing the indexer:", datetime.now())
     indexer = DataIndexer()
+    print("after instacing the indexer:", datetime.now())
+    with open('./app/sources.txt', 'a') as file:
         for doc in docs:
             file.writelines(doc.metadata['source'] + '\n')
+    print("after writing the indexer:", datetime.now())
     indexer.index_data(docs)
+    print("end:", datetime.now())
+    # ###### test ###########
+    # test_docs = docs[:2]  # Just try first two documents
+    # print("\nTest Document Details:")
+    # print(f"Number of test documents: {len(test_docs)}")
+    # for idx, doc in enumerate(test_docs):
+    #     print(f"\nDocument {idx + 1}:")
+    #     print(f"Content length: {len(doc.page_content)}")
+    #     # print(f"First 100 chars: {doc.page_content[:100]}")
+    #     print(f"Metadata: {doc.metadata}")
+    # # try:
+    # print("\nInitializing DataIndexer...")
+    # indexer = DataIndexer()
+    # print("\nStarting indexing...")
+    # indexer.index_data(test_docs)
+    # print("Test indexing successful")
+    # # except Exception as e:
+    # #     print(f"Test indexing failed: {str(e)}")

main.py CHANGED Viewed

@@ -9,11 +9,12 @@ from datetime import datetime
 import schemas
 from models import Message
-from chains import simple_chain, formatted_chain, history_chain, rag_chain
 from prompts import format_chat_history
 import crud, models, schemas
 from database import SessionLocal, engine
 from callbacks import LogResponseCallback
 # temporary
 from database import engine
@@ -42,17 +43,32 @@ def get_db():
 #         yield {'data': data, "event": "data"}
 #     yield {"event": "end"}
-async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
-    for chunk in runnable.stream(input_data.dict(), config={"callbacks": callbacks}):
-        # ChatHuggingFace returns message chunks with content attribute
-        if hasattr(chunk, 'content'):
-            content = chunk.content
-        else:
-            content = str(chunk)
-        if content:  # Only yield non-empty content
-            yield {'data': content, "event": "data"}
     yield {"event": "end"}
@@ -73,7 +89,7 @@ async def formatted_stream(request: Request):
         output =  EventSourceResponse(
             generate_stream(
                 input_data = user_question,
-                runnable = formatted_chain )
                 )
         # print(output.generations[0][0].text)
         return output
@@ -99,6 +115,7 @@ async def history_stream(request: Request, db: Session = Depends(get_db)):
     # since history stream means
     # we have existing user's no need to check for a user
     chat_history = crud.get_user_chat_history(db, user_request.username)
     history_input = schemas.HistoryInput(
         chat_history = format_chat_history(chat_history),
         question=user_request.question
@@ -106,7 +123,7 @@ async def history_stream(request: Request, db: Session = Depends(get_db)):
     ## adding messgae to message database
     type = 'Human'
-    user_data = crud.get_or_create_user(db, user_request.username)
     user_id = user_data.id
     timestamp  = str(datetime.now())
     add_message = schemas.MessageBase(
@@ -120,9 +137,10 @@ async def history_stream(request: Request, db: Session = Depends(get_db)):
     _ = crud.add_message(db,add_message, username = user_request.username)
     # chat history contains: [{ message, type, timestamp}]
-    output = EventSourceResponse(generate_stream(history_input, history_chain))
-    LogResponseCallback.on_llm_end(outputs = output)
     return output
     # raise NotImplemented
@@ -151,11 +169,19 @@ async def rag_stream(request: Request, db: Session = Depends(get_db)):
         user_id = user_id,
         message = user_request.question,
         type = type,
-        timestamp = timestamp
     )
     _ = crud.add_message(db,add_message, username = user_request.username)
-    return EventSourceResponse(generate_stream(history_input, rag_chain))
     # raise NotImplemented
@@ -169,19 +195,11 @@ async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):
     # - We create an instance of HistoryInput by using format_chat_history.
     # - We use the history input within the filtered rag chain.
     data = await request.json()
-    user_request = models.UserRequest(**dat['input'])
-    messages = db.Query(
-        Message.message,
-        Message.type,
-        Message.timestamp
-    ).filter(Message.user_id == user_request.username)
     chat_history = messages
-    history_input = schemas.HistoryInput(
-        chat_history = format_chat_history(chat_history),
-        question=user_request.question
-    )
     ## adding messgae to message database
     type = 'Human'
     user_data = crud.get_or_create_user(db, user_request.username)
@@ -191,12 +209,20 @@ async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):
         user_id = user_id,
         message = user_request.question,
         type = type,
-        timestamp = timestamp
     )
     _ = crud.add_message(db,add_message, username = user_request.username)
-    return EventSourceResponse(generate_stream(history_input, filtered_rag_chain))
     # raise NotImplemented

 import schemas
 from models import Message
+from chains import simple_chain, formatted_chain, history_chain, rag_chain, filtered_rag_chain
 from prompts import format_chat_history
 import crud, models, schemas
 from database import SessionLocal, engine
 from callbacks import LogResponseCallback
+import json
 # temporary
 from database import engine
 #         yield {'data': data, "event": "data"}
 #     yield {"event": "end"}
+async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[], response_callback=None):
+    complete_response=""
+    if callbacks is None:
+        callbacks=[]
+    try:
+        stream_iterator = runnable.stream(input_data.dict(), config={"callbacks":callbacks})
+        for chunk in stream_iterator:
+            # ChatHuggingFace returns message chunks with content attribute
+            if hasattr(chunk, 'content'):
+                content = chunk.content
+            else:
+                content = str(chunk)
+            complete_response +=content
+            if content!="" or len(content)!=0:  # Only yield non-empty content
+                yield {'data': json.dumps({"content":content}), "event": "data"}
+                # yield {'data': content, "event": "data"}
+    except StopIteration:
+        print("stream ended with StopIteration")
+        yield {"event":"end"}
+    # except Exception as e:
+    #     print(f"error geenrating response :{e}")
+    if response_callback:
+        response_callback(complete_response)
     yield {"event": "end"}
         output =  EventSourceResponse(
             generate_stream(
                 input_data = user_question,
+                runnable = formatted_chain)
                 )
         # print(output.generations[0][0].text)
         return output
     # since history stream means
     # we have existing user's no need to check for a user
     chat_history = crud.get_user_chat_history(db, user_request.username)
+    print("chat_history from the database", chat_history)
     history_input = schemas.HistoryInput(
         chat_history = format_chat_history(chat_history),
         question=user_request.question
     ## adding messgae to message database
     type = 'Human'
+    user_data = crud.get_or_create_user(db, user_request.username)
     user_id = user_data.id
     timestamp  = str(datetime.now())
     add_message = schemas.MessageBase(
     _ = crud.add_message(db,add_message, username = user_request.username)
     # chat history contains: [{ message, type, timestamp}]
+    init = LogResponseCallback(user_request = user_request, db = db)
+    def save_full_response(complete_response):
+        init.on_llm_end(outputs=complete_response)
+    output = EventSourceResponse(generate_stream(history_input, history_chain, response_callback=save_full_response))
     return output
     # raise NotImplemented
         user_id = user_id,
         message = user_request.question,
         type = type,
+        timestamp = timestamp,
+        user=user_request.username,
     )
     _ = crud.add_message(db,add_message, username = user_request.username)
+    print("/rag/stream: \n: succesfully affed message to database")
+    init = LogResponseCallback(user_request = user_request, db = db)
+    print("succesfully intiated LogResponseCallback ")
+    def save_full_response(complete_response):
+        init.on_llm_end(outputs=complete_response)
+    print("calling EventSourceResponse to generate stream............")
+    return EventSourceResponse(generate_stream(history_input, rag_chain, response_callback=save_full_response))
     # raise NotImplemented
     # - We create an instance of HistoryInput by using format_chat_history.
     # - We use the history input within the filtered rag chain.
     data = await request.json()
+    user_request = schemas.UserRequest(**data['input'])
+    messages = crud.get_user_chat_history(db, user_request.username)
     chat_history = messages
+    history_input = schemas.HistoryInput( chat_history = format_chat_history(chat_history), question=user_request.question)
     ## adding messgae to message database
     type = 'Human'
     user_data = crud.get_or_create_user(db, user_request.username)
         user_id = user_id,
         message = user_request.question,
         type = type,
+        timestamp = timestamp,
+        user=user_request.username,
     )
     _ = crud.add_message(db,add_message, username = user_request.username)
+    print("/rag/stream: \n: succesfully affed message to database")
+    init = LogResponseCallback(user_request = user_request, db = db)
+    print("succesfully intiated LogResponseCallback ")
+    def save_full_response(complete_response):
+        init.on_llm_end(outputs=complete_response)
+    print("calling EventSourceResponse to generate stream............")
+    return EventSourceResponse(generate_stream(history_input, filtered_rag_chain, response_callback=save_full_response))
     # raise NotImplemented

prompts.py CHANGED Viewed

@@ -8,24 +8,24 @@ def format_prompt(prompt) -> PromptTemplate:
     template = f"""
     <|begin_of_text|><|start_header_id|>system<|end_header_id|>
     You are a helpful assistant.<|eot_id|>
-    <|start_header_id|>user<|end_header_id|>
     {prompt}<|eot_id|>
     <|start_header_id|>assistant<|end_header_id|>
     """
-    raw_template = [
-        {"role": "system", "content":"You are a helpful assistant." },
-        {"role": "user", "content": "{{prompt}}"},
-    ]
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-    formatted_template = tokenizer.apply_chat_template(
-            raw_template,
-            tokenize=False,
-            add_generation_prompt=True
-    )
     prompt_template = PromptTemplate.from_template(
         # input_variables=["question"], the variables will be auto detected by langchain package
-        formatted_template
     )
     # TODO: return a langchain PromptTemplate
     return prompt_template
@@ -64,6 +64,8 @@ raw_prompt = "{question}"
 history_prompt: str = """
 Given the following conversation provide a helpful answer to the following up question.
 Chat History:
 {chat_history}

     template = f"""
     <|begin_of_text|><|start_header_id|>system<|end_header_id|>
     You are a helpful assistant.<|eot_id|>
+    <|start_header_id|>user<|end_header_id|> Before answering tell me if you are given an empty context or not then answer
     {prompt}<|eot_id|>
     <|start_header_id|>assistant<|end_header_id|>
     """
+    # raw_template = [
+    #     {"role": "system", "content":"You are a helpful assistant." },
+    #     {"role": "user", "content": "{{prompt}}"},
+    # ]
+    # tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+    # formatted_template = tokenizer.apply_chat_template(
+    #         raw_template,
+    #         tokenize=False,
+    #         add_generation_prompt=True
+    # )
     prompt_template = PromptTemplate.from_template(
         # input_variables=["question"], the variables will be auto detected by langchain package
+        template
     )
     # TODO: return a langchain PromptTemplate
     return prompt_template
 history_prompt: str = """
 Given the following conversation provide a helpful answer to the following up question.
+explain me the previous questions if I ask,
 Chat History:
 {chat_history}

sources.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

test.db CHANGED Viewed

Binary files a/test.db and b/test.db differ