File size: 4,948 Bytes
11650ac
 
 
 
 
 
288fd1b
11650ac
288fd1b
 
11650ac
 
 
 
 
 
 
 
 
 
 
288fd1b
11650ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288fd1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11650ac
 
288fd1b
11650ac
288fd1b
11650ac
 
 
 
 
 
288fd1b
11650ac
 
 
 
 
 
 
 
 
 
 
288fd1b
11650ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288fd1b
11650ac
 
 
 
 
 
 
 
 
 
 
 
288fd1b
11650ac
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# 1. IMPORT NECESSARY LIBRARIES
import streamlit as st
from transformers import pipeline

# 2. SET UP THE PAGE CONFIGURATION AND TITLE
st.set_page_config(
    page_title="GUVI AI Assistant",
    page_icon="πŸ€–",
    layout="wide",
    initial_sidebar_state="expanded"
)

# 3. LOAD ALL THREE AI MODELS
@st.cache_resource
def load_models():
    """Loads and returns all three required AI pipelines."""
    translator = pipeline("translation", model="facebook/mbart-large-50-many-to-many-mmt")
    generator = pipeline("text-generation", model="aswinprasath31/guvi-gpt2-finetuned")
    detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
    return translator, generator, detector

# Load the models. A spinner is shown in the sidebar while loading.
with st.spinner("Initializing AI models, please wait..."):
    translator, generator, detector = load_models()

# This dictionary maps short language codes to the longer codes needed by the translator.
MODEL_LANG_MAP = {
    "ar": "ar_AR", "cs": "cs_CZ", "de": "de_DE", "en": "en_XX", "es": "es_XX", "et": "et_EE", "fi": "fi_FI",
    "fr": "fr_XX", "gu": "gu_IN", "hi": "hi_IN", "it": "it_IT", "ja": "ja_XX", "kk": "kk_KZ", "ko": "ko_KR",
    "lt": "lt_LT", "lv": "lv_LV", "my": "my_MM", "ne": "ne_NP", "nl": "nl_XX", "ro": "ro_RO", "ru": "ru_RU",
    "si": "si_LK", "tr": "tr_TR", "vi": "vi_VN", "zh": "zh_CN", "af": "af_ZA", "az": "az_AZ", "bn": "bn_IN",
    "fa": "fa_IR", "he": "he_IL", "hr": "hr_HR", "id": "id_ID", "ka": "ka_GE", "km": "km_KH", "mk": "mk_MK",
    "ml": "ml_IN", "mn": "mn_MN", "mr": "mr_IN", "pl": "pl_PL", "ps": "ps_AF", "pt": "pt_XX", "sv": "sv_SE",
    "sw": "sw_KE", "ta": "ta_IN", "te": "te_IN", "th": "th_TH", "tl": "tl_XX", "uk": "uk_UA", "ur": "ur_PK",
    "xh": "xh_ZA", "gl": "gl_ES", "sl": "sl_SI"
}

# 4. SIDEBAR CONTENT
with st.sidebar:
    st.title("About the Chatbot")
    st.info(
        "This is an advanced multilingual AI assistant for GUVI. "
        "It leverages a fine-tuned GPT-2 model to provide accurate, "
        "domain-specific answers about GUVI's courses."
    )
    st.success(
        "**Key Features:**\n"
        "- **Auto-Detects Language:** Chat in over 20 languages.\n"
        "- **Domain-Specific:** Fine-tuned on GUVI's data.\n"
        "- **Real-time:** Get instant answers to your questions."
    )
    st.markdown("---")
    st.header("Tech Stack")
    st.markdown(
        """
        - Streamlit
        - Hugging Face Transformers
        - PyTorch
        - Google Colab
        """
    )

# 5. SET UP THE MAIN CHAT INTERFACE
st.title("GUVI AI Learning Assistant πŸ€–")

# Initialize chat history in session state
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I help you with GUVI's courses today?"}]

# Display prior chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# 6. DEFINE THE CHATBOT'S LOGIC
def get_bot_response(user_input):
    """Processes user input through the AI pipeline and returns the final response."""
    if len(user_input.split()) < 3:
        return "That's a broad topic! Could you please ask a more specific question about it?"

    detection_result = detector(user_input)
    detected_lang_code = detection_result[0]['label']
    src_lang_code = MODEL_LANG_MAP.get(detected_lang_code, "en_XX")

    if src_lang_code == "en_XX":
        english_query = user_input
    else:
        translated_to_en = translator(user_input, src_lang=src_lang_code, tgt_lang="en_XX")
        english_query = translated_to_en[0]['translation_text']

    prompt = f"Answer the following question about GUVI's online courses in a helpful and clear manner: {english_query}"
    gpt_response = generator(prompt, max_length=200, num_return_sequences=1)
    english_response = gpt_response[0]['generated_text']

    if src_lang_code == "en_XX":
        final_response = english_response
    else:
        translated_to_orig = translator(english_response, src_lang="en_XX", tgt_lang=src_lang_code)
        final_response = translated_to_orig[0]['translation_text']

    return final_response

# 7. ACCEPT AND PROCESS USER INPUT
if prompt := st.chat_input("Ask me about Data Science, Python, or other courses..."):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = get_bot_response(prompt)
        st.markdown(response)

    st.session_state.messages.append({"role": "assistant", "content": response})

# 8. ADD THE FOOTER
st.markdown("---")
st.markdown(
    """
    <div style="text-align: center; color: grey;">
        Developed by <b>Aswinprasath V</b> | Supported by <b>GUVI</b>
    </div>
    """,
    unsafe_allow_html=True
)