giovannigiofre's picture
Update app.py
15477ab verified
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from difflib import get_close_matches
import gradio as gr
from datasets import load_dataset
# Load the dataset from Hugging Face
def load_data(dataset_name):
"""
Load the entire dataset from Hugging Face
"""
dataset = load_dataset(dataset_name)
if isinstance(dataset, dict):
# Concatenate all splits into a single DataFrame
data = pd.concat([pd.DataFrame(dataset[split]) for split in dataset.keys()], ignore_index=True)
else:
# Convert to DataFrame directly if there are no predefined splits
data = pd.DataFrame(dataset)
return data
# Feature selection based on user input or default settings
def select_features(user_selected_features=None):
"""
Select features for similarity comparison.
"""
default_features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
return user_selected_features if user_selected_features else default_features
# Search for songs based on partial input
def search_songs(data, partial_song_name, limit=5):
"""
Search for song names that closely match the partial input.
"""
# Ensure all song names are strings and handle NaN values
all_song_names = data['track_name'].dropna().astype(str).unique()
close_matches = get_close_matches(partial_song_name, all_song_names, n=limit, cutoff=0.3)
return close_matches
# Recommend songs based on sound similarity without needing the artist name
def recommend_song_by_sound_similarity(data, song, selected_features=None):
"""
Recommend songs based on sound similarity, not requiring the artist name.
"""
try:
# Flexible matching for song titles
matched_songs = data[data['track_name'].str.lower().str.contains(song.lower(), na=False)]
if matched_songs.empty:
print(f"No matches found for '{song}'. Please check the spelling or try a different song.")
return None
similar_songs = data.copy()
features = select_features(selected_features)
sound_properties = normalize(similar_songs[features])
# Calculate similarity based on the first match
song_index = matched_songs.index[0] # Safe to assume non-empty due to check above
similar_songs['Similarity with song'] = cosine_similarity(sound_properties, sound_properties[song_index, None]).flatten()
similar_songs = similar_songs.sort_values(by='Similarity with song', ascending=False)
similar_songs = similar_songs[['track_name', 'artists', 'popularity']]
similar_songs.reset_index(drop=True, inplace=True)
return similar_songs.iloc[1:11]
except Exception as e:
print(f"Error: {str(e)}")
return None
def recommend_ui(partial_song_name):
# Load dataset (ensure the dataset path is correct and accessible)
data = load_data('maharshipandya/spotify-tracks-dataset')
# Search for songs based on partial input
song_matches = search_songs(data, partial_song_name)
if not song_matches:
return "No song matches found. Please try a different search."
# For simplicity, we'll use the first match to recommend songs
selected_song = song_matches[0]
recommendations = recommend_song_by_sound_similarity(data, selected_song)
if recommendations is not None:
# Format the recommendations for display
recommendations_formatted = recommendations.to_dict('records')
result = "\n".join([f"{rec['track_name']} by {rec['artists']} (Popularity: {rec['popularity']})" for rec in recommendations_formatted])
return result
else:
return "No recommendations could be generated. Please try a different song."
iface = gr.Interface(fn=recommend_ui,
inputs=gr.Textbox(label="Partial Song Name"),
outputs=gr.Textbox(label="Recommended Songs"),
title="Songs Recommendation System",
description="Enter a partial song name to get song recommendations based on sound similarity.")
if __name__ == "__main__":
iface.launch()