Spaces:
Running
Running
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
# Use a compatible CLIP model | |
model = SentenceTransformer("clip-ViT-B-32") | |
def search_images_by_text(text, df, embeddings, top_k=30): | |
text_emb = model.encode([text]) | |
filtered_embeddings = embeddings[df.index] | |
sims = cosine_similarity(text_emb, filtered_embeddings)[0] | |
top_indices = np.argsort(sims)[::-1][:top_k] | |
return df.iloc[top_indices] | |
def get_similar_images(df, image_id, embeddings, top_k=5): | |
index = int(image_id) # adjust based on your ID setup | |
query_emb = embeddings[index] | |
sims = cosine_similarity([query_emb], embeddings)[0] | |
top_indices = np.argsort(sims)[::-1][1:top_k+1] | |
return df.iloc[top_indices] | |
from PIL import Image | |
import torchvision.transforms as T | |
# Make sure to use the same embedding method for your dataset images | |
def preprocess_image(img): | |
# You may want to adapt this if you used a specific transform for your dataset | |
transform = T.Compose([ | |
T.Resize((224, 224)), | |
T.ToTensor(), | |
T.Normalize(mean=(0.48145466, 0.4578275, 0.40821073), | |
std=(0.26862954, 0.26130258, 0.27577711)) | |
]) | |
return transform(img).unsqueeze(0) | |
def search_images_by_image(uploaded_image, df, embeddings, top_k=30): | |
# Convert to PIL Image if necessary | |
if isinstance(uploaded_image, str): # if path | |
uploaded_image = Image.open(uploaded_image).convert("RGB") | |
elif isinstance(uploaded_image, np.ndarray): | |
uploaded_image = Image.fromarray(uploaded_image).convert("RGB") | |
# Encode image using CLIP model | |
image_emb = model.encode(uploaded_image) | |
# Compute cosine similarity to dataset embeddings | |
filtered_embeddings = embeddings[df.index] | |
sims = cosine_similarity([image_emb], filtered_embeddings)[0] | |
top_indices = np.argsort(sims)[::-1][:top_k] | |
return df.iloc[top_indices] |