The Product Recommendation System is a content-based recommender engine that suggests similar products based on their textual features such as category, subcategory, and type. Designed to enhance user engagement and improve discovery in e-commerce or inventory platforms, it uses natural language processing (NLP) and vector-based similarity techniques to generate recommendations.
df = pd.read_csv('productlist.csv')
df = df.dropna()
for col in ['category', 'sub_category', 'type']:
df[col] = df[col].apply(lambda a: list(map(lambda x: x.strip(), re.split('& |, |*|
', a))))
def cleaner(x):
if isinstance(x, list):
return [str.lower(i.replace(' ', '')) for i in x]
elif isinstance(x, str):
return str.lower(x.replace(' ', ''))
else:
return ''
for col in ['category', 'sub_category', 'type']:
df[col] = df[col].apply(cleaner)
def couple(x):
return ' '.join(x['category']) + ' ' + ' '.join(x['sub_category']) + ' ' + ' '.join(x['type'])
df['soup'] = df.apply(couple, axis=1)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['product'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(df.index, index=df['product']).drop_duplicates()
def get_recommendations(title, cosine_sim=cosine_sim):
idx = indices[title]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:11]
product_indices = [i[0] for i in sim_scores]
return df['product'].iloc[product_indices]