@@ -288,6 +288,9 @@ def query_gpt_for_perturbations(
288288# Word2Vec model
289289# --------------------------------------------------------------------------- #
290290
291+ # TODO: anything you want to do
292+ # Improving and using better Word2Vec Models and also giving user an option to have preference on this
293+
291294def load_google_news_vectors ():
292295 # First try gensim downloader (fastest & simplest)
293296 try :
@@ -336,6 +339,10 @@ def clean_text(text: str) -> str:
336339 cleaned = re .sub (r"[^\w\s]" , "" , text .lower ())
337340 return cleaned .strip ()
338341
342+ # TODO: anything you want to do
343+ # Removing the word "safe" from the function
344+ # Adding More statistical distances here
345+ # As Statistical distance measures are the code for SMILE, we can consider a separate class for them.
339346
340347def safe_wmdistance (model , text1 : str , text2 : str ) -> float :
341348 """Compute Word Mover's Distance using only in-vocabulary cleaned words.
@@ -411,6 +418,9 @@ def normalize_similarities(wmd_scores: list) -> list:
411418# Regression
412419# --------------------------------------------------------------------------- #
413420
421+ # TODO: anything you want to do
422+ # This part can be extended to cover other types of LIME, like BayLIME
423+
414424def fit_weighted_regression (
415425 perturbations : list ,
416426 similarities : list ,
@@ -419,7 +429,7 @@ def fit_weighted_regression(
419429 """Fit weighted linear regression using WMD distances.
420430
421431 Args:
422- perturbations (list): List of binary perturbation vector
432+ perturbations (list): List of binary perturbation vectors
423433 (Perturbations)
424434 similarities (list): List of (text, similarity).
425435 wmd_scores (list): List of (text, distance).
0 commit comments