import itertools
from math import log2
from typing import List, Sequence, TypeVar
import numpy as np
from scipy import stats
from sklearn.metrics import dcg_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity
X = TypeVar("X")
[docs]def reciprocal_rank(relevant_item: X, recommendation: Sequence[X]) -> float:
"""
Calculate the reciprocal rank (RR) of an item in a ranked list of items.
Args:
relevant_item: a target item in the predicted list of items.
recommendation: An N x 1 sequence of predicted items.
Returns:
RR (float): The reciprocal rank of the item.
"""
for i, item in enumerate(recommendation):
if item == relevant_item:
return 1.0 / (i + 1.0)
raise ValueError("relevant item did not appear in recommendation")
[docs]def mean_reciprocal_rank(relevant_items: List, recommendation: List):
"""
Calculate the mean reciprocal rank (MRR) of items in a ranked list.
Args:
relevant_items (array-like): An N x 1 array of relevant items.
recommendation (array-like): An N x 1 array of ordered items.
Returns:
MRR (float): The mean reciprocal rank of the relevant items in a predicted.
"""
reciprocal_ranks = []
for item in relevant_items:
rr = reciprocal_rank(item, recommendation)
reciprocal_ranks.append(rr)
return np.mean(reciprocal_ranks)
[docs]def rank(relevant_item: X, recommendation: Sequence[X]) -> float:
"""
Calculate the rank of an item in a ranked list of items.
Args:
relevant_item: a target item in the predicted list of items.
recommendation: An N x 1 sequence of predicted items.
Returns:
: The rank of the item.
"""
for i, item in enumerate(recommendation):
if item == relevant_item:
return i + 1.0
raise ValueError("relevant item did not appear in recommendation")
[docs]def mean_rank(relevant_items: Sequence[X], recommendation: Sequence[X]) -> float:
"""
Calculate the arithmetic mean rank (MR) of items in a ranked list.
Args:
relevant_items: An N x 1 sequence of relevant items.
recommendation: An N x 1 sequence of ordered items.
Returns:
: The mean rank of the relevant items in a predicted.
"""
return np.mean([rank(item, recommendation) for item in relevant_items])
[docs]def gmean_rank(relevant_items: Sequence[X], recommendation: Sequence[X]) -> float:
"""
Calculate the geometric mean rank (GMR) of items in a ranked list.
Args:
relevant_items: An N x 1 sequence of relevant items.
recommendation: An N x 1 sequence of ordered items.
Returns:
: The mean reciprocal rank of the relevant items in a predicted.
"""
return stats.gmean([rank(item, recommendation) for item in relevant_items])
[docs]def average_precision_at_k(relevant_items: np.array, recommendation: np.array, k=10):
"""
Calculate the average precision at k (AP@K) of items in a ranked list.
Args:
relevant_items (array-like): An N x 1 array of relevant items.
recommendation (array-like): An N x 1 array of ordered items.
k (int): the number of items considered in the predicted list.
Returns:
AP@K (float): The average precision @ k of a predicted list.
`Original <https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py>`_
"""
if len(recommendation) > k:
recommendation = recommendation[:k]
score = 0.0
hits = 0.0
for i, item in enumerate(recommendation):
if item in relevant_items and item not in recommendation[:i]:
hits += 1.0
score += hits / (i + 1.0)
return score / min(len(relevant_items), k)
[docs]def mean_average_precision_at_k(relevant_items: List[list], recommendations: List[list], k: int = 10):
"""
Calculate the mean average precision at k (MAP@K) across predicted lists.
Each prediction should be paired with a list of relevant items. First predicted list is
evaluated against the first list of relevant items, and so on.
Example usage:
.. code-block:: python
import numpy as np
from rexmex.metrics.predicted import mean_average_precision_at_k
mean_average_precision_at_k(
relevant_items=np.array(
[
[1,2],
[2,3]
]
),
predicted=np.array([
[3,2,1],
[2,1,3]
])
)
>>> 0.708333...
Args:
relevant_items (array-like): An M x N array of relevant items.
recommendations (array-like): An M x N array of recommendation lists.
k (int): the number of items considered in the predicted list.
Returns:
MAP@K (float): The mean average precision @ k across recommendations.
"""
aps = []
for items, recommendation in zip(relevant_items, recommendations):
ap = average_precision_at_k(items, recommendation, k)
aps.append(ap)
return np.mean(aps)
[docs]def average_recall_at_k(relevant_items: List, recommendation: List, k: int = 10):
"""
Calculate the average recall at k (AR@K) of items in a ranked list.
Args:
relevant_items (array-like): An N x 1 array of relevant items.
recommendation (array-like): An N x 1 array of items.
k (int): the number of items considered in the predicted list.
Returns:
AR@K (float): The average precision @ k of a predicted list.
"""
if len(recommendation) > k:
recommendation = recommendation[:k]
num_hits = 0.0
score = 0.0
for i, item in enumerate(recommendation):
if item in relevant_items and item not in recommendation[:i]:
num_hits += 1.0
score += num_hits / (i + 1.0)
return score / len(relevant_items)
[docs]def mean_average_recall_at_k(relevant_items: List[list], recommendations: List[list], k: int = 10):
"""
Calculate the mean average recall at k (MAR@K) for a list of recommendations.
Each recommendation should be paired with a list of relevant items. First recommendation list is
evaluated against the first list of relevant items, and so on.
Args:
relevant_items (array-like): An M x R list where M is the number of recommendation lists,
and R is the number of relevant items.
recommendations (array-like): An M x N list where M is the number of recommendation lists and
N is the number of recommended items.
k (int): the number of items considered in the recommendation.
Returns:
MAR@K (float): The mean average recall @ k across the recommendations.
"""
ars = []
for items, recommendation in zip(relevant_items, recommendations):
ar = average_recall_at_k(items, recommendation, k)
ars.append(ar)
return np.mean(ars)
[docs]def hits_at_k(relevant_items: np.array, recommendation: np.array, k=10):
"""
Calculate the number of hits of relevant items in a ranked list HITS@K.
Args:
relevant_items (array-like): An 1 x N array of relevant items.
recommendation (array-like): An 1 x N array of predicted arrays
k (int): the number of items considered in the predicted list
Returns:
HITS@K (float): The number of relevant items in the first k items of a prediction.
"""
if len(recommendation) > k:
recommendation = recommendation[:k]
hits = 0.0
for i, item in enumerate(recommendation):
if item in relevant_items and item not in recommendation[:i]:
hits += 1.0
return hits / len(relevant_items)
[docs]def spearmans_rho(relevant_items: np.array, recommendation: np.array):
"""
Calculate the Spearman's rank correlation coefficient (Spearman's rho) between two lists.
Args:
relevant_items (array-like): An 1 x N array of items.
recommendation (array-like): An 1 x N array of items.
Returns:
(float): Spearman's rho.
p-value (float): two-sided p-value for null hypothesis that both predicted are uncorrelated.
"""
return stats.spearmanr(relevant_items, recommendation)
[docs]def kendall_tau(relevant_items: np.array, recommendation: np.array):
"""
Calculate the Kendall's tau, measuring the correspondence between two lists.
Args:
relevant_items (array-like): An 1 x N array of items.
recommendation (array-like): An 1 x N array of items.
Returns:
Kendall tau (float): The tau statistic.
p-value (float): two-sided p-value for null hypothesis that there's no association between the predicted.
"""
return stats.kendalltau(relevant_items, recommendation)
[docs]def intra_list_similarity(recommendations: List[list], items_feature_matrix: np.array):
"""
Calculate the intra list similarity of recommended items. The items
are represented by feature vectors, which compared with cosine similarity.
The predicted consists of item indices, which are used to fetch the item
features.
Args:
recommendations (List[list]): A M x N array of predicted, where M is the number
of predicted and N the number of recommended items
items_feature_matrix (matrix-link): A N x D matrix, where N is the number of items and D the
number of features representing one item
Returns:
(float): Average intra list similarity across predicted
`Original <https://github.com/statisticianinstilettos/recmetrics/blob/master/recmetrics/metrics.py#L232>`_
"""
intra_list_similarities = []
for predicted in recommendations:
predicted_features = items_feature_matrix[predicted]
similarity = cosine_similarity(predicted_features)
upper_right = np.triu_indices(similarity.shape[0], k=1)
avg_similarity = np.mean(similarity[upper_right])
intra_list_similarities.append(avg_similarity)
return np.mean(intra_list_similarities)
[docs]def personalization(recommendations: List[list]):
"""
Calculates personalization, a measure of similarity between recommendations.
A high value indicates that the recommendations are disimillar, or "personalized".
Args:
recommendations (List[list]): A M x N array of predicted items, where M is the number
of predicted lists and N the number of items
Returns:
(float): personalization
`Original <https://github.com/statisticianinstilettos/recmetrics/blob/master/recmetrics/metrics.py#L160>`_
"""
n_predictions = len(recommendations)
# map each ranked item to index
item2ix = {}
counter = 0
for prediction in recommendations:
for item in prediction:
if item not in item2ix:
item2ix[item] = counter
counter += 1
n_items = len(item2ix.keys())
# create matrix of predicted x items
items_matrix = np.zeros((n_predictions, n_items))
for i, prediction in enumerate(recommendations):
for item in prediction:
item_ix = item2ix[item]
items_matrix[i][item_ix] = 1
similarity = cosine_similarity(X=items_matrix)
dim = similarity.shape[0]
personalization = (similarity.sum() - dim) / (dim * (dim - 1))
return 1 - personalization
[docs]def novelty(recommendations: List[list], item_popularities: dict, num_users: int, k: int = 10):
"""
Calculates the capacity of the recommender system to to generate novel
and unexpected results.
Args:
recommendations (List[list]): A M x N array of items, where M is the number
of predicted lists and N the number of recommended items
item_popularities (dict): A dict mapping each item in the recommendations to a popularity value.
Popular items have higher values.
num_users (int): The number of users
k (int): The number of items considered in each recommendation.
Returns:
(float): novelty
Metric Definition:
Zhou, T., Kuscsik, Z., Liu, J. G., Medo, M., Wakeling, J. R., & Zhang, Y. C. (2010).
Solving the apparent diversity-accuracy dilemma of recommender systems.
Proceedings of the National Academy of Sciences, 107(10), 4511-4515.
`Original <https://github.com/statisticianinstilettos/recmetrics/blob/master/recmetrics/metrics.py#L14>`_
"""
epsilon = 1e-10
all_self_information = []
for recommendations in recommendations:
self_information_sum = 0.0
for i in range(k):
item = recommendations[i]
item_pop = item_popularities[item]
self_information_sum += -log2((item_pop + epsilon) / num_users)
avg_self_information = self_information_sum / k
all_self_information.append(avg_self_information)
return np.mean(all_self_information)
[docs]def discounted_cumulative_gain(y_true: np.array, y_score: np.array):
"""
Computes the Discounted Cumulative Gain (DCG), a sum of the true scores ordered
by the predicted scores, and then penalized by a logarithmic discount based on ordering.
Args:
y_true (array-like): An N x M array of ground truth values, where M > 1 for multilabel classification problems.
y_score (array-like): An N x M array of predicted values, where M > 1 for multilabel classification problems..
Returns:
DCG (float): Discounted Cumulative Gain
"""
return dcg_score(y_true, y_score)
[docs]def normalized_discounted_cumulative_gain(y_true: np.array, y_score: np.array):
"""
Computes the Normalized Discounted Cumulative Gain (NDCG), a sum of the true scores ordered
by the predicted scores, and then penalized by a logarithmic discount based on ordering.
The score is normalized between [0.0, 1.0]
Args:
y_true (array-like): An N x M array of ground truth values, where M > 1 for multilabel classification problems.
y_score (array-like): An N x M array of predicted values, where M > 1 for multilabel classification problems..
Returns:
NDCG (float) : Normalized Discounted Cumulative Gain
"""
return ndcg_score(y_true, y_score)