Skip to content

Commit 9d848b7

Browse files
committed
Merge pull request #93 from paoloRais/master
recall@k
2 parents 64e355d + b428b56 commit 9d848b7

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed

lightfm/evaluation.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111

1212
__all__ = ['precision_at_k',
13+
'recall_at_k',
1314
'auc_score',
1415
'reciprocal_rank']
1516

@@ -72,6 +73,66 @@ def precision_at_k(model, test_interactions, train_interactions=None,
7273
return precision
7374

7475

76+
def recall_at_k(model, test_interactions, train_interactions=None,
77+
k=10, user_features=None, item_features=None,
78+
preserve_rows=False, num_threads=1):
79+
"""
80+
Measure the recall at k metric for a model: the number of positive items in the first k
81+
positions of the ranked list of results divided by the number of positive items
82+
in the test period. A perfect score is 1.0.
83+
84+
Parameters
85+
----------
86+
87+
model: LightFM instance
88+
the model to be evaluated
89+
test_interactions: np.float32 csr_matrix of shape [n_users, n_items]
90+
Non-zero entries representing known positives in the evaluation set.
91+
train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional
92+
Non-zero entries representing known positives in the train set. These
93+
will be omitted from the score calulations to avoid re-recommending
94+
known positives.
95+
k: integer, optional
96+
The k parameter.
97+
user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional
98+
Each row contains that user's weights over features.
99+
item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional
100+
Each row contains that item's weights over features.
101+
preserve_rows: boolean, optional
102+
When False (default), the number of rows in the output will be equal to
103+
the number of users with interactions in the evaluation set. When True,
104+
the number of rows in the output will be equal to the number of users.
105+
num_threads: int, optional
106+
Number of parallel computation threads to use. Should
107+
not be higher than the number of physical cores.
108+
109+
Returns
110+
-------
111+
112+
np.array of shape [n_users with interactions or n_users,]
113+
Numpy array containing recall@k scores for each user. If there are no interactions
114+
for a given user having items in the test period, the returned recall will be 0.
115+
"""
116+
117+
ranks = model.predict_rank(test_interactions,
118+
train_interactions=train_interactions,
119+
user_features=user_features,
120+
item_features=item_features,
121+
num_threads=num_threads)
122+
123+
ranks.data[ranks.data < k] = 1.0
124+
ranks.data[ranks.data >= k] = 0.0
125+
126+
retrieved = np.squeeze(test_interactions.getnnz(axis=1))
127+
hit = np.squeeze(np.array(ranks.sum(axis=1)))
128+
129+
if not preserve_rows:
130+
hit = hit[test_interactions.getnnz(axis=1) > 0]
131+
retrieved = retrieved[test_interactions.getnnz(axis=1) > 0]
132+
133+
return hit / retrieved
134+
135+
75136
def auc_score(model, test_interactions, train_interactions=None,
76137
user_features=None, item_features=None,
77138
preserve_rows=False, num_threads=1):

tests/test_evaluation.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,45 @@ def _precision_at_k(model, ground_truth, k, train=None, user_features=None, item
6666
return sum(precisions) / len(precisions)
6767

6868

69+
def _recall_at_k(model, ground_truth, k, train=None, user_features=None,
70+
item_features=None):
71+
# Alternative test implementation
72+
73+
ground_truth = ground_truth.tocsr()
74+
75+
no_users, no_items = ground_truth.shape
76+
77+
pid_array = np.arange(no_items, dtype=np.int32)
78+
79+
recalls = []
80+
81+
uid_array = np.empty(no_items, dtype=np.int32)
82+
83+
if train is not None:
84+
train = train.tocsr()
85+
86+
for user_id, row in enumerate(ground_truth):
87+
uid_array.fill(user_id)
88+
89+
predictions = model.predict(uid_array, pid_array,
90+
user_features=user_features,
91+
item_features=item_features,
92+
num_threads=4)
93+
if train is not None:
94+
train_items = train[user_id].indices
95+
top_k = set([x for x in np.argsort(-predictions)
96+
if x not in train_items][:k])
97+
else:
98+
top_k = set(np.argsort(-predictions)[:k])
99+
100+
true_pids = set(row.indices[row.data == 1])
101+
102+
if true_pids:
103+
recalls.append(len(top_k & true_pids) / float(len(true_pids)))
104+
105+
return sum(recalls) / len(recalls)
106+
107+
69108
def _auc(model, ground_truth, train=None, user_features=None, item_features=None):
70109

71110
ground_truth = ground_truth.tocsr()
@@ -143,6 +182,44 @@ def test_precision_at_k():
143182
assert np.allclose(precision.mean(), expected_mean_precision)
144183

145184

185+
def test_recall_at_k():
186+
187+
no_users, no_items = (10, 100)
188+
189+
train, test = _generate_data(no_users, no_items)
190+
191+
model = LightFM(loss='bpr')
192+
model.fit_partial(train)
193+
194+
k = 10
195+
196+
# Without omitting train interactions
197+
recall = evaluation.recall_at_k(model,
198+
test,
199+
k=k)
200+
expected_mean_recall = _recall_at_k(model,
201+
test,
202+
k)
203+
204+
assert np.allclose(recall.mean(), expected_mean_recall)
205+
assert len(recall) == (test.getnnz(axis=1) > 0).sum()
206+
assert len(evaluation.recall_at_k(model,
207+
train,
208+
preserve_rows=True)) == test.shape[0]
209+
210+
# With omitting train interactions
211+
recall = evaluation.recall_at_k(model,
212+
test,
213+
k=k,
214+
train_interactions=train)
215+
expected_mean_recall = _recall_at_k(model,
216+
test,
217+
k,
218+
train=train)
219+
220+
assert np.allclose(recall.mean(), expected_mean_recall)
221+
222+
146223
def test_auc_score():
147224

148225
no_users, no_items = (10, 100)

0 commit comments

Comments
 (0)