|
10 | 10 |
|
11 | 11 |
|
12 | 12 | __all__ = ['precision_at_k',
|
| 13 | + 'recall_at_k', |
13 | 14 | 'auc_score',
|
14 | 15 | 'reciprocal_rank']
|
15 | 16 |
|
@@ -72,6 +73,66 @@ def precision_at_k(model, test_interactions, train_interactions=None,
|
72 | 73 | return precision
|
73 | 74 |
|
74 | 75 |
|
| 76 | +def recall_at_k(model, test_interactions, train_interactions=None, |
| 77 | + k=10, user_features=None, item_features=None, |
| 78 | + preserve_rows=False, num_threads=1): |
| 79 | + """ |
| 80 | + Measure the recall at k metric for a model: the number of positive items in the first k |
| 81 | + positions of the ranked list of results divided by the number of positive items |
| 82 | + in the test period. A perfect score is 1.0. |
| 83 | +
|
| 84 | + Parameters |
| 85 | + ---------- |
| 86 | +
|
| 87 | + model: LightFM instance |
| 88 | + the model to be evaluated |
| 89 | + test_interactions: np.float32 csr_matrix of shape [n_users, n_items] |
| 90 | + Non-zero entries representing known positives in the evaluation set. |
| 91 | + train_interactions: np.float32 csr_matrix of shape [n_users, n_items], optional |
| 92 | + Non-zero entries representing known positives in the train set. These |
| 93 | + will be omitted from the score calulations to avoid re-recommending |
| 94 | + known positives. |
| 95 | + k: integer, optional |
| 96 | + The k parameter. |
| 97 | + user_features: np.float32 csr_matrix of shape [n_users, n_user_features], optional |
| 98 | + Each row contains that user's weights over features. |
| 99 | + item_features: np.float32 csr_matrix of shape [n_items, n_item_features], optional |
| 100 | + Each row contains that item's weights over features. |
| 101 | + preserve_rows: boolean, optional |
| 102 | + When False (default), the number of rows in the output will be equal to |
| 103 | + the number of users with interactions in the evaluation set. When True, |
| 104 | + the number of rows in the output will be equal to the number of users. |
| 105 | + num_threads: int, optional |
| 106 | + Number of parallel computation threads to use. Should |
| 107 | + not be higher than the number of physical cores. |
| 108 | +
|
| 109 | + Returns |
| 110 | + ------- |
| 111 | +
|
| 112 | + np.array of shape [n_users with interactions or n_users,] |
| 113 | + Numpy array containing recall@k scores for each user. If there are no interactions |
| 114 | + for a given user having items in the test period, the returned recall will be 0. |
| 115 | + """ |
| 116 | + |
| 117 | + ranks = model.predict_rank(test_interactions, |
| 118 | + train_interactions=train_interactions, |
| 119 | + user_features=user_features, |
| 120 | + item_features=item_features, |
| 121 | + num_threads=num_threads) |
| 122 | + |
| 123 | + ranks.data[ranks.data < k] = 1.0 |
| 124 | + ranks.data[ranks.data >= k] = 0.0 |
| 125 | + |
| 126 | + retrieved = np.squeeze(test_interactions.getnnz(axis=1)) |
| 127 | + hit = np.squeeze(np.array(ranks.sum(axis=1))) |
| 128 | + |
| 129 | + if not preserve_rows: |
| 130 | + hit = hit[test_interactions.getnnz(axis=1) > 0] |
| 131 | + retrieved = retrieved[test_interactions.getnnz(axis=1) > 0] |
| 132 | + |
| 133 | + return hit / retrieved |
| 134 | + |
| 135 | + |
75 | 136 | def auc_score(model, test_interactions, train_interactions=None,
|
76 | 137 | user_features=None, item_features=None,
|
77 | 138 | preserve_rows=False, num_threads=1):
|
|
0 commit comments