Skip to content

Commit 953624b

Browse files
committed
Merge pull request #61 from maciejkula/explicit_sample_weights
Change sample_weight API to accept COO matrices.
2 parents c1d179b + 1c952e9 commit 953624b

File tree

3 files changed

+83
-25
lines changed

3 files changed

+83
-25
lines changed

lightfm/lightfm.py

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,45 @@ def _to_cython_dtype(self, mat):
199199
else:
200200
return mat
201201

202+
def _process_sample_weight(self, interactions, sample_weight):
203+
204+
if sample_weight is not None:
205+
206+
if self.loss == 'warp-kos':
207+
raise NotImplementedError('k-OS loss with sample weights '
208+
'not implemented.')
209+
210+
if not isinstance(sample_weight, sp.coo_matrix):
211+
raise ValueError('Sample_weight must be a COO matrix.')
212+
213+
if sample_weight.shape != interactions.shape:
214+
raise ValueError('Sample weight and interactions '
215+
'matrices must be the same shape')
216+
217+
if not (np.array_equal(interactions.row,
218+
sample_weight.row)
219+
and
220+
np.array_equal(interactions.col,
221+
sample_weight.col)):
222+
raise ValueError('Sample weight and interaction matrix '
223+
'entries must be in the same order')
224+
225+
if sample_weight.data.dtype != CYTHON_DTYPE:
226+
sample_weight_data = sample_weight.data.astype(CYTHON_DTYPE)
227+
else:
228+
sample_weight_data = sample_weight.data
229+
else:
230+
if np.array_equiv(interactions.data, 1.0):
231+
# Re-use interactions data if they are all
232+
# ones
233+
sample_weight_data = interactions.data
234+
else:
235+
# Otherwise allocate a new array of ones
236+
sample_weight_data = np.ones_like(interactions.data,
237+
dtype=CYTHON_DTYPE)
238+
239+
return sample_weight_data
240+
202241
def fit(self, interactions,
203242
user_features=None, item_features=None,
204243
sample_weight=None,
@@ -207,21 +246,28 @@ def fit(self, interactions,
207246
Fit the model.
208247
209248
Arguments:
210-
- coo_matrix interactions: matrix of shape [n_users, n_items] containing
249+
- coo_matrix interactions: np.float32 matrix of shape [n_users, n_items] containing
211250
user-item interactions. Will be converted to
212251
numpy.float32 dtype if it is not of that type
213252
(this conversion may be heavy depending upon
214253
matrix size)
254+
255+
Optional arguments:
215256
- csr_matrix user_features: array of shape [n_users, n_user_features].
216257
Each row contains that user's weights
217258
over features.
218259
- csr_matrix item_features: array of shape [n_items, n_item_features].
219260
Each row contains that item's weights
220261
over features.
221-
- np.float32 array user_weights: array of shape [n_interactions,] with
222-
weights applied to individual interactions.
223-
Defaults to weight 1.0 for all interactions.
224-
Not implemented for the k-OS loss.
262+
- coo_matrix sample_weight: np.float32 matrix of shape [n_users, n_items] with
263+
entries expressing weights of individual
264+
interactions from the interactions matrix.
265+
Its row and col arrays must be the same as
266+
those of the interactions matrix. For memory
267+
efficiency its ssible to use the same arrays
268+
for both weights and interaction matrices.
269+
Defaults to weight 1.0 for all interactions.
270+
Not implemented for the k-OS loss.
225271
226272
- int epochs: number of epochs to run. Default: 1
227273
- int num_threads: number of parallel computation threads to use. Should
@@ -250,18 +296,19 @@ def fit_partial(self, interactions,
250296
# If that's already true, this is a no-op.
251297
interactions = interactions.tocoo()
252298

299+
if interactions.dtype != CYTHON_DTYPE:
300+
interactions.data = interactions.data.astype(CYTHON_DTYPE)
301+
302+
sample_weight_data = self._process_sample_weight(interactions,
303+
sample_weight)
304+
253305
n_users, n_items = interactions.shape
254306
(user_features,
255307
item_features) = self._construct_feature_matrices(n_users,
256308
n_items,
257309
user_features,
258310
item_features)
259311

260-
if self.loss == 'warp-kos' and sample_weight is not None:
261-
raise NotImplementedError('k-OS loss with sample weights '
262-
'not implemented.')
263-
264-
interactions = self._to_cython_dtype(interactions)
265312
user_features = self._to_cython_dtype(user_features)
266313
item_features = self._to_cython_dtype(item_features)
267314
sample_weight = (self._to_cython_dtype(sample_weight)
@@ -284,13 +331,6 @@ def fit_partial(self, interactions,
284331
if not user_features.shape[1] == self.user_embeddings.shape[0]:
285332
raise ValueError('Incorrect number of features in user_features')
286333

287-
if sample_weight.ndim != 1:
288-
raise ValueError('Sample weights must be 1-dimensional')
289-
290-
if sample_weight.shape[0] != interactions.getnnz():
291-
raise ValueError('Number of sample weights incompatible '
292-
'with number of interactions')
293-
294334
for epoch in range(epochs):
295335

296336
if verbose:
@@ -299,7 +339,7 @@ def fit_partial(self, interactions,
299339
self._run_epoch(item_features,
300340
user_features,
301341
interactions,
302-
sample_weight,
342+
sample_weight_data,
303343
num_threads,
304344
self.loss)
305345

tests/test_api.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,17 +175,33 @@ def test_sample_weight():
175175
model = LightFM()
176176

177177
train = sp.coo_matrix(np.array([[0, 1],
178-
[0, 0]]))
178+
[0, 1]]))
179179

180180
with pytest.raises(ValueError):
181+
# Wrong number of weights
182+
sample_weight = sp.coo_matrix(np.zeros((2, 2)))
183+
181184
model.fit(train,
182-
sample_weight=np.zeros((2, 2)))
185+
sample_weight=sample_weight)
186+
187+
with pytest.raises(ValueError):
188+
# Wrong shape
189+
sample_weight = sp.coo_matrix(np.zeros(2))
190+
model.fit(train,
191+
sample_weight=np.zeros(3))
183192

184193
with pytest.raises(ValueError):
194+
# Wrong order of entries
195+
sample_weight = sp.coo_matrix((train.data,
196+
(train.row[::-1],
197+
train.col[::-1])))
185198
model.fit(train,
186199
sample_weight=np.zeros(3))
187200

188-
model.fit(train, sample_weight=np.ones(1))
201+
sample_weight = sp.coo_matrix((train.data,
202+
(train.row,
203+
train.col)))
204+
model.fit(train, sample_weight=sample_weight)
189205

190206
model = LightFM(loss='warp-kos')
191207

tests/test_movielens.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,9 @@ def test_movielens_accuracy_sample_weights():
407407
# roughly the same accuracy
408408

409409
scale = 1e-01
410-
weights = np.ones(train.getnnz(),
411-
dtype=np.float32) * scale
410+
weights = train.copy()
411+
weights.data = np.ones(train.getnnz(),
412+
dtype=np.float32) * scale
412413

413414
for (loss, exp_score) in (('logistic', 0.74),
414415
('bpr', 0.84),
@@ -483,8 +484,9 @@ def test_zero_weights_accuracy():
483484
# When very small weights are used
484485
# accuracy should be no better than
485486
# random.
486-
weights = np.zeros(train.getnnz(),
487-
dtype=np.float32)
487+
weights = train.copy()
488+
weights.data = np.zeros(train.getnnz(),
489+
dtype=np.float32)
488490

489491
for loss in ('logistic', 'bpr', 'warp'):
490492
model = LightFM(loss=loss)

0 commit comments

Comments
 (0)