@@ -199,6 +199,45 @@ def _to_cython_dtype(self, mat):
199
199
else :
200
200
return mat
201
201
202
+ def _process_sample_weight (self , interactions , sample_weight ):
203
+
204
+ if sample_weight is not None :
205
+
206
+ if self .loss == 'warp-kos' :
207
+ raise NotImplementedError ('k-OS loss with sample weights '
208
+ 'not implemented.' )
209
+
210
+ if not isinstance (sample_weight , sp .coo_matrix ):
211
+ raise ValueError ('Sample_weight must be a COO matrix.' )
212
+
213
+ if sample_weight .shape != interactions .shape :
214
+ raise ValueError ('Sample weight and interactions '
215
+ 'matrices must be the same shape' )
216
+
217
+ if not (np .array_equal (interactions .row ,
218
+ sample_weight .row )
219
+ and
220
+ np .array_equal (interactions .col ,
221
+ sample_weight .col )):
222
+ raise ValueError ('Sample weight and interaction matrix '
223
+ 'entries must be in the same order' )
224
+
225
+ if sample_weight .data .dtype != CYTHON_DTYPE :
226
+ sample_weight_data = sample_weight .data .astype (CYTHON_DTYPE )
227
+ else :
228
+ sample_weight_data = sample_weight .data
229
+ else :
230
+ if np .array_equiv (interactions .data , 1.0 ):
231
+ # Re-use interactions data if they are all
232
+ # ones
233
+ sample_weight_data = interactions .data
234
+ else :
235
+ # Otherwise allocate a new array of ones
236
+ sample_weight_data = np .ones_like (interactions .data ,
237
+ dtype = CYTHON_DTYPE )
238
+
239
+ return sample_weight_data
240
+
202
241
def fit (self , interactions ,
203
242
user_features = None , item_features = None ,
204
243
sample_weight = None ,
@@ -207,21 +246,28 @@ def fit(self, interactions,
207
246
Fit the model.
208
247
209
248
Arguments:
210
- - coo_matrix interactions: matrix of shape [n_users, n_items] containing
249
+ - coo_matrix interactions: np.float32 matrix of shape [n_users, n_items] containing
211
250
user-item interactions. Will be converted to
212
251
numpy.float32 dtype if it is not of that type
213
252
(this conversion may be heavy depending upon
214
253
matrix size)
254
+
255
+ Optional arguments:
215
256
- csr_matrix user_features: array of shape [n_users, n_user_features].
216
257
Each row contains that user's weights
217
258
over features.
218
259
- csr_matrix item_features: array of shape [n_items, n_item_features].
219
260
Each row contains that item's weights
220
261
over features.
221
- - np.float32 array user_weights: array of shape [n_interactions,] with
222
- weights applied to individual interactions.
223
- Defaults to weight 1.0 for all interactions.
224
- Not implemented for the k-OS loss.
262
+ - coo_matrix sample_weight: np.float32 matrix of shape [n_users, n_items] with
263
+ entries expressing weights of individual
264
+ interactions from the interactions matrix.
265
+ Its row and col arrays must be the same as
266
+ those of the interactions matrix. For memory
267
+ efficiency its ssible to use the same arrays
268
+ for both weights and interaction matrices.
269
+ Defaults to weight 1.0 for all interactions.
270
+ Not implemented for the k-OS loss.
225
271
226
272
- int epochs: number of epochs to run. Default: 1
227
273
- int num_threads: number of parallel computation threads to use. Should
@@ -250,18 +296,19 @@ def fit_partial(self, interactions,
250
296
# If that's already true, this is a no-op.
251
297
interactions = interactions .tocoo ()
252
298
299
+ if interactions .dtype != CYTHON_DTYPE :
300
+ interactions .data = interactions .data .astype (CYTHON_DTYPE )
301
+
302
+ sample_weight_data = self ._process_sample_weight (interactions ,
303
+ sample_weight )
304
+
253
305
n_users , n_items = interactions .shape
254
306
(user_features ,
255
307
item_features ) = self ._construct_feature_matrices (n_users ,
256
308
n_items ,
257
309
user_features ,
258
310
item_features )
259
311
260
- if self .loss == 'warp-kos' and sample_weight is not None :
261
- raise NotImplementedError ('k-OS loss with sample weights '
262
- 'not implemented.' )
263
-
264
- interactions = self ._to_cython_dtype (interactions )
265
312
user_features = self ._to_cython_dtype (user_features )
266
313
item_features = self ._to_cython_dtype (item_features )
267
314
sample_weight = (self ._to_cython_dtype (sample_weight )
@@ -284,13 +331,6 @@ def fit_partial(self, interactions,
284
331
if not user_features .shape [1 ] == self .user_embeddings .shape [0 ]:
285
332
raise ValueError ('Incorrect number of features in user_features' )
286
333
287
- if sample_weight .ndim != 1 :
288
- raise ValueError ('Sample weights must be 1-dimensional' )
289
-
290
- if sample_weight .shape [0 ] != interactions .getnnz ():
291
- raise ValueError ('Number of sample weights incompatible '
292
- 'with number of interactions' )
293
-
294
334
for epoch in range (epochs ):
295
335
296
336
if verbose :
@@ -299,7 +339,7 @@ def fit_partial(self, interactions,
299
339
self ._run_epoch (item_features ,
300
340
user_features ,
301
341
interactions ,
302
- sample_weight ,
342
+ sample_weight_data ,
303
343
num_threads ,
304
344
self .loss )
305
345
0 commit comments