@@ -267,26 +267,31 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
267
267
assert len (layer_stack_cuda ) == len (layer_stack_cpu )
268
268
269
269
for layer , cuda_output in layer_stack_cuda :
270
+ tensor_cuda_out = None
271
+ tensor_cpu_out = None
272
+ abs_diff = None
270
273
for cpu_layer , cpu_output in layer_stack_cpu :
271
274
if cpu_layer == layer :
272
275
print ("CPU Layer {} GPU Layer {}" .format (cpu_layer , layer ))
273
276
274
277
if not type (cuda_output ) is tuple :
275
- tensor_cuda_out = cuda_output . to ( torch . device ( 'cpu' ))
278
+ tensor_cuda_out = cuda_output
276
279
else :
277
280
tensor_cuda_out = convert_tensor (cuda_output )
278
281
if type (cpu_output ) is tuple :
279
282
tensor_cpu_out = convert_tensor (cpu_output )
280
283
else :
281
- tensor_cpu_out = cpu_output
284
+ tensor_cpu_out = cpu_output . to ( 'cuda' )
282
285
print ("tensor converted... get torch abs diff" )
283
286
abs_diff = torch .abs (tensor_cpu_out - tensor_cuda_out ).flatten ().tolist ()
284
- cos = nn .CosineSimilarity ()
285
- cos_sim = cos (tensor_cpu_out - tensor_cuda_out )
286
-
287
- print ("abs_diff and cos_sim calculated" )
287
+ print ("abs_diff calculated" )
288
+ cos = nn .CosineSimilarity (dim = 1 )
289
+ cos_sim = cos (tensor_cpu_out , tensor_cuda_out )
290
+ print (cos_sim )
291
+
288
292
absolute_differences .append (abs_diff )
289
- print ("abs_diff list extended" )
293
+ print ("abs_diff list appended" )
294
+ print (len (absolute_differences ))
290
295
291
296
prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
292
297
layer_name = str (layer ).replace ('[' ,'' ).replace (']' , '' )
0 commit comments