Skip to content

Commit f5e3728

Browse files
committed
Cuda migration (by amp and amohan)
Memory allocations must be handles
1 parent 045b410 commit f5e3728

File tree

2 files changed

+44
-8
lines changed

2 files changed

+44
-8
lines changed

cava/nightwatch/generator/c/callee.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def buffer_case():
390390
return ""
391391

392392
def default_case():
393-
return (Expr(type.transfer).equals("NW_HANDLE")).if_then_else(
393+
return (Expr(type.transfer).one_of(["NW_HANDLE", "NW_OPAQUE"])).if_then_else(
394394
Expr(not type.deallocates).if_then_else(
395395
assign_record_replay_functions(param_value, type).then(record_call_metadata(param_value, type)),
396396
expunge_calls(param_value, type),
@@ -399,9 +399,10 @@ def default_case():
399399

400400
if type.fields:
401401
return for_all_elements(values, type, depth=depth, original_type=original_type, **other)
402-
return type.is_simple_buffer().if_then_else(
403-
simple_buffer_case, Expr(type.transfer).equals("NW_BUFFER").if_then_else(buffer_case, default_case)
404-
)
402+
return Expr(type.transfer).equals("NW_BUFFER").if_then_else(
403+
buffer_case,
404+
default_case
405+
)
405406

406407
with location(f"at {term.yellow(str(arg.name))}", arg.location):
407408
conv = convert_result_value(

cava/samples/cudart/cudart.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ typedef struct {
5858
int num_funcs;
5959
struct fatbin_function *func; /* for functions */
6060

61+
size_t buffer_size;
62+
6163
/* global states */
6264
CUmodule cur_module;
6365

@@ -442,21 +444,51 @@ cudaFreeHost(void *ptr)
442444
}
443445
ava_end_replacement;
444446

447+
448+
/// Migration: @mem_extract_tag
449+
ava_utility void* object_extract(void* obj, size_t* length) {
450+
//called from host
451+
void* buffer;
452+
printf("object_replay: object=%lx\n", (uintptr_t)obj);
453+
cudaDeviceSynchronize();
454+
455+
*length = ava_metadata(obj)->buffer_size;
456+
buffer = malloc(*length);
457+
cudaMemcpy(buffer, obj, *length, cudaMemcpyDeviceToHost);
458+
return buffer;
459+
}
460+
461+
ava_utility void object_replace(void* obj, void* data, size_t length) {
462+
printf("object_replace: object=%lx, len=%lu\n", (uintptr_t)obj, length);
463+
assert(length != 0);
464+
cudaMemcpy(obj, data, length, cudaMemcpyHostToDevice);
465+
}
466+
445467
__host__ __cudart_builtin__ cudaError_t CUDARTAPI
446468
cudaMalloc(void **devPtr, size_t size)
447469
{
448470
ava_argument(devPtr) {
471+
449472
ava_out; ava_buffer(1);
450-
ava_element ava_opaque;
473+
ava_element{
474+
ava_allocates;
475+
ava_handle;
476+
ava_object_explicit_state_functions(object_extract, object_replace);
477+
ava_object_record;
478+
}
451479
}
480+
481+
ava_execute();
482+
ava_metadata(*devPtr)->buffer_size = size;
452483
}
453484

454485
__host__ cudaError_t CUDARTAPI
455486
cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind)
456487
{
457488
ava_argument(dst) {
458489
if (kind == cudaMemcpyHostToDevice) {
459-
ava_opaque;
490+
ava_handle;
491+
ava_object_record;
460492
}
461493
else if (kind == cudaMemcpyDeviceToHost) {
462494
ava_out; ava_buffer(count);
@@ -468,15 +500,18 @@ cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind)
468500
ava_in; ava_buffer(count);
469501
}
470502
else if (kind == cudaMemcpyDeviceToHost) {
471-
ava_opaque;
503+
ava_handle;
472504
}
473505
}
474506
}
475507

476508
__host__ __cudart_builtin__ cudaError_t CUDARTAPI
477509
cudaFree(void *devPtr)
478510
{
479-
ava_argument(devPtr) ava_opaque;
511+
ava_argument(devPtr){
512+
ava_handle;
513+
ava_object_record;
514+
}
480515
}
481516

482517
/* Rich set of APIs */

0 commit comments

Comments
 (0)