Skip to content

Commit 9d14df0

Browse files
committed
make corresponding changes to nan handling
1 parent aaf50e0 commit 9d14df0

File tree

2 files changed

+215
-161
lines changed

2 files changed

+215
-161
lines changed

reproschema/reproschema2redcap.py

Lines changed: 170 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -84,43 +84,21 @@ def find_Ftype_and_colH(item, row_data, response_options):
8484

8585
return row_data
8686

87-
88-
def process_item(
89-
item,
90-
item_properties,
91-
activity_name,
92-
activity_preamble,
93-
contextfile,
94-
http_kwargs,
95-
compute_item=False,
96-
compute_expr=None,
97-
):
87+
def process_item(item, item_properties, activity_name, activity_preamble, contextfile, http_kwargs, compute_item=False, compute_expr=None):
9888
"""
9989
Process an item in JSON format and extract relevant information into a dictionary.
100-
101-
Args:
102-
item_json (dict): The JSON object representing the item.
103-
activity_name (str): The name of the activity.
104-
105-
Returns:
106-
dict: A dictionary containing the extracted information.
90+
Only includes non-empty/non-None values to match clean_dict_nans behavior.
10791
"""
10892
if activity_name.endswith("_schema"):
10993
activity_name = activity_name[:-7]
94+
95+
# Initialize with only required fields
11096
row_data = {
111-
"val_min": "",
112-
"val_max": "",
113-
"choices": "",
114-
"required": "",
115-
"field_notes": "",
116-
"var_name": "",
97+
"var_name": item.id,
11798
"activity": activity_name,
118-
"field_label": "",
119-
"isVis_logic": "",
12099
}
121100

122-
# Extract min and max values from response options, if available
123-
# loading additional files if responseOptions is an url
101+
# Extract and add non-empty response option values
124102
if isinstance(item.responseOptions, str):
125103
resp = load_file(
126104
item.responseOptions,
@@ -134,47 +112,73 @@ def process_item(
134112
if "ResponseOption" in resp["category"]:
135113
response_options = ResponseOption(**resp)
136114
else:
137-
raise Exception(
138-
f"Expected to have ResponseOption but got {resp['category']}"
139-
)
115+
raise Exception(f"Expected to have ResponseOption but got {resp['category']}")
140116
else:
141117
response_options = item.responseOptions
142-
row_data["val_min"] = response_options.minValue if response_options else ""
143-
row_data["val_max"] = response_options.maxValue if response_options else ""
144-
145-
# 'choices' processing is now handled in 'find_Ftype_and_colH' if it's a URL
146-
choices = response_options.choices if response_options else ""
147-
if choices and not isinstance(choices, str):
148-
if isinstance(choices, list):
149-
item_choices = [
150-
f"{ch.value}, {ch.name.get('en', '')}" for ch in choices
151-
]
152-
row_data["choices"] = " | ".join(item_choices)
153118

154-
if item_properties.get("valueRequired", "") is True:
119+
# Only add values if they exist
120+
if response_options:
121+
if response_options.minValue is not None:
122+
row_data["val_min"] = response_options.minValue
123+
if response_options.maxValue is not None:
124+
row_data["val_max"] = response_options.maxValue
125+
126+
# Handle choices
127+
choices = response_options.choices
128+
if choices and not isinstance(choices, str):
129+
if isinstance(choices, list):
130+
item_choices = [f"{ch.value}, {ch.name.get('en', '')}" for ch in choices if ch.value is not None]
131+
if item_choices:
132+
row_data["choices"] = " | ".join(item_choices)
133+
134+
# Add valueRequired if explicitly True
135+
if item_properties and "valueRequired" in item_properties and item_properties["valueRequired"] is True:
155136
row_data["required"] = "y"
156-
if "isVis" in item_properties and item_properties["isVis"] is not True:
137+
138+
var_name = str(item.id).split("/")[-1] # Get the last part of the id path
139+
if var_name.endswith("_total_score"):
140+
row_data["isVis_logic"] = False # This will make the field hidden
141+
# Regular isVis handling for other fields
142+
elif "isVis" in item_properties and item_properties["isVis"] is not True:
157143
row_data["isVis_logic"] = item_properties["isVis"]
158-
row_data["field_notes"] = item.description.get("en", "")
159-
row_data["preamble"] = item.preamble.get("en", activity_preamble)
160-
row_data["var_name"] = item.id
161144

145+
# Handle description
146+
if item.description and "en" in item.description and item.description["en"]:
147+
row_data["field_notes"] = item.description["en"]
148+
149+
# Handle preamble
150+
if item.preamble and "en" in item.preamble and item.preamble["en"]:
151+
row_data["preamble"] = item.preamble["en"]
152+
elif activity_preamble:
153+
row_data["preamble"] = activity_preamble
154+
155+
# Handle question/field label
162156
if compute_item:
163-
# for compute items there are no questions
164157
question = item.description
165158
else:
166159
question = item.question
167-
if isinstance(question, dict):
168-
row_data["field_label"] = question.get("en", "")
169-
elif isinstance(question, str):
160+
161+
if isinstance(question, dict) and "en" in question and question["en"]:
162+
row_data["field_label"] = question["en"]
163+
elif isinstance(question, str) and question:
170164
row_data["field_label"] = question
171165

166+
# Handle compute items
172167
if compute_item and compute_expr:
168+
print(f"\nDebug - Compute Item: {var_name}")
169+
print(f"Compute Expression: {compute_expr}")
173170
row_data["choices"] = compute_expr
174171
row_data["field_type"] = "calc"
172+
# For computed fields, we may need to set visibility to false by default
173+
if any(score_type in var_name for score_type in ["_score", "_total"]):
174+
row_data["isVis_logic"] = False
175175
else:
176-
# Call helper function to find field type and validation type (if any) and update row_data
177-
row_data = find_Ftype_and_colH(item, row_data, response_options)
176+
# Use find_Ftype_and_colH but only add non-empty values
177+
field_info = find_Ftype_and_colH(item, {}, response_options)
178+
if field_info.get("field_type"):
179+
row_data["field_type"] = field_info["field_type"]
180+
if field_info.get("val_type_OR_slider"):
181+
row_data["val_type_OR_slider"] = field_info["val_type_OR_slider"]
178182

179183
return row_data
180184

@@ -220,6 +224,14 @@ def get_csv_data(dir_path, contextfile, http_kwargs):
220224
el["variableName"]: el
221225
for el in parsed_activity_json["ui"]["addProperties"]
222226
}
227+
228+
# Get activity name without adding extra _schema
229+
activity_name = act.id.split("/")[-1]
230+
if activity_name.endswith('_schema.jsonld'):
231+
activity_name = activity_name[:-12] # Remove _schema.jsonld
232+
elif activity_name.endswith('.jsonld'):
233+
activity_name = activity_name[:-7] # Remove .jsonld
234+
223235
items_properties.update(
224236
{
225237
el["isAbout"]: el
@@ -233,61 +245,69 @@ def get_csv_data(dir_path, contextfile, http_kwargs):
233245
item_order = [("ord", el) for el in act.ui.order]
234246
item_calc = [("calc", el) for el in act.compute]
235247

248+
computed_fields = {calc_item.variableName for _, calc_item in item_calc}
249+
250+
236251
for tp, item in item_order + item_calc:
237-
if tp == "calc":
238-
js_expr = item.jsExpression
239-
if item.variableName in items_properties:
240-
item = items_properties[item.variableName][
241-
"isAbout"
242-
]
252+
try:
253+
if tp == "calc":
254+
js_expr = item.jsExpression
255+
var_name = item.variableName
256+
257+
# Find the corresponding item properties
258+
if var_name in items_properties:
259+
item = items_properties[var_name]["isAbout"]
260+
# Ensure computed fields are marked as hidden
261+
items_properties[var_name]["isVis"] = False
262+
else:
263+
print(f"WARNING: no item properties found for computed field {var_name} in {activity_name}")
264+
continue
265+
item_calc = True
243266
else:
244-
print(
245-
"WARNING: no item properties found for",
246-
item.variableName,
247-
activity_name,
267+
item_calc = False
268+
js_expr = None
269+
it_prop = items_properties.get(item)
270+
if not _is_url(item):
271+
item = Path(activity_path).parent / item
272+
273+
try:
274+
item_json = load_file(
275+
item,
276+
started=True,
277+
http_kwargs=http_kwargs,
278+
fixoldschema=True,
279+
compact=True,
280+
compact_context=contextfile,
248281
)
282+
item_json.pop("@context", "")
283+
itm = Item(**item_json)
284+
except Exception as e:
285+
print(f"Error loading item: {item}")
286+
print(f"Error details: {str(e)}")
249287
continue
250-
item_calc = True
251-
else:
252-
item_calc = False
253-
js_expr = None
254-
it_prop = items_properties.get(item)
255-
if not _is_url(item):
256-
item = Path(activity_path).parent / item
257-
try:
258-
item_json = load_file(
259-
item,
260-
started=True,
261-
http_kwargs=http_kwargs,
262-
fixoldschema=True,
263-
compact=True,
264-
compact_context=contextfile,
288+
289+
activity_name = act.id.split("/")[-1].split(".")[0]
290+
activity_preamble = act.preamble.get("en", "").strip() if hasattr(act, 'preamble') else ""
291+
292+
row_data = process_item(
293+
itm,
294+
it_prop,
295+
activity_name,
296+
activity_preamble,
297+
contextfile,
298+
http_kwargs,
299+
item_calc,
300+
js_expr,
265301
)
266-
except Exception:
267-
print(f"Error loading item: {item}")
302+
csv_data.append(row_data)
303+
304+
except Exception as e:
305+
print(f"Error processing item {item}: {str(e)}")
268306
continue
269-
item_json.pop("@context", "")
270-
itm = Item(**item_json)
271-
activity_name = act.id.split("/")[-1].split(".")[0]
272-
activity_preamble = act.preamble.get(
273-
"en", ""
274-
).strip()
275-
row_data = process_item(
276-
itm,
277-
it_prop,
278-
activity_name,
279-
activity_preamble,
280-
contextfile,
281-
http_kwargs,
282-
item_calc,
283-
js_expr,
284-
)
285-
csv_data.append(row_data)
286307
# Break after finding the first _schema file
287308
break
288309
return csv_data
289310

290-
291311
def write_to_csv(csv_data, output_csv_filename):
292312
# REDCap-specific headers
293313
headers = [
@@ -297,7 +317,7 @@ def write_to_csv(csv_data, output_csv_filename):
297317
"Field Type",
298318
"Field Label",
299319
"Choices, Calculations, OR Slider Labels",
300-
"Field Note", # TODO: is this description?
320+
"Field Note",
301321
"Text Validation Type OR Show Slider Number",
302322
"Text Validation Min",
303323
"Text Validation Max",
@@ -308,49 +328,70 @@ def write_to_csv(csv_data, output_csv_filename):
308328
"Question Number (surveys only)",
309329
"Matrix Group Name",
310330
"Matrix Ranking?",
311-
"Field Annotation",
331+
"Field Annotation"
312332
]
313333

314334
# Writing to the CSV file
315-
with open(
316-
output_csv_filename, "w", newline="", encoding="utf-8"
317-
) as csvfile:
335+
with open(output_csv_filename, "w", newline="", encoding="utf-8") as csvfile:
318336
writer = csv.DictWriter(csvfile, fieldnames=headers)
319-
320-
# Map the data from your format to REDCap format
321-
redcap_data = []
337+
writer.writeheader()
338+
322339
for row in csv_data:
340+
redcap_row = {}
341+
342+
# Handle var_name URL conversion
323343
var_name = row["var_name"]
324344
if _is_url(var_name):
325345
var_name = var_name.split("/")[-1].split(".")[0]
326-
redcap_row = {
327-
"Variable / Field Name": var_name,
328-
"Form Name": row["activity"],
329-
"Section Header": row[
330-
"preamble"
331-
], # Update this if your data includes section headers
332-
"Field Type": row["field_type"],
333-
"Field Label": row["field_label"],
334-
"Choices, Calculations, OR Slider Labels": row["choices"],
335-
"Field Note": row["field_notes"],
336-
"Text Validation Type OR Show Slider Number": row.get(
337-
"val_type_OR_slider", ""
338-
),
339-
"Required Field?": row["required"],
340-
"Text Validation Min": row["val_min"],
341-
"Text Validation Max": row["val_max"],
342-
"Branching Logic (Show field only if...)": row["isVis_logic"],
343-
# Add other fields as necessary based on your data
346+
redcap_row["Variable / Field Name"] = var_name
347+
348+
# Handle form name
349+
activity_name = row["activity"]
350+
if activity_name.endswith("_schema"):
351+
activity_name = activity_name[:-7]
352+
redcap_row["Form Name"] = activity_name
353+
354+
# Map remaining fields
355+
field_mappings = {
356+
"preamble": "Section Header",
357+
"field_type": "Field Type",
358+
"field_label": "Field Label",
359+
"choices": "Choices, Calculations, OR Slider Labels",
360+
"field_notes": "Field Note",
361+
"val_type_OR_slider": "Text Validation Type OR Show Slider Number",
362+
"val_min": "Text Validation Min",
363+
"val_max": "Text Validation Max",
364+
"required": "Required Field?",
365+
"isVis_logic": "Branching Logic (Show field only if...)",
366+
"field_annotation": "Field Annotation",
367+
"matrix_group": "Matrix Group Name",
368+
"matrix_ranking": "Matrix Ranking?"
344369
}
345-
redcap_data.append(redcap_row)
346370

347-
writer.writeheader()
348-
for row in redcap_data:
349-
writer.writerow(row)
371+
# Add mapped fields only if they exist and aren't empty
372+
for src_key, dest_key in field_mappings.items():
373+
if src_key in row and row[src_key] is not None and row[src_key] != "":
374+
# Special handling for visibility logic
375+
if src_key == "isVis_logic":
376+
if row[src_key] is not True: # Only add if not default True
377+
redcap_row[dest_key] = row[src_key]
378+
# Special handling for required field
379+
elif src_key == "required":
380+
redcap_row[dest_key] = "y" if row[src_key] else "n"
381+
# Special handling for field annotation
382+
elif src_key == "field_annotation":
383+
current_annotation = redcap_row.get(dest_key, "")
384+
if current_annotation:
385+
redcap_row[dest_key] = f"{current_annotation} {row[src_key]}"
386+
else:
387+
redcap_row[dest_key] = row[src_key]
388+
else:
389+
redcap_row[dest_key] = row[src_key]
390+
391+
writer.writerow(redcap_row)
350392

351393
print("The CSV file was written successfully")
352394

353-
354395
def reproschema2redcap(input_dir_path, output_csv_filename):
355396
contextfile = CONTEXTFILE_URL # todo, give an option
356397
http_kwargs = {}

0 commit comments

Comments
 (0)