@@ -84,43 +84,21 @@ def find_Ftype_and_colH(item, row_data, response_options):
8484
8585 return row_data
8686
87-
88- def process_item (
89- item ,
90- item_properties ,
91- activity_name ,
92- activity_preamble ,
93- contextfile ,
94- http_kwargs ,
95- compute_item = False ,
96- compute_expr = None ,
97- ):
87+ def process_item (item , item_properties , activity_name , activity_preamble , contextfile , http_kwargs , compute_item = False , compute_expr = None ):
9888 """
9989 Process an item in JSON format and extract relevant information into a dictionary.
100-
101- Args:
102- item_json (dict): The JSON object representing the item.
103- activity_name (str): The name of the activity.
104-
105- Returns:
106- dict: A dictionary containing the extracted information.
90+ Only includes non-empty/non-None values to match clean_dict_nans behavior.
10791 """
10892 if activity_name .endswith ("_schema" ):
10993 activity_name = activity_name [:- 7 ]
94+
95+ # Initialize with only required fields
11096 row_data = {
111- "val_min" : "" ,
112- "val_max" : "" ,
113- "choices" : "" ,
114- "required" : "" ,
115- "field_notes" : "" ,
116- "var_name" : "" ,
97+ "var_name" : item .id ,
11798 "activity" : activity_name ,
118- "field_label" : "" ,
119- "isVis_logic" : "" ,
12099 }
121100
122- # Extract min and max values from response options, if available
123- # loading additional files if responseOptions is an url
101+ # Extract and add non-empty response option values
124102 if isinstance (item .responseOptions , str ):
125103 resp = load_file (
126104 item .responseOptions ,
@@ -134,47 +112,73 @@ def process_item(
134112 if "ResponseOption" in resp ["category" ]:
135113 response_options = ResponseOption (** resp )
136114 else :
137- raise Exception (
138- f"Expected to have ResponseOption but got { resp ['category' ]} "
139- )
115+ raise Exception (f"Expected to have ResponseOption but got { resp ['category' ]} " )
140116 else :
141117 response_options = item .responseOptions
142- row_data ["val_min" ] = response_options .minValue if response_options else ""
143- row_data ["val_max" ] = response_options .maxValue if response_options else ""
144-
145- # 'choices' processing is now handled in 'find_Ftype_and_colH' if it's a URL
146- choices = response_options .choices if response_options else ""
147- if choices and not isinstance (choices , str ):
148- if isinstance (choices , list ):
149- item_choices = [
150- f"{ ch .value } , { ch .name .get ('en' , '' )} " for ch in choices
151- ]
152- row_data ["choices" ] = " | " .join (item_choices )
153118
154- if item_properties .get ("valueRequired" , "" ) is True :
119+ # Only add values if they exist
120+ if response_options :
121+ if response_options .minValue is not None :
122+ row_data ["val_min" ] = response_options .minValue
123+ if response_options .maxValue is not None :
124+ row_data ["val_max" ] = response_options .maxValue
125+
126+ # Handle choices
127+ choices = response_options .choices
128+ if choices and not isinstance (choices , str ):
129+ if isinstance (choices , list ):
130+ item_choices = [f"{ ch .value } , { ch .name .get ('en' , '' )} " for ch in choices if ch .value is not None ]
131+ if item_choices :
132+ row_data ["choices" ] = " | " .join (item_choices )
133+
134+ # Add valueRequired if explicitly True
135+ if item_properties and "valueRequired" in item_properties and item_properties ["valueRequired" ] is True :
155136 row_data ["required" ] = "y"
156- if "isVis" in item_properties and item_properties ["isVis" ] is not True :
137+
138+ var_name = str (item .id ).split ("/" )[- 1 ] # Get the last part of the id path
139+ if var_name .endswith ("_total_score" ):
140+ row_data ["isVis_logic" ] = False # This will make the field hidden
141+ # Regular isVis handling for other fields
142+ elif "isVis" in item_properties and item_properties ["isVis" ] is not True :
157143 row_data ["isVis_logic" ] = item_properties ["isVis" ]
158- row_data ["field_notes" ] = item .description .get ("en" , "" )
159- row_data ["preamble" ] = item .preamble .get ("en" , activity_preamble )
160- row_data ["var_name" ] = item .id
161144
145+ # Handle description
146+ if item .description and "en" in item .description and item .description ["en" ]:
147+ row_data ["field_notes" ] = item .description ["en" ]
148+
149+ # Handle preamble
150+ if item .preamble and "en" in item .preamble and item .preamble ["en" ]:
151+ row_data ["preamble" ] = item .preamble ["en" ]
152+ elif activity_preamble :
153+ row_data ["preamble" ] = activity_preamble
154+
155+ # Handle question/field label
162156 if compute_item :
163- # for compute items there are no questions
164157 question = item .description
165158 else :
166159 question = item .question
167- if isinstance (question , dict ):
168- row_data ["field_label" ] = question .get ("en" , "" )
169- elif isinstance (question , str ):
160+
161+ if isinstance (question , dict ) and "en" in question and question ["en" ]:
162+ row_data ["field_label" ] = question ["en" ]
163+ elif isinstance (question , str ) and question :
170164 row_data ["field_label" ] = question
171165
166+ # Handle compute items
172167 if compute_item and compute_expr :
168+ print (f"\n Debug - Compute Item: { var_name } " )
169+ print (f"Compute Expression: { compute_expr } " )
173170 row_data ["choices" ] = compute_expr
174171 row_data ["field_type" ] = "calc"
172+ # For computed fields, we may need to set visibility to false by default
173+ if any (score_type in var_name for score_type in ["_score" , "_total" ]):
174+ row_data ["isVis_logic" ] = False
175175 else :
176- # Call helper function to find field type and validation type (if any) and update row_data
177- row_data = find_Ftype_and_colH (item , row_data , response_options )
176+ # Use find_Ftype_and_colH but only add non-empty values
177+ field_info = find_Ftype_and_colH (item , {}, response_options )
178+ if field_info .get ("field_type" ):
179+ row_data ["field_type" ] = field_info ["field_type" ]
180+ if field_info .get ("val_type_OR_slider" ):
181+ row_data ["val_type_OR_slider" ] = field_info ["val_type_OR_slider" ]
178182
179183 return row_data
180184
@@ -220,6 +224,14 @@ def get_csv_data(dir_path, contextfile, http_kwargs):
220224 el ["variableName" ]: el
221225 for el in parsed_activity_json ["ui" ]["addProperties" ]
222226 }
227+
228+ # Get activity name without adding extra _schema
229+ activity_name = act .id .split ("/" )[- 1 ]
230+ if activity_name .endswith ('_schema.jsonld' ):
231+ activity_name = activity_name [:- 12 ] # Remove _schema.jsonld
232+ elif activity_name .endswith ('.jsonld' ):
233+ activity_name = activity_name [:- 7 ] # Remove .jsonld
234+
223235 items_properties .update (
224236 {
225237 el ["isAbout" ]: el
@@ -233,61 +245,69 @@ def get_csv_data(dir_path, contextfile, http_kwargs):
233245 item_order = [("ord" , el ) for el in act .ui .order ]
234246 item_calc = [("calc" , el ) for el in act .compute ]
235247
248+ computed_fields = {calc_item .variableName for _ , calc_item in item_calc }
249+
250+
236251 for tp , item in item_order + item_calc :
237- if tp == "calc" :
238- js_expr = item .jsExpression
239- if item .variableName in items_properties :
240- item = items_properties [item .variableName ][
241- "isAbout"
242- ]
252+ try :
253+ if tp == "calc" :
254+ js_expr = item .jsExpression
255+ var_name = item .variableName
256+
257+ # Find the corresponding item properties
258+ if var_name in items_properties :
259+ item = items_properties [var_name ]["isAbout" ]
260+ # Ensure computed fields are marked as hidden
261+ items_properties [var_name ]["isVis" ] = False
262+ else :
263+ print (f"WARNING: no item properties found for computed field { var_name } in { activity_name } " )
264+ continue
265+ item_calc = True
243266 else :
244- print (
245- "WARNING: no item properties found for" ,
246- item .variableName ,
247- activity_name ,
267+ item_calc = False
268+ js_expr = None
269+ it_prop = items_properties .get (item )
270+ if not _is_url (item ):
271+ item = Path (activity_path ).parent / item
272+
273+ try :
274+ item_json = load_file (
275+ item ,
276+ started = True ,
277+ http_kwargs = http_kwargs ,
278+ fixoldschema = True ,
279+ compact = True ,
280+ compact_context = contextfile ,
248281 )
282+ item_json .pop ("@context" , "" )
283+ itm = Item (** item_json )
284+ except Exception as e :
285+ print (f"Error loading item: { item } " )
286+ print (f"Error details: { str (e )} " )
249287 continue
250- item_calc = True
251- else :
252- item_calc = False
253- js_expr = None
254- it_prop = items_properties .get (item )
255- if not _is_url (item ):
256- item = Path (activity_path ).parent / item
257- try :
258- item_json = load_file (
259- item ,
260- started = True ,
261- http_kwargs = http_kwargs ,
262- fixoldschema = True ,
263- compact = True ,
264- compact_context = contextfile ,
288+
289+ activity_name = act .id .split ("/" )[- 1 ].split ("." )[0 ]
290+ activity_preamble = act .preamble .get ("en" , "" ).strip () if hasattr (act , 'preamble' ) else ""
291+
292+ row_data = process_item (
293+ itm ,
294+ it_prop ,
295+ activity_name ,
296+ activity_preamble ,
297+ contextfile ,
298+ http_kwargs ,
299+ item_calc ,
300+ js_expr ,
265301 )
266- except Exception :
267- print (f"Error loading item: { item } " )
302+ csv_data .append (row_data )
303+
304+ except Exception as e :
305+ print (f"Error processing item { item } : { str (e )} " )
268306 continue
269- item_json .pop ("@context" , "" )
270- itm = Item (** item_json )
271- activity_name = act .id .split ("/" )[- 1 ].split ("." )[0 ]
272- activity_preamble = act .preamble .get (
273- "en" , ""
274- ).strip ()
275- row_data = process_item (
276- itm ,
277- it_prop ,
278- activity_name ,
279- activity_preamble ,
280- contextfile ,
281- http_kwargs ,
282- item_calc ,
283- js_expr ,
284- )
285- csv_data .append (row_data )
286307 # Break after finding the first _schema file
287308 break
288309 return csv_data
289310
290-
291311def write_to_csv (csv_data , output_csv_filename ):
292312 # REDCap-specific headers
293313 headers = [
@@ -297,7 +317,7 @@ def write_to_csv(csv_data, output_csv_filename):
297317 "Field Type" ,
298318 "Field Label" ,
299319 "Choices, Calculations, OR Slider Labels" ,
300- "Field Note" , # TODO: is this description?
320+ "Field Note" ,
301321 "Text Validation Type OR Show Slider Number" ,
302322 "Text Validation Min" ,
303323 "Text Validation Max" ,
@@ -308,49 +328,70 @@ def write_to_csv(csv_data, output_csv_filename):
308328 "Question Number (surveys only)" ,
309329 "Matrix Group Name" ,
310330 "Matrix Ranking?" ,
311- "Field Annotation" ,
331+ "Field Annotation"
312332 ]
313333
314334 # Writing to the CSV file
315- with open (
316- output_csv_filename , "w" , newline = "" , encoding = "utf-8"
317- ) as csvfile :
335+ with open (output_csv_filename , "w" , newline = "" , encoding = "utf-8" ) as csvfile :
318336 writer = csv .DictWriter (csvfile , fieldnames = headers )
319-
320- # Map the data from your format to REDCap format
321- redcap_data = []
337+ writer .writeheader ()
338+
322339 for row in csv_data :
340+ redcap_row = {}
341+
342+ # Handle var_name URL conversion
323343 var_name = row ["var_name" ]
324344 if _is_url (var_name ):
325345 var_name = var_name .split ("/" )[- 1 ].split ("." )[0 ]
326- redcap_row = {
327- "Variable / Field Name" : var_name ,
328- "Form Name" : row ["activity" ],
329- "Section Header" : row [
330- "preamble"
331- ], # Update this if your data includes section headers
332- "Field Type" : row ["field_type" ],
333- "Field Label" : row ["field_label" ],
334- "Choices, Calculations, OR Slider Labels" : row ["choices" ],
335- "Field Note" : row ["field_notes" ],
336- "Text Validation Type OR Show Slider Number" : row .get (
337- "val_type_OR_slider" , ""
338- ),
339- "Required Field?" : row ["required" ],
340- "Text Validation Min" : row ["val_min" ],
341- "Text Validation Max" : row ["val_max" ],
342- "Branching Logic (Show field only if...)" : row ["isVis_logic" ],
343- # Add other fields as necessary based on your data
346+ redcap_row ["Variable / Field Name" ] = var_name
347+
348+ # Handle form name
349+ activity_name = row ["activity" ]
350+ if activity_name .endswith ("_schema" ):
351+ activity_name = activity_name [:- 7 ]
352+ redcap_row ["Form Name" ] = activity_name
353+
354+ # Map remaining fields
355+ field_mappings = {
356+ "preamble" : "Section Header" ,
357+ "field_type" : "Field Type" ,
358+ "field_label" : "Field Label" ,
359+ "choices" : "Choices, Calculations, OR Slider Labels" ,
360+ "field_notes" : "Field Note" ,
361+ "val_type_OR_slider" : "Text Validation Type OR Show Slider Number" ,
362+ "val_min" : "Text Validation Min" ,
363+ "val_max" : "Text Validation Max" ,
364+ "required" : "Required Field?" ,
365+ "isVis_logic" : "Branching Logic (Show field only if...)" ,
366+ "field_annotation" : "Field Annotation" ,
367+ "matrix_group" : "Matrix Group Name" ,
368+ "matrix_ranking" : "Matrix Ranking?"
344369 }
345- redcap_data .append (redcap_row )
346370
347- writer .writeheader ()
348- for row in redcap_data :
349- writer .writerow (row )
371+ # Add mapped fields only if they exist and aren't empty
372+ for src_key , dest_key in field_mappings .items ():
373+ if src_key in row and row [src_key ] is not None and row [src_key ] != "" :
374+ # Special handling for visibility logic
375+ if src_key == "isVis_logic" :
376+ if row [src_key ] is not True : # Only add if not default True
377+ redcap_row [dest_key ] = row [src_key ]
378+ # Special handling for required field
379+ elif src_key == "required" :
380+ redcap_row [dest_key ] = "y" if row [src_key ] else "n"
381+ # Special handling for field annotation
382+ elif src_key == "field_annotation" :
383+ current_annotation = redcap_row .get (dest_key , "" )
384+ if current_annotation :
385+ redcap_row [dest_key ] = f"{ current_annotation } { row [src_key ]} "
386+ else :
387+ redcap_row [dest_key ] = row [src_key ]
388+ else :
389+ redcap_row [dest_key ] = row [src_key ]
390+
391+ writer .writerow (redcap_row )
350392
351393 print ("The CSV file was written successfully" )
352394
353-
354395def reproschema2redcap (input_dir_path , output_csv_filename ):
355396 contextfile = CONTEXTFILE_URL # todo, give an option
356397 http_kwargs = {}
0 commit comments