Skip to content

Commit 17df384

Browse files
committed
refactor: ai optimizations
1 parent 6f43ada commit 17df384

File tree

3 files changed

+139
-107
lines changed

3 files changed

+139
-107
lines changed

src/csv2json.ts

Lines changed: 86 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,28 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
99
const escapedWrapDelimiterRegex = new RegExp(options.delimiter.wrap + options.delimiter.wrap, 'g'),
1010
excelBOMRegex = new RegExp('^' + excelBOM),
1111
valueParserFn = options.parseValue && typeof options.parseValue === 'function' ? options.parseValue : JSON.parse;
12+
// micro-optimization: cache delimiter and commonly read options locally for hot paths
13+
const delimiter = options.delimiter;
14+
const trimHeaderFields = options.trimHeaderFields;
15+
const headerFieldsOption = options.headerFields;
16+
const keysOption = options.keys;
17+
const excelBOMOption = options.excelBOM;
18+
const trimFieldValues = options.trimFieldValues;
19+
// Cache hot utility references to avoid repeated property lookups in hot paths
20+
const getNCharacters = utils.getNCharacters;
21+
const isStringRepresentation = utils.isStringRepresentation;
22+
const isDateRepresentation = utils.isDateRepresentation;
23+
const isError = utils.isError;
24+
const isInvalid = utils.isInvalid;
25+
const setPathLocal = setPath;
1226

1327
/**
1428
* Trims the header key, if specified by the user via the provided options
1529
*/
1630
function processHeaderKey(headerKey: string) {
1731
headerKey = removeWrapDelimitersFromValue(headerKey);
18-
if (options.trimHeaderFields) {
19-
return headerKey.split('.')
20-
.map((component) => component.trim())
21-
.join('.');
32+
if (trimHeaderFields) {
33+
return headerKey.split('.').map((component) => component.trim()).join('.');
2234
}
2335
return headerKey;
2436
}
@@ -29,8 +41,8 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
2941
function retrieveHeading(lines: string[][]): Csv2JsonParams {
3042
let headerFields: HeaderField[] = [];
3143

32-
if (options.headerFields) {
33-
headerFields = options.headerFields.map((headerField, index): HeaderField => ({
44+
if (headerFieldsOption) {
45+
headerFields = headerFieldsOption.map((headerField, index): HeaderField => ({
3446
value: processHeaderKey(headerField),
3547
index
3648
}));
@@ -43,15 +55,28 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
4355
}));
4456

4557
// If the user provided keys, filter the generated keys to just the user provided keys so we also have the key index
46-
if (options.keys) {
47-
const keys = options.keys; // TypeScript type checking work around to get it to recognize the option is not undefined
58+
if (keysOption) {
59+
const keys = keysOption; // TypeScript type checking work around to get it to recognize the option is not undefined
4860
headerFields = headerFields.filter((headerKey) => keys.includes(headerKey.value));
4961
}
5062
}
5163

5264
return {
5365
lines,
5466
headerFields,
67+
// precompute accessors for each header to speed up document creation
68+
headerAccessors: headerFields.map((hf) => {
69+
const idx = hf.index;
70+
const path = hf.value;
71+
return function setField(document: any, line: string[]) {
72+
try {
73+
const value = processRecordValue(line[idx]);
74+
setPathLocal(document, path, value);
75+
} catch (err) {
76+
// ignore invalid paths
77+
}
78+
};
79+
}),
5580
recordLines: [] as string[][],
5681
};
5782
}
@@ -60,7 +85,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
6085
* Removes the Excel BOM value, if specified by the options object
6186
*/
6287
function stripExcelBOM(csv: string) {
63-
if (options.excelBOM) {
88+
if (excelBOMOption) {
6489
return csv.replace(excelBOMRegex, '');
6590
}
6691
return csv;
@@ -73,7 +98,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
7398
// Parse out the line...
7499
const lines = [],
75100
lastCharacterIndex = csv.length - 1,
76-
eolDelimiterLength = options.delimiter.eol.length,
101+
eolDelimiterLength = delimiter.eol.length,
77102
stateVariables = {
78103
insideWrapDelimiter: false,
79104
parsingValue: true,
@@ -98,7 +123,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
98123
charAfter = index < lastCharacterIndex ? csv[index + 1] : '';
99124
// Next n characters, including the current character, where n = length(EOL delimiter)
100125
// This allows for the checking of an EOL delimiter when if it is more than a single character (eg. '\r\n')
101-
nextNChar = utils.getNCharacters(csv, index, eolDelimiterLength);
126+
nextNChar = getNCharacters(csv, index, eolDelimiterLength);
102127

103128
if ((nextNChar === options.delimiter.eol && !stateVariables.insideWrapDelimiter ||
104129
index === lastCharacterIndex) && charBefore === options.delimiter.field) {
@@ -163,8 +188,8 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
163188
stateVariables.parsingValue = true;
164189

165190
// If the next character(s) are an EOL delimiter, then skip them so we don't parse what we've seen as another value
166-
if (utils.getNCharacters(csv, index + 1, eolDelimiterLength) === options.delimiter.eol) {
167-
index += options.delimiter.eol.length + 1; // Skip past EOL
191+
if (getNCharacters(csv, index + 1, eolDelimiterLength) === delimiter.eol) {
192+
index += delimiter.eol.length + 1; // Skip past EOL
168193
}
169194
} else if (charBefore === options.delimiter.field && character === options.delimiter.wrap && charAfter === options.delimiter.eol) {
170195
// We reached the start of a wrapped new field that begins with an EOL delimiter
@@ -177,49 +202,49 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
177202
stateVariables.insideWrapDelimiter = true;
178203
stateVariables.justParsedDoubleQuote = true;
179204
index += 1;
180-
} else if ((charBefore !== options.delimiter.wrap || stateVariables.justParsedDoubleQuote && charBefore === options.delimiter.wrap) &&
181-
character === options.delimiter.wrap && utils.getNCharacters(csv, index + 1, eolDelimiterLength) === options.delimiter.eol) {
205+
} else if ((charBefore !== delimiter.wrap || stateVariables.justParsedDoubleQuote && charBefore === delimiter.wrap) &&
206+
character === delimiter.wrap && getNCharacters(csv, index + 1, eolDelimiterLength) === delimiter.eol) {
182207
// If we reach a wrap which is not preceded by a wrap delim and the next character is an EOL delim (ie. *"\n)
183208

184209
stateVariables.insideWrapDelimiter = false;
185210
stateVariables.parsingValue = false;
186211
// Next iteration will substring, add the value to the line, and push the line onto the array of lines
187-
} else if (character === options.delimiter.wrap && (index === 0 || utils.getNCharacters(csv, index - eolDelimiterLength, eolDelimiterLength) === options.delimiter.eol && !stateVariables.insideWrapDelimiter)) {
212+
} else if (character === delimiter.wrap && (index === 0 || getNCharacters(csv, index - eolDelimiterLength, eolDelimiterLength) === delimiter.eol && !stateVariables.insideWrapDelimiter)) {
188213
// If the line starts with a wrap delimiter (ie. "*)
189214

190215
stateVariables.insideWrapDelimiter = true;
191216
stateVariables.parsingValue = true;
192217
stateVariables.startIndex = index;
193-
} else if (character === options.delimiter.wrap && charAfter === options.delimiter.field && stateVariables.insideWrapDelimiter) {
218+
} else if (character === delimiter.wrap && charAfter === delimiter.field && stateVariables.insideWrapDelimiter) {
194219
// If we reached a wrap delimiter with a field delimiter after it (ie. *",)
195220

196221
splitLine.push(csv.substring(stateVariables.startIndex, index + 1));
197222
stateVariables.startIndex = index + 2; // next value starts after the field delimiter
198223
stateVariables.insideWrapDelimiter = false;
199224
stateVariables.parsingValue = false;
200-
} else if (character === options.delimiter.wrap && charBefore === options.delimiter.field &&
225+
} else if (character === delimiter.wrap && charBefore === delimiter.field &&
201226
!stateVariables.insideWrapDelimiter && stateVariables.parsingValue) {
202227
// If we reached a wrap delimiter with a field delimiter after it (ie. ,"*)
203228

204229
splitLine.push(csv.substring(stateVariables.startIndex, index - 1));
205230
stateVariables.insideWrapDelimiter = true;
206231
stateVariables.parsingValue = true;
207232
stateVariables.startIndex = index;
208-
} else if (character === options.delimiter.wrap && charAfter === options.delimiter.wrap && index !== stateVariables.startIndex) {
233+
} else if (character === delimiter.wrap && charAfter === delimiter.wrap && index !== stateVariables.startIndex) {
209234
// If we run into an escaped quote (ie. "") skip past the second quote
210235

211236
index += 2;
212237
stateVariables.justParsedDoubleQuote = true;
213238
continue;
214-
} else if (character === options.delimiter.field && charBefore !== options.delimiter.wrap &&
215-
charAfter !== options.delimiter.wrap && !stateVariables.insideWrapDelimiter &&
239+
} else if (character === delimiter.field && charBefore !== delimiter.wrap &&
240+
charAfter !== delimiter.wrap && !stateVariables.insideWrapDelimiter &&
216241
stateVariables.parsingValue) {
217242
// If we reached a field delimiter and are not inside the wrap delimiters (ie. *,*)
218243

219244
splitLine.push(csv.substring(stateVariables.startIndex, index));
220245
stateVariables.startIndex = index + 1;
221-
} else if (character === options.delimiter.field && charBefore === options.delimiter.wrap &&
222-
charAfter !== options.delimiter.wrap && !stateVariables.parsingValue) {
246+
} else if (character === delimiter.field && charBefore === delimiter.wrap &&
247+
charAfter !== delimiter.wrap && !stateVariables.parsingValue) {
223248
// If we reached a field delimiter, the previous character was a wrap delimiter, and the
224249
// next character is not a wrap delimiter (ie. ",*)
225250

@@ -240,7 +265,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
240265
* Retrieves the record lines from the split CSV lines and sets it on the params object
241266
*/
242267
function retrieveRecordLines(params: Csv2JsonParams) {
243-
if (options.headerFields) { // This option is passed for instances where the CSV has no header line
268+
if (headerFieldsOption) { // This option is passed for instances where the CSV has no header line
244269
params.recordLines = params.lines;
245270
} else { // All lines except for the header line
246271
params.recordLines = params.lines.splice(1);
@@ -269,7 +294,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
269294
const parsedJson = parseValue(fieldValue);
270295
// If parsedJson is anything aside from an error, then we want to use the parsed value
271296
// This allows us to interpret values like 'null' --> null, 'false' --> false
272-
if (!utils.isError(parsedJson) && !utils.isInvalid(parsedJson)) {
297+
if (!isError(parsedJson) && !isInvalid(parsedJson)) {
273298
return parsedJson;
274299
} else if (fieldValue === 'undefined') {
275300
return undefined;
@@ -282,7 +307,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
282307
* Trims the record value, if specified by the user via the options object
283308
*/
284309
function trimRecordValue(fieldValue: string) {
285-
if (options.trimFieldValues && fieldValue !== null) {
310+
if (trimFieldValues && fieldValue !== null) {
286311
return fieldValue.trim();
287312
}
288313
return fieldValue;
@@ -294,19 +319,19 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
294319
* @returns {Object} created json document
295320
*/
296321
function createDocument(headerFields: HeaderField[], line: string[]) {
297-
// Reduce the keys into a JSON document representing the given line
298-
return headerFields.reduce((document, headerField) => {
299-
// If there is a value at the key's index in the line, set the value; otherwise null
300-
const value = retrieveRecordValueFromLine(headerField, line);
322+
const document: any = {};
301323

324+
for (let i = 0; i < headerFields.length; i++) {
325+
const headerField = headerFields[i];
302326
try {
303-
// Otherwise add the key and value to the document
304-
return setPath(document, headerField.value, value);
305-
} catch (error) {
306-
// Catch any errors where key paths are null or '' and continue
307-
return document;
327+
const value = retrieveRecordValueFromLine(headerField, line);
328+
setPathLocal(document, headerField.value, value);
329+
} catch (err) {
330+
// ignore errors when setting invalid paths
308331
}
309-
}, {});
332+
}
333+
334+
return document;
310335
}
311336

312337
/**
@@ -318,7 +343,7 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
318343
lastIndex = fieldValue.length - 1,
319344
lastChar = fieldValue[lastIndex];
320345
// If the field starts and ends with a wrap delimiter
321-
if (firstChar === options.delimiter.wrap && lastChar === options.delimiter.wrap) {
346+
if (firstChar === delimiter.wrap && lastChar === delimiter.wrap) {
322347
// Handle the case where the field is just a pair of wrap delimiters
323348
return fieldValue.length <= 2 ? '' : fieldValue.substring(1, lastIndex);
324349
}
@@ -330,37 +355,49 @@ export const Csv2Json = function (options: FullCsv2JsonOptions) {
330355
* This is done in order to parse RFC 4180 compliant CSV back to JSON
331356
*/
332357
function unescapeWrapDelimiterInField(fieldValue: string) {
333-
return fieldValue.replace(escapedWrapDelimiterRegex, options.delimiter.wrap);
358+
return fieldValue.replace(escapedWrapDelimiterRegex, delimiter.wrap);
334359
}
335360

336361
/**
337362
* Main helper function to convert the CSV to the JSON document array
338363
*/
339364
function transformRecordLines(params: Csv2JsonParams) {
340-
// For each line, create the document and add it to the array of documents
341-
return params.recordLines.reduce((generatedJsonObjects: object[], line: string[]) => {
365+
const results: object[] = [];
366+
367+
const accessors = params.headerAccessors;
368+
369+
for (let i = 0; i < params.recordLines.length; i++) {
370+
let line = params.recordLines[i];
342371

343-
line = line.map((fieldValue: string) => {
344-
// Perform the necessary operations on each line
372+
for (let j = 0; j < line.length; j++) {
373+
let fieldValue = line[j];
345374
fieldValue = removeWrapDelimitersFromValue(fieldValue);
346375
fieldValue = unescapeWrapDelimiterInField(fieldValue);
347376
fieldValue = trimRecordValue(fieldValue);
377+
line[j] = fieldValue;
378+
}
348379

349-
return fieldValue;
350-
});
351-
352-
const generatedDocument = createDocument(params.headerFields, line);
353-
return generatedJsonObjects.concat(generatedDocument);
380+
if (accessors && accessors.length) {
381+
const document: any = {};
382+
for (let a = 0; a < accessors.length; a++) {
383+
accessors[a](document, line);
384+
}
385+
results.push(document);
386+
} else {
387+
const generatedDocument = createDocument(params.headerFields, line);
388+
results.push(generatedDocument);
389+
}
390+
}
354391

355-
}, []);
392+
return results;
356393
}
357394

358395
/**
359396
* Attempts to parse the provided value. If it is not parsable, then an error is returned
360397
*/
361398
function parseValue(value: string) {
362399
try {
363-
if (utils.isStringRepresentation(value, options) && !utils.isDateRepresentation(value)) {
400+
if (isStringRepresentation(value, options) && !isDateRepresentation(value)) {
364401
return value;
365402
}
366403

0 commit comments

Comments
 (0)