Different from the actual file cursor when processing duplicate headers with header auto
jsakaicribl opened this issue · comments
When processing duplicate headers with header auto, the position of data.meta.cursor will be returned as the position of cursor
including headers such as _1. But it's different from the cursor
of the actual file. For example, if I execute the test written here below, cursor
will return 21 and an error will occur. However, I think the end of the file is 15, but it seems that the cursor
is returning a cursor at a position that doesn't actually exist.
{
description: "Simple duplicated header names",
input: 'A,A,A,A\n1,2,3,4',
config: { header: true },
expected: {
data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']],
errors: [],
meta: {
renamedHeaders: {A_1: 'A', A_2: 'A', A_3: 'A'},
cursor: 15
}
}
}
I would appreciate it if you could take a look.
I don't know if it makes sense, but could I fix it with code something like this:
diff --git a/papaparse.js b/papaparse.js
index 60997b0..d0d8ff6 100755
--- a/papaparse.js
+++ b/papaparse.js
@@ -1469,9 +1469,10 @@ License: MIT
return returnable();
// Rename headers if there are duplicates
+ var firstLine;
if (config.header && !baseIndex)
{
- var firstLine = input.split(newline)[0];
+ firstLine = input.split(newline)[0];
var headers = firstLine.split(delim);
var separator = '_';
var headerMap = new Set();
@@ -1515,7 +1516,11 @@ License: MIT
for (var i = 0; i < rows.length; i++)
{
row = rows[i];
- cursor += row.length;
+ if (config.header && i === 0 && firstLine !== undefined) {
+ cursor += firstLine.length;
+ }else{
+ cursor += row.length;
+ }
if (i !== rows.length - 1)
cursor += newline.length;
else if (ignoreLastRow)
diff --git a/tests/test-cases.js b/tests/test-cases.js
index 1bccea6..9f65ff0 100644
--- a/tests/test-cases.js
+++ b/tests/test-cases.js
@@ -594,7 +594,10 @@ var CORE_PARSER_TESTS = [
expected: {
data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']],
errors: [],
- meta: {renamedHeaders: {A_1: 'A', A_2: 'A', A_3: 'A'}}
+ meta: {
+ renamedHeaders: {A_1: 'A', A_2: 'A', A_3: 'A'},
+ cursor: 15
+ }
}
},
{
@@ -604,17 +607,23 @@ var CORE_PARSER_TESTS = [
expected: {
data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']],
errors: [],
- meta: {renamedHeaders: {a_1: 'a', a_2: 'a', a_3: 'a'}}
+ meta: {
+ renamedHeaders: {a_1: 'a', a_2: 'a', a_3: 'a'},
+ cursor: 15
+ }
}
},
{
description: "Duplicated header names existing column",
- input: 'c,c,c,c_1\n1,2,3,4',
+ input: 'c,c,c,c_1\n1,2,3,14',
config: { header: true },
expected: {
- data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
+ data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '14']],
errors: [],
- meta: {renamedHeaders: {c_1: 'c', c_2: 'c'}}
+ meta: {
+ renamedHeaders: {c_1: 'c', c_2: 'c'},
+ cursor: 18 ,
+ }
}
},
];
Hi @jsakaicribl,
What you comment complletly makes sense.
Could you please create a MR with your proposed changes?
Thanks!
@pokoli Thank you for taking a look the PR and merge. Will this be released with version 5.4.2? When do you think it will be npm package?
@jsakaicribl Yes, it will be included on the next version, not sure when I will be able to upload it. It depends also if we wait to include more fixes.
Will 5.4.2 be released soon?