Skip to content
2 changes: 2 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -8073,6 +8073,8 @@ test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), outpu
test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4))
test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4))
test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50
input = "x y\n\n1 a\n\n2 b\n\n3 c"
test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")

# test 1579 moved to optimize.Rraw

Expand Down
5 changes: 5 additions & 0 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,7 @@ int freadMain(freadMainArgs _args)
int topNumFields = 1; // how many fields that was, to resolve ties
enum quote_rule_t topQuoteRule = -1; // which quote rule that was
int topSkip = 0; // how many rows to auto-skip
// #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line
const char *topStart = NULL;

for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok
Expand Down Expand Up @@ -1946,6 +1947,10 @@ int freadMain(freadMainArgs _args)
}
}
}
if (!prevStart && topSkip > 1 && !skipEmptyLines)
{
DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n"));
}
if (!firstJumpEnd) {
if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n"));
topNumFields = 1;
Expand Down
Loading