I'd say that this behaviour is surprising and sub-optimal, although admittedly there's no indication that this should not behave this way in the docs. I'm currently working this around like this:
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Encode qw< decode encode >;
use Text::CSV_PP 'csv';
use Data::Dumper;
ok 1, 'tests start';
my $input = input();
my $input2 = input2();
subtest 'plain array' => sub {
open my $fh, '<', \$input or die "open(): $!";
my $parsed = csv(in => $fh, sep_char => ',');
isa_ok $parsed, 'ARRAY';
my $n_read = @$parsed;
is $n_read, 3, 'lines read';
is length($parsed->[0][0]), 4, 'length of first parsed thing';
};
subtest 'with headers => auto' => sub {
open my $fh, '<', \$input or die "open(): $!";
my $parsed = csv(in => $fh, sep_char => ',', headers => 'auto');
isa_ok $parsed, 'ARRAY';
my $n_read = @$parsed;
is $n_read, 2, 'records read';
my $record = $parsed->[0];
isa_ok $record, 'HASH';
my $key;
for my $k (keys %{$record}) {
next if $record->{$k};
$key = $k;
last;
}
is length($key), 4, 'length of "first" key';
};
subtest 'with detect_bom => 1' => sub {
open my $fh, '<', \$input or die "open(): $!";
my $parsed = csv(in => $fh, sep_char => ',', detect_bom => 1);
isa_ok $parsed, 'ARRAY';
my $n_read = @$parsed;
is $n_read, 2, 'records read';
my $record = $parsed->[0];
isa_ok $record, 'HASH';
my $key;
for my $k (keys %{$record}) {
next if $record->{$k};
$key = $k;
last;
}
is length($key), 3, 'length of "first" key (stripped!)';
};
subtest 'input with detect_bom => 1 and headers => auto' => sub {
open my $fh, '<', \$input or die "open(): $!";
my $parsed = eval {
csv(in => $fh, sep_char => ',', detect_bom => 1, headers => 'auto');
};
if ($parsed) {
isa_ok $parsed, 'ARRAY';
my $n_read = @$parsed;
is $n_read, 2, 'records read';
my $record = $parsed->[0];
isa_ok $record, 'HASH';
my $key;
for my $k (keys %{$record}) {
next if $record->{$k};
$key = $k;
last;
}
is length($key), 3, 'length of "first" key (stripped!)';
}
else {
fail "could not parse: $@";
}
};
subtest 'input2 with detect_bom => 1 and headers => auto' => sub {
open my $fh, '<', \$input2 or die "open(): $!";
my $parsed = eval {
csv(in => $fh, sep_char => ',', detect_bom => 1, headers => 'auto');
};
if ($parsed) {
isa_ok $parsed, 'ARRAY';
my $n_read = @$parsed;
is $n_read, 2, 'records read' or diag "read $n_read";
my $record = $parsed->[0];
isa_ok $record, 'HASH';
my $key;
for my $k (keys %{$record}) {
next if $record->{$k};
$key = $k;
last;
}
is length($key), 3, 'length of "first" key (stripped!)'
or diag "key<$key>";
}
else {
fail "could not parse: $@";
}
};
done_testing();
sub input {
my $string = <<"END";
\x{FEFF}foo,bar
,12
0,11
END
return encode('UTF-8', $string);
}
sub input2 {
my $string = <<"END";
\x{FEFF}foo,bar
NOMNOM,da-bah
0,11
END
return encode('UTF-8', $string);
}
$ perl -MText::CSV_PP -E 'say $Text::CSV_PP::VERSION'
2.02
$ prove -v ./csv_pp-test01.t
./csv_pp-test01.t ..
ok 1 - tests start
# Subtest: plain array
ok 1 - A reference of type 'ARRAY' isa 'ARRAY'
ok 2 - lines read
ok 3 - length of first parsed thing
1..3
ok 2 - plain array
# Subtest: with headers => auto
ok 1 - A reference of type 'ARRAY' isa 'ARRAY'
ok 2 - records read
ok 3 - A reference of type 'HASH' isa 'HASH'
ok 4 - length of "first" key
1..4
ok 3 - with headers => auto
# Subtest: with detect_bom => 1
ok 1 - A reference of type 'ARRAY' isa 'ARRAY'
ok 2 - records read
ok 3 - A reference of type 'HASH' isa 'HASH'
ok 4 - length of "first" key (stripped!)
1..4
ok 4 - with detect_bom => 1
# Subtest: input with detect_bom => 1 and headers => auto
# CSV_PP ERROR: 1012 - INI - the header contains an empty field @ rec 2 pos 0
not ok 1 - could not parse: INI - the header contains an empty field
1..1
not ok 5 - input with detect_bom => 1 and headers => auto
# Subtest: input2 with detect_bom => 1 and headers => auto
# Failed test 'could not parse: INI - the header contains an empty field'
# at ./csv_pp-test01.t line 86.
# Looks like you failed 1 test of 1.
# Failed test 'input with detect_bom => 1 and headers => auto'
# at ./csv_pp-test01.t line 88.
ok 1 - A reference of type 'ARRAY' isa 'ARRAY'
not ok 2 - records read
# Failed test 'records read'
# at ./csv_pp-test01.t line 99.
# got: '1'
# expected: '2'
ok 3 - A reference of type 'HASH' isa 'HASH'
not ok 4 - length of "first" key (stripped!)
1..4
not ok 6 - input2 with detect_bom => 1 and headers => auto
1..6
# read 1
# Failed test 'length of "first" key (stripped!)'
# at ./csv_pp-test01.t line 109.
# got: '6'
# expected: '3'
# key<NOMNOM>
# Looks like you failed 2 tests of 4.
# Failed test 'input2 with detect_bom => 1 and headers => auto'
# at ./csv_pp-test01.t line 115.
# Looks like you failed 2 tests of 6.
Dubious, test returned 2 (wstat 512, 0x200)
Failed 2/6 subtests
Test Summary Report
-------------------
./csv_pp-test01.t (Wstat: 512 Tests: 6 Failed: 2)
Failed tests: 5-6
Non-zero exit status: 2
Files=1, Tests=6, 0 wallclock secs ( 0.00 usr 0.01 sys + 0.05 cusr 0.00 csys = 0.06 CPU)
Result: FAIL