Skip to content

Commit

Permalink
feat: align dialect definition with data package v2 (#59)
Browse files Browse the repository at this point in the history
* feat: 100% coverage of dialect definition for delimited files from data package v2
* feat: support commentRows dialect's description
* feat: headerRows and headerJoin to define multi-lines headers
  • Loading branch information
Seddryck authored Dec 22, 2024
1 parent cb4f8bc commit e1641b5
Show file tree
Hide file tree
Showing 25 changed files with 420 additions and 175 deletions.
73 changes: 58 additions & 15 deletions PocketCsvReader.Testing/CharParserTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public void Parse_FieldLineTerminator_StartEnd(string value, string sep, int sta
public void Parse_FieldLineTerminatorSingleChar_StartEnd(string value, string sep, int start, int length)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { Delimiter = ',', QuoteChar = '\'', LineTerminator = sep }));
new DialectDescriptor() { Delimiter = ',', QuoteChar = '\'', LineTerminator = sep }));
var result = value.Aggregate((ParserState?)null, (current, c) => parser.Parse(c));

Assert.That(result, Is.EqualTo(ParserState.Record));
Expand All @@ -71,7 +71,7 @@ public void Parse_FieldLineTerminatorSingleChar_StartEnd(string value, string se
[TestCase("#foobar")]
public void Parse_Comment_StartEnd(string value)
{
var parser = new CharParser(new CsvProfile(new CsvDialectDescriptor() {CommentChar='#', Delimiter=';', LineTerminator="\r\n" }));
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { Header = false, CommentChar = '#', Delimiter = ';', LineTerminator = "\r\n" }));
var result = value.Aggregate((ParserState?)null, (current, c) => parser.Parse(c));

Assert.That(result, Is.EqualTo(ParserState.Continue));
Expand All @@ -83,7 +83,7 @@ public void Parse_Comment_StartEnd(string value)
[TestCase("bar")]
public void Parse_AfterComment_StartEnd(string value)
{
var parser = new CharParser(new CsvProfile(new CsvDialectDescriptor() { CommentChar = '#', Delimiter = ';', LineTerminator = "\r\n" }));
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { CommentChar = '#', Delimiter = ';', LineTerminator = "\r\n" }));
var result = value.Aggregate((ParserState?)null, (current, c) => parser.Parse(c));
result = parser.ParseEof();

Expand All @@ -97,9 +97,9 @@ public void Parse_AfterComment_StartEnd(string value)
[TestCase("bar\r\n", 1)]
public void Parse_Record_CountOfField(string value, int count)
{
var parser = new CharParser(new CsvProfile(new CsvDialectDescriptor() { Delimiter = ';', LineTerminator = "\r\n" }));
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { Delimiter = ';', LineTerminator = "\r\n" }));
var result = value.Aggregate(0, (current, c)
=> parser.Parse(c) != ParserState.Continue ? current+1 : current);
=> parser.Parse(c) != ParserState.Continue ? current + 1 : current);

Assert.That(result, Is.EqualTo(count));
}
Expand All @@ -111,7 +111,18 @@ public void Parse_Record_CountOfField(string value, int count)
[TestCase("bar;\r\nfoo\r\n", 2)]
public void Parse_Record_CountOfRecord(string value, int count)
{
var parser = new CharParser(new CsvProfile(new CsvDialectDescriptor() { Delimiter = ';', LineTerminator = "\r\n" }));
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { Header= false, Delimiter = ';', LineTerminator = "\r\n" }));
var result = value.Aggregate(0, (current, c)
=> parser.Parse(c) == ParserState.Record ? current + 1 : current);

Assert.That(result, Is.EqualTo(count));
}

[TestCase("field_1;field_2\r\nfoo;bar\r\nfoo;bar\r\nfoo;bar\r\n", 3)]
[TestCase("field_1\r\nbar\r\n", 1)]
public void Parse_RecordAndHeader_CountOfRecord(string value, int count)
{
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { Header = true, Delimiter = ';', LineTerminator = "\r\n" }));
var result = value.Aggregate(0, (current, c)
=> parser.Parse(c) == ParserState.Record ? current + 1 : current);

Expand All @@ -127,7 +138,7 @@ public void Parse_Record_CountOfRecord(string value, int count)
[TestCase("'f\ro\no';", "f\ro\no")]
public void Parse_QuotedField_CorrectField(string value, string expected)
{
var parser = new CharParser(new CsvProfile(new CsvDialectDescriptor() { QuoteChar='\'', Delimiter = ';', LineTerminator = "\r\n" }));
var parser = new CharParser(new CsvProfile(new DialectDescriptor() { QuoteChar = '\'', Delimiter = ';', LineTerminator = "\r\n" }));
var result = string.Empty;
foreach (var c in value)
if (parser.Parse(c) == ParserState.Field)
Expand All @@ -141,7 +152,7 @@ public void Parse_QuotedField_CorrectField(string value, string expected)
public void Parse_DoubleQuotedFieldWhenDenied_Error(string value)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { QuoteChar = '\'', EscapeChar = '\\', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { QuoteChar = '\'', EscapeChar = '\\', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
var result = string.Empty;
foreach (var c in value)
if (parser.Parse(c) == ParserState.Error)
Expand All @@ -155,7 +166,7 @@ public void Parse_DoubleQuotedFieldWhenDenied_Error(string value)
public void Parse_DoubleQuotedFieldWhenAllowed_EscapedSet(string value)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = true, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = true, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
if (parser.Parse(c) == ParserState.Field)
{
Expand All @@ -171,7 +182,7 @@ public void Parse_DoubleQuotedFieldWhenAllowed_EscapedSet(string value)
public void Parse_EscapeQuoteInQuotedField_EscapedSet(string value)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
if (parser.Parse(c) == ParserState.Field)
{
Expand All @@ -187,7 +198,7 @@ public void Parse_EscapeQuoteInQuotedField_EscapedSet(string value)
public void Parse_EscapeDelimiterInUnquotedField_EscapedSet(string value)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
if (parser.Parse(c) == ParserState.Field)
{
Expand All @@ -203,7 +214,7 @@ public void Parse_EscapeDelimiterInUnquotedField_EscapedSet(string value)
public void Parse_SkipInitialSpace_SpaceSkip(string value, int start)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
parser.Parse(c);
Assert.That(parser.FieldStart, Is.EqualTo(start));
Expand All @@ -216,7 +227,7 @@ public void Parse_SkipInitialSpace_SpaceSkip(string value, int start)
public void Parse_SkipInitialSpaceBeforeQuotedField_SpaceSkip(string value, int start)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
parser.Parse(c);
Assert.That(parser.FieldStart, Is.EqualTo(start));
Expand All @@ -229,10 +240,42 @@ public void Parse_SkipInitialSpaceBeforeQuotedField_SpaceSkip(string value, int
public void Parse_SkipInitialSpaceWithinQuotedField_SpaceNotSkip(string value, int start)
{
var parser = new CharParser(new CsvProfile(
new CsvDialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
new DialectDescriptor() { SkipInitialSpace = true, QuoteChar = '`', EscapeChar = '%', DoubleQuote = false, Delimiter = ';', LineTerminator = "\r\n" }));
foreach (var c in value)
parser.Parse(c);
Assert.That(parser.FieldStart, Is.EqualTo(start));
Assert.That(parser.FieldLength, Is.EqualTo(value.Length-7));
Assert.That(parser.FieldLength, Is.EqualTo(value.Length - 7));
}

[TestCase("foo\r\nbar\r\n")]
[TestCase("Comment\r\nfoo\r\nbar\r\n", 1)]
[TestCase("Comment 1\r\nComment 2\r\nfoo\r\nbar\r\n", 1, 2)]
[TestCase("Comment 1\r\nComment 2\r\nfoo\r\nbar\r\nComment 3", 1, 2, 5)]
[TestCase("Comment 1\r\n\r\nfooComment 2\r\nbar\r\nComment 3", 1, 3, 5)]
public void Parse_CommentRows_CommentsSkipped(string value, params int[] commentRows)
{
var parser = new CharParser(new CsvProfile(
new DialectDescriptor() { Header = false, CommentRows = commentRows, LineTerminator = "\r\n" }));
var recordCount = 0;
foreach (var c in value)
if (parser.Parse(c) == ParserState.Record)
recordCount++;
Assert.That(recordCount, Is.EqualTo(2));
}

[TestCase("foo\r\nbar\r\n")]
[TestCase("Comment\r\nfoo\r\nbar\r\n#Comment", 1)]
[TestCase("Comment 1\r\nComment 2\r\nfoo\r\n#Comment\r\nbar\r\n#Comment", 1, 2)]
[TestCase("Comment 1\r\nComment 2\r\nfoo\r\n\r\n#Commentbar\r\nComment 3", 1, 2, 6)]
[TestCase("Comment 1\r\n\r\nfooComment 2\r\nbar\r\n#Comment\r\nComment 3", 1, 3, 6)]
public void Parse_CommentRowsAndComments_CommentsSkipped(string value, params int[] commentRows)
{
var parser = new CharParser(new CsvProfile(
new DialectDescriptor() { Header = false, CommentChar = '#', CommentRows = commentRows, LineTerminator = "\r\n" }));
var recordCount = 0;
foreach (var c in value)
if (parser.Parse(c) == ParserState.Record)
recordCount++;
Assert.That(recordCount, Is.EqualTo(2));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,19 @@ public void WithHeader_ShouldSetHeaderToTrue()
.Build();

Assert.That(descriptor.Header, Is.True);
Assert.That(descriptor.HeaderRows, Is.Not.Null.And.Not.Empty);
}

[Test]
public void SwitchHeaderValue_ShouldSetHeaderToTrue()
{
var descriptor = new DialectDescriptorBuilder()
.WithoutHeader()
.WithHeader()
.Build();

Assert.That(descriptor.Header, Is.True);
Assert.That(descriptor.HeaderRows, Is.Not.Null.And.Not.Empty);
}

[Test]
Expand All @@ -208,64 +221,92 @@ public void WithoutHeader_ShouldSetHeaderToFalse()
.Build();

Assert.That(descriptor.Header, Is.False);
Assert.That(descriptor.HeaderRows, Is.Empty);
}

[Test]
[TestCase("#")]
[TestCase("/")]
public void WithCommentChar_ShouldSetCommentChar(char commentChar)
[TestCase(" ")]
[TestCase("-")]
[TestCase(" - ")]
public void WithHeaderJoin_ShouldSetHeaderJoin(string join)
{
var descriptor = new DialectDescriptorBuilder()
.WithCommentChar(commentChar)
.WithHeaderJoin(join)
.Build();

Assert.That(descriptor.CommentChar, Is.EqualTo(commentChar));
Assert.That(descriptor.HeaderJoin, Is.EqualTo(join));
}

[Test]
[TestCase(CommentChar.Hash, '#')]
[TestCase(CommentChar.ForwardSlash, '/')]
[TestCase(CommentChar.Dash, '-')]
[TestCase(CommentChar.Semicolon, ';')]
public void WithCommentChar_ShouldSetCommentChar(CommentChar commentChar, char value)
[TestCase(1)]
[TestCase(1, 2, 3)]
public void WithHeaderRows_ShouldSetHeaderRows(params int[] rows)
{
var descriptor = new DialectDescriptorBuilder()
.WithCommentChar(commentChar)
.WithHeaderRows(rows)
.Build();
Assert.That(descriptor.CommentChar, Is.EqualTo(value));

Assert.That(descriptor.HeaderRows, Is.EqualTo(rows));
}

[Test]
public void WithHeaderRowsEmpty_ShouldSetHeaderRowsAndHeader()
{
var descriptor = new DialectDescriptorBuilder()
.WithHeaderRows([])
.Build();

Assert.That(descriptor.HeaderRows, Is.Empty);
Assert.That(descriptor.Header, Is.False);
}

[Test]
[TestCase(true)]
[TestCase(false)]
public void WithCaseSensitiveHeader_ShouldSetCaseSensitiveHeaderToValue(bool value)
public void WithoutHeaderRows_ShouldSetHeaderRowsAndHeader()
{
var descriptor = new DialectDescriptorBuilder()
.WithCaseSensitiveHeader(value)
.WithoutHeaderRows()
.Build();

Assert.That(descriptor.CaseSensitiveHeader, Is.EqualTo(value));
Assert.That(descriptor.HeaderRows, Is.Empty);
Assert.That(descriptor.Header, Is.False);
}

[Test]
public void WithCaseSensitiveHeader_ShouldSetCaseSensitiveHeaderToTrue()
[TestCase("#")]
[TestCase("/")]
public void WithCommentChar_ShouldSetCommentChar(char commentChar)
{
var descriptor = new DialectDescriptorBuilder()
.WithCaseSensitiveHeader()
.WithCommentChar(commentChar)
.Build();

Assert.That(descriptor.CaseSensitiveHeader, Is.True);
Assert.That(descriptor.CommentChar, Is.EqualTo(commentChar));
}

[Test]
[TestCase(CommentChar.Hash, '#')]
[TestCase(CommentChar.ForwardSlash, '/')]
[TestCase(CommentChar.Dash, '-')]
[TestCase(CommentChar.Semicolon, ';')]
public void WithCommentChar_ShouldSetCommentChar(CommentChar commentChar, char value)
{
var descriptor = new DialectDescriptorBuilder()
.WithCommentChar(commentChar)
.Build();
Assert.That(descriptor.CommentChar, Is.EqualTo(value));
}

[Test]
public void WithCaseSensitiveHeader_WithoutParameter_ShouldSetCaseSensitiveHeaderToFalse()
[TestCase()]
[TestCase(1)]
[TestCase(1, 2, 3)]
public void WithCommentRows_ShouldSetCommentChar(params int[] rows)
{
var descriptor = new DialectDescriptorBuilder()
.WithoutCaseSensitiveHeader()
.WithCommentRows(rows)
.Build();

Assert.That(descriptor.CaseSensitiveHeader, Is.False);
Assert.That(descriptor.CommentRows, Is.EqualTo(rows));
}

[Test]
Expand Down
Loading

0 comments on commit e1641b5

Please sign in to comment.