Skip to content

Fix: Allow \r in unquoted fields when row separator doesn't contain \r #346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/csv/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ def prepare_quoted
def prepare_unquoted
return if @quote_character.nil?

no_unquoted_values = "\r\n".encode(@encoding)
no_unquoted_values = Regexp.escape(@row_separator).encode(@encoding)
no_unquoted_values << @escaped_first_column_separator
unless @liberal_parsing
no_unquoted_values << @escaped_quote_character
Expand Down
57 changes: 43 additions & 14 deletions test/csv/parse/test_general.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,28 +138,57 @@ def test_non_regex_edge_cases
end
end

def test_malformed_csv_cr_first_line
def test_unquoted_cr_with_lf_row_separator
data = "field1,field\rwith\rcr,field3\nrow2,data,here\n"
expected = [
["field1", "field\rwith\rcr", "field3"],
["row2", "data", "here"]
]
assert_equal(expected, CSV.parse(data, row_sep: "\n"))
end

def test_unquoted_cr_with_custom_row_separator
data = "field1,field\rwith\rcr,field3|row2,data,here|"
expected = [
["field1", "field\rwith\rcr", "field3"],
["row2", "data", "here"]
]
assert_equal(expected, CSV.parse(data, row_sep: "|"))
end

def test_unquoted_cr_with_crlf_row_separator
data = "field1,field\r2,field3\r\nrow2,data,here\r\n"
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2\r,3", row_sep: "\n")
CSV.parse(data, row_sep: "\r\n")
end
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
error.message)
end

def test_malformed_csv_cr_middle_line
csv = <<-CSV
line,1,abc
line,2,"def\nghi"
def test_quoted_cr_with_custom_row_separator
data = "field1,\"field\rwith\rcr\",field3|row2,data,here|"
expected = [
["field1", "field\rwith\rcr", "field3"],
["row2", "data", "here"]
]
assert_equal(expected, CSV.parse(data, row_sep: "|"))
end

line,4,some\rjunk
line,5,jkl
CSV
def test_unquoted_cr_in_middle_line
csv = "line,1,abc\nline,2,\"def\nghi\"\nline,4,some\rjunk\nline,5,jkl\n"
result = CSV.parse(csv)
expected = [
["line", "1", "abc"],
["line", "2", "def\nghi"],
["line", "4", "some\rjunk"],
["line", "5", "jkl"]
]
assert_equal(expected, result)
end

error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(csv)
end
assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
error.message)
def test_empty_rows_with_cr
result = CSV.parse("\n" + "\r")
assert_equal([[], ["\r"]], result)
end

def test_malformed_csv_unclosed_quote
Expand Down
10 changes: 0 additions & 10 deletions test/csv/parse/test_invalid.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false

require_relative "../helper"

class TestCSVParseInvalid < Test::Unit::TestCase
def test_no_column_mixed_new_lines
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse("\n" +
"\r")
end
assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
error.message)
end

def test_ignore_invalid_line
csv = CSV.new(<<-CSV, headers: true, return_headers: true)
head1,head2,head3
Expand Down
9 changes: 9 additions & 0 deletions test/csv/parse/test_liberal_parsing.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ def test_space_quote
CSV.parse(input, liberal_parsing: true))
end

def test_unquoted_cr_with_custom_row_separator
data = "field1,field\rwith\rcr,field3|row2,data,here|"
expected = [
["field1", "field\rwith\rcr", "field3"],
["row2", "data", "here"]
]
assert_equal(expected, CSV.parse(data, row_sep: "|", liberal_parsing: true))
end

def test_double_quote_outside_quote
data = %Q{a,""b""}
error = assert_raise(CSV::MalformedCSVError) do
Expand Down
Loading