diff --git a/internal/matchers/text.go b/internal/matchers/text.go index e9ca91c4..9200b73b 100644 --- a/internal/matchers/text.go +++ b/internal/matchers/text.go @@ -208,6 +208,49 @@ func GeoJson(in []byte) bool { return false } +// NdJson matches a Newline delimited JSON file +func NdJson(in []byte) bool { + // Seperator with carriage return and new line `\r\n` + srn := []byte{0x0D, 0x0A} + + // Seperator with only new line `\n` + sn := []byte{0x0A} + + // total bytes scanned + parsed := 0 + + // Split by `srn` + for rni, insrn := range bytes.Split(in, srn) { + // seperator byte count should be added only after the first split + if rni != 0 { + // Add two as `\r\n` is used for split + parsed += 2 + } + // Return false if there is a carriage return `\r` + if bytes.Contains(insrn, []byte{0x0D}) { + return false + } + // Split again by `sn` + for ni, insn := range bytes.Split(insrn, sn) { + // seperator byte count should be added only after the first split + if ni != 0 { + // Add one as `\n` is used for split + parsed += 1 + } + // Empty line is valid + if len(insn) == 0 { + continue + } + p, err := json.Scan(insn) + parsed += p + if parsed < ReadLimit && err != nil { + return false + } + } + } + return parsed == len(in) +} + // Js matches a Javascript file. func Js(in []byte) bool { return detect(in, jsSigs) diff --git a/mime_test.go b/mime_test.go index bcd50f23..ea76f981 100644 --- a/mime_test.go +++ b/mime_test.go @@ -99,6 +99,7 @@ var files = map[string]*node{ "json.json": json, "geojson.geojson": geoJson, "geojson.1.geojson": geoJson, + "ndjson.ndjson": ndJson, "csv.csv": csv, "tsv.tsv": tsv, "rtf.rtf": rtf, diff --git a/supported_mimes.md b/supported_mimes.md index abe6cb36..12e880ee 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 114 Supported MIME types +## 115 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type @@ -92,6 +92,7 @@ Extension | MIME type **py** | application/x-python **json** | application/json **geojson** | application/geo+json +**ndjson** | application/x-ndjson **rtf** | text/rtf **tcl** | text/x-tcl **csv** | text/csv diff --git a/testdata/ndjson.ndjson b/testdata/ndjson.ndjson new file mode 100644 index 00000000..6951371c --- /dev/null +++ b/testdata/ndjson.ndjson @@ -0,0 +1,4 @@ +{ "firstName": "John", "lastName": "Smith", "age": -25, "limit": 1e2, "width": 12, "height": 1.73, "good": true, "bad": false, "address": { "streetAddress": "21\t\u0009 \u1234 2nd Street", "city": "New York", "state": "NY", "postalCode": "10021" }, "phoneNumber": [ { "type": "home", "number": "212 555-1234" }, { "type": "fax", "number": "646 555-4567" } ], "gender": { "type": "male" }} +{ "firstName": "John", "lastName": "Smith", "age": -25, "limit": 1e2, "width": 12, "height": 1.73, "good": true, "bad": false, "address": { "streetAddress": "21\t\u0009 \u1234 2nd Street", "city": "New York", "state": "NY", "postalCode": "10021" }, "phoneNumber": [ { "type": "home", "number": "212 555-1234" }, { "type": "fax", "number": "646 555-4567" } ], "gender": { "type": "male" }} + +{ "firstName": "John", "lastName": "Smith", "age": -25, "limit": 1e2, "width": 12, "height": 1.73, "good": true, "bad": false, "address": { "streetAddress": "21\t\u0009 \u1234 2nd Street", "city": "New York", "state": "NY", "postalCode": "10021" }, "phoneNumber": [ { "type": "home", "number": "212 555-1234" }, { "type": "fax", "number": "646 555-4567" } ], "gender": { "type": "male" }} \ No newline at end of file diff --git a/tree.go b/tree.go index 4543c3e3..89d648a3 100644 --- a/tree.go +++ b/tree.go @@ -34,12 +34,13 @@ var ( psd = newNode("application/x-photoshop", "psd", matchers.Psd) fits = newNode("application/fits", "fits", matchers.Fits) ogg = newNode("application/ogg", "ogg", matchers.Ogg) - txt = newNode("text/plain", "txt", matchers.Txt, html, svg, xml, php, js, lua, perl, python, json, rtf, tcl, csv, tsv, vCard) + txt = newNode("text/plain", "txt", matchers.Txt, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard) xml = newNode("text/xml; charset=utf-8", "xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf) json = newNode("application/json", "json", matchers.Json, geoJson) csv = newNode("text/csv", "csv", matchers.Csv) tsv = newNode("text/tab-separated-values", "tsv", matchers.Tsv) geoJson = newNode("application/geo+json", "geojson", matchers.GeoJson) + ndJson = newNode("application/x-ndjson", "ndjson", matchers.NdJson) html = newNode("text/html; charset=utf-8", "html", matchers.Html) php = newNode("text/x-php; charset=utf-8", "php", matchers.Php) rtf = newNode("text/rtf", "rtf", matchers.Rtf)