diff --git a/internal/matchers/text.go b/internal/matchers/text.go index e385bba6..06c2c071 100644 --- a/internal/matchers/text.go +++ b/internal/matchers/text.go @@ -119,8 +119,20 @@ var ( } ) -// Txt matches a text file. -func Txt(in []byte) bool { +// Utf16be matches a text file encoded with UTF-16 and with the characters +// represented in big endian. +func Utf16be(in []byte) bool { + return bytes.HasPrefix(in, []byte{0xFE, 0xFF}) +} + +// Utf16le matches a text file encoded with UTF-16 and with the characters +// represented in little endian. +func Utf16le(in []byte) bool { + return bytes.HasPrefix(in, []byte{0xFF, 0xFE}) +} + +// Utf8 matches a UTF-8 text file. +func Utf8(in []byte) bool { in = trimLWS(in) for _, b := range in { if b <= 0x08 || diff --git a/mimetype_test.go b/mimetype_test.go index 152c6466..4d297a09 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -108,7 +108,9 @@ var files = map[string]*MIME{ "html.withbr.html": html, "svg.svg": svg, "svg.1.svg": svg, - "txt.txt": txt, + "utf8.txt": utf8, + "utf16lebom.txt": utf16le, + "utf16bebom.txt": utf16be, "php.php": php, "ps.ps": ps, "json.json": json, diff --git a/supported_mimes.md b/supported_mimes.md index 4d704b2c..dab5ae87 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 137 Supported MIME types +## 139 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -77,36 +77,8 @@ Extension | MIME type | Aliases **.voc** | audio/x-unknown | - **.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a **.m4a** | audio/x-m4a | - -**.txt** | text/plain; charset=utf-8 | - -**.html** | text/html; charset=utf-8 | - -**.svg** | image/svg+xml | - -**.xml** | text/xml; charset=utf-8 | - -**.rss** | application/rss+xml | text/rss -**.atom** | application/atom+xml | - -**.x3d** | model/x3d+xml | - -**.kml** | application/vnd.google-earth.kml+xml | - -**.xlf** | application/x-xliff+xml | - -**.dae** | model/vnd.collada+xml | - -**.gml** | application/gml+xml | - -**.gpx** | application/gpx+xml | - -**.tcx** | application/vnd.garmin.tcx+xml | - -**.amf** | application/x-amf | - -**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | - -**.php** | text/x-php; charset=utf-8 | - -**.js** | application/javascript | application/x-javascript, text/javascript -**.lua** | text/x-lua | - -**.pl** | text/x-perl | - -**.py** | application/x-python | - -**.json** | application/json | - -**.geojson** | application/geo+json | - -**.ndjson** | application/x-ndjson | - -**.rtf** | text/rtf | - -**.tcl** | text/x-tcl | application/x-tcl -**.csv** | text/csv | - -**.tsv** | text/tab-separated-values | - -**.vcf** | text/vcard | - -**.ics** | text/calendar | - -**.warc** | application/warc | - +**.txt** | text/plain; charset=utf-16le | - +**.txt** | text/plain; charset=utf-16be | - **.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document **.class** | application/x-java-applet; charset=binary | - **.swf** | application/x-shockwave-flash | - @@ -140,3 +112,33 @@ Extension | MIME type | Aliases **.accdb** | application/x-msaccess | - **.zst** | application/zstd | - **.cab** | application/vnd.ms-cab-compressed | - +**.txt** | text/plain; charset=utf-8 | - +**.html** | text/html; charset=utf-8 | - +**.svg** | image/svg+xml | - +**.xml** | text/xml; charset=utf-8 | - +**.rss** | application/rss+xml | text/rss +**.atom** | application/atom+xml | - +**.x3d** | model/x3d+xml | - +**.kml** | application/vnd.google-earth.kml+xml | - +**.xlf** | application/x-xliff+xml | - +**.dae** | model/vnd.collada+xml | - +**.gml** | application/gml+xml | - +**.gpx** | application/gpx+xml | - +**.tcx** | application/vnd.garmin.tcx+xml | - +**.amf** | application/x-amf | - +**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | - +**.php** | text/x-php; charset=utf-8 | - +**.js** | application/javascript | application/x-javascript, text/javascript +**.lua** | text/x-lua | - +**.pl** | text/x-perl | - +**.py** | application/x-python | - +**.json** | application/json | - +**.geojson** | application/geo+json | - +**.ndjson** | application/x-ndjson | - +**.rtf** | text/rtf | - +**.tcl** | text/x-tcl | application/x-tcl +**.csv** | text/csv | - +**.tsv** | text/tab-separated-values | - +**.vcf** | text/vcard | - +**.ics** | text/calendar | - +**.warc** | application/warc | - diff --git a/testdata/utf16bebom.txt b/testdata/utf16bebom.txt new file mode 100644 index 00000000..da0c4516 Binary files /dev/null and b/testdata/utf16bebom.txt differ diff --git a/testdata/utf16lebom.txt b/testdata/utf16lebom.txt new file mode 100644 index 00000000..393cc5ea Binary files /dev/null and b/testdata/utf16lebom.txt differ diff --git a/testdata/txt.txt b/testdata/utf8.txt similarity index 100% rename from testdata/txt.txt rename to testdata/utf8.txt diff --git a/tree.go b/tree.go index fbd9035c..a6de598f 100644 --- a/tree.go +++ b/tree.go @@ -6,12 +6,13 @@ import "github.com/gabriel-vasile/mimetype/internal/matchers" // When a matcher passes the check, the children matchers // are tried in order to find a more accurate mime type. var root = newMIME("application/octet-stream", "", matchers.True, - sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp, exe, elf, - ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape, musePack, amr, - wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP, threeG2, avi, flv, - mkv, asf, aac, voc, aMp4, m4a, txt, gzip, class, swf, crx, woff, woff2, otf, - eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, macho, - qcp, icns, heic, heicSeq, heif, heifSeq, mrc, mdb, accdb, zstd, cab, + sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp, + exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape, + musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP, + threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, utf16le, utf16be, gzip, + class, swf, crx, woff, woff2, otf, eot, wasm, shx, dbf, dcm, rar, djvu, + mobi, lit, bpg, sqlite3, dwg, nes, macho, qcp, icns, heic, heicSeq, heif, + heifSeq, mrc, mdb, accdb, zstd, cab, utf8, ) // The list of nodes appended to the root node @@ -45,7 +46,9 @@ var ( alias("application/x-ogg") oggAudio = newMIME("audio/ogg", ".oga", matchers.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", matchers.OggVideo) - txt = newMIME("text/plain; charset=utf-8", ".txt", matchers.Txt, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc) + utf16le = newMIME("text/plain; charset=utf-16le", ".txt", matchers.Utf16le) + utf16be = newMIME("text/plain; charset=utf-16be", ".txt", matchers.Utf16be) + utf8 = newMIME("text/plain; charset=utf-8", ".txt", matchers.Utf8, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc) xml = newMIME("text/xml; charset=utf-8", ".xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf) json = newMIME("application/json", ".json", matchers.Json, geoJson) csv = newMIME("text/csv", ".csv", matchers.Csv)