diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c9cd10..e2eaa55 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,4 @@ jobs: - run: npm install - run: npm run build - run: npm test - - name: Spec compliance tests - run: | - git clone --depth 1 https://github.com/toml-lang/toml-test.git .binarymuse/toml-test - npm run test:spec + - run: npm run test:spec:fresh diff --git a/lib/parser.js b/lib/parser.js index a819894..019d181 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -196,9 +196,10 @@ function peg$parse(input, options) { const peg$c28 = " "; const peg$c29 = "\n"; const peg$c30 = "\r"; - const peg$c31 = "\\U"; - const peg$c32 = "\\u"; - const peg$c33 = "\\x"; + const peg$c31 = "\uFEFF"; + const peg$c32 = "\\U"; + const peg$c33 = "\\u"; + const peg$c34 = "\\x"; const peg$r0 = /^[\t -~\x80-\uFFFF]/; const peg$r1 = /^[+\-]/; @@ -265,11 +266,12 @@ function peg$parse(input, options) { const peg$e44 = peg$classExpectation([" ", "\t"], false, false, false); const peg$e45 = peg$literalExpectation("\n", false); const peg$e46 = peg$literalExpectation("\r", false); - const peg$e47 = peg$classExpectation([["A", "Z"], ["a", "z"], ["0", "9"], "_", "-"], false, false, false); - const peg$e48 = peg$classExpectation(["\"", "\\", "b", "t", "n", "f", "r", "e"], false, false, false); - const peg$e49 = peg$literalExpectation("\\U", false); - const peg$e50 = peg$literalExpectation("\\u", false); - const peg$e51 = peg$literalExpectation("\\x", false); + const peg$e47 = peg$literalExpectation("\uFEFF", false); + const peg$e48 = peg$classExpectation([["A", "Z"], ["a", "z"], ["0", "9"], "_", "-"], false, false, false); + const peg$e49 = peg$classExpectation(["\"", "\\", "b", "t", "n", "f", "r", "e"], false, false, false); + const peg$e50 = peg$literalExpectation("\\U", false); + const peg$e51 = peg$literalExpectation("\\u", false); + const peg$e52 = peg$literalExpectation("\\x", false); function peg$f0() { return nodes } function peg$f1(name) { addNode(node('ArrayPath', name, offset())) } @@ -521,18 +523,21 @@ function peg$parse(input, options) { } function peg$parsestart() { - let s0, s1, s2; + let s0, s1, s2, s3; s0 = peg$currPos; - s1 = []; - s2 = peg$parseline(); - while (s2 !== peg$FAILED) { - s1.push(s2); - s2 = peg$parseline(); + s1 = peg$parseBOM(); + if (s1 === peg$FAILED) { + s1 = null; + } + s2 = []; + s3 = peg$parseline(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseline(); } peg$savedPos = s0; - s1 = peg$f0(); - s0 = s1; + s0 = peg$f0(); return s0; } @@ -3996,6 +4001,20 @@ function peg$parse(input, options) { return s0; } + function peg$parseBOM() { + let s0; + + if (input.charCodeAt(peg$currPos) === 65279) { + s0 = peg$c31; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e47); } + } + + return s0; + } + function peg$parseDIGIT() { let s0; @@ -4032,7 +4051,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e47); } + if (peg$silentFails === 0) { peg$fail(peg$e48); } } return s0; @@ -4055,7 +4074,7 @@ function peg$parse(input, options) { peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e48); } + if (peg$silentFails === 0) { peg$fail(peg$e49); } } if (s2 !== peg$FAILED) { peg$savedPos = s0; @@ -4079,12 +4098,12 @@ function peg$parse(input, options) { let s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c31) { - s1 = peg$c31; + if (input.substr(peg$currPos, 2) === peg$c32) { + s1 = peg$c32; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e49); } + if (peg$silentFails === 0) { peg$fail(peg$e50); } } if (s1 !== peg$FAILED) { s2 = peg$currPos; @@ -4157,12 +4176,12 @@ function peg$parse(input, options) { } if (s0 === peg$FAILED) { s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c32) { - s1 = peg$c32; + if (input.substr(peg$currPos, 2) === peg$c33) { + s1 = peg$c33; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e50); } + if (peg$silentFails === 0) { peg$fail(peg$e51); } } if (s1 !== peg$FAILED) { s2 = peg$currPos; @@ -4211,12 +4230,12 @@ function peg$parse(input, options) { } if (s0 === peg$FAILED) { s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c33) { - s1 = peg$c33; + if (input.substr(peg$currPos, 2) === peg$c34) { + s1 = peg$c34; peg$currPos += 2; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$e51); } + if (peg$silentFails === 0) { peg$fail(peg$e52); } } if (s1 !== peg$FAILED) { s2 = peg$currPos; diff --git a/package.json b/package.json index bd71919..8a5092a 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "build": "peggy -o lib/parser.js src/toml.pegjs", "test": "node --test test/test_toml.js", "test:spec": "node test/spec-test.js", + "test:spec:fresh": "node scripts/test-spec-fresh.js", "test:spec:failures": "node test/spec-test.js --failures", "test:all": "node --test test/test_toml.js && node test/spec-test.js", "prepublishOnly": "npm run build" diff --git a/scripts/test-spec-fresh.js b/scripts/test-spec-fresh.js new file mode 100644 index 0000000..c0bf829 --- /dev/null +++ b/scripts/test-spec-fresh.js @@ -0,0 +1,36 @@ +#!/usr/bin/env node +"use strict"; + +var childProcess = require("child_process"); +var fs = require("fs"); +var path = require("path"); + +var repoRoot = path.join(__dirname, ".."); +var testCheckout = path.join(repoRoot, ".binarymuse", "toml-test"); +var npmCommand = process.platform === "win32" ? "npm.cmd" : "npm"; + +function run(command, args) { + var result = childProcess.spawnSync(command, args, { + cwd: repoRoot, + stdio: "inherit", + }); + + if (result.error) { + throw result.error; + } + if (result.status !== 0) { + process.exit(result.status || 1); + } +} + +fs.rmSync(testCheckout, { recursive: true, force: true }); +fs.mkdirSync(path.dirname(testCheckout), { recursive: true }); + +run("git", [ + "clone", + "--depth", + "1", + "https://github.com/toml-lang/toml-test.git", + testCheckout, +]); +run(npmCommand, ["run", "test:spec"]); diff --git a/src/toml.pegjs b/src/toml.pegjs index 3f9bfd7..18d325b 100644 --- a/src/toml.pegjs +++ b/src/toml.pegjs @@ -93,7 +93,7 @@ } start - = line* { return nodes } + = BOM? line* { return nodes } line = S* expr:expression S* comment? (NL+ / EOF) @@ -324,6 +324,7 @@ S = [ \t] NL = "\n" / "\r" "\n" NLS = NL / S EOF = !. +BOM = "\uFEFF" DIGIT = [0-9] HEX = [0-9a-fA-F] ASCII_BASIC = [A-Za-z0-9_\-] diff --git a/test/test_toml.js b/test/test_toml.js index 0676131..43a1796 100644 --- a/test/test_toml.js +++ b/test/test_toml.js @@ -3,6 +3,7 @@ var { describe, it } = require("node:test"); var assert = require("node:assert"); var toml = require("../"); +var parser = require("../lib/parser"); var fs = require("fs"); var path = require("path"); @@ -293,6 +294,24 @@ describe("whitespace", function () { it("leading newlines", function () { parsesToml("\ntest = \"ing\"", { test: "ing" }); }); + + it("allows a UTF-8 BOM at the start of the document", function () { + parsesToml("\uFEFFa = 1", { a: 1 }); + + assert.doesNotThrow(function () { + parser.parse("\uFEFF# starts with a BOM\n"); + }); + }); + + it("rejects a UTF-8 BOM anywhere but the start of the document", function () { + assert.throws(function () { + toml.parse("a = \uFEFF1"); + }); + + assert.throws(function () { + toml.parse("\uFEFF\uFEFFa = 1"); + }); + }); }); describe("datetimes", function () {