From fe1fbfba3c6a9211dda166a327c9651b6c974471 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Strub Date: Fri, 29 May 2026 12:10:22 +0200 Subject: [PATCH] Add black-box quotation preprocessor Introduce {% name body %} quotations, expanded during lexing by an external tool over stdin/stdout. A quotation expands to a sentence fragment that is spliced into the surrounding sentence, so it may replace only part of a sentence (and several may appear in one). The terminating '.' is always written by the user. Locations in the expansion are remapped back to the original source, so errors point at the original quoted text. Usage Write a quotation as {% name body %}, where name selects a handler and body is arbitrary text (the {% %} delimiters nest). The expansion is a fragment, so the surrounding sentence and its '.' are written outside the quotation: op forty_two = {% calc 6 * 7 %}. Quotations compose with ordinary source and with each other in one sentence: op mixed = {% calc 6 * 7 %} + ({% calc 2 + 3 %} * 10). Enabling quotations Quotations run external programs, so the feature is OFF by default and a quotation encountered while it is off is a hard error (never a silent skip or a silent execution). Enable it for a run in one of two ways: easycrypt -enable-quotations compile foo.ec EC_ENABLE_QUOTATIONS=1 easycrypt compile foo.ec It cannot be enabled from easycrypt.project, which ships inside a checked-out tree -- letting it turn the feature on would defeat the safeguard. Only enable quotations for sources you trust. Binding handlers Once enabled, a name is resolved to a shell command in order: a 'quote = name:command' entry in the [general] section of easycrypt.project (relative paths resolved against the project directory); then EC_QUOTE_; then EC_QUOTE_CMD; then an executable handlers/ (also .py/.sh) next to the source file. Handler protocol The handler reads the body (after a header line) on stdin and writes a single JSON object on stdout: { "expanded": , "segments": [...] }, where segments is the source map used for error-location remapping. A non-zero exit reports an error at the quotation. See doc/quotations.rst for the full reference. --- Makefile | 2 +- config/tests.config | 9 +- doc/index.rst | 1 + doc/quotations.rst | 249 +++++++++++++++ src/ec.ml | 16 + src/ecIo.ml | 79 ++++- src/ecLexer.mll | 38 +++ src/ecOptions.ml | 70 +++- src/ecOptions.mli | 15 +- src/ecParser.mly | 1 + src/ecQuotation.ml | 301 ++++++++++++++++++ tests/quotations-disabled/handlers/calc.py | 49 +++ tests/quotations-disabled/test_disabled.ec | 7 + tests/quotations-project/calc.py | 49 +++ tests/quotations-project/easycrypt.project | 5 + tests/quotations-project/test_project_calc.ec | 10 + tests/quotations-project/verbatim.py | 36 +++ tests/quotations/handlers/calc.py | 49 +++ tests/quotations/handlers/verbatim.py | 36 +++ tests/quotations/test_calc.ec | 9 + tests/quotations/test_calc_inline.ec | 8 + tests/quotations/test_verbatim_err.ec | 13 + tests/quotations/test_verbatim_ok.ec | 8 + 23 files changed, 1038 insertions(+), 22 deletions(-) create mode 100644 doc/quotations.rst create mode 100644 src/ecQuotation.ml create mode 100755 tests/quotations-disabled/handlers/calc.py create mode 100644 tests/quotations-disabled/test_disabled.ec create mode 100755 tests/quotations-project/calc.py create mode 100644 tests/quotations-project/easycrypt.project create mode 100644 tests/quotations-project/test_project_calc.ec create mode 100755 tests/quotations-project/verbatim.py create mode 100755 tests/quotations/handlers/calc.py create mode 100755 tests/quotations/handlers/verbatim.py create mode 100644 tests/quotations/test_calc.ec create mode 100644 tests/quotations/test_calc_inline.ec create mode 100644 tests/quotations/test_verbatim_err.ec create mode 100644 tests/quotations/test_verbatim_ok.ec diff --git a/Makefile b/Makefile index 62ace7ba2c..9325f72db4 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ uninstall: $(DUNE) uninstall unit: build - $(CHECK) unit + $(CHECK) unit quotations-disabled stdlib: build $(CHECK) prelude stdlib diff --git a/config/tests.config b/config/tests.config index a5762df7d9..7c9de076fb 100644 --- a/config/tests.config +++ b/config/tests.config @@ -14,4 +14,11 @@ exclude = examples/MEE-CBC examples/old examples/old/list-ddh !examples/incomple okdirs = examples/MEE-CBC [test-unit] -okdirs = tests tests/exception +args = -enable-quotations +okdirs = tests tests/exception tests/quotations tests/quotations-project + +# Quotations are disabled by default (they run external programs); a file +# using one must fail when -enable-quotations is NOT given. This scenario +# deliberately omits the flag. +[test-quotations-disabled] +kodirs = tests/quotations-disabled diff --git a/doc/index.rst b/doc/index.rst index 8b6c7d9b2f..c28f7018e0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,3 +5,4 @@ EasyCrypt reference manual :maxdepth: 2 tactics + quotations diff --git a/doc/quotations.rst b/doc/quotations.rst new file mode 100644 index 0000000000..03493c1fc8 --- /dev/null +++ b/doc/quotations.rst @@ -0,0 +1,249 @@ +======================================================================== +Quotations (external preprocessor) +======================================================================== + +A *quotation* lets you embed, directly in an EasyCrypt source file, a +fragment written in some other surface syntax, and have EasyCrypt expand it +into ordinary EasyCrypt code by delegating to an **external tool**. The tool +is a black box: EasyCrypt communicates with it over standard input and +standard output, so it can be written in any language. + +.. warning:: + + Quotations run external programs, so the feature is **disabled by + default**. Enable it explicitly with the command-line flag + ``-enable-quotations`` or the environment variable + ``EC_ENABLE_QUOTATIONS=1``. It cannot be enabled from ``easycrypt.project`` + (that file ships inside a checked-out tree, so allowing it to turn the + feature on would defeat the safeguard). While disabled, encountering a + quotation is a hard error, never a silent skip or a silent execution. Only + enable quotations for sources you trust. + +Quotations are processed during lexing, before parsing. A quotation expands +to a **sentence fragment**: its tokens are spliced into the surrounding +sentence, so a quotation may stand for only *part* of a sentence and several +quotations may appear in one sentence. The sentence terminator (``.``) is +always written by the user and never produced by a quotation. When the +external tool — or EasyCrypt's handling of its output — produces an error, the +location reported by EasyCrypt is mapped back to the **original** quoted text, +not to the generated code. + +------------------------------------------------------------------------ +Syntax +------------------------------------------------------------------------ + +A quotation is delimited by ``{%`` and ``%}``: + +.. admonition:: Syntax + + ``{% {name} {body} %}`` + +Here: + +- ``{name}`` is a lowercase identifier selecting which external *handler* + expands the quotation (see `Configuring handlers`_). + +- ``{body}`` is arbitrary text. It runs from the character following + ``{name}`` up to the matching ``%}``. The delimiters nest: a ``{% ... %}`` + pair occurring inside the body is kept verbatim and does not close the + outer quotation, so a body may itself contain quotation delimiters. + +A quotation expands to a sentence *fragment*, so the ``.`` that ends the +sentence is written outside the quotation. For example, a ``calc`` handler +that returns the value of an arithmetic expression:: + + op forty_two = {% calc 6 * 7 %}. + +expands to ``op forty_two = 42.``. Because the expansion is only a fragment, +quotations compose with ordinary source and with each other within one +sentence:: + + op mixed = {% calc 6 * 7 %} + ({% calc 2 + 3 %} * 10). + +It is an error for a quotation's expansion to contain a sentence terminator +(``.``): the fragment must not close the sentence itself. + +------------------------------------------------------------------------ +Configuring handlers +------------------------------------------------------------------------ + +A quotation ``name`` is resolved to a shell command in this order: + +- a binding in ``easycrypt.project`` (see below); + +- ``EC_QUOTE_`` — where ```` is ``name`` uppercased — gives the + command for that specific quotation name; + +- ``EC_QUOTE_CMD`` — a fallback command used for any quotation whose specific + variable is unset; + +- otherwise, an executable ``handlers/`` (also tried with the ``.py`` + and ``.sh`` extensions) sitting next to the source file. This lets a + directory of files be self-contained, needing no environment to set up — it + is how the test suite binds its handlers. + +The recommended way is the project file. In the ``[general]`` section of +``easycrypt.project``, add one repeatable ``quote`` entry per handler, of the +form ``name:command``:: + + [general] + quote = calc:handlers/calc.py + quote = verbatim:python3 tools/verbatim.py + +The ``command`` is a shell command (so it may include an interpreter and +arguments). When it is, verbatim, a relative path to an existing file, it is +resolved against the directory containing ``easycrypt.project``; otherwise it +is passed to the shell unchanged. Project-file bindings take precedence over +the environment, so the committed configuration is authoritative. + +To bind a quotation ad hoc through the environment instead:: + + export EC_QUOTE_CALC=/path/to/calc-handler + +A quotation whose name resolves to no command raises an error located at the +quotation. + +------------------------------------------------------------------------ +The handler protocol +------------------------------------------------------------------------ + +For each quotation, EasyCrypt launches the bound command, writes a request to +its standard input, and reads the expansion from its standard output. + +Request (sent by EasyCrypt) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A single header line, followed by the raw body:: + + #ec-quote v1 name= file= line= col= off= + + +where ``line``/``col`` are the 1-based line and 0-based column of the body's +first character in the original file, and ``off`` is its absolute character +offset. + +Response (returned by the handler) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A single JSON object on standard output:: + + { "expanded": "", + "segments": [ { "out": [ob, oe], "in": [ib, ie], "kind": "verbatim" }, + ... ] } + +``"expanded"`` (a string, required) is the replacement source. ``"segments"`` +(optional) is the *source map*: each segment maps a half-open character range +``[ob, oe)`` of the **expanded** source back to a range ``[ib, ie)`` of the +**body** (offsets relative to the start of each, the body being the handler's +own stdin payload). + +The two ``"kind"`` values answer one question: given an error at some offset +*inside* a segment's output range, can EasyCrypt compute the *exact* +corresponding input offset, or only point at the region as a whole? It depends +on whether the handler copied the text or invented it. + +- ``"kind": "verbatim"`` — the output range is a **character-for-character + copy** of the input range, so the two ranges have the same length + (``oe - ob == ie - ib``). Output character *k* is input character *k*, so an + error at output offset ``o`` maps to input offset ``ib + (o - ob)`` — + **column-precise**. Mark a segment verbatim exactly when you pass input + through unchanged. + +- ``"kind": "synthesized"`` — the output range was **generated** by the + handler (a computed value, boilerplate, glue), so it has no + character-to-character relationship with the input and the ranges typically + differ in length. There is no meaningful per-character offset to compute, so + the whole output range is attributed to the whole input range: an error + there points at the responsible region of the body rather than at a + misleading column. Mark a segment synthesized whenever the output is not a + literal copy. + +Why distinguish them: collapsing *everything* to the region (as synthesized +does) is always safe but throws away precision; the ``+ (o - ob)`` arithmetic +is only valid when the copy is exact, which is what ``verbatim`` asserts. A +real handler mixes both — for example, expanding ``{% sugar foo %}`` into +``lemma foo_lemma : `` would mark the boilerplate +``lemma foo_lemma :`` synthesized and the copied ```` verbatim, so +errors in the user's own text get exact columns while errors in the generated +glue fall back to the region. A handler that does not care about precision may +mark everything synthesized (coarse, but never wrong). + +As a safety check, EasyCrypt treats a segment as verbatim only if it is +labelled ``"verbatim"`` **and** the two ranges actually have equal length; a +mislabelled (length-mismatched) verbatim segment is downgraded to the safe +collapse rather than producing bogus offsets. + +If the response carries no (or an unparsable) ``"segments"`` field, the entire +expansion is attributed to the entire quotation (coarse mapping). Output that +is not a JSON object, or that lacks a string ``"expanded"`` field, is reported +as an error at the quotation. + +Errors +~~~~~~ + +A handler that exits with a non-zero status makes EasyCrypt raise an error +located at the quotation, using the handler's standard-error output as the +message. + +------------------------------------------------------------------------ +Location mapping +------------------------------------------------------------------------ + +Because the expanded code is lexed and parsed in a separate buffer, the +positions EasyCrypt computes for it would, naively, refer to the generated +text. Using the source map and the body's original offset, EasyCrypt rewrites +those positions so that **every** location it reports — parse errors, type +errors, and printed locations alike — refers to the original source file. + +For a ``verbatim`` segment this is exact down to the column; for a +``synthesized`` segment the location collapses to the originating region of +the body. Two examples make the difference concrete. + +*Verbatim.* A handler that copies its body through unchanged reports:: + + { "expanded": "op broken : int = no_such_op + 1", + "segments": [ { "out": [0, 32], "in": [0, 32], "kind": "verbatim" } ] } + +The error on ``no_such_op`` (output offset 18) maps to input offset 18, then +to the original file, so EasyCrypt points at the exact columns of +``no_such_op`` in the source — not at the quotation as a whole. + +*Synthesized.* The ``calc`` handler turns the body ``6 * 7`` into ``42``:: + + { "expanded": "42", + "segments": [ { "out": [0, 2], "in": [0, 5], "kind": "synthesized" } ] } + +Output and input have different lengths and ``42`` came from no particular +character of ``6 * 7``, so an error on ``42`` cannot be attributed to a column; +it points at the whole ``6 * 7`` region instead. + +This is also why offset-range segments are used rather than line markers such +as ``#line``: a line marker can only say "this generated line came from input +line *N*", whereas reporting an error at the exact columns of ``no_such_op`` +needs the character-level mapping a verbatim segment provides. + +------------------------------------------------------------------------ +Examples +------------------------------------------------------------------------ + +A ``calc`` handler that evaluates an integer expression returns the resulting +literal as a fragment, so:: + + op forty_two = {% calc 6 * 7 %}. + +expands to ``op forty_two = 42.``. + +A ``verbatim`` handler that copies its body through with a single ``verbatim`` +segment lets EasyCrypt point at the exact original character on error. Given:: + + {% verbatim op broken : int = no_such_op + 1 %}. + +EasyCrypt reports the unknown-identifier error at the column of ``no_such_op`` +inside the quotation, even though that identifier sits at a different offset in +the generated buffer. + +.. note:: + + The result of expanding a quotation is stored in the compiled ``.eco`` + file. When iterating on a handler, remove the stale ``.eco`` so the + quotation is expanded afresh. diff --git a/src/ec.ml b/src/ec.ml index 1a1efc5dd4..4d6a06a169 100644 --- a/src/ec.ml +++ b/src/ec.ml @@ -398,6 +398,14 @@ let main () = ldropts.ldro_idirs; end; + (* Quotations: enabled only via CLI/env (never easycrypt.project), and the + handlers declared in easycrypt.project are registered only then. *) + EcQuotation.set_enabled options.o_options.o_enable_quotes; + if options.o_options.o_enable_quotes then + List.iter + (fun (name, command) -> EcQuotation.register ~name ~command) + ldropts.ldro_quotes; + (* Initialize printer *) EcCorePrinting.Registry.register (module EcPrinting); @@ -780,6 +788,14 @@ let main () = List.iter (fun p -> let loc = p.EP.gl_action.EcLocation.pl_loc in + (* Mechanism B: a location should never escape quotation + position-remapping (EcIo) carrying the synthetic buffer + filename. If one does, collapse it rather than print + meaningless coordinates. *) + let loc = + if EcQuotation.is_sentinel loc.EcLocation.loc_fname + then EcLocation._dummy else loc + in (* -upto: if this command starts past the target, print goals and exit *) if past_upto loc then begin diff --git a/src/ecIo.ml b/src/ecIo.ml index 016545d85c..fedb45c0fd 100644 --- a/src/ecIo.ml +++ b/src/ecIo.ml @@ -38,6 +38,11 @@ type ecreader_r = { mutable ecr_atstart : bool; mutable ecr_trim : int; mutable ecr_tokens : EcParser.token list; + (* Pre-positioned triples produced by expanding a quotation. These are + emitted (front-first) before any token is read from [ecr_lexbuf], so a + single quotation can expand to several EC sentences across successive + [parse] calls. Positions are already remapped into the original file. *) + mutable ecr_expand : (EcParser.token * L.position * L.position) list; } type ecreader = ecreader_r Disposable.t @@ -48,7 +53,8 @@ let ecreader_of_lexbuf (buffer : Buffer.t) (lexbuf : L.lexbuf) : ecreader_r = ecr_source = buffer; ecr_atstart = true; ecr_trim = 0; - ecr_tokens = []; } + ecr_tokens = []; + ecr_expand = []; } (* -------------------------------------------------------------------- *) let lexbuf (reader : ecreader) = @@ -97,13 +103,71 @@ let finalize (ecreader : ecreader) = Disposable.dispose ecreader (* -------------------------------------------------------------------- *) -let lexer ?(checkpoint : _ I.checkpoint option) (ecreader : ecreader_r) = +(* Expand a quotation into a list of pre-positioned token triples. *) +(* The handler output is lexed in its own buffer; each token's positions *) +(* are remapped into the original file via the source map. *) +(* *) +(* A quotation expands to a FRAGMENT that is spliced into the surrounding *) +(* sentence: it may stand for only part of a sentence, and the sentence *) +(* terminator ('.') is always written by the user, never produced by the *) +(* expansion. Hence the fragment must contain no FINAL. *) +let expand_quotation (q : EcQuotation.quotation) + : (EcParser.token * L.position * L.position) list += + let (expanded, sm) = EcQuotation.run q in + let sub = Lexing.from_string expanded in + Lexing.set_filename sub (EcQuotation.sentinel_fname q); + + let remap o = EcQuotation.remap_position sm q o in + + let rec collect acc = + let toks = + try EcLexer.main sub + with EcLexer.LexicalError (_, msg) -> + EcQuotation.error q + (Printf.sprintf "lexical error in expansion: %s" msg) + in + (* positions of the lexeme just consumed by EcLexer.main *) + let sp = remap (Lexing.lexeme_start sub) in + let ep = remap (Lexing.lexeme_end sub) in + let acc = + List.fold_left (fun acc tk -> (tk, sp, ep) :: acc) acc toks in + match toks with + | [EcParser.EOF] -> List.rev acc + | _ -> collect acc + in + let triples = collect [] in + + (* drop the lexed EOF; the surrounding stream supplies sentence flow *) + let body = List.filter (fun (t, _, _) -> t <> EcParser.EOF) triples in + (* a fragment must not terminate the sentence: the '.' is the user's *) + let isfinal = function EcParser.FINAL _ -> true | _ -> false in + if List.exists (fun (t, _, _) -> isfinal t) body then + EcQuotation.error q + "quotation expansion must be a sentence fragment (it must not contain '.')"; + body + +(* -------------------------------------------------------------------- *) +let rec lexer ?(checkpoint : _ I.checkpoint option) (ecreader : ecreader_r) = let lexbuf = ecreader.ecr_lexbuf in let isfinal = function | EcParser.FINAL _ -> true | _ -> false in + (* Emit the next pre-positioned expansion triple, if any. *) + let emit_expand () = + match ecreader.ecr_expand with + | [] -> None + | triple :: rest -> + ecreader.ecr_expand <- rest; + Some triple + in + + match emit_expand () with + | Some triple -> triple + | None -> + if ecreader.ecr_atstart then ecreader.ecr_trim <- ecreader.ecr_lexbuf.Lexing.lex_curr_p.pos_cnum; @@ -119,6 +183,17 @@ let lexer ?(checkpoint : _ I.checkpoint option) (ecreader : ecreader_r) = ecreader.ecr_tokens <- tokens done; + (* Intercept a quotation token: expand it into a fragment and splice it. + A quotation always sits at the head of [ecr_tokens] (its lexer rule + returns a singleton list). An empty fragment is allowed -- recurse to + produce the next real token. *) + match ecreader.ecr_tokens with + | EcParser.QUOTATION q :: queue -> + ecreader.ecr_tokens <- queue; + ecreader.ecr_expand <- expand_quotation q; + lexer ?checkpoint ecreader + | _ -> + let token, queue = List.destruct ecreader.ecr_tokens in let token, prequeue = diff --git a/src/ecLexer.mll b/src/ecLexer.mll index 704b0e9764..b93792cd61 100644 --- a/src/ecLexer.mll +++ b/src/ecLexer.mll @@ -395,6 +395,20 @@ rule main = parse | "\"" { [STRING (Buffer.contents (string (Buffer.create 0) lexbuf))] } + (* black-box quotation: {% name %} (see ecQuotation.ml) *) + | "{%" blank* (lident as name) { + (* body starts at the current position (after name and the blanks/ + newline that follow it on this rule's match) *) + let bpos = Lexing.lexeme_end_p lexbuf in + let buf = Buffer.create 256 in + let epos = quotation buf 0 lexbuf in + [QUOTATION EcQuotation.{ + q_name = name; + q_body = Buffer.contents buf; + q_bpos = bpos; + q_epos = epos; }] + } + (* string symbols *) | ".." { [DOTDOT ] } | ".[" { [DLBRACKET] } @@ -483,6 +497,30 @@ and doccomment kind buf = parse doccomment kind buf lexbuf } +and quotation buf depth = parse + | "%}" { + if depth = 0 then + Lexing.lexeme_end_p lexbuf + else begin + Buffer.add_string buf "%}"; + quotation buf (depth - 1) lexbuf + end + } + | "{%" { + Buffer.add_string buf "{%"; + quotation buf (depth + 1) lexbuf + } + | newline { + Lexing.new_line lexbuf; + Buffer.add_char buf '\n'; + quotation buf depth lexbuf + } + | eof { lex_error lexbuf "unterminated quotation" } + | _ as c { + Buffer.add_char buf c; + quotation buf depth lexbuf + } + and string buf = parse | "\"" { buf } | "\\n" { Buffer.add_char buf '\n'; string buf lexbuf } diff --git a/src/ecOptions.ml b/src/ecOptions.ml index bd21a69aa0..35a5fff7b6 100644 --- a/src/ecOptions.ml +++ b/src/ecOptions.ml @@ -69,15 +69,17 @@ and prv_options = { } and ldr_options = { - ldro_idirs : (string option * string * bool) list; - ldro_boot : bool; + ldro_idirs : (string option * string * bool) list; + ldro_boot : bool; + ldro_quotes : (string * string) list; } and glb_options = { - o_why3 : string option; - o_reloc : bool; - o_ovrevict : string list; - o_loader : ldr_options; + o_why3 : string option; + o_reloc : bool; + o_ovrevict : string list; + o_loader : ldr_options; + o_enable_quotes : bool; } (* -------------------------------------------------------------------- *) @@ -90,6 +92,7 @@ type ini_options = { ini_timeout : int option; ini_idirs : (string option * string) list; ini_rdirs : (string option * string) list; + ini_quotes : (string * string) list; } type ini_context = { @@ -116,6 +119,8 @@ module Ini : sig val get_rdirs : ini_context -> (string option * string) list + val get_quotes : ini_context -> (string * string) list + (* ------------------------------------------------------------------ *) val get_all_ppwidth : ini_context list -> int option @@ -132,6 +137,8 @@ module Ini : sig val get_all_idirs : ini_context list -> (string option * string) list val get_all_rdirs : ini_context list -> (string option * string) list + + val get_all_quotes : ini_context list -> (string * string) list end = struct (* ------------------------------------------------------------------ *) let absolute ?(root : string option) (filename : string) = @@ -174,6 +181,20 @@ end = struct (snd_map (absolute ?root:ini.inic_root)) ini.inic_ini.ini_rdirs + (* A quotation command is resolved against the project directory only when + it is, verbatim, a relative path to an existing file (the common + [handlers/foo.py] case). Anything else -- a command with arguments, an + absolute path, or a name on the PATH -- is passed through unchanged. *) + let get_quotes (ini : ini_context) = + let resolve (cmd : string) = + match ini.inic_root with + | Some root when Filename.is_relative cmd + && Sys.file_exists (Filename.concat root cmd) -> + Filename.concat root cmd + | _ -> cmd + in + List.map (snd_map resolve) ini.inic_ini.ini_quotes + (* ------------------------------------------------------------------ *) let get_all_ppwidth (ini : ini_context list) = List.find_map_opt get_ppwidth ini @@ -198,6 +219,9 @@ end = struct let get_all_rdirs (ini : ini_context list) = List.flatten (List.map get_rdirs ini) + + let get_all_quotes (ini : ini_context list) = + List.flatten (List.map get_quotes ini) end (* -------------------------------------------------------------------- *) @@ -355,6 +379,8 @@ let specs = { `Spec ("why3" , `String, "why3 configuration file"); `Spec ("reloc" , `Flag , ""); `Spec ("no-evict", `String, "Don't evict given prover"); + `Spec ("enable-quotations", `Flag, + "Allow {% %} quotations to run external handler programs"); `Group "loader"; ]; @@ -463,6 +489,13 @@ let parse_idir s = | None -> (None, expand s) | Some (nm, s) -> (Some (expand nm), expand s) +(* A quotation binding is [name:command]; split on the first ':'. *) +let parse_quote s = + match String.Exceptionless.split ~by:":" s with + | Some (nm, cmd) when String.trim nm <> "" -> + Some (String.trim nm, String.trim cmd) + | _ -> None + (* -------------------------------------------------------------------- *) let dirs_of_env = let parse_ecpath s = @@ -476,7 +509,7 @@ let dirs_of_env = (* -------------------------------------------------------------------- *) let ldr_options_of_values ~env ?(ini = []) values = if get_flag "boot" values then - { ldro_idirs = []; ldro_boot = true; } + { ldro_idirs = []; ldro_boot = true; ldro_quotes = Ini.get_all_quotes ini; } else let add_rec (fl : bool) ((nm, x) : string option * string) = (nm, x, fl) in @@ -490,8 +523,9 @@ let ldr_options_of_values ~env ?(ini = []) values = let rdirs = List.map (add_rec true) rdirs in let idirs_R = List.map (add_rec true) (List.map parse_idir (get_strings "R" values)) in - { ldro_idirs = idirs @ idirs_I @ rdirs @ idirs_R; - ldro_boot = false; } + { ldro_idirs = idirs @ idirs_I @ rdirs @ idirs_R; + ldro_boot = false; + ldro_quotes = Ini.get_all_quotes ini; } let glb_options_of_values ~env ini values = let why3 = @@ -501,10 +535,20 @@ let glb_options_of_values ~env ini values = let ovrevict = Ini.get_all_ovrevict ini in + (* Enabling quotations runs external programs, so it is taken ONLY from the + command line or the environment -- never from [ini]/easycrypt.project, + which ships inside the checked-out tree. *) + let enable_quotes = + get_flag "enable-quotations" values + || (env && (match Sys.getenv_opt "EC_ENABLE_QUOTATIONS" with + | Some ("" | "0" | "false") | None -> false + | Some _ -> true)) in + { o_why3 = why3; o_reloc = get_flag "reloc" values; o_ovrevict = ovrevict @ (get_strings "no-evict" values); - o_loader = ldr_options_of_values ~env ~ini values; } + o_loader = ldr_options_of_values ~env ~ini values; + o_enable_quotes = enable_quotes; } let prv_options_of_values ini values = let provers = @@ -747,7 +791,8 @@ let read_ini_file (filename : string) = ini_quorum = tryint "quorum" ; ini_timeout = tryint "timeout" ; ini_idirs = List.map parse_idir (trylist "idirs"); - ini_rdirs = List.map parse_idir (trylist "rdirs"); } in + ini_rdirs = List.map parse_idir (trylist "rdirs"); + ini_quotes = List.filter_map parse_quote (trylist "quote"); } in { ini_ppwidth = ini.ini_ppwidth; ini_why3 = omap expand ini.ini_why3; @@ -756,4 +801,5 @@ let read_ini_file (filename : string) = ini_quorum = ini.ini_quorum; ini_timeout = ini.ini_timeout; ini_idirs = ini.ini_idirs; - ini_rdirs = ini.ini_rdirs; } + ini_rdirs = ini.ini_rdirs; + ini_quotes = ini.ini_quotes; } diff --git a/src/ecOptions.mli b/src/ecOptions.mli index a5c09b11d9..72bd21e3c0 100644 --- a/src/ecOptions.mli +++ b/src/ecOptions.mli @@ -65,15 +65,17 @@ and prv_options = { } and ldr_options = { - ldro_idirs : (string option * string * bool) list; - ldro_boot : bool; + ldro_idirs : (string option * string * bool) list; + ldro_boot : bool; + ldro_quotes : (string * string) list; } and glb_options = { - o_why3 : string option; - o_reloc : bool; - o_ovrevict : string list; - o_loader : ldr_options; + o_why3 : string option; + o_reloc : bool; + o_ovrevict : string list; + o_loader : ldr_options; + o_enable_quotes : bool; } (* -------------------------------------------------------------------- *) @@ -86,6 +88,7 @@ type ini_options = { ini_timeout : int option; ini_idirs : (string option * string) list; ini_rdirs : (string option * string) list; + ini_quotes : (string * string) list; } type ini_context = { diff --git a/src/ecParser.mly b/src/ecParser.mly index 117ccf9a15..292aebc1bd 100644 --- a/src/ecParser.mly +++ b/src/ecParser.mly @@ -602,6 +602,7 @@ %token LTCOLON DASHLT GT LT GE LE LTSTARGT LTLTSTARGT LTSTARGTGT %token FINAL %token DOCCOMMENT +%token QUOTATION %nonassoc prec_below_comma %nonassoc COMMA ELSE diff --git a/src/ecQuotation.ml b/src/ecQuotation.ml new file mode 100644 index 0000000000..c3aa9e3807 --- /dev/null +++ b/src/ecQuotation.ml @@ -0,0 +1,301 @@ +(* -------------------------------------------------------------------- *) +(* Black-box quotation preprocessor support. *) +(* *) +(* A quotation {% name ... %} is lexed as a single opaque token *) +(* (see ecLexer.mll). EcIo expands it by shelling out to an external *) +(* handler over stdin/stdout, re-lexes the produced EC source, and *) +(* remaps every position back into the original source file using the *) +(* segment map returned by the handler. See preprocessor/DESIGN.md. *) +(* -------------------------------------------------------------------- *) +open EcUtils + +module L = Lexing + +(* -------------------------------------------------------------------- *) +(* Raw quotation as produced by the lexer. *) +(* *) +(* [q_bpos] is the [Lexing.position] of the first byte of the body in *) +(* the original file; its [pos_cnum] is the absolute body-start offset *) +(* referred to as [q0] in the design. *) +type quotation = { + q_name : string; + q_body : string; + q_bpos : L.position; (* position of body start in original file *) + q_epos : L.position; (* position just after the closing "%}" *) +} + +(* -------------------------------------------------------------------- *) +exception QuotationError of EcLocation.t * string + +let () = + EcPException.register (fun fmt exn -> + match exn with + | QuotationError (_, msg) -> + Format.fprintf fmt "quotation error: %s" msg + | _ -> raise exn) + +let qloc (q : quotation) : EcLocation.t = + EcLocation.make q.q_bpos q.q_epos + +let error (q : quotation) msg = + raise (QuotationError (qloc q, msg)) + +(* -------------------------------------------------------------------- *) +(* A source-map segment. Offsets are relative to the start of their *) +(* respective buffers: [out] into the expanded EC source, [in] into the *) +(* quotation body. *) +type segment = { + s_out : int * int; (* [ob, oe) in expanded output *) + s_in : int * int; (* [ib, ie) in original body *) + s_verbatim : bool; (* true => char-for-char (oe-ob = ie-ib) *) +} + +type segmap = { + sm_segments : segment array; (* sorted by s_out start *) + sm_body_len : int; (* length of the original body *) +} + +(* Coarse fallback: the whole expansion maps to the whole body. *) +let coarse_segmap ~(body_len : int) : segmap = + { sm_segments = + [| { s_out = (0, max_int); + s_in = (0, body_len); + s_verbatim = false } |]; + sm_body_len = body_len; } + +(* -------------------------------------------------------------------- *) +(* Parse the JSON source-map section emitted by the handler. *) +let segmap_of_json ~(body_len : int) (json : Yojson.Safe.t) : segmap = + let open Yojson.Safe.Util in + let pair name j = + match to_list (member name j) with + | [a; b] -> (to_int a, to_int b) + | _ -> failwith (Printf.sprintf "bad %s range in source map" name) + in + let seg j = + let (ob, oe) = pair "out" j in + let (ib, ie) = pair "in" j in + let kind = + match member "kind" j with + | `String s -> s + | `Null -> "verbatim" + | _ -> failwith "bad kind in source map" + in + { s_out = (ob, oe); + s_in = (ib, ie); + s_verbatim = (kind = "verbatim" && oe - ob = ie - ib); } + in + let segs = List.map seg (to_list (member "segments" json)) in + let segs = List.sort (fun a b -> compare (fst a.s_out) (fst b.s_out)) segs in + match segs with + | [] -> coarse_segmap ~body_len + | _ -> { sm_segments = Array.of_list segs; sm_body_len = body_len; } + +(* -------------------------------------------------------------------- *) +(* Reverse mapping: an offset [o] in the expanded output -> an absolute *) +(* offset in the original file. [q0] is the body-start offset. *) +let remap_offset (sm : segmap) ~(q0 : int) (o : int) : int = + let n = Array.length sm.sm_segments in + (* find the last segment whose out-start <= o *) + let rec find lo hi best = + if lo > hi then best else + let mid = (lo + hi) / 2 in + let (ob, _) = sm.sm_segments.(mid).s_out in + if ob <= o then find (mid + 1) hi mid else find lo (mid - 1) best + in + if n = 0 then q0 else + let idx = find 0 (n - 1) 0 in + let s = sm.sm_segments.(idx) in + let (ob, oe) = s.s_out and (ib, ie) = s.s_in in + let in_off = + if s.s_verbatim && o >= ob && o < oe then + ib + (o - ob) + else + (* synthesized / out-of-range: collapse to the originating in-span, + biased to the start so error markers land at the responsible region *) + if o <= ob then ib + else if o >= oe then ie + else ib + in + q0 + (max 0 (min sm.sm_body_len in_off)) + +(* -------------------------------------------------------------------- *) +(* Line-start table of the ORIGINAL file, to turn an absolute offset *) +(* into a (line, bol) pair for a Lexing.position. Built lazily per *) +(* original filename and cached. *) +type linetab = int array (* bol offset of each line, line 1 at idx 0 *) + +let linetab_cache : (string, linetab) Hashtbl.t = Hashtbl.create 0 + +let build_linetab (fname : string) : linetab option = + try + let ic = open_in_bin fname in + let len = in_channel_length ic in + let data = really_input_string ic len in + close_in ic; + let bols = ref [0] in + String.iteri (fun i c -> if c = '\n' then bols := (i + 1) :: !bols) data; + Some (Array.of_list (List.rev !bols)) + with _ -> None + +let linetab (fname : string) : linetab option = + match Hashtbl.find_opt linetab_cache fname with + | Some t -> Some t + | None -> + match build_linetab fname with + | None -> None + | Some t -> Hashtbl.replace linetab_cache fname t; Some t + +(* (line, bol) for an absolute offset using the original file's table. *) +let line_of_offset (fname : string) (off : int) : int * int = + match linetab fname with + | None -> (1, 0) + | Some tab -> + let n = Array.length tab in + let rec find lo hi best = + if lo > hi then best else + let mid = (lo + hi) / 2 in + if tab.(mid) <= off then find (mid + 1) hi mid else find lo (mid - 1) best + in + let idx = find 0 (n - 1) 0 in + (idx + 1, tab.(idx)) + +(* -------------------------------------------------------------------- *) +(* Turn an expanded-output offset into a remapped Lexing.position that *) +(* refers into the original file. *) +let remap_position (sm : segmap) (q : quotation) (o : int) : L.position = + let q0 = q.q_bpos.L.pos_cnum in + let abs = remap_offset sm ~q0 o in + let (lnum, bol) = line_of_offset q.q_bpos.L.pos_fname abs in + { L.pos_fname = q.q_bpos.L.pos_fname; + L.pos_lnum = lnum; + L.pos_bol = bol; + L.pos_cnum = abs; } + +(* -------------------------------------------------------------------- *) +(* Sentinel filename for the expanded buffer (Mechanism B). *) +let sentinel_fname (q : quotation) : string = + Printf.sprintf "" q.q_bpos.L.pos_fname q.q_bpos.L.pos_cnum + +let is_sentinel (fname : string) : bool = + let prefix = "= pn && String.sub fname 0 pn = prefix + +(* -------------------------------------------------------------------- *) +(* Quotations run arbitrary external programs, so the feature is OFF by *) +(* default and must be enabled explicitly by the user (a command-line *) +(* flag or an environment variable). It is deliberately NOT enableable *) +(* from easycrypt.project: that file ships inside the checked-out tree, *) +(* so letting it turn the feature on would defeat the safeguard. While *) +(* disabled, encountering a quotation is a hard error -- never a silent *) +(* skip or a silent execution. *) +let enabled : bool ref = ref false + +let set_enabled (b : bool) : unit = + enabled := b + +(* -------------------------------------------------------------------- *) +(* Registry of handler commands declared in easycrypt.project (or via *) +(* the API). Populated at startup, consulted by [resolve_command]. *) +let registry : (string, string) Hashtbl.t = Hashtbl.create 0 + +let register ~(name : string) ~(command : string) : unit = + Hashtbl.replace registry name command + +(* -------------------------------------------------------------------- *) +(* Handler resolution. In order: *) +(* 1. a binding from easycrypt.project (the registry); *) +(* 2. environment variable EC_QUOTE_ (uppercased name); *) +(* 3. environment variable EC_QUOTE_CMD (catch-all); *) +(* 4. an executable [handlers/] (optionally with a known script *) +(* extension) sitting next to the source file -- this makes a test *) +(* directory self-contained, with no environment to set up. *) +let resolve_command (q : quotation) : string option = + let name = q.q_name in + match Hashtbl.find_opt registry name with + | Some _ as c -> c + | None -> + match Sys.getenv_opt ("EC_QUOTE_" ^ String.uppercase_ascii name) with + | Some _ as c -> c + | None -> + match Sys.getenv_opt "EC_QUOTE_CMD" with + | Some _ as c -> c + | None -> + let dir = Filename.dirname q.q_bpos.L.pos_fname in + let candidates = + List.map (Filename.concat (Filename.concat dir "handlers")) + [name; name ^ ".py"; name ^ ".sh"] + in + List.find_opt + (fun p -> try Unix.access p [Unix.X_OK]; true with _ -> false) + candidates + +(* -------------------------------------------------------------------- *) +(* Run the external handler. Returns (expanded_source, segmap). *) +let run (q : quotation) : string * segmap = + if not !enabled then + error q + "quotations are disabled; they run external programs and must be \ + enabled explicitly with --enable-quotations (or EC_ENABLE_QUOTATIONS=1)"; + let cmd = + match resolve_command q with + | Some c -> c + | None -> + error q (Printf.sprintf + "no handler bound for quotation '%s' (set EC_QUOTE_%s, EC_QUOTE_CMD, \ + or provide handlers/%s next to the source file)" + q.q_name (String.uppercase_ascii q.q_name) q.q_name) + in + let header = + Printf.sprintf "#ec-quote v1 name=%s file=%s line=%d col=%d off=%d\n" + q.q_name q.q_bpos.L.pos_fname + q.q_bpos.L.pos_lnum + (q.q_bpos.L.pos_cnum - q.q_bpos.L.pos_bol) + q.q_bpos.L.pos_cnum + in + let (ic, oc) = Unix.open_process cmd in + let raw = + try + output_string oc header; + output_string oc q.q_body; + close_out oc; + let buf = Buffer.create 1024 in + let chunk = Bytes.create 4096 in + let rec loop () = + let n = input ic chunk 0 (Bytes.length chunk) in + if n > 0 then (Buffer.add_subbytes buf chunk 0 n; loop ()) + in + loop (); + Buffer.contents buf + with e -> + ignore (Unix.close_process (ic, oc)); + error q (Printf.sprintf "handler '%s' failed: %s" cmd (Printexc.to_string e)) + in + let status = Unix.close_process (ic, oc) in + (match status with + | Unix.WEXITED 0 -> () + | Unix.WEXITED n -> error q (Printf.sprintf "handler exited with code %d:\n%s" n raw) + | Unix.WSIGNALED n -> error q (Printf.sprintf "handler killed by signal %d" n) + | Unix.WSTOPPED n -> error q (Printf.sprintf "handler stopped by signal %d" n)); + (* The handler writes a single JSON object on stdout: + { "expanded": , "segments": [ ... ] } + [expanded] is the replacement source; [segments] (optional) is the + source map -- absent or unparsable segments fall back to a coarse map. *) + let body_len = String.length q.q_body in + let json = + try Yojson.Safe.from_string raw + with _ -> + error q "handler output is not valid JSON \ + (expected { \"expanded\": ..., \"segments\": ... })" + in + let expanded = + match Yojson.Safe.Util.member "expanded" json with + | `String s -> s + | _ -> error q "handler output has no string \"expanded\" field" + in + let sm = + try segmap_of_json ~body_len json + with _ -> coarse_segmap ~body_len + in + (expanded, sm) diff --git a/tests/quotations-disabled/handlers/calc.py b/tests/quotations-disabled/handlers/calc.py new file mode 100755 index 0000000000..404ffc1449 --- /dev/null +++ b/tests/quotations-disabled/handlers/calc.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""'calc' quotation handler: evaluate an integer arithmetic expression and +splice the resulting literal into EasyCrypt source. + +A quotation expands to a sentence *fragment*; the surrounding sentence (and +its terminating '.') is written by the user. So the body is a bare +expression and the expansion is just its value: + + op forty_two = {% calc 6 * 7 %}. + expands to + op forty_two = 42. + +The literal has no 1:1 origin, so the whole output is reported as a single +SYNTHESIZED segment mapping to the whole body: any downstream error points at +the quotation region as a whole. +""" +import sys +import json + + +def main() -> int: + raw = sys.stdin.buffer.read().decode("utf-8", "replace") + nl = raw.find("\n") + body = raw[nl + 1:] if nl >= 0 else raw + expr = body.strip() + + try: + # integer arithmetic only; no names/builtins + value = eval(expr, {"__builtins__": {}}, {}) # noqa: S307 (sandboxed) + if not isinstance(value, int): + raise ValueError("result is not an integer") + except Exception as e: # noqa: BLE001 + sys.stderr.write(f"#ec-error off=0 len={len(body)} message=calc: {e}\n") + return 1 + + expanded = str(value) + segments = [{ + "out": [0, len(expanded)], + "in": [0, len(body)], + "kind": "synthesized", + }] + + json.dump({"expanded": expanded, "segments": segments}, sys.stdout) + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/quotations-disabled/test_disabled.ec b/tests/quotations-disabled/test_disabled.ec new file mode 100644 index 0000000000..0038016673 --- /dev/null +++ b/tests/quotations-disabled/test_disabled.ec @@ -0,0 +1,7 @@ +require import Int. + +(* Security check: quotations are OFF by default. A handler IS available + (handlers/calc.py beside this file), so the ONLY reason this must fail is + that -enable-quotations was not passed. This file lives in a 'kodirs' + scenario that deliberately omits the flag; it is expected to fail. *) +op forty_two = {% calc 6 * 7 %}. diff --git a/tests/quotations-project/calc.py b/tests/quotations-project/calc.py new file mode 100755 index 0000000000..404ffc1449 --- /dev/null +++ b/tests/quotations-project/calc.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""'calc' quotation handler: evaluate an integer arithmetic expression and +splice the resulting literal into EasyCrypt source. + +A quotation expands to a sentence *fragment*; the surrounding sentence (and +its terminating '.') is written by the user. So the body is a bare +expression and the expansion is just its value: + + op forty_two = {% calc 6 * 7 %}. + expands to + op forty_two = 42. + +The literal has no 1:1 origin, so the whole output is reported as a single +SYNTHESIZED segment mapping to the whole body: any downstream error points at +the quotation region as a whole. +""" +import sys +import json + + +def main() -> int: + raw = sys.stdin.buffer.read().decode("utf-8", "replace") + nl = raw.find("\n") + body = raw[nl + 1:] if nl >= 0 else raw + expr = body.strip() + + try: + # integer arithmetic only; no names/builtins + value = eval(expr, {"__builtins__": {}}, {}) # noqa: S307 (sandboxed) + if not isinstance(value, int): + raise ValueError("result is not an integer") + except Exception as e: # noqa: BLE001 + sys.stderr.write(f"#ec-error off=0 len={len(body)} message=calc: {e}\n") + return 1 + + expanded = str(value) + segments = [{ + "out": [0, len(expanded)], + "in": [0, len(body)], + "kind": "synthesized", + }] + + json.dump({"expanded": expanded, "segments": segments}, sys.stdout) + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/quotations-project/easycrypt.project b/tests/quotations-project/easycrypt.project new file mode 100644 index 0000000000..1ced1bb43c --- /dev/null +++ b/tests/quotations-project/easycrypt.project @@ -0,0 +1,5 @@ +[general] +provers = CVC5@1.0 +provers = Z3@4.12 +quote = calc:calc.py +quote = verbatim:verbatim.py diff --git a/tests/quotations-project/test_project_calc.ec b/tests/quotations-project/test_project_calc.ec new file mode 100644 index 0000000000..915ad5049f --- /dev/null +++ b/tests/quotations-project/test_project_calc.ec @@ -0,0 +1,10 @@ +require import Int. + +(* The 'calc' and 'verbatim' handlers are bound in this directory's + easycrypt.project (quote = name:path), not via the environment or a + handlers/ subdirectory. This checks that project-file bindings work. *) +op forty_two = {% calc 6 * 7 %}. + +{% verbatim op answer : int = 42 %}. + +lemma check : forty_two = answer by []. diff --git a/tests/quotations-project/verbatim.py b/tests/quotations-project/verbatim.py new file mode 100755 index 0000000000..2218c10e63 --- /dev/null +++ b/tests/quotations-project/verbatim.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Verbatim quotation handler for EasyCrypt. + +Protocol (see doc/quotations.rst): + stdin : "#ec-quote v1 name=... file=... line=L col=C off=O\n" + body + stdout: a single JSON object { "expanded": , "segments": [...] } + +This handler copies the body through unchanged, emitting a single VERBATIM +segment so that any error EasyCrypt reports inside the expansion maps back to +the exact original character. It is the simplest possible black-box tool and +exercises the column-precise reverse location mapping. +""" +import sys +import json + + +def main() -> int: + raw = sys.stdin.buffer.read().decode("utf-8", "replace") + # split header line from body + nl = raw.find("\n") + body = raw[nl + 1:] if nl >= 0 else raw + + expanded = body # identity + segments = [{ + "out": [0, len(expanded)], + "in": [0, len(body)], + "kind": "verbatim", + }] + + json.dump({"expanded": expanded, "segments": segments}, sys.stdout) + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/quotations/handlers/calc.py b/tests/quotations/handlers/calc.py new file mode 100755 index 0000000000..404ffc1449 --- /dev/null +++ b/tests/quotations/handlers/calc.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""'calc' quotation handler: evaluate an integer arithmetic expression and +splice the resulting literal into EasyCrypt source. + +A quotation expands to a sentence *fragment*; the surrounding sentence (and +its terminating '.') is written by the user. So the body is a bare +expression and the expansion is just its value: + + op forty_two = {% calc 6 * 7 %}. + expands to + op forty_two = 42. + +The literal has no 1:1 origin, so the whole output is reported as a single +SYNTHESIZED segment mapping to the whole body: any downstream error points at +the quotation region as a whole. +""" +import sys +import json + + +def main() -> int: + raw = sys.stdin.buffer.read().decode("utf-8", "replace") + nl = raw.find("\n") + body = raw[nl + 1:] if nl >= 0 else raw + expr = body.strip() + + try: + # integer arithmetic only; no names/builtins + value = eval(expr, {"__builtins__": {}}, {}) # noqa: S307 (sandboxed) + if not isinstance(value, int): + raise ValueError("result is not an integer") + except Exception as e: # noqa: BLE001 + sys.stderr.write(f"#ec-error off=0 len={len(body)} message=calc: {e}\n") + return 1 + + expanded = str(value) + segments = [{ + "out": [0, len(expanded)], + "in": [0, len(body)], + "kind": "synthesized", + }] + + json.dump({"expanded": expanded, "segments": segments}, sys.stdout) + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/quotations/handlers/verbatim.py b/tests/quotations/handlers/verbatim.py new file mode 100755 index 0000000000..2218c10e63 --- /dev/null +++ b/tests/quotations/handlers/verbatim.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Verbatim quotation handler for EasyCrypt. + +Protocol (see doc/quotations.rst): + stdin : "#ec-quote v1 name=... file=... line=L col=C off=O\n" + body + stdout: a single JSON object { "expanded": , "segments": [...] } + +This handler copies the body through unchanged, emitting a single VERBATIM +segment so that any error EasyCrypt reports inside the expansion maps back to +the exact original character. It is the simplest possible black-box tool and +exercises the column-precise reverse location mapping. +""" +import sys +import json + + +def main() -> int: + raw = sys.stdin.buffer.read().decode("utf-8", "replace") + # split header line from body + nl = raw.find("\n") + body = raw[nl + 1:] if nl >= 0 else raw + + expanded = body # identity + segments = [{ + "out": [0, len(expanded)], + "in": [0, len(body)], + "kind": "verbatim", + }] + + json.dump({"expanded": expanded, "segments": segments}, sys.stdout) + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/quotations/test_calc.ec b/tests/quotations/test_calc.ec new file mode 100644 index 0000000000..e6a2e702dc --- /dev/null +++ b/tests/quotations/test_calc.ec @@ -0,0 +1,9 @@ +require import Int. + +(* The 'calc' quotation expands to a sentence FRAGMENT (the literal 42). + The surrounding 'op forty_two = ... .' -- including the terminating '.' -- + is written here, not by the handler. This shows a quotation used in the + middle of a sentence. *) +op forty_two = {% calc 6 * 7 %}. + +lemma check : forty_two = 42 by []. diff --git a/tests/quotations/test_calc_inline.ec b/tests/quotations/test_calc_inline.ec new file mode 100644 index 0000000000..d11a6a3595 --- /dev/null +++ b/tests/quotations/test_calc_inline.ec @@ -0,0 +1,8 @@ +require import Int. + +(* Two quotations appearing inline within a single sentence, each replacing + only part of it. The handler returns bare literals; everything else -- + operators, parentheses, the terminating '.' -- is ordinary EC source. *) +op mixed = {% calc 6 * 7 %} + ({% calc 2 + 3 %} * 10). + +lemma check : mixed = 92 by []. diff --git a/tests/quotations/test_verbatim_err.ec b/tests/quotations/test_verbatim_err.ec new file mode 100644 index 0000000000..c1fa067c0f --- /dev/null +++ b/tests/quotations/test_verbatim_err.ec @@ -0,0 +1,13 @@ +require import Int. + +(* Error-location test. The body references an undefined identifier + 'no_such_op'. The 'fail' prefix asserts that expanding and checking this + sentence raises an error -- so the file as a whole still succeeds while + exercising the failure path. + + The quotation is a FRAGMENT; the terminating '.' is written after %}. + + When run without 'fail', EasyCrypt reports the error at the ORIGINAL + location of 'no_such_op' inside the quotation (column-precise, via the + verbatim segment map). *) +fail {% verbatim op broken : int = no_such_op + 1 %}. diff --git a/tests/quotations/test_verbatim_ok.ec b/tests/quotations/test_verbatim_ok.ec new file mode 100644 index 0000000000..4a39123aee --- /dev/null +++ b/tests/quotations/test_verbatim_ok.ec @@ -0,0 +1,8 @@ +require import Int. + +(* 'verbatim' copies the body through unchanged (one verbatim segment). + The body is a sentence FRAGMENT -- the terminating '.' is written by the + user, after %}, not inside the quotation. *) +{% verbatim op answer : int = 42 %}. + +lemma check : answer = 42 by [].