diff --git a/test/cmark.py b/test/cmark.py index 38a310cf..31976bc8 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -1,40 +1,87 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from ctypes import CDLL, c_char_p, c_long, c_int -from subprocess import * +from ctypes import * +from subprocess import Popen, PIPE import platform import os +class cmark_mem(Structure): + _fields_ = [("calloc", c_void_p), + ("realloc", c_void_p), + ("free", CFUNCTYPE(None, c_void_p))] + def pipe_through_prog(prog, text): p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) [result, err] = p1.communicate(input=text.encode('utf-8')) - return [p1.returncode, result, err] + return [p1.returncode, result.decode('utf-8'), err] + +def to_html(lib, text): + get_alloc = lib.cmark_get_default_mem_allocator + get_alloc.restype = POINTER(cmark_mem) + free_func = get_alloc().contents.free + + markdown = lib.cmark_markdown_to_html + markdown.restype = POINTER(c_char) + markdown.argtypes = [c_char_p, c_size_t, c_int] -def use_library(lib, text): textbytes = text.encode('utf-8') textlen = len(textbytes) - return [0, lib(textbytes, textlen, 0), ''] + # 1 << 17 == CMARK_OPT_UNSAFE + cstring = markdown(textbytes, textlen, 1 << 17) + result = string_at(cstring).decode('utf-8') + free_func(cstring) + + return [0, result, ''] + +def to_commonmark(lib, text): + get_alloc = lib.cmark_get_default_mem_allocator + get_alloc.restype = POINTER(cmark_mem) + free_func = get_alloc().contents.free + + parse_document = lib.cmark_parse_document + parse_document.restype = c_void_p + parse_document.argtypes = [c_char_p, c_size_t, c_int] + + render_commonmark = lib.cmark_render_commonmark + render_commonmark.restype = POINTER(c_char) + render_commonmark.argtypes = [c_void_p, c_int, c_int] + + free_node = lib.cmark_node_free + free_node.argtypes = [c_void_p] + + textbytes = text.encode('utf-8') + textlen = len(textbytes) + node = parse_document(textbytes, textlen, 0) + cstring = render_commonmark(node, 0, 0) + result = string_at(cstring).decode('utf-8') + free_func(cstring) + free_node(node) + + return [0, result, ''] class CMark: def __init__(self, prog=None, library_dir=None): self.prog = prog if prog: + prog += ' --unsafe' self.to_html = lambda x: pipe_through_prog(prog, x) + self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x) else: sysname = platform.system() if sysname == 'Darwin': - libname = "libcmark.dylib" + libnames = [ "libcmark.dylib" ] elif sysname == 'Windows': - libname = "cmark.dll" - else: - libname = "libcmark.so" - if library_dir: - libpath = os.path.join(library_dir, libname) + libnames = [ "cmark.dll", "libcmark.dll" ] else: - libpath = os.path.join("build", "src", libname) + libnames = [ "libcmark.so" ] + if not library_dir: + library_dir = os.path.join("build", "src") + for libname in libnames: + candidate = os.path.join(library_dir, libname) + if os.path.isfile(candidate): + libpath = candidate + break cmark = CDLL(libpath) - markdown = cmark.cmark_markdown_to_html - markdown.restype = c_char_p - markdown.argtypes = [c_char_p, c_long, c_int] - self.to_html = lambda x: use_library(markdown, x) + self.to_html = lambda x: to_html(cmark, x) + self.to_commonmark = lambda x: to_commonmark(cmark, x) diff --git a/test/spec_tests.py b/test/spec_tests.py index f8cb7092..615e77b9 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -6,42 +6,45 @@ import argparse import re import json +import os from cmark import CMark from normalize import normalize_html -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Run cmark tests.') - parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, - help='program to test') - parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', - help='path to spec') - parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', - default=None, help='limit to sections matching regex pattern') - parser.add_argument('--library-dir', dest='library_dir', nargs='?', - default=None, help='directory containing dynamic library') - parser.add_argument('--no-normalize', dest='normalize', - action='store_const', const=False, default=True, - help='do not normalize HTML') - parser.add_argument('-d', '--dump-tests', dest='dump_tests', - action='store_const', const=True, default=False, - help='dump tests in JSON format') - parser.add_argument('--debug-normalization', dest='debug_normalization', - action='store_const', const=True, - default=False, help='filter stdin through normalizer for testing') - parser.add_argument('-n', '--number', type=int, default=None, - help='only consider the test with the given number') - parser.add_argument('--track', metavar='path', - help='track which test cases pass/fail in the given JSON file and only report changes') - args = parser.parse_args(sys.argv[1:]) +parser = argparse.ArgumentParser(description='Run cmark tests.') +parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, + help='program to test') +parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', + help='path to spec') +parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', + default=None, help='limit to sections matching regex pattern') +parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') +parser.add_argument('--no-normalize', dest='normalize', + action='store_const', const=False, default=True, + help='do not normalize HTML') +parser.add_argument('-d', '--dump-tests', dest='dump_tests', + action='store_const', const=True, default=False, + help='dump tests in JSON format') +parser.add_argument('--debug-normalization', dest='debug_normalization', + action='store_const', const=True, + default=False, help='filter stdin through normalizer for testing') +parser.add_argument('-n', '--number', type=int, default=None, + help='only consider the test with the given number') +parser.add_argument('--track', metavar='path', + help='track which test cases pass/fail in the given JSON file and only report changes') +parser.add_argument('--fuzz-corpus', + help='convert test cases to fuzz corpus') +args = parser.parse_args(sys.argv[1:]) def out(str): sys.stdout.buffer.write(str.encode('utf-8')) def print_test_header(test): - out("Example %d (lines %d-%d) %s\n" % (test['example'], test['start_line'], test['end_line'], test['section'])) + out("Example %d (lines %d-%d) %s\n" + % (test['example'], test['start_line'], test['end_line'], test['section'])) -def do_test(test, normalize, prev_result): - [retcode, actual_html_bytes, err] = cmark.to_html(test['markdown']) +def do_test(converter, test, normalize, prev_result): + [retcode, actual_html_bytes, err] = converter(test['markdown']) if retcode != 0: if prev_result != 'error': print_test_header(test) @@ -52,7 +55,7 @@ def do_test(test, normalize, prev_result): expected_html = test['html'] try: - actual_html = actual_html_bytes.decode('utf-8') + actual_html = actual_html_bytes except UnicodeDecodeError as e: if prev_result != 'fail': print_test_header(test) @@ -136,17 +139,31 @@ def get_tests(specfile): exit(0) all_tests = get_tests(args.spec) + + if args.fuzz_corpus: + i = 1 + base = os.path.basename(args.spec) + (name, ext) = os.path.splitext(base) + for test in all_tests: + filename = os.path.join(args.fuzz_corpus, '%s.%d' % (name, i)) + with open(filename, 'wb') as f: + f.write(b'\0' * 8) # options header + f.write(test['markdown'].encode()) + i += 1 + exit(0) + if args.pattern: pattern_re = re.compile(args.pattern, re.IGNORECASE) else: pattern_re = re.compile('.') - tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] + tests = [ test for test in all_tests if re.search(pattern_re, test['section']) + and (not args.number or test['example'] == args.number) ] if args.dump_tests: out(json.dumps(tests, ensure_ascii=False, indent=2)) exit(0) else: skipped = len(all_tests) - len(tests) - cmark = CMark(prog=args.program, library_dir=args.library_dir) + converter = CMark(prog=args.program, library_dir=args.library_dir).to_html result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} previous = {} @@ -161,7 +178,7 @@ def get_tests(specfile): results = {} for test in tests: - result = do_test(test, args.normalize, previous.get(str(test['example']))) + result = do_test(converter, test, args.normalize, previous.get(str(test['example']))) result_counts[result] += 1 results[test['example']] = result