Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 32 additions & 24 deletions bibtexparser/bibtexexpression.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def add_logger_parse_action(expr, log_func):
"""Register a callback on expression parsing with the adequate message."""
def action(s, l, t):
log_func("Found {}: {}".format(expr.resultsName, t))
expr.addParseAction(action)
expr.add_parse_action(action)


# Parse action helpers
Expand Down Expand Up @@ -111,7 +111,7 @@ def __init__(self):
# String names
string_name = pp.Word(pp.alphanums + '_-:')('StringName')
self.set_string_name_parse_action(lambda s, l, t: None)
string_name.addParseAction(self._string_name_parse_action)
string_name.add_parse_action(self._string_name_parse_action)

# Values inside bibtex fields
# Values can be integer or string expressions. The latter may use
Expand All @@ -123,35 +123,35 @@ def __init__(self):
# Braced values: braced values can contain nested (but balanced) braces
braced_value_content = pp.CharsNotIn('{}')
braced_value = pp.Forward() # Recursive definition for nested braces
braced_value <<= pp.originalTextFor(
braced_value <<= pp.original_text_for(
'{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}'
)('BracedValue')
braced_value.setParseAction(remove_braces)
braced_value.set_parse_action(remove_braces)
# TODO add ignore for "\}" and "\{" ?
# TODO @ are not parsed by bibtex in braces

# Quoted values: may contain braced content with balanced braces
brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None)
brace_in_quoted = pp.nested_expr('{', '}', ignore_expr=None)
text_in_quoted = pp.CharsNotIn('"{}')
# (quotes should be escaped by braces in quoted value)
quoted_value = pp.originalTextFor(
quoted_value = pp.original_text_for(
'"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"'
)('QuotedValue')
quoted_value.addParseAction(pp.removeQuotes)
quoted_value.add_parse_action(pp.remove_quotes)

# String expressions
string_expr = pp.delimitedList(
string_expr = pp.DelimitedList(
(quoted_value | braced_value | string_name), delim='#'
)('StringExpression')
string_expr.addParseAction(self._string_expr_parse_action)
string_expr.add_parse_action(self._string_expr_parse_action)

value = (integer | string_expr)('Value')

# Entries

# @EntryType { ...
entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType')
entry_type.setParseAction(first_token)
entry_type.set_parse_action(first_token)

# Entry key: any character up to a ',' without leading and trailing
# spaces. Also exclude spaces and prevent it from being empty.
Expand All @@ -175,20 +175,20 @@ def citekeyParseAction(string_, location, token):
msg="Whitespace not allowed in citekeys.")
return key

key.setParseAction(citekeyParseAction)
key.set_parse_action(citekeyParseAction)

# Field name: word of letters, digits, dashes and underscores
field_name = pp.Word(pp.alphanums + '_-().+')('FieldName')
field_name.setParseAction(first_token)
field_name.set_parse_action(first_token)

# Field: field_name = value
field = pp.Group(field_name + pp.Suppress('=') + value)('Field')
field.setParseAction(field_to_pair)
field.set_parse_action(field_to_pair)

# List of fields: comma separeted fields
field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(','))
field_list = (pp.DelimitedList(field) + pp.Suppress(pp.Optional(','))
)('Fields')
field_list.setParseAction(
field_list.set_parse_action(
lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))})

# Entry: type, key, and fields
Expand All @@ -204,10 +204,10 @@ def citekeyParseAction(string_, location, token):
) | pp.StringEnd()
self.explicit_comment = (
pp.Suppress(comment_line_start) +
pp.originalTextFor(pp.SkipTo(not_an_implicit_comment),
asString=True))('ExplicitComment')
self.explicit_comment.addParseAction(remove_trailing_newlines)
self.explicit_comment.addParseAction(remove_braces)
pp.original_text_for(pp.SkipTo(not_an_implicit_comment),
as_string=True))('ExplicitComment')
self.explicit_comment.add_parse_action(remove_trailing_newlines)
self.explicit_comment.add_parse_action(remove_braces)
# Previous implementation included comment until next '}'.
# This is however not inline with bibtex behavior that is to only
# ignore until EOL. Brace stipping is arbitrary here but avoids
Expand All @@ -219,10 +219,10 @@ def mustNotBeEmpty(t):
raise pp.ParseException("Match must not be empty.")

# Implicit comments: not anything else
self.implicit_comment = pp.originalTextFor(
pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty),
asString=True)('ImplicitComment')
self.implicit_comment.addParseAction(remove_trailing_newlines)
self.implicit_comment = pp.original_text_for(
pp.SkipTo(not_an_implicit_comment).set_parse_action(mustNotBeEmpty),
as_string=True)('ImplicitComment')
self.implicit_comment.add_parse_action(remove_trailing_newlines)

# String definition
self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars(
Expand Down Expand Up @@ -274,5 +274,13 @@ def _string_name_parse_action(self, s, l, t):
def _string_expr_parse_action(self, s, l, t):
return BibDataStringExpression.expression_if_needed(t)

def parse_file(self, file_obj):
"""Execute parse expression on a file object"""
return self.main_expression.parse_file(file_obj, parse_all=True)

def parseFile(self, file_obj):
return self.main_expression.parseFile(file_obj, parseAll=True)
"""Execute parse expression on a file object

Alias for parse_file()
"""
return self.parse_file(file_obj)
12 changes: 6 additions & 6 deletions bibtexparser/bparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def parse(self, bibtex_str, partial=False):

bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
try:
self._expr.parseFile(bibtex_file_obj)
self._expr.parse_file(bibtex_file_obj)
except self._expr.ParseException as exc:
logger.error("Could not parse properly, starting at %s", exc.line)
if not partial:
Expand Down Expand Up @@ -198,20 +198,20 @@ def _init_expressions(self):
self._expr.add_log_function(logger.debug)

# Set actions
self._expr.entry.addParseAction(
self._expr.entry.add_parse_action(
lambda s, l, t: self._add_entry(
t.get('EntryType'), t.get('Key'), t.get('Fields'))
)
self._expr.implicit_comment.addParseAction(
self._expr.implicit_comment.add_parse_action(
lambda s, l, t: self._add_comment(t[0])
)
self._expr.explicit_comment.addParseAction(
self._expr.explicit_comment.add_parse_action(
lambda s, l, t: self._add_comment(t[0])
)
self._expr.preamble_decl.addParseAction(
self._expr.preamble_decl.add_parse_action(
lambda s, l, t: self._add_preamble(t[0])
)
self._expr.string_def.addParseAction(
self._expr.string_def.add_parse_action(
lambda s, l, t: self._add_string(t['StringName'].name,
t['StringValue'])
)
Expand Down
42 changes: 21 additions & 21 deletions bibtexparser/tests/test_bibtexexpression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,90 +14,90 @@ def setUp(self):
self.expr = BibtexExpression()

def test_minimal(self):
result = self.expr.entry.parseString('@journal{key, name = 123 }')
result = self.expr.entry.parse_string('@journal{key, name = 123 }')
self.assertEqual(result.get('EntryType'), 'journal')
self.assertEqual(result.get('Key'), 'key')
self.assertEqual(result.get('Fields'), {'name': '123'})

def test_capital_type(self):
result = self.expr.entry.parseString('@JOURNAL{key, name = 123 }')
result = self.expr.entry.parse_string('@JOURNAL{key, name = 123 }')
self.assertEqual(result.get('EntryType'), 'JOURNAL')

def test_capital_key(self):
result = self.expr.entry.parseString('@journal{KEY, name = 123 }')
result = self.expr.entry.parse_string('@journal{KEY, name = 123 }')
self.assertEqual(result.get('Key'), 'KEY')

def test_braced(self):
result = self.expr.entry.parseString('@journal{key, name = {abc} }')
result = self.expr.entry.parse_string('@journal{key, name = {abc} }')
self.assertEqual(result.get('Fields'), {'name': 'abc'})

def test_braced_with_new_line(self):
result = self.expr.entry.parseString(
result = self.expr.entry.parse_string(
'@journal{key, name = {abc\ndef} }')
self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})

def test_braced_unicode(self):
result = self.expr.entry.parseString(
result = self.expr.entry.parse_string(
'@journal{key, name = {àbcđéf} }')
self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})

def test_quoted(self):
result = self.expr.entry.parseString('@journal{key, name = "abc" }')
result = self.expr.entry.parse_string('@journal{key, name = "abc" }')
self.assertEqual(result.get('Fields'), {'name': 'abc'})

def test_quoted_with_new_line(self):
result = self.expr.entry.parseString(
result = self.expr.entry.parse_string(
'@journal{key, name = "abc\ndef" }')
self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'})

def test_quoted_with_unicode(self):
result = self.expr.entry.parseString(
result = self.expr.entry.parse_string(
'@journal{key, name = "àbcđéf" }')
self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'})

def test_entry_declaration_after_space(self):
self.expr.entry.parseString(' @journal{key, name = {abcd}}')
self.expr.entry.parse_string(' @journal{key, name = {abcd}}')

def test_entry_declaration_no_key(self):
with self.assertRaises(self.expr.ParseException):
self.expr.entry.parseString('@misc{name = {abcd}}')
self.expr.entry.parse_string('@misc{name = {abcd}}')

def test_entry_declaration_no_key_new_line(self):
with self.assertRaises(self.expr.ParseException):
self.expr.entry.parseString('@misc{\n name = {abcd}}')
self.expr.entry.parse_string('@misc{\n name = {abcd}}')

def test_entry_declaration_no_key_comma(self):
with self.assertRaises(self.expr.ParseException):
self.expr.entry.parseString('@misc{, \nname = {abcd}}')
self.expr.entry.parse_string('@misc{, \nname = {abcd}}')

def test_entry_declaration_no_key_keyvalue_without_space(self):
with self.assertRaises(self.expr.ParseException):
self.expr.entry.parseString('@misc{\nname=aaa}')
self.expr.entry.parse_string('@misc{\nname=aaa}')

def test_entry_declaration_key_with_whitespace(self):
with self.assertRaises(self.expr.ParseException):
self.expr.entry.parseString('@misc{ xx yy, \n name = aaa}')
self.expr.entry.parse_string('@misc{ xx yy, \n name = aaa}')

def test_string_declaration_after_space(self):
self.expr.string_def.parseString(' @string{ name = {abcd}}')
self.expr.string_def.parse_string(' @string{ name = {abcd}}')

def test_preamble_declaration_after_space(self):
self.expr.preamble_decl.parseString(' @preamble{ "blah blah " }')
self.expr.preamble_decl.parse_string(' @preamble{ "blah blah " }')

def test_declaration_after_space(self):
keys = []
self.expr.entry.addParseAction(
self.expr.entry.add_parse_action(
lambda s, l, t: keys.append(t.get('Key'))
)
self.expr.main_expression.parseString(' @journal{key, name = {abcd}}')
self.expr.main_expression.parse_string(' @journal{key, name = {abcd}}')
self.assertEqual(keys, ['key'])

def test_declaration_after_space_and_comment(self):
keys = []
self.expr.entry.addParseAction(
self.expr.entry.add_parse_action(
lambda s, l, t: keys.append(t.get('Key'))
)
self.expr.main_expression.parseString(
self.expr.main_expression.parse_string(
'% Implicit comment\n @article{key, name={abcd}}'
)
self.assertEqual(keys, ['key'])
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyparsing>=2.0.3
pyparsing>=3.0.0