Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions bibtexparser/middlewares/interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,72 @@ def _value_is_nonstring_or_enclosed(value: Any) -> bool:
return False


def _split_concatenation(value: str) -> "list[str] | None":
"""Split a value on top-level ``#`` concatenation operators.

Returns the list of stripped tokens if the value contains at least one
``#`` outside of any ``"..."`` or ``{...}`` group, otherwise ``None``
(i.e., the value is not a concatenation expression).
"""
tokens = []
current = []
depth = 0
in_quotes = False
found_separator = False
for char in value:
if char == '"' and depth == 0:
in_quotes = not in_quotes
current.append(char)
elif char == "{" and not in_quotes:
depth += 1
current.append(char)
elif char == "}" and not in_quotes:
depth = max(0, depth - 1)
current.append(char)
elif char == "#" and depth == 0 and not in_quotes:
found_separator = True
tokens.append("".join(current).strip())
current = []
else:
current.append(char)

if not found_separator:
return None

tokens.append("".join(current).strip())
return tokens


def _resolve_concatenation(tokens: "list[str]", string_values: "dict[str, str]") -> "str | None":
"""Resolve concatenation tokens to their joined string content.

Each token is a number (kept verbatim), a quoted or braced string (its
inner content is used), or a string reference (resolved via
``string_values``). Returns ``None`` if any reference is unknown, so the
caller can leave the original expression untouched.
"""
resolved = []
for token in tokens:
if not token:
return None
if token.startswith('"') and token.endswith('"'):
resolved.append(token[1:-1])
elif token.startswith("{") and token.endswith("}"):
resolved.append(token[1:-1])
elif token.isdigit():
resolved.append(token)
else:
try:
referenced = string_values[token.lower()]
except KeyError:
return None
if referenced.startswith(('"', "{")) and referenced.endswith(('"', "}")):
referenced = referenced[1:-1]
resolved.append(referenced)

return "".join(resolved)


class ResolveStringReferencesMiddleware(LibraryMiddleware):
"""Replace strings references with their values."""

Expand Down Expand Up @@ -59,6 +125,18 @@ def transform(self, library: Library) -> Library:

field: Field
for field in entry.fields:
if isinstance(field.value, str):
tokens = _split_concatenation(field.value)
if tokens is not None:
joined = _resolve_concatenation(tokens, string_values)
if joined is not None:
# Keep the result enclosed (in braces) so the
# downstream enclosing middlewares treat it as a
# plain string rather than an unenclosed reference.
field.value = "{" + joined + "}"
resolved_fields.append(field.key)
continue

if _value_is_nonstring_or_enclosed(field.value):
continue
try:
Expand Down
63 changes: 63 additions & 0 deletions tests/middleware_tests/test_interpolate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

import bibtexparser
from bibtexparser.middlewares.enclosing import RemoveEnclosingMiddleware
from bibtexparser.middlewares.interpolate import ResolveStringReferencesMiddleware
from bibtexparser.splitter import Splitter
Expand Down Expand Up @@ -66,3 +67,65 @@ def test_warning_is_raised_if_enclosings_are_removed():

assert len(record) == 1
assert "RemoveEnclosing" in record[0].message.args[0]


def test_string_interpolation_resolves_concatenation():
bibtex = """
@string{jan = "Jan."}

@inbook{test_inbook,
month = 10 # "~" # jan,
title = "hello" # " " # "world",
mixed = jan # { } # "X",
numbers = 1 # 2 # 3
}
"""
library = Splitter(bibtex).split()

m = ResolveStringReferencesMiddleware()
library = m.transform(library)

fields = library.entries_dict["test_inbook"].fields_dict
# Concatenations are resolved and kept enclosed in braces.
assert fields["month"].value == "{10~Jan.}"
assert fields["title"].value == "{hello world}"
assert fields["mixed"].value == "{Jan. X}"
assert fields["numbers"].value == "{123}"


def test_string_interpolation_leaves_unresolvable_concatenation_untouched():
bibtex = """
@inbook{test_inbook,
note = unknown_macro # " suffix"
}
"""
library = Splitter(bibtex).split()

m = ResolveStringReferencesMiddleware()
library = m.transform(library)

# An unknown reference means the expression is left exactly as-is.
assert (
library.entries_dict["test_inbook"].fields_dict["note"].value == 'unknown_macro # " suffix"'
)


def test_parse_string_resolves_concatenation_end_to_end():
# Exact reproduction of issue #396.
bibtex_str = """
@STRING{ jan = "Jan." }

@INBOOK{inbook-full,
month = 10 # "~" # jan,
}
"""
library = bibtexparser.parse_string(bibtex_str)
month = library.entries[0].fields_dict["month"].value
assert month == "10~Jan."

# The resolved value writes as a valid, brace-enclosed string and
# round-trips back to the same value.
written = bibtexparser.write_string(library)
assert "month = {10~Jan.}" in written
reparsed = bibtexparser.parse_string(written)
assert reparsed.entries[0].fields_dict["month"].value == "10~Jan."