diff --git a/src/spdx_tools/spdx/parser/tagvalue/parser.py b/src/spdx_tools/spdx/parser/tagvalue/parser.py
index 50096bda..1d440d14 100644
--- a/src/spdx_tools/spdx/parser/tagvalue/parser.py
+++ b/src/spdx_tools/spdx/parser/tagvalue/parser.py
@@ -92,6 +92,7 @@ class Parser:
self.lex = SPDXLexer()
self.lex.build(reflags=re.UNICODE)
self.yacc = yacc.yacc(module=self, **kwargs)
+ self._relationship_hashes = set()
@grammar_rule("start : start attrib ")
def p_start_start_attrib(self, p):
@@ -603,6 +604,8 @@ class Parser:
)
return
package_spdx_id = self.elements_built["packages"][-1].spdx_id
- relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
- if relationship not in self.elements_built.setdefault("relationships", []):
+ h = package_spdx_id + "CONTAINS" + file_spdx_id
+ if h not in self._relationship_hashes:
+ self._relationship_hashes.add(h)
+ relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
self.elements_built["relationships"].append(relationship)
Specific to the tagvalue parser where
CONTAINSrelationship is constructed, there's an optimization which took our use case, a whole platform tree with nested sublevels of packages, from impracticable to at least useable: