@@ -11,18 +11,8 @@ def __init__(self, print_sent_id=True, print_text=True, print_empty_trees=True,
1111 self .print_text = print_text
1212 self .print_empty_trees = print_empty_trees
1313
14- def process_tree (self , tree ): # pylint: disable=too-many-branches
15- empty_nodes = tree .empty_nodes
16- if empty_nodes :
17- nodes = sorted (tree ._descendants + empty_nodes )
18- else :
19- nodes = tree ._descendants
20-
21- # Empty sentences are not allowed in CoNLL-U, so with print_empty_trees==0
22- # we need to skip the whole tree (including possible comments).
23- if not nodes and not self .print_empty_trees :
24- return
25-
14+ def iter_comment_lines (self , tree ):
15+ """Yield comment lines (without leading #) for the current tree."""
2616 # If tree.comment contains placeholders $NEWDOC,...$TEXT, replace them with the actual
2717 # value of the attribute and make note on which line (i_*) they were present.
2818 comment_lines = tree .comment .splitlines ()
@@ -65,51 +55,66 @@ def process_tree(self, tree): # pylint: disable=too-many-branches
6555 printed_i = - 1
6656 if comment_lines and comment_lines [0 ].startswith (' global.columns' ):
6757 printed_i += 1
68- print ( '#' + comment_lines [printed_i ])
58+ yield comment_lines [printed_i ]
6959 if self .print_sent_id :
7060 if tree .newdoc :
7161 if i_newdoc == - 1 :
72- print ( '# newdoc' + (' id = ' + tree .newdoc if tree .newdoc is not True else '' ) )
62+ yield ' newdoc' + (' id = ' + tree .newdoc if tree .newdoc is not True else '' )
7363 else :
7464 while printed_i < i_newdoc :
7565 printed_i += 1
7666 if comment_lines [printed_i ]:
77- print ( '#' + comment_lines [printed_i ])
67+ yield comment_lines [printed_i ]
7868 ge = tree .document .meta .get ('global.Entity' )
7969 if ge :
8070 if i_global_entity == - 1 :
81- print ( '# global.Entity = ' + ge )
71+ yield ' global.Entity = ' + ge
8272 else :
8373 while printed_i < i_global_entity :
8474 printed_i += 1
8575 if comment_lines [printed_i ]:
86- print ( '#' + comment_lines [printed_i ])
76+ yield comment_lines [printed_i ]
8777 if tree .newpar :
8878 if i_newpar == - 1 :
89- print ( '# newpar' + (' id = ' + tree .newpar if tree .newpar is not True else '' ) )
79+ yield ' newpar' + (' id = ' + tree .newpar if tree .newpar is not True else '' )
9080 else :
9181 while printed_i < i_newpar :
9282 printed_i += 1
9383 if comment_lines [printed_i ]:
94- print ( '#' + comment_lines [printed_i ])
84+ yield comment_lines [printed_i ]
9585 if i_sent_id == - 1 :
96- print ( '# sent_id = ' + tree .sent_id )
86+ yield ' sent_id = ' + tree .sent_id
9787 else :
9888 while printed_i < i_sent_id :
9989 printed_i += 1
10090 if comment_lines [printed_i ]:
101- print ( '#' + comment_lines [printed_i ])
91+ yield comment_lines [printed_i ]
10292 if self .print_text and i_text == - 1 :
103- print ( '# text = ' + (tree .compute_text () if tree .text is None else tree .text .replace ('\n ' , '' ).replace ('\r ' , '' ).rstrip () ))
93+ yield ' text = ' + (tree .compute_text () if tree .text is None else tree .text .replace ('\n ' , '' ).replace ('\r ' , '' ).rstrip ())
10494
10595 for c_line in comment_lines [printed_i + 1 :]:
10696 if c_line :
107- print ( '#' + c_line )
97+ yield c_line
10898
10999 # Special-purpose json_* comments should always be at the end of the comment block.
110100 if tree .json :
111101 for key , value in sorted (tree .json .items ()):
112- print (f"# json_{ key } = { json .dumps (value , ensure_ascii = False , sort_keys = True )} " )
102+ yield f" json_{ key } = { json .dumps (value , ensure_ascii = False , sort_keys = True )} "
103+
104+ def process_tree (self , tree ): # pylint: disable=too-many-branches
105+ empty_nodes = tree .empty_nodes
106+ if empty_nodes :
107+ nodes = sorted (tree ._descendants + empty_nodes )
108+ else :
109+ nodes = tree ._descendants
110+
111+ # Empty sentences are not allowed in CoNLL-U, so with print_empty_trees==0
112+ # we need to skip the whole tree (including possible comments).
113+ if not nodes and not self .print_empty_trees :
114+ return
115+
116+ for line in self .iter_comment_lines (tree ):
117+ print ('#' + line )
113118
114119 last_mwt_id = 0
115120 for node in nodes :
0 commit comments