Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions machine/corpora/corpora_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
T = TypeVar("T")


def alignment_exception(refs: Iterable[str]) -> TypeError:
return TypeError(
f'Invalid format in {", ".join(refs)}. '
"Mismatched key formats. There may be an extraneous tab, "
"missing ref, or inconsistent use of user-defined refs."
)


def batch(iterable: Iterable[T], batch_size: int) -> Iterable[Sequence[T]]:
if isinstance(iterable, Sequence) and len(iterable) <= batch_size:
yield iterable
Expand Down
17 changes: 13 additions & 4 deletions machine/corpora/n_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Callable, Iterable, List, Optional, Sequence, Set, cast

from ..scripture.verse_ref import Versification
from .corpora_utils import alignment_exception
from .n_parallel_text_corpus_base import NParallelTextCorpusBase
from .n_parallel_text_row import NParallelTextRow
from .scripture_ref import ScriptureRef
Expand Down Expand Up @@ -181,7 +182,10 @@ def _get_rows(self, generators: List[TextCorpusEnumerator]) -> Iterable[NParalle
refs.append(row.ref)
else:
refs.append(None)
min_ref_indexes = self._min_ref_indexes(refs)
try:
min_ref_indexes = self._min_ref_indexes(refs)
except TypeError as e:
raise alignment_exception([str(r.ref) for r in current_rows if r is not None]) from e
non_min_ref_indexes = list(set(range(0, self.n)).difference(min_ref_indexes))
if (
len(min_ref_indexes) < num_remaining_rows
Expand Down Expand Up @@ -274,6 +278,8 @@ def _get_rows(self, generators: List[TextCorpusEnumerator]) -> Iterable[NParalle
if is_completed:
num_completed += 1
num_remaining_rows -= 1
else:
raise alignment_exception([str(current_rows[i].ref) for i in min_ref_indexes])

if range_info.is_in_range:
yield range_info.create_row()
Expand Down Expand Up @@ -362,9 +368,12 @@ def _create_min_ref_rows(
yield row

def _check_same_ref_rows(self, same_ref_rows: List[TextRow], other_row: TextRow) -> bool:
if len(same_ref_rows) > 0 and self.row_ref_comparer(same_ref_rows[0].ref, other_row.ref) != 0:
same_ref_rows.clear()
return len(same_ref_rows) > 0
try:
if len(same_ref_rows) > 0 and self.row_ref_comparer(same_ref_rows[0].ref, other_row.ref) != 0:
same_ref_rows.clear()
return len(same_ref_rows) > 0
except TypeError as e:
raise alignment_exception([str(same_ref_rows[0].ref), str(other_row.ref)]) from e

def _create_same_ref_rows(
self,
Expand Down
6 changes: 5 additions & 1 deletion machine/corpora/standard_parallel_text_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Callable, Generator, Iterable, Optional

from .alignment_corpus import AlignmentCorpus
from .corpora_utils import alignment_exception
from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
from .n_parallel_text_corpus import NParallelTextCorpus, default_row_ref_comparer
from .parallel_text_corpus import ParallelTextCorpus
Expand Down Expand Up @@ -68,7 +69,10 @@ def _get_rows(self, text_ids: Optional[Iterable[str]]) -> Generator[ParallelText
if self._alignment_corpus is not None and all([len(n) > 0 for n in n_row.n_segments]):
while True:
if alignment_row is not None:
compare_alignment_corpus = self._row_ref_comparer(n_row.ref, alignment_row.ref)
try:
compare_alignment_corpus = self._row_ref_comparer(n_row.ref, alignment_row.ref)
except TypeError as e:
raise alignment_exception([str(r) for r in n_row.n_refs]) from e
else:
compare_alignment_corpus = 1
if compare_alignment_corpus >= 0:
Expand Down
Loading