NVIDIA · mgrafu · Jun 8, 2026
diff --git a/nemo_text_processing/text_normalization/ar/taggers/money.py b/nemo_text_processing/text_normalization/ar/taggers/money.py
@@ -80,14 +80,14 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
             pynutil.insert("integer_part: \"") + ((NEMO_SIGMA - "1") @ cardinal_graph) + pynutil.insert("\"")
         )
 
-        graph_integer_only = graph_maj_singular + insert_space + graph_integer_one
-        graph_integer_only |= graph_maj_plural + insert_space + graph_integer
+        currency_first = pynutil.insert(' morphosyntactic_features: "currency_first"')
+        # Currency-first tagging for exactly one major unit (e.g. $1 -> دولار واحد).
+        graph_integer_one_unit = graph_maj_singular + insert_space + graph_integer_one + currency_first
 
-        # For local currency "9د.ك"
+        # For local currency "5د.ك"
         graph_integer_only_ar = graph_integer + insert_space + graph_ar_cur
-        # graph_decimal_ar = graph_decimal_final + insert_space  + graph_ar_cur
 
-        graph = (graph_integer_only + optional_delete_fractional_zeros) | graph_integer_only_ar
+        graph = (graph_integer_one_unit + optional_delete_fractional_zeros) | graph_integer_only_ar
 
         # remove trailing zeros of non zero number in the first 2 digits and fill up to 2 digits
         # e.g. 2000 -> 20, 0200->02, 01 -> 01, 10 -> 10
@@ -112,9 +112,12 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
 
             preserve_order = pynutil.insert(" preserve_order: true")
             integer_plus_maj = graph_integer + insert_space + pynutil.insert(curr_symbol) @ graph_maj_plural
-            integer_plus_maj |= graph_integer_one + insert_space + pynutil.insert(curr_symbol) @ graph_maj_singular
-            # non zero integer part
-            integer_plus_maj = (pynini.closure(NEMO_DIGIT) - "0") @ integer_plus_maj
+            integer_plus_maj_with_one = integer_plus_maj | (
+                graph_integer_one + insert_space + pynutil.insert(curr_symbol) @ graph_maj_singular
+            )
+            # Amount == 1 without fractional part uses graph_integer_one_unit / graph_one_prefix.
+            integer_plus_maj_no_minor = (pynini.closure(NEMO_DIGIT) - "0") @ integer_plus_maj
+            integer_plus_maj_with_minor = (pynini.closure(NEMO_DIGIT) - "0") @ integer_plus_maj_with_one
 
             graph_fractional_one = two_digits_fractional_part @ pynini.cross("1", "")
             graph_fractional_one = pynutil.insert("fractional_part: \"") + graph_fractional_one + pynutil.insert("\"")
@@ -141,11 +144,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
                 graph_fractional_up_to_ten + insert_space + pynutil.insert(curr_symbol) @ graph_min_plural
             )
 
-            graph_with_no_minor_curr = integer_plus_maj
-            graph_with_no_minor_curr |= pynutil.add_weight(
-                integer_plus_maj,
-                weight=0.0001,
-            )
+            graph_with_no_minor_curr = integer_plus_maj_no_minor
 
             graph_with_no_minor_curr = pynutil.delete(curr_symbol) + graph_with_no_minor_curr + preserve_order
 
@@ -154,9 +153,9 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
                 if graph_with_no_minor is None
                 else pynini.union(graph_with_no_minor, graph_with_no_minor_curr)
             )
-            decimal_graph_with_minor_curr = integer_plus_maj + pynini.cross(".", " ") + fractional_plus_min
+            decimal_graph_with_minor_curr = integer_plus_maj_with_minor + pynini.cross(".", " ") + fractional_plus_min
             decimal_graph_with_minor_curr |= pynutil.add_weight(
-                integer_plus_maj
+                integer_plus_maj_with_minor
                 + pynini.cross(".", " ")
                 + pynutil.insert("fractional_part: \"")
                 + two_digits_fractional_part @ cardinal_graph

diff --git a/nemo_text_processing/text_normalization/ar/verbalizers/money.py b/nemo_text_processing/text_normalization/ar/verbalizers/money.py
@@ -28,6 +28,7 @@ class MoneyFst(GraphFst):
     Finite state transducer for verbalizing money, e.g.
         money { integer_part: "تسعة" currency_maj: "يورو" preserve_order: true} -> "تسعة يورو"
         money { integer_part: "تسعة" currency_maj: "دولار" preserve_order: true} -> "تسعة دولار"
+        money { currency_maj: "دولار" integer_part: "واحد" morphosyntactic_features: "currency_first"} -> "دولار واحد"
         money { integer_part: "خمسة" currency_maj: "دينار كويتي"} -> "خمسة دينار كويتي"
 
     Args:
@@ -49,9 +50,10 @@ def __init__(self, deterministic: bool = True):
 
         integer_part = pynutil.delete("integer_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
         add_and = pynutil.insert(" و")
+        morph_currency_first = pynutil.delete(' morphosyntactic_features: "currency_first"')
 
-        #  *** currency_maj
-        graph_integer = maj + keep_space + integer_part
+        # currency_maj before integer_part; disambiguated via morphosyntactic_features for Sparrowhawk.
+        graph_currency_first = maj + keep_space + integer_part + delete_space + morph_currency_first
 
         #  *** currency_maj + (***) (و) *** current_min
         graph_integer_with_minor = (
@@ -65,12 +67,10 @@ def __init__(self, deterministic: bool = True):
             + pynini.closure(keep_space + min, 0, 1)
             + delete_preserve_order
         )
-        # this graph fix word order from dollar three (دولار تسعة)--> three dollar (تسعة دولار)
         graph_integer_no_minor = integer_part + keep_space + maj + delete_space + delete_preserve_order
-        # *** current_min
         graph_minor = fractional_part + keep_space + delete_space + min + delete_preserve_order
 
-        graph = graph_integer | graph_integer_with_minor | graph_minor | graph_integer_no_minor
+        graph = graph_currency_first | graph_integer_with_minor | graph_minor | graph_integer_no_minor
 
         delete_tokens = self.delete_tokens(graph)
         self.fst = delete_tokens.optimize()
diff --git a/tests/nemo_text_processing/ar/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/ar/test_sparrowhawk_normalization.sh
@@ -0,0 +1,71 @@
+#! /bin/sh
+GRAMMARS_DIR=${1:-"/workspace/sparrowhawk/documentation/grammars"}
+TEST_DIR=${2:-"/workspace/tests/ar"}
+
+runtest () {
+  input=$1
+  echo "INPUT is $input"
+  cd ${GRAMMARS_DIR}
+
+  while IFS= read -r testcase; do
+    IFS='~' read -r written spoken <<< "$testcase"
+
+    escaped_written=$(printf '%s' "$written" | sed 's/\\/\\\\/g')
+    denorm_pred=$(echo "$escaped_written" | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1 | sed 's/\xC2\xA0/ /g')
+
+    spoken="$(echo -e "${spoken}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+    denorm_pred="$(echo -e "${denorm_pred}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+
+    assertEquals "$written" "$spoken" "$denorm_pred"
+  done < "$input"
+}
+
+# For test files stored as expected~input (spoken~written).
+runtest_swapped () {
+  input=$1
+  echo "INPUT is $input"
+  cd ${GRAMMARS_DIR}
+
+  while IFS= read -r testcase; do
+    IFS='~' read -r spoken written <<< "$testcase"
+
+    escaped_written=$(printf '%s' "$written" | sed 's/\\/\\\\/g')
+    denorm_pred=$(echo "$escaped_written" | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1 | sed 's/\xC2\xA0/ /g')
+
+    spoken="$(echo -e "${spoken}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+    denorm_pred="$(echo -e "${denorm_pred}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+
+    assertEquals "$written" "$spoken" "$denorm_pred"
+  done < "$input"
+}
+
+testTNCardinal() {
+  input=$TEST_DIR/data_text_normalization/test_cases_cardinal.txt
+  runtest $input
+}
+
+testTNDecimal() {
+  input=$TEST_DIR/data_text_normalization/test_cases_decimal.txt
+  runtest $input
+}
+
+testTNFraction() {
+  input=$TEST_DIR/data_text_normalization/test_cases_fraction.txt
+  runtest_swapped $input
+}
+
+testTNMeasure() {
+  input=$TEST_DIR/data_text_normalization/test_cases_measure.txt
+  runtest_swapped $input
+}
+
+testTNMoney() {
+  input=$TEST_DIR/data_text_normalization/test_cases_money.txt
+  runtest $input
+}
+
+# Remove all command-line arguments
+shift $#
+
+# Load shUnit2
+. /workspace/shunit2/shunit2
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
@@ -278,6 +278,7 @@ def parse_args():
         from nemo_text_processing.text_normalization.ar.taggers.tokenize_and_classify import (
             ClassifyFst as TNClassifyFst,
         )
+        from nemo_text_processing.text_normalization.ar.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst
     elif args.language == 'it':
         from nemo_text_processing.text_normalization.it.taggers.tokenize_and_classify import (
             ClassifyFst as TNClassifyFst,