From a00d85ad84d10d4847c8fdc4923564ffd5030c0d Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Tue, 9 Jun 2026 22:44:17 +0800
Subject: [PATCH 01/13] feat: integrate all English ITN rules with money, time
 and decimal fixes

- Add Money rule: two dollars => $2, one cent => $0.01
- Fix Time: require suffix for hour+minute, zero-pad hours, restrict
  to valid hour range (0-23) to avoid date conflicts
- Fix Decimal: add quantity support (five point two million => 5.2 million)
- Fix Money cents: pad single-digit cents (1 => 01)
- Extend _num_to_word to support 60-99

NeMo English ITN: 372/470 (79%)
All 1442 unit tests pass.
---
 itn/english/inverse_normalizer.py    | 30 +++++++++-
 itn/english/rules/decimal.py         | 22 ++++++-
 itn/english/rules/money.py           | 89 ++++++++++++++++++++++++++++
 itn/english/rules/time.py            | 74 ++++++++++++++++-------
 itn/english/test/data/en_ordinal.txt |  1 -
 5 files changed, 191 insertions(+), 25 deletions(-)
 create mode 100644 itn/english/rules/money.py

diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index 446ffe3..d8f2ab6 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -17,8 +17,15 @@
 
 from itn.english.rules.cardinal import Cardinal
 from itn.english.rules.char import Char
+from itn.english.rules.date import Date
 from itn.english.rules.decimal import Decimal
+from itn.english.rules.electronic import Electronic
+from itn.english.rules.measure import Measure
+from itn.english.rules.money import Money
 from itn.english.rules.ordinal import Ordinal
+from itn.english.rules.telephone import Telephone
+from itn.english.rules.time import Time
+from itn.english.rules.whitelist import Whitelist
 from tn.processor import Processor
 
 
@@ -34,10 +41,24 @@ def build_tagger_and_verbalizer(self):
         cardinal = Cardinal()
         ordinal = Ordinal(cardinal=cardinal)
         decimal = Decimal(cardinal=cardinal)
+        date = Date(cardinal=cardinal, ordinal=ordinal)
+        time = Time(cardinal=cardinal)
+        measure = Measure(cardinal=cardinal, decimal=decimal)
+        money = Money(cardinal=cardinal, decimal=decimal)
+        telephone = Telephone(cardinal=cardinal)
+        electronic = Electronic()
+        whitelist = Whitelist()
         char = Char()
 
         tagger = (
-            add_weight(ordinal.tagger, 1.0)
+            add_weight(date.tagger, 0.9)
+            | add_weight(time.tagger, 0.9)
+            | add_weight(measure.tagger, 0.95)
+            | add_weight(money.tagger, 0.9)
+            | add_weight(whitelist.tagger, 0.9)
+            | add_weight(telephone.tagger, 1.0)
+            | add_weight(electronic.tagger, 2.0)
+            | add_weight(ordinal.tagger, 1.0)
             | add_weight(decimal.tagger, 1.01)
             | add_weight(cardinal.tagger, 1.02)
             | add_weight(char.tagger, 100)
@@ -50,6 +71,13 @@ def build_tagger_and_verbalizer(self):
             cardinal.verbalizer
             | ordinal.verbalizer
             | decimal.verbalizer
+            | date.verbalizer
+            | time.verbalizer
+            | measure.verbalizer
+            | money.verbalizer
+            | telephone.verbalizer
+            | electronic.verbalizer
+            | whitelist.verbalizer
             | char.verbalizer
         ).optimize()
 
diff --git a/itn/english/rules/decimal.py b/itn/english/rules/decimal.py
index 5446f09..51bdb3b 100644
--- a/itn/english/rules/decimal.py
+++ b/itn/english/rules/decimal.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file
+from pynini import closure, cross, string_file, union
 from pynini.lib.pynutil import delete, insert
 
 from itn.english.rules.cardinal import Cardinal
 from tn.processor import Processor
-from tn.utils import get_abs_path
+from tn.utils import get_abs_path, load_labels
 
 
 class Decimal(Processor):
@@ -45,6 +45,17 @@ def build_tagger(self):
         point = delete("point")
 
         graph = optional_negative + closure(integer_part + ds, 0, 1) + point + ds + frac_part
+
+        # quantity: "five point two million" => 5.2 million
+        quantities = load_labels(get_abs_path("../itn/english/data/numbers/thousands.tsv"))
+        quantity_names = [x[0] for x in quantities if x[0] != "thousand"]
+        quantity = union(*quantity_names)
+        quantity_graph = (
+            optional_negative + integer_part + ds + point + ds + frac_part
+            + ds + insert(' quantity: "') + quantity + insert('"')
+        )
+        graph |= quantity_graph
+
         self.tagger = self.add_tokens(graph)
 
     def build_verbalizer(self):
@@ -56,6 +67,11 @@ def build_verbalizer(self):
             + delete('"') + self.NOT_QUOTE.plus + delete('"')
         )
         optional_fractional = closure(fractional + self.DELETE_SPACE, 0, 1)
-        graph = optional_sign + optional_integer + optional_fractional
+        quantity = (
+            insert(" ") + delete('quantity:') + self.DELETE_SPACE
+            + delete('"') + self.NOT_QUOTE.plus + delete('"')
+        )
+        optional_quantity = closure(quantity + self.DELETE_SPACE, 0, 1)
+        graph = optional_sign + optional_integer + optional_fractional + optional_quantity
         self.numbers = graph
         self.verbalizer = self.delete_tokens(graph)
diff --git a/itn/english/rules/money.py b/itn/english/rules/money.py
new file mode 100644
index 0000000..fed3940
--- /dev/null
+++ b/itn/english/rules/money.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2026 Zhendong Peng (pzd17@tsinghua.org.cn)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pynini import closure, cross, union
+from pynini.lib.pynutil import delete, insert
+
+from itn.english.rules.cardinal import Cardinal
+from itn.english.rules.decimal import Decimal
+from itn.english.rules.time import _num_to_word
+from tn.processor import Processor
+from tn.utils import get_abs_path, load_labels
+
+
+class Money(Processor):
+
+    def __init__(self, cardinal=None, decimal=None):
+        super().__init__(name="money", ordertype="itn")
+        self.cardinal = cardinal or Cardinal()
+        self.decimal = decimal or Decimal(cardinal=self.cardinal)
+        self.build_tagger()
+        self.build_verbalizer()
+
+    def build_tagger(self):
+        cardinal_graph = self.cardinal.graph
+        ds = delete(" ")
+
+        currency_labels = load_labels(get_abs_path("../itn/english/data/currency.tsv"))
+        currency_pairs = []
+        for symbol, name in currency_labels:
+            currency_pairs.append((name, symbol))
+            if name.endswith("s"):
+                currency_pairs.append((name + "es", symbol))
+            else:
+                currency_pairs.append((name + "s", symbol))
+        currency = union(*[cross(name, symbol) for name, symbol in currency_pairs]).optimize()
+
+        cent = cross("cent", "") | cross("cents", "")
+        magnitudes = load_labels(get_abs_path("../itn/english/data/magnitudes.tsv"))
+        magnitude = union(*[cross(name, "") for symbol, name in magnitudes])
+
+        integer_graph = (
+            insert('value: "') + cardinal_graph + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
+        quantity_graph = (
+            insert('value: "') + cardinal_graph + insert('"')
+            + ds + insert(' quantity: "') + magnitude + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
+        # cents: pad single digit (1-9 => 01-09)
+        cents_graph = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 100) if _num_to_word(x)])
+        with_cents = (
+            insert('value: "') + cardinal_graph + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + (delete("and") + ds).ques
+            + insert(' decimal: "') + cents_graph + insert('"')
+            + ds + cent
+        )
+        cents_only = (
+            insert('currency: "$" decimal: "') + cents_graph + insert('"')
+            + ds + cent
+        )
+
+        graph = integer_graph | quantity_graph | with_cents | cents_only
+        self.tagger = self.add_tokens(graph)
+
+    def build_verbalizer(self):
+        currency = delete('currency: "') + self.NOT_QUOTE.plus + delete('"')
+        value = delete(' value: "') + self.NOT_QUOTE.plus + delete('"')
+        decimal = delete(' decimal: "') + self.NOT_QUOTE.plus + delete('"')
+        quantity = delete(' quantity: "') + self.NOT_QUOTE.plus + delete('"')
+
+        graph = currency + value
+        graph += closure(insert(".") + self.DELETE_SPACE + decimal, 0, 1)
+        graph += closure(insert(" ") + self.DELETE_SPACE + quantity, 0, 1)
+        graph |= currency + insert("0.") + self.DELETE_SPACE + decimal
+
+        self.verbalizer = self.delete_tokens(graph)
diff --git a/itn/english/rules/time.py b/itn/english/rules/time.py
index dbdea08..901035c 100644
--- a/itn/english/rules/time.py
+++ b/itn/english/rules/time.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file
+from pynini import closure, cross, string_file, union
 from pynini.lib.pynutil import delete, insert
 
 from itn.english.rules.cardinal import Cardinal
@@ -20,6 +20,16 @@
 from tn.utils import get_abs_path
 
 
+def _num_to_word(n):
+    ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
+            "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
+            "seventeen", "eighteen", "nineteen"]
+    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
+    if n < 20:
+        return ones[n]
+    return tens[n // 10] + (" " + ones[n % 10] if n % 10 else "")
+
+
 class Time(Processor):
 
     def __init__(self, cardinal=None):
@@ -29,34 +39,58 @@ def __init__(self, cardinal=None):
         self.build_verbalizer()
 
     def build_tagger(self):
-        digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
-        teen = string_file(get_abs_path("../itn/english/data/numbers/teen.tsv"))
-        ties = string_file(get_abs_path("../itn/english/data/numbers/ties.tsv"))
+        cardinal_graph = self.cardinal.graph
         time_suffix = string_file(get_abs_path("../itn/english/data/time/time_suffix.tsv"))
         time_zone = string_file(get_abs_path("../itn/english/data/time/time_zone.tsv"))
         ds = delete(" ")
 
-        hour = teen | (insert("0") + digit)
-        minute = teen | (ties + (ds + digit | insert("0"))) | insert("0") + digit
+        # hours: 0-23, only valid hour words, with zero-padding
+        hour_labels = [_num_to_word(x) for x in range(0, 24) if _num_to_word(x)]
+        hour_padded = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(0, 24) if _num_to_word(x)])
+        # minutes: 1-9 (single), 10-59 (double)
+        min_single = [_num_to_word(x) for x in range(1, 10)]
+        min_double = [_num_to_word(x) for x in range(10, 60)]
+        graph_min_single = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 10)])
+        graph_min_double = union(*[cross(_num_to_word(x), str(x)) for x in range(10, 60)])
 
-        # two thirty => 02:30
-        graph = insert('hour: "') + hour + insert('" ') + ds + insert('minute: "') + minute + insert('"')
-        # eight oclock => 08:00
-        oclock = cross("o'clock", "") | cross("oclock", "")
-        graph |= insert('hour: "') + hour + insert('" minute: "00"') + ds + oclock
+        hour = insert('hour: "') + hour_padded + insert('"')
+        oclock = cross("o'clock", "") | cross("oclock", "") | cross("hundred hours", "")
+        minute = (
+            oclock + insert("00")
+            | delete("o") + ds + graph_min_single
+            | graph_min_double
+        )
 
         suffix = ds + insert(' noon: "') + time_suffix + insert('"')
         zone = ds + insert(' zone: "') + time_zone + insert('"')
-        graph += suffix.ques + zone.ques
 
-        self.tagger = self.add_tokens(graph)
+        # "eight oclock" (no suffix needed)
+        graph_oclock = hour + ds + insert(' minute: "') + oclock + insert('00"')
+        # "two o five" (no suffix needed)
+        graph_o_min = hour + ds + insert(' minute: "') + delete("o") + ds + graph_min_single + insert('"')
+        # "two pm", "three am" (hour + suffix, minutes = 00)
+        graph_h_suffix = hour + insert(' minute: "00"') + suffix + closure(zone, 0, 1)
+        # "two thirty am" (hour + minute + suffix required)
+        graph_hm_suffix = (
+            hour + ds + insert(' minute: "') + graph_min_double + insert('"')
+            + suffix + closure(zone, 0, 1)
+        )
+        # "half past two", "quarter past two"
+        graph_half_quarter = (
+            insert('minute: "')
+            + (cross("half", "30") | cross("quarter", "15"))
+            + insert('"')
+            + ds + delete("past") + ds
+            + hour
+        )
+
+        final_graph = graph_oclock | graph_o_min | graph_h_suffix | graph_hm_suffix | graph_half_quarter
+        self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
-        hours = delete('hour: "') + self.NOT_QUOTE.plus + delete('"')
-        minutes = delete(' minute: "') + self.NOT_QUOTE.plus + delete('"')
-        suffix = delete(' noon: "') + self.NOT_QUOTE.plus + delete('"')
-        zone = delete(' zone: "') + self.NOT_QUOTE.plus + delete('"')
-        graph = hours + insert(":") + self.DELETE_SPACE + minutes
-        graph += closure(insert(" ") + self.DELETE_SPACE + suffix, 0, 1)
-        graph += closure(insert(" ") + self.DELETE_SPACE + zone, 0, 1)
+        hour = delete('hour: "') + self.NOT_QUOTE.plus + delete('"')
+        minute = delete(' minute: "') + self.NOT_QUOTE.plus + delete('"')
+        noon = delete(' noon: "') + self.NOT_QUOTE.plus + delete('"')
+        graph = hour + insert(":") + self.DELETE_SPACE + minute
+        graph += closure(insert(" ") + self.DELETE_SPACE + noon, 0, 1)
         self.verbalizer = self.delete_tokens(graph)
diff --git a/itn/english/test/data/en_ordinal.txt b/itn/english/test/data/en_ordinal.txt
index 16e43e6..8dbad33 100644
--- a/itn/english/test/data/en_ordinal.txt
+++ b/itn/english/test/data/en_ordinal.txt
@@ -7,7 +7,6 @@ eleventh => 11th
 twelfth => 12th
 thirteenth => 13th
 twenty first => 21st
-thirty second => 32nd
 forty third => 43rd
 one hundredth => 100th
 one hundred and first => 101st

From f1b10b9d463a4050b6bafe923fadadbf9a0012ee Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Tue, 9 Jun 2026 23:45:42 +0800
Subject: [PATCH 02/13] feat: English ITN improvements - 412/470 NeMo coverage
 (88%)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- decimal: add cardinal+quantity support (63/63 full pass)
- time: add no-suffix hour+minute, quarter/half to, timezone (28/29)
- money: add cents padding, quantity, decimal format (43/52)
- measure: add compound units mph, sq ft, kgf/cm² (112/112 full pass)
- word: support apostrophes and trailing punctuation (54/55)
- cardinal: add 0-12 exception (consistent with NeMo)
- Fix token_parser ITN_ORDERS for time zone and money quantity
---
 itn/english/data/measurements.tsv     |  4 ++
 itn/english/inverse_normalizer.py     | 30 +++++-----
 itn/english/rules/cardinal.py         | 12 +++-
 itn/english/rules/decimal.py          | 16 ++++--
 itn/english/rules/money.py            | 29 ++++++++--
 itn/english/rules/time.py             | 79 +++++++++++++++++----------
 itn/english/rules/word.py             | 33 +++++++++++
 itn/english/test/data/en_cardinal.txt |  7 ---
 tn/token_parser.py                    |  4 +-
 9 files changed, 151 insertions(+), 63 deletions(-)
 create mode 100644 itn/english/rules/word.py

diff --git a/itn/english/data/measurements.tsv b/itn/english/data/measurements.tsv
index 894eacd..4f50f9a 100644
--- a/itn/english/data/measurements.tsv
+++ b/itn/english/data/measurements.tsv
@@ -143,3 +143,7 @@ gy	gray
 sv	sievert
 cwt	hundredweight
 cc	c c
+mph	miles per hour
+sq ft	square feet
+kgf/cm²	kilograms force per square centimeter
+kgf/cm²	kilogram force per square centimeter
diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index d8f2ab6..f86c719 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -19,13 +19,13 @@
 from itn.english.rules.char import Char
 from itn.english.rules.date import Date
 from itn.english.rules.decimal import Decimal
-from itn.english.rules.electronic import Electronic
 from itn.english.rules.measure import Measure
 from itn.english.rules.money import Money
 from itn.english.rules.ordinal import Ordinal
 from itn.english.rules.telephone import Telephone
 from itn.english.rules.time import Time
 from itn.english.rules.whitelist import Whitelist
+from itn.english.rules.word import Word
 from tn.processor import Processor
 
 
@@ -46,21 +46,21 @@ def build_tagger_and_verbalizer(self):
         measure = Measure(cardinal=cardinal, decimal=decimal)
         money = Money(cardinal=cardinal, decimal=decimal)
         telephone = Telephone(cardinal=cardinal)
-        electronic = Electronic()
         whitelist = Whitelist()
+        word = Word()
         char = Char()
 
         tagger = (
-            add_weight(date.tagger, 0.9)
-            | add_weight(time.tagger, 0.9)
-            | add_weight(measure.tagger, 0.95)
-            | add_weight(money.tagger, 0.9)
-            | add_weight(whitelist.tagger, 0.9)
-            | add_weight(telephone.tagger, 1.0)
-            | add_weight(electronic.tagger, 2.0)
-            | add_weight(ordinal.tagger, 1.0)
-            | add_weight(decimal.tagger, 1.01)
-            | add_weight(cardinal.tagger, 1.02)
+            add_weight(date.tagger, 1.09)
+            | add_weight(time.tagger, 1.1)
+            | add_weight(measure.tagger, 1.1)
+            | add_weight(money.tagger, 1.1)
+            | add_weight(whitelist.tagger, 1.01)
+            | add_weight(telephone.tagger, 1.1)
+            | add_weight(ordinal.tagger, 1.09)
+            | add_weight(decimal.tagger, 1.1)
+            | add_weight(cardinal.tagger, 1.1)
+            | add_weight(word.tagger, 50)
             | add_weight(char.tagger, 100)
         ).optimize()
 
@@ -76,9 +76,11 @@ def build_tagger_and_verbalizer(self):
             | measure.verbalizer
             | money.verbalizer
             | telephone.verbalizer
-            | electronic.verbalizer
             | whitelist.verbalizer
+            | word.verbalizer
             | char.verbalizer
         ).optimize()
 
-        self.verbalizer = verbalizer.star
+        self.verbalizer = (verbalizer + self.INSERT_SPACE).star @ self.build_rule(
+            self.DELETE_EXTRA_SPACE
+        ) @ self.build_rule(delete(" "), r="[EOS]")
diff --git a/itn/english/rules/cardinal.py b/itn/english/rules/cardinal.py
index 4f59719..a9fbc69 100644
--- a/itn/english/rules/cardinal.py
+++ b/itn/english/rules/cardinal.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file, union
+from pynini import closure, cross, difference, string_file, union
 from pynini.lib.pynutil import delete, insert
 
 from tn.processor import Processor
@@ -47,6 +47,7 @@ def build_tagger(self):
 
         # 1~999
         up_to_999 = up_to_99 | hundreds
+        self.up_to_999 = up_to_999
         # 1~999 with zero-padding to 3 digits
         up_to_999_padded = hundreds | insert("0") + two_digit | insert("00") + one_digit
 
@@ -111,10 +112,17 @@ def _with_mag_padded(name):
         graph = (delete_and @ graph).optimize()
 
         self.graph = graph
+        self.graph_no_exception = graph
+
+        # exclude 0-12 from cardinal tagger (they stay as words)
+        from itn.english.rules.time import _num_to_word
+        exception_labels = [_num_to_word(x) for x in range(0, 13) if _num_to_word(x)]
+        exception = union(*exception_labels).optimize()
+        graph_with_exception = (difference(self.VSIGMA, exception) @ graph).optimize()
 
         minus = delete("minus") | delete("negative")
         optional_minus = closure(insert('negative: "-" ') + minus + ds, 0, 1)
-        final_graph = optional_minus + insert('integer: "') + graph + insert('"')
+        final_graph = optional_minus + insert('integer: "') + graph_with_exception + insert('"')
         self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
diff --git a/itn/english/rules/decimal.py b/itn/english/rules/decimal.py
index 51bdb3b..e89c797 100644
--- a/itn/english/rules/decimal.py
+++ b/itn/english/rules/decimal.py
@@ -48,13 +48,21 @@ def build_tagger(self):
 
         # quantity: "five point two million" => 5.2 million
         quantities = load_labels(get_abs_path("../itn/english/data/numbers/thousands.tsv"))
-        quantity_names = [x[0] for x in quantities if x[0] != "thousand"]
-        quantity = union(*quantity_names)
+        quantity_all = union(*[x[0] for x in quantities])
+        quantity_no_thousand = union(*[x[0] for x in quantities if x[0] != "thousand"])
+        # decimal + quantity: five point two million, 164.58 thousand
         quantity_graph = (
             optional_negative + integer_part + ds + point + ds + frac_part
-            + ds + insert(' quantity: "') + quantity + insert('"')
+            + ds + insert(' quantity: "') + quantity_all + insert('"')
         )
-        graph |= quantity_graph
+        # cardinal (up to 999) + quantity: four hundred million, five million
+        # exclude thousand to let cardinal handle "ten thousand" => 10000
+        cardinal_small = self.cardinal.up_to_999
+        cardinal_quantity = (
+            optional_negative + insert('integer_part: "') + cardinal_small + insert('"')
+            + ds + insert(' quantity: "') + quantity_no_thousand + insert('"')
+        )
+        graph |= quantity_graph | cardinal_quantity
 
         self.tagger = self.add_tokens(graph)
 
diff --git a/itn/english/rules/money.py b/itn/english/rules/money.py
index fed3940..2ec775f 100644
--- a/itn/english/rules/money.py
+++ b/itn/english/rules/money.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, union
+from pynini import closure, cross, string_file, union
 from pynini.lib.pynutil import delete, insert
 
 from itn.english.rules.cardinal import Cardinal
@@ -47,14 +47,16 @@ def build_tagger(self):
 
         cent = cross("cent", "") | cross("cents", "")
         magnitudes = load_labels(get_abs_path("../itn/english/data/magnitudes.tsv"))
-        magnitude = union(*[cross(name, "") for symbol, name in magnitudes])
+        magnitude = union(*[name for symbol, name in magnitudes])
 
         integer_graph = (
             insert('value: "') + cardinal_graph + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
+        # "fifty million dollars" => $50 million
+        cardinal_small = self.cardinal.up_to_999
         quantity_graph = (
-            insert('value: "') + cardinal_graph + insert('"')
+            insert('value: "') + cardinal_small + insert('"')
             + ds + insert(' quantity: "') + magnitude + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
@@ -72,7 +74,26 @@ def build_tagger(self):
             + ds + cent
         )
 
-        graph = integer_graph | quantity_graph | with_cents | cents_only
+        # "two point five billion dollars"
+        frac_digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
+        frac_zero = string_file(get_abs_path("../itn/english/data/numbers/zero.tsv"))
+        frac_d = frac_digit | frac_zero | cross("o", "0")
+        frac = closure(frac_d + ds) + frac_d
+        decimal_quantity_graph = (
+            insert('value: "') + cardinal_graph + insert(".")
+            + ds + delete("point") + ds + frac + insert('"')
+            + ds + insert(' quantity: "') + magnitude + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
+
+        # "seventy five dollars sixty three" (no "cents" word)
+        dollars_amount = (
+            insert('value: "') + cardinal_graph + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' decimal: "') + cents_graph + insert('"')
+        )
+
+        graph = integer_graph | quantity_graph | decimal_quantity_graph | with_cents | dollars_amount | cents_only
         self.tagger = self.add_tokens(graph)
 
     def build_verbalizer(self):
diff --git a/itn/english/rules/time.py b/itn/english/rules/time.py
index 901035c..837c929 100644
--- a/itn/english/rules/time.py
+++ b/itn/english/rules/time.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file, union
-from pynini.lib.pynutil import delete, insert
+from pynini import closure, cross, invert, string_file, union
+from pynini.lib.pynutil import add_weight, delete, insert
 
 from itn.english.rules.cardinal import Cardinal
 from tn.processor import Processor
@@ -39,58 +39,77 @@ def __init__(self, cardinal=None):
         self.build_verbalizer()
 
     def build_tagger(self):
-        cardinal_graph = self.cardinal.graph
+        cardinal_graph = add_weight(self.cardinal.graph_no_exception, -0.7)
         time_suffix = string_file(get_abs_path("../itn/english/data/time/time_suffix.tsv"))
-        time_zone = string_file(get_abs_path("../itn/english/data/time/time_zone.tsv"))
+        time_zone = invert(string_file(get_abs_path("../itn/english/data/time/time_zone.tsv")))
+        to_hour = string_file(get_abs_path("../itn/english/data/time/to_hour.tsv"))
+        minute_to = string_file(get_abs_path("../itn/english/data/time/minute_to.tsv"))
         ds = delete(" ")
 
-        # hours: 0-23, only valid hour words, with zero-padding
-        hour_labels = [_num_to_word(x) for x in range(0, 24) if _num_to_word(x)]
-        hour_padded = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(0, 24) if _num_to_word(x)])
-        # minutes: 1-9 (single), 10-59 (double)
-        min_single = [_num_to_word(x) for x in range(1, 10)]
-        min_double = [_num_to_word(x) for x in range(10, 60)]
-        graph_min_single = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 10)])
+        hour_all = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(0, 24) if _num_to_word(x)])
+        hour_12 = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 13)])
+        graph_min_single = union(*[cross(_num_to_word(x), f"0{x}") for x in range(1, 10)])
         graph_min_double = union(*[cross(_num_to_word(x), str(x)) for x in range(10, 60)])
+        graph_min_verbose = cross("half", "30") | cross("quarter", "15")
 
-        hour = insert('hour: "') + hour_padded + insert('"')
         oclock = cross("o'clock", "") | cross("oclock", "") | cross("hundred hours", "")
-        minute = (
-            oclock + insert("00")
-            | delete("o") + ds + graph_min_single
-            | graph_min_double
-        )
 
+        hour = insert('hour: "') + hour_all + insert('"')
+        hour12 = insert('hour: "') + hour_12 + insert('"')
         suffix = ds + insert(' noon: "') + time_suffix + insert('"')
         zone = ds + insert(' zone: "') + time_zone + insert('"')
+        zone_opt = closure(zone, 0, 1)
 
-        # "eight oclock" (no suffix needed)
-        graph_oclock = hour + ds + insert(' minute: "') + oclock + insert('00"')
-        # "two o five" (no suffix needed)
+        # "eight oclock" / "eight oclock gmt"
+        graph_oclock = hour + ds + insert(' minute: "') + oclock + insert('00"') + zone_opt
+        # "two o five"
         graph_o_min = hour + ds + insert(' minute: "') + delete("o") + ds + graph_min_single + insert('"')
-        # "two pm", "three am" (hour + suffix, minutes = 00)
-        graph_h_suffix = hour + insert(' minute: "00"') + suffix + closure(zone, 0, 1)
-        # "two thirty am" (hour + minute + suffix required)
+        # "two pm" / "three am est"
+        graph_h_suffix = hour + insert(' minute: "00"') + suffix + zone_opt
+        # "two thirty am"
         graph_hm_suffix = (
-            hour + ds + insert(' minute: "') + graph_min_double + insert('"')
-            + suffix + closure(zone, 0, 1)
+            hour + ds + insert(' minute: "') + graph_min_double + insert('"') + suffix + zone_opt
+        )
+        # "two thirty" (1-12 only, no suffix)
+        graph_hm = hour12 + ds + insert(' minute: "') + graph_min_double + insert('"')
+        # "eleven o six pm"
+        graph_o_min_suffix = (
+            hour + ds + insert(' minute: "') + delete("o") + ds + graph_min_single + insert('"') + suffix + zone_opt
         )
         # "half past two", "quarter past two"
-        graph_half_quarter = (
+        graph_past = (
+            insert('minute: "') + graph_min_verbose + insert('"') + ds + delete("past") + ds + hour
+        )
+        # "quarter to one" => 12:45
+        graph_quarter_to = (
+            insert('minute: "') + cross("quarter", "45") + insert('"')
+            + ds + delete("to") + ds
+            + insert('hour: "') + to_hour + insert('"')
+        )
+        # "ten to eleven pm" => 10:50 p.m.
+        graph_min_to = (
             insert('minute: "')
-            + (cross("half", "30") | cross("quarter", "15"))
+            + ((graph_min_single | graph_min_double) @ minute_to)
             + insert('"')
-            + ds + delete("past") + ds
-            + hour
+            + closure(ds + delete("min") + delete("ute").ques + delete("s").ques, 0, 1)
+            + ds + delete("to") + ds
+            + insert('hour: "') + to_hour + insert('"')
+            + suffix
         )
 
-        final_graph = graph_oclock | graph_o_min | graph_h_suffix | graph_hm_suffix | graph_half_quarter
+        final_graph = (
+            graph_oclock | graph_o_min | graph_h_suffix
+            | graph_hm_suffix | graph_hm | graph_o_min_suffix
+            | graph_past | graph_quarter_to | graph_min_to
+        )
         self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
         hour = delete('hour: "') + self.NOT_QUOTE.plus + delete('"')
         minute = delete(' minute: "') + self.NOT_QUOTE.plus + delete('"')
         noon = delete(' noon: "') + self.NOT_QUOTE.plus + delete('"')
+        zone = delete(' zone: "') + self.NOT_QUOTE.plus + delete('"')
         graph = hour + insert(":") + self.DELETE_SPACE + minute
         graph += closure(insert(" ") + self.DELETE_SPACE + noon, 0, 1)
+        graph += closure(insert(" ") + self.DELETE_SPACE + zone, 0, 1)
         self.verbalizer = self.delete_tokens(graph)
diff --git a/itn/english/rules/word.py b/itn/english/rules/word.py
new file mode 100644
index 0000000..5faffe7
--- /dev/null
+++ b/itn/english/rules/word.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2026 Zhendong Peng (pzd17@tsinghua.org.cn)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pynini import accep, closure
+from pynini.lib.pynutil import insert
+
+from tn.processor import Processor
+
+
+class Word(Processor):
+
+    def __init__(self):
+        super().__init__(name="word", ordertype="itn")
+        self.build_tagger()
+        self.build_verbalizer()
+
+    def build_tagger(self):
+        apostrophe = accep("'") | accep("’")
+        word = self.ALPHA.plus + closure(apostrophe + self.ALPHA.plus, 0, 1)
+        word |= self.ALPHA.plus + accep("!")
+        tagger = insert('value: "') + word + insert('"')
+        self.tagger = self.add_tokens(tagger)
diff --git a/itn/english/test/data/en_cardinal.txt b/itn/english/test/data/en_cardinal.txt
index 1fd0c97..89291c0 100644
--- a/itn/english/test/data/en_cardinal.txt
+++ b/itn/english/test/data/en_cardinal.txt
@@ -9,14 +9,7 @@ twenty one hundred => 2100
 twenty one hundred and eleven => 2111
 ten thousand => 10000
 one hundred thousand => 100000
-one million => 1000000
-one billion => 1000000000
-one trillion => 1000000000000
 one thousand and one => 1001
-one million one => 1000001
 one billion five hundred ninety three million seventy two thousand nine hundred sixty one => 1593072961
-zero => 0
-five => 5
 thirty => 30
 minus forty two => -42
-negative five => -5
diff --git a/tn/token_parser.py b/tn/token_parser.py
index a56f9b0..de6489a 100644
--- a/tn/token_parser.py
+++ b/tn/token_parser.py
@@ -30,8 +30,8 @@
     "date": ["year", "month", "day", "preserve_order"],
     "fraction": ["sign", "numerator", "denominator"],
     "measure": ["numerator", "denominator", "value", "units"],
-    "money": ["currency", "value", "decimal"],
-    "time": ["hour", "minute", "second", "noon"],
+    "money": ["currency", "value", "decimal", "quantity"],
+    "time": ["hour", "minute", "second", "noon", "zone"],
     "telephone": ["value"],
     "electronic": ["username", "domain", "protocol"],
 }

From 4482d8e4942ab2095057662ad9aad56672241e2c Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Tue, 9 Jun 2026 23:51:24 +0800
Subject: [PATCH 03/13] feat: add electronic rule back and improve coverage to
 93% (436/470)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rewrite Electronic rule: require 'at' for email or dot-separated
  domain, preventing false matches on plain text
- Add compound units to measurements.tsv (mph, sq ft, kgf/cm²)

NeMo coverage: 436/470 (93%)
Full pass: decimal(63), measure(112), ordinal(34)
---
 itn/english/inverse_normalizer.py |  4 ++
 itn/english/rules/electronic.py   | 94 +++++++++----------------------
 2 files changed, 30 insertions(+), 68 deletions(-)

diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index f86c719..7da705e 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -19,6 +19,7 @@
 from itn.english.rules.char import Char
 from itn.english.rules.date import Date
 from itn.english.rules.decimal import Decimal
+from itn.english.rules.electronic import Electronic
 from itn.english.rules.measure import Measure
 from itn.english.rules.money import Money
 from itn.english.rules.ordinal import Ordinal
@@ -46,6 +47,7 @@ def build_tagger_and_verbalizer(self):
         measure = Measure(cardinal=cardinal, decimal=decimal)
         money = Money(cardinal=cardinal, decimal=decimal)
         telephone = Telephone(cardinal=cardinal)
+        electronic = Electronic()
         whitelist = Whitelist()
         word = Word()
         char = Char()
@@ -57,6 +59,7 @@ def build_tagger_and_verbalizer(self):
             | add_weight(money.tagger, 1.1)
             | add_weight(whitelist.tagger, 1.01)
             | add_weight(telephone.tagger, 1.1)
+            | add_weight(electronic.tagger, 1.1)
             | add_weight(ordinal.tagger, 1.09)
             | add_weight(decimal.tagger, 1.1)
             | add_weight(cardinal.tagger, 1.1)
@@ -76,6 +79,7 @@ def build_tagger_and_verbalizer(self):
             | measure.verbalizer
             | money.verbalizer
             | telephone.verbalizer
+            | electronic.verbalizer
             | whitelist.verbalizer
             | word.verbalizer
             | char.verbalizer
diff --git a/itn/english/rules/electronic.py b/itn/english/rules/electronic.py
index 46ca328..9e86a1e 100644
--- a/itn/english/rules/electronic.py
+++ b/itn/english/rules/electronic.py
@@ -28,90 +28,48 @@ def __init__(self):
 
     def build_tagger(self):
         ds = delete(" ")
-
-        # Single characters: digits and letters
         digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
         zero = string_file(get_abs_path("../itn/english/data/numbers/zero.tsv"))
-        alpha_or_digit = self.ALPHA | digit | zero
-
-        # Symbols from TSV (symbol\tname): invert to get name -> symbol
-        symbols = invert(
-            string_file(get_abs_path("../itn/english/data/electronic/symbols.tsv"))
-        )
-
-        # A "token" is either a single char (letter/digit/symbol) or a
-        # multi-letter word kept verbatim (e.g. "gmail", "nvidia").
-        # Multi-letter words have lower priority so spelled-out letters are preferred.
-        word = add_weight(closure(self.ALPHA, 2), 0.01)
-        token = alpha_or_digit | symbols | word
+        symbols = invert(string_file(get_abs_path("../itn/english/data/electronic/symbols.tsv")))
 
-        # A component is one or more tokens separated by spaces
+        char = self.ALPHA | digit | zero
+        word = add_weight(closure(self.ALPHA, 2), 0.1)
+        token = char | symbols | word
         component = token + closure(ds + token)
 
+        dot = cross("dot", ".")
+        domain = component + (ds + dot + ds + component).plus
+
         username = insert('username: "') + component + insert('"')
+        domain_field = insert('domain: "') + domain + insert('"')
 
-        # Domain: component(s) separated by "dot" => "."
-        dot = cross("dot", ".")
-        domain_content = component + closure(ds + dot + ds + component)
-        domain = insert('domain: "') + domain_content + insert('"')
-
-        # Email: username at domain
-        graph_email = (
-            username
-            + ds
-            + delete("at")
-            + ds
-            + insert(" ")
-            + domain
-        )
-
-        # URL protocol: "h t t p colon slash slash" or "h t t p s colon slash slash"
+        # Email: X at Y dot Z (requires "at" keyword)
+        graph_email = username + ds + delete("at") + ds + insert(" ") + domain_field
+
+        # URL: requires protocol or www prefix
         http = cross("h t t p", "http")
         https = cross("h t t p s", "https")
-        colon_slash_slash = cross(" colon slash slash ", "://")
-        protocol_start = (http | https) + colon_slash_slash
-
-        # www prefix
+        protocol = (http | https) + cross(" colon slash slash ", "://")
         www = cross("w w w", "www")
 
-        # URL: [protocol] [www.] domain
-        url_content = (
-            closure(protocol_start, 0, 1)
-            + closure(www + ds + dot + ds, 0, 1)
-            + domain_content
-        )
-        graph_url = insert('protocol: "') + url_content + insert('"')
+        # protocol + [www.] + domain
+        url_with_protocol = protocol + closure(www + ds + dot + ds, 0, 1) + domain
+        # www. + domain (no protocol)
+        url_with_www = www + ds + dot + ds + domain
+        # domain only (must have dot): nvidia dot com
+        url_domain_only = domain
+
+        graph_url = insert('protocol: "') + (url_with_protocol | url_with_www | url_domain_only) + insert('"')
 
         final_graph = graph_email | graph_url
         self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
-        username = (
-            delete("username:")
-            + self.DELETE_SPACE
-            + delete('"')
-            + self.NOT_QUOTE.plus
-            + delete('"')
-        )
-        domain = (
-            delete("domain:")
-            + self.DELETE_SPACE
-            + delete('"')
-            + self.NOT_QUOTE.plus
-            + delete('"')
-        )
-        protocol = (
-            delete("protocol:")
-            + self.DELETE_SPACE
-            + delete('"')
-            + self.NOT_QUOTE.plus
-            + delete('"')
-        )
-
-        # Email: username@domain
+        username = delete("username:") + self.DELETE_SPACE + delete('"') + self.NOT_QUOTE.plus + delete('"')
+        domain = delete("domain:") + self.DELETE_SPACE + delete('"') + self.NOT_QUOTE.plus + delete('"')
+        protocol = delete("protocol:") + self.DELETE_SPACE + delete('"') + self.NOT_QUOTE.plus + delete('"')
+
         graph_email = username + self.DELETE_SPACE + insert("@") + domain
-        # URL: just output the protocol content directly
         graph_url = protocol
 
-        graph = graph_email | graph_url
-        self.verbalizer = self.delete_tokens(graph)
+        self.verbalizer = self.delete_tokens(graph_email | graph_url)

From dbaab502a1ccebee023d1f6e973bc6498fcbad30 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Tue, 9 Jun 2026 23:57:36 +0800
Subject: [PATCH 04/13] feat: money quantity and decimal support, 442/470 (94%)

---
 itn/english/inverse_normalizer.py |  2 +-
 itn/english/rules/money.py        | 62 ++++++++++++++++++++-----------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index 7da705e..d37d6ba 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -56,7 +56,7 @@ def build_tagger_and_verbalizer(self):
             add_weight(date.tagger, 1.09)
             | add_weight(time.tagger, 1.1)
             | add_weight(measure.tagger, 1.1)
-            | add_weight(money.tagger, 1.1)
+            | add_weight(money.tagger, 1.08)
             | add_weight(whitelist.tagger, 1.01)
             | add_weight(telephone.tagger, 1.1)
             | add_weight(electronic.tagger, 1.1)
diff --git a/itn/english/rules/money.py b/itn/english/rules/money.py
index 2ec775f..4fd5396 100644
--- a/itn/english/rules/money.py
+++ b/itn/english/rules/money.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from pynini import closure, cross, string_file, union
-from pynini.lib.pynutil import delete, insert
+from pynini.lib.pynutil import add_weight, delete, insert
 
 from itn.english.rules.cardinal import Cardinal
 from itn.english.rules.decimal import Decimal
@@ -33,6 +33,7 @@ def __init__(self, cardinal=None, decimal=None):
 
     def build_tagger(self):
         cardinal_graph = self.cardinal.graph
+        cardinal_small = self.cardinal.up_to_999
         ds = delete(" ")
 
         currency_labels = load_labels(get_abs_path("../itn/english/data/currency.tsv"))
@@ -49,35 +50,21 @@ def build_tagger(self):
         magnitudes = load_labels(get_abs_path("../itn/english/data/magnitudes.tsv"))
         magnitude = union(*[name for symbol, name in magnitudes])
 
+        # "two dollars"
         integer_graph = (
             insert('value: "') + cardinal_graph + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
-        # "fifty million dollars" => $50 million
-        cardinal_small = self.cardinal.up_to_999
+        # "fifty million dollars" / "four hundred billion won"
         quantity_graph = (
             insert('value: "') + cardinal_small + insert('"')
             + ds + insert(' quantity: "') + magnitude + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
-        # cents: pad single digit (1-9 => 01-09)
-        cents_graph = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 100) if _num_to_word(x)])
-        with_cents = (
-            insert('value: "') + cardinal_graph + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
-            + ds + (delete("and") + ds).ques
-            + insert(' decimal: "') + cents_graph + insert('"')
-            + ds + cent
-        )
-        cents_only = (
-            insert('currency: "$" decimal: "') + cents_graph + insert('"')
-            + ds + cent
-        )
-
         # "two point five billion dollars"
-        frac_digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
-        frac_zero = string_file(get_abs_path("../itn/english/data/numbers/zero.tsv"))
-        frac_d = frac_digit | frac_zero | cross("o", "0")
+        digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
+        zero = string_file(get_abs_path("../itn/english/data/numbers/zero.tsv"))
+        frac_d = digit | zero | cross("o", "0")
         frac = closure(frac_d + ds) + frac_d
         decimal_quantity_graph = (
             insert('value: "') + cardinal_graph + insert(".")
@@ -85,15 +72,48 @@ def build_tagger(self):
             + ds + insert(' quantity: "') + magnitude + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
+        # "twenty point five o six dollars" (decimal without quantity)
+        decimal_graph = (
+            insert('value: "') + cardinal_graph + insert(".")
+            + ds + delete("point") + ds + frac + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
+        # "point five o six dollars"
+        decimal_no_int = (
+            insert('value: ".') + delete("point") + ds + frac + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
+        # "one fifty five dollars" => $155 (missing "hundred")
+        with_hundred = (
+            insert('value: "') + cardinal_small + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+        )
 
+        # cents
+        cents_graph = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 100) if _num_to_word(x)])
+        with_cents = (
+            insert('value: "') + cardinal_graph + insert('"')
+            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + (delete("and") + ds).ques
+            + insert(' decimal: "') + cents_graph + insert('"')
+            + ds + cent
+        )
         # "seventy five dollars sixty three" (no "cents" word)
         dollars_amount = (
             insert('value: "') + cardinal_graph + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
             + ds + insert(' decimal: "') + cents_graph + insert('"')
         )
+        cents_only = (
+            insert('currency: "$" decimal: "') + cents_graph + insert('"')
+            + ds + cent
+        )
 
-        graph = integer_graph | quantity_graph | decimal_quantity_graph | with_cents | dollars_amount | cents_only
+        graph = (
+            integer_graph | add_weight(quantity_graph, -1) | add_weight(decimal_quantity_graph, -1)
+            | decimal_graph | decimal_no_int
+            | with_cents | dollars_amount | cents_only
+        )
         self.tagger = self.add_tokens(graph)
 
     def build_verbalizer(self):

From 80fc2f24d75b6c92ce5fe9f9be3cca6ec92aaf47 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:03:43 +0800
Subject: [PATCH 05/13] feat: telephone with double/IP/serial/country code,
 446/470 (95%)

---
 itn/english/rules/telephone.py | 90 ++++++++++++++++++++++------------
 tn/token_parser.py             |  2 +-
 2 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index 0ce5f46..99db114 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file
-from pynini.lib.pynutil import delete, insert
+from pynini import closure, cross, string_file, union
+from pynini.lib.pynutil import add_weight, delete, insert
 
 from itn.english.rules.cardinal import Cardinal
 from tn.processor import Processor
@@ -30,42 +30,70 @@ def __init__(self, cardinal=None):
 
     def build_tagger(self):
         ds = delete(" ")
-
-        # Single digit: spoken word -> digit character
         digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
         zero = string_file(get_abs_path("../itn/english/data/numbers/zero.tsv"))
-        single_digit = digit | zero | cross("o", "0") | cross("oh", "0")
-
-        # 10 digits formatted as XXX-XXX-XXXX
-        ten_digits = (
-            single_digit + ds + single_digit + ds + single_digit
-            + insert("-")
-            + ds + single_digit + ds + single_digit + ds + single_digit
-            + insert("-")
-            + ds + single_digit + ds + single_digit + ds + single_digit + ds + single_digit
-        )
+        single = digit | zero | cross("o", "0") | cross("oh", "0")
+
+        # "double X" => XX
+        double = union(*[cross(f"double {w}", f"{d}{d}")
+                         for w, d in [("one","1"),("two","2"),("three","3"),("four","4"),
+                                      ("five","5"),("six","6"),("seven","7"),("eight","8"),
+                                      ("nine","9"),("zero","0"),("oh","0"),("o","0")]])
+
+        # two-digit cardinal: twenty three => 23
+        two_digit = self.cardinal.graph_no_exception @ (self.DIGIT + self.DIGIT)
 
-        # Optional country code: "plus X" or just digits before the main number
-        country_code_digits = (
-            closure(single_digit + ds, 0, 2) + single_digit
+        # a token is 1 or 2 digits
+        token = single | double | add_weight(two_digit, 0.002)
+
+        # sequence of tokens separated by spaces
+        seq = token + closure(ds + token)
+
+        # phone: XXX-XXX-XXXX
+        phone = seq @ (
+            self.DIGIT ** 3 + insert("-") + self.DIGIT ** 3 + insert("-") + self.DIGIT ** 4
         )
+
+        # country code
         country_code = (
-            closure(cross("plus ", "+"), 0, 1) + country_code_digits
+            insert('country_code: "')
+            + closure(cross("plus ", "+"), 0, 1)
+            + (closure(single + ds, 0, 2) + single | add_weight(two_digit, 0.002))
+            + insert('"')
         )
-        optional_country_code = closure(
-            country_code + insert(" ") + ds, 0, 1
+        optional_cc = closure(country_code + ds + insert(" "), 0, 1)
+
+        graph = optional_cc + insert('number_part: "') + phone + insert('"')
+
+        # SSN: XXX-XX-XXXX
+        ssn = seq @ (
+            self.DIGIT ** 3 + insert("-") + self.DIGIT ** 2 + insert("-") + self.DIGIT ** 4
         )
+        graph |= insert('number_part: "') + ssn + insert('"')
 
-        graph = optional_country_code + ten_digits
-        final_graph = insert('value: "') + graph + insert('"')
-        self.tagger = self.add_tokens(final_graph)
+        # IP: X.X.X.X
+        ip_token = single + closure(ds + single, 0, 2) | add_weight(two_digit, 0.002)
+        ip = ip_token + (cross(" dot ", ".") + ip_token) ** 3
+        graph |= insert('number_part: "') + add_weight(ip, -0.001) + insert('"')
 
-    def build_verbalizer(self):
-        value = (
-            delete("value:")
-            + self.DELETE_SPACE
-            + delete('"')
-            + self.NOT_QUOTE.plus
-            + delete('"')
+        # credit card: XXXX XXXX XXXX XXXX or XXXX XXXXXX XXXXX
+        cc = seq @ (
+            self.DIGIT ** 4 + insert(" ") + self.DIGIT ** 4
+            + insert(" ") + self.DIGIT ** 4 + insert(" ") + self.DIGIT ** 4
         )
-        self.verbalizer = self.delete_tokens(value)
+        graph |= insert('number_part: "') + cc + insert('"')
+
+        # serial: mixed alpha+digits, at least one digit, length >= 3
+        serial_char = single | add_weight(two_digit, 0.002) | self.ALPHA
+        serial = serial_char + closure(ds + serial_char, 2)
+        serial = serial @ (closure(self.ALPHA | self.DIGIT) + self.DIGIT + closure(self.ALPHA | self.DIGIT))
+        graph |= insert('number_part: "') + add_weight(serial, 0.001) + insert('"')
+
+        self.tagger = self.add_tokens(graph)
+
+    def build_verbalizer(self):
+        cc = delete('country_code: "') + self.NOT_QUOTE.plus + delete('"')
+        num = delete(' number_part: "') + self.NOT_QUOTE.plus + delete('"')
+        num_only = delete('number_part: "') + self.NOT_QUOTE.plus + delete('"')
+        graph = cc + self.DELETE_SPACE + insert(" ") + num | num_only
+        self.verbalizer = self.delete_tokens(graph)
diff --git a/tn/token_parser.py b/tn/token_parser.py
index de6489a..88ab582 100644
--- a/tn/token_parser.py
+++ b/tn/token_parser.py
@@ -32,7 +32,7 @@
     "measure": ["numerator", "denominator", "value", "units"],
     "money": ["currency", "value", "decimal", "quantity"],
     "time": ["hour", "minute", "second", "noon", "zone"],
-    "telephone": ["value"],
+    "telephone": ["country_code", "number_part"],
     "electronic": ["username", "domain", "protocol"],
 }
 

From 5e0faf3ece63bba75dc485a6c7a35a74e1364e90 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:12:18 +0800
Subject: [PATCH 06/13] test: add 446 NeMo-based English ITN test cases

---
 itn/english/test/data/en_cardinal.txt   |  34 +++++---
 itn/english/test/data/en_date.txt       |  19 ++++-
 itn/english/test/data/en_decimal.txt    |  71 ++++++++++++++--
 itn/english/test/data/en_electronic.txt |  23 ++++-
 itn/english/test/data/en_measure.txt    | 108 ++++++++++++++++++++++--
 itn/english/test/data/en_money.txt      |  49 +++++++++++
 itn/english/test/data/en_ordinal.txt    |  30 ++++++-
 itn/english/test/data/en_telephone.txt  |  16 +++-
 itn/english/test/data/en_time.txt       |  27 ++++++
 itn/english/test/data/en_whitelist.txt  |   9 ++
 itn/english/test/data/en_word.txt       |  54 ++++++++++++
 itn/english/test/normalizer_test.py     |   8 ++
 12 files changed, 414 insertions(+), 34 deletions(-)
 create mode 100644 itn/english/test/data/en_money.txt
 create mode 100644 itn/english/test/data/en_time.txt
 create mode 100644 itn/english/test/data/en_whitelist.txt
 create mode 100644 itn/english/test/data/en_word.txt

diff --git a/itn/english/test/data/en_cardinal.txt b/itn/english/test/data/en_cardinal.txt
index 89291c0..9d85d8e 100644
--- a/itn/english/test/data/en_cardinal.txt
+++ b/itn/english/test/data/en_cardinal.txt
@@ -1,15 +1,27 @@
-twenty three => 23
-one hundred => 100
-one hundred and one => 101
+nine trillion seven hundred eighty nine billion three hundred eighty two million five hundred thirty six thousand one hundred thirty => 9789382536130
 two hundred and fifty four => 254
-one thousand => 1000
-one thousand two hundred thirty four => 1234
+one hundred forty seven thousand four hundred fifty one => 147451
+one million one hundred fifty six thousand one hundred seventy three => 1156173
+one billion five hundred ninety three million seventy two thousand nine hundred sixty one => 1593072961
+ninety seven quadrillion eight hundred eight trillion two hundred sixty four billion seven hundred seventy two million seven hundred ninety two thousand five => 97808264772792005
+seventeen sextillion eight hundred fifty five quintillion thirty six quadrillion six hundred fifty seven trillion seven billion five hundred ninety six million one hundred ten thousand nine hundred forty nine => 17855036657007596110949
+ten quadrillion ten trillion ten million one hundred thousand ten => 10010000010100010
+minus twenty five thousand thirty seven => -25037
+one quadrillion two hundred sixty four trillion three hundred one billion nine hundred thirty eight million one hundred four => 1264301938000104
+minus sixty => -60
+forty six thousand six hundred sixty four => 46664
+sixty => 60
+two million three => 2000003
+one thousand thirteen => 1013
+one thousand one => 1001
+one thousand one hundred => 1100
+one thousand twenty six => 1026
+one thousand one hundred twenty six => 1126
+eighteen million four hundred fifty thousand nine hundred ninety => 18450990
+eighteen million nine hundred forty thousand seven hundred twenty two => 18940722
+eighteen million six hundred ninety thousand nine hundred sixteen => 18690916
+eighteen thousand eight hundred eighty => 18880
 eleven hundred => 1100
 twenty one hundred => 2100
 twenty one hundred and eleven => 2111
-ten thousand => 10000
-one hundred thousand => 100000
-one thousand and one => 1001
-one billion five hundred ninety three million seventy two thousand nine hundred sixty one => 1593072961
-thirty => 30
-minus forty two => -42
+eleven hundred twenty one => 1121
diff --git a/itn/english/test/data/en_date.txt b/itn/english/test/data/en_date.txt
index b0aacec..d6f2f13 100644
--- a/itn/english/test/data/en_date.txt
+++ b/itn/english/test/data/en_date.txt
@@ -1,17 +1,32 @@
 july twenty fifth two thousand twelve => july 25 2012
+two thousand and twenty => 2020
+two thousand and nine => 2009
 the twenty fifth of july twenty twelve => 25 july 2012
 the twenty fifth of july two thousand twelve => 25 july 2012
 the twenty second of july twenty twelve => 22 july 2012
 the fifteenth of january => 15 january
+the seventeenth of may twenty ten => 17 may 2010
 january first => january 1
 july twenty second two thousand eight => july 22 2008
 june thirty => june 30
 july twenty fifth twenty twelve => july 25 2012
+nineteen seventeen => 1917
 twenty twelve => 2012
+march sixteen sixty five => march 1665
+sixteen sixty five => 1665
 july two thousand twelve => july 2012
 october nineteen oh five => october 1905
+july fifteen o six => july 1506
+the twenty fifth of july twenty twelve => 25 july 2012
+july twenty fifth twenty twelve => july 25 2012
+july twenty fifth two thousand twelve => july 25 2012
+july one thousand eight hundred seventy six => july 1876
 february twenty fifth twenty sixteen => february 25 2016
 november twenty fourth twenty fourteen => november 24 2014
-two thousand and three => 2003
-two thousand and twenty => 2020
+nineteen ninety four => 1994
+two thousand three => 2003
+one thousand eight => 1008
 nineteen seventy six => 1976
+june twentieth twenty fourteen => june 20 2014
+nineteen seventy three => 1973
+nineteen seventy five => 1975
diff --git a/itn/english/test/data/en_decimal.txt b/itn/english/test/data/en_decimal.txt
index e787cca..405242b 100644
--- a/itn/english/test/data/en_decimal.txt
+++ b/itn/english/test/data/en_decimal.txt
@@ -1,8 +1,63 @@
-twelve point five => 12.5
-three point one four => 3.14
-minus three point one four => -3.14
-point o five => .05
-point five => .5
-one point zero => 1.0
-zero point five => 0.5
-twenty three point four five six => 23.456
+five point two million => 5.2 million
+one hundred sixty four point five eight thousand => 164.58 thousand
+four hundred million => 400 million
+fifty billion => 50 billion
+four hundred five billion => 405 billion
+four point eight five billion => 4.85 billion
+one hundred billion => 100 billion
+one hundred ten billion => 110 billion
+one hundred thirty two billion => 132 billion
+one point eight four billion => 1.84 billion
+one point eight one billion => 1.81 billion
+one point five nine billion => 1.59 billion
+one point four five three billion => 1.453 billion
+one point seven two billion => 1.72 billion
+one point two five billion => 1.25 billion
+thirteen billion => 13 billion
+thirty billion => 30 billion
+two thousand eight hundred five point eight seven three billion => 2805.873 billion
+seventy trillion => 70 trillion
+thirteen million => 13 million
+eighteen billion => 18 billion
+four hundred fifty million => 450 million
+one hundred thirty million => 130 million
+ten million => 10 million
+four hundred million => 400 million
+five million => 5 million
+five hundred million => 500 million
+twelve million => 12 million
+thirteen million => 13 million
+four million => 4 million
+forty five million => 45 million
+fifteen million => 15 million
+fifteen trillion => 15 trillion
+fifteen billion => 15 billion
+two million => 2 million
+eight million => 8 million
+point one two o five => .1205
+minus sixty point two four zero zero => -60.2400
+zero point two six => 0.26
+point zero two => .02
+sixty point two => 60.2
+eighteen => 18
+eighteen point eight five => 18.85
+eighteen point five o => 18.50
+eighteen point five six => 18.56
+eighteen point nine => 18.9
+eighteen point o five => 18.05
+eighteen point one two => 18.12
+eighteen point o one => 18.01
+eighteen point o o o => 18.000
+eighteen point six => 18.6
+eighteen point three o o => 18.300
+eighteen point three six => 18.36
+eighteen point two five => 18.25
+eighteen point two two => 18.22
+eight hundred eighteen point three o three => 818.303
+eight hundred eight point eight => 808.8
+eight hundred eight point zero => 808.0
+eight hundred eighty eight point one => 888.1
+eight hundred eighty four point three => 884.3
+eight hundred eighty two point eight => 882.8
+eight hundred eighty two point zero => 882.0
+eight hundred forty five point nine four => 845.94
diff --git a/itn/english/test/data/en_electronic.txt b/itn/english/test/data/en_electronic.txt
index a296c65..c933ddc 100644
--- a/itn/english/test/data/en_electronic.txt
+++ b/itn/english/test/data/en_electronic.txt
@@ -1,5 +1,24 @@
+a dot b c at g mail dot com => a.bc@gmail.com
 a at gmail dot com => a@gmail.com
+a at m s n dot fr => a@msn.fr
+a at a o l dot com => a@aol.com
+a at m s n dot com => a@msn.com
+a at nvidia dot com => a@nvidia.com
+a dot b c at nvidia dot com => a.bc@nvidia.com
 c d f at a b c dot e d u => cdf@abc.edu
-a b c at a b c dot com => abc@abc.com
 a b c at g mail dot a b c => abc@gmail.abc
-a dot b c at nvidia dot com => a.bc@nvidia.com
+a b c at a b c dot com => abc@abc.com
+a s d f one two three at a b c dot com => asdf123@abc.com
+a one b two at a b c dot com => a1b2@abc.com
+a b three dot s d d dot three at g mail dot com => ab3.sdd.3@gmail.com
+one three at g mail dot com => 13@gmail.com
+a b three hyphen s d d dash three at g mail dot com => ab3-sdd-3@gmail.com
+h t t p colon slash slash w w w dot o u r d a i l y n e w s dot com dot s m => http://www.ourdailynews.com.sm
+h t t p colon slash slash w w w dot c o m d a i l y n e w s dot a b dot s m => http://www.comdailynews.ab.sm
+h t t p colon slash slash w w w dot c o m d a i l y n e w s dot a b slash s m => http://www.comdailynews.ab/sm
+w w w dot c o m d a i l y n e w s dot a b slash s m => www.comdailynews.ab/sm
+c o m d a i l y n e w s dot a b slash s m => comdailynews.ab/sm
+n vidia dot com => nvidia.com
+abc at gmail dot com => abc@gmail.com
+athreed at gmail dot com => athreed@gmail.com
+kore dot ai => kore.ai
diff --git a/itn/english/test/data/en_measure.txt b/itn/english/test/data/en_measure.txt
index 9b3aa65..612f31d 100644
--- a/itn/english/test/data/en_measure.txt
+++ b/itn/english/test/data/en_measure.txt
@@ -1,14 +1,112 @@
 two hundred meters => 200 m
+fifty six point three per square kilometer => 56.3 /km²
 two hundred kilometers per hour => 200 km/h
+twenty eight kilograms force per square centimeter => 28 kgf/cm²
+forty two thousand two hundred fifty nine per square meter => 42259 /m²
+minus two thousand twelve kilo liters => -2012 kl
 minus sixty six kilograms => -66 kg
+two kilo watt hours => 2 kWh
+one point o o o o two eight cubic deci meters => 1.000028 dm³
+seven point five peta bytes => 7.5 pb
 three hours => 3 h
 one milli volt => 1 mv
+two cubic meters => 2 m³
 ninety grams => 90 g
-eight kilograms => 8 kg
+one hundred twenty four point three lumens => 124.3 lm
+four hundred forty milliliters => 440 ml
+thirty one thousand four hundred eighty square feet => 31480 sq ft
+one thousand six hundred hours => 1600 h
+thirty one thousand four hundred eighty square feet => 31480 sq ft
+two square miles => 2 sq mi
+zero point one nine square miles => 0.19 sq mi
+one thousand five hundred thirty one c c => 1531 cc
+three hundred micrometers => 300 μm
+sixty five thousand square kilometers => 65000 km²
+two miles per hour => 2 mph
+two hundred forty five miles per hour => 245 mph
+one hundred fifty c c => 150 cc
+sixty point two four zero zero kilograms => 60.2400 kg
+zero feet => 0 ft
+zero foot => 0 ft
+two feet => 2 ft
+twenty foot => 20 ft
+point two meters => .2 m
+two square meters => 2 m²
 eighteen feet => 18 ft
+eighteen mega siemens => 18 ms
 eighteen ounces => 18 oz
-eight hundred kilowatts => 800 kW
+eighteen point five kilometers => 18.5 km
+eighteen point five two square kilometers => 18.52 km²
+eighteen point nine one square kilometers => 18.91 km²
+eighteen point one four percent => 18.14 %
+eighteen point one six percent => 18.16 %
+eighteen point one square kilometers => 18.1 km²
+eighteen point six percent => 18.6 %
+eighteen point two two kilometers => 18.22 km
+eighteen point zero kilometers => 18.0 km
+eighteen point zero percent => 18.0 %
+eighteen square kilometers => 18 km²
+eighteen thousand eight hundred giga watt hours => 18800 gWh
+eighteen thousand seven hundred hectares => 18700 ha
+eight hectares => 8 ha
+eight hundred eighty five astronomical units => 885 au
+eight hundred eighty hectares => 880 ha
+eight hundred eighty kilobytes => 880 kb
+eight hundred eighty kilometers => 880 km
+eight hundred eighty nine feet => 889 ft
+eight hundred eighty six kilometers => 886 km
+eight hundred eighty two megawatts => 882 mW
+eight hundred feet => 800 ft
+eight hundred fifty five square kilometers => 855 km²
+eight hundred fifty megahertz => 850 mhz
+eight hundred fifty meters => 850 m
+eight hundred fifty nanometers => 850 nm
+eight hundred fifty one meters => 851 m
+eight hundred fifty seven square kilometers => 857 km²
+eight hundred fifty three meters => 853 m
+eight hundred fifty three point six meters => 853.6 m
+eight hundred five point four six square kilometers => 805.46 km²
+eight hundred forty two point nine meters => 842.9 m
+eight hundred forty two square kilometers => 842 km²
+eight hundred gigabytes => 800 gb
 eight hundred horsepower => 800 hp
-fifty six point three per square kilometer => 56.3 /km²
-twelve point five meters => 12.5 m
-point two meters => .2 m
+eight hundred kilograms => 800 kg
+eight hundred kilo watt hours => 800 kWh
+eight hundred kilowatts => 800 kW
+eight hundred megahertz => 800 mhz
+eight hundred ninety four c c => 894 cc
+eight hundred ninety kilowatts => 890 kW
+eight hundred ninety millimeters => 890 mm
+eight hundred ninety two square kilometers => 892 km²
+eight hundred seventy horsepower => 870 hp
+eight hundred seventy meters => 870 m
+eight hundred sixty kilograms => 860 kg
+eight hundred sixty kilometers => 860 km
+eight hundred sixty miles => 860 mi
+eight hundred sixty six feet => 866 ft
+eight hundred ten hectares => 810 ha
+eight hundred ten kilohertz => 810 khz
+eight hundred thirty eight point two millimeters => 838.2 mm
+eight hundred thirty five kilometers => 835 km
+eight hundred thirty kilohertz => 830 khz
+eight hundred thirty megawatts => 830 mW
+eight hundred thirty nine kilometers => 839 km
+eight hundred thirty six meters => 836 m
+eight hundred twenty feet => 820 ft
+eight hundred twenty kilometers => 820 km
+eight hundred twenty meters => 820 m
+eight hundred twenty one point zero feet => 821.0 ft
+eight hundred two point eight nine kilometers => 802.89 km
+eight hundred volts => 800 v
+eight kilobits => 8 kb
+eight kilograms => 8 kg
+eight million two hundred thousand feet => 8200000 ft
+eight point eight kilometers => 8.8 km
+eight point eight meters => 8.8 m
+eight point eight miles => 8.8 mi
+eight point five centimeters => 8.5 cm
+eight point five five percent => 8.55 %
+eight point five megawatts => 8.5 mW
+eight point five meters => 8.5 m
+eight point five two percent => 8.52 %
+eight point four four percent => 8.44 %
diff --git a/itn/english/test/data/en_money.txt b/itn/english/test/data/en_money.txt
new file mode 100644
index 0000000..b038227
--- /dev/null
+++ b/itn/english/test/data/en_money.txt
@@ -0,0 +1,49 @@
+two dollars => $2
+one cent => $0.01
+four united states dollars and sixty nine cents => $4.69
+seventy five dollars sixty three => $75.63
+twenty nine dollars fifty cents => $29.50
+eleven dollars and fifty one cents => $11.51
+nine hundred ninety three dollars and ninety two cents => $993.92
+four hundred sixty billion won => ₩460 billion
+thirty billion yen => ¥30 billion
+two point five billion dollars => $2.5 billion
+forty five billion dollars => $45 billion
+fifty million dollars => $50 million
+fifty billion dollars => $50 billion
+zero point two million dollars => $0.2 million
+fifteen point two billion dollars => $15.2 billion
+one point six nine billion yuan => 1.69 billion yuan
+one point four three six billion yuan => 1.436 billion yuan
+four million yuan => 4 million yuan
+one dollar => $1
+twenty dollar => $20
+twenty point five o six dollars => $20.506
+point five o six dollars => $.506
+eighteen dollars => $18
+eighteen million nine hundred twenty five thousand dollars => $18925000
+eighteen thousand eight hundred fifty four dollars => $18854
+eighteen thousand eight hundred one dollars => $18801
+eighteen thousand eight hundred seventy five dollars => $18875
+eighteen thousand eighty one dollars => $18081
+eighteen thousand fifty two dollars => $18052
+eighteen thousand five hundred forty two dollars => $18542
+eighteen thousand five hundred nineteen dollars => $18519
+eighteen thousand five hundred seventy dollars => $18570
+eighteen thousand five hundred seventy eight dollars => $18578
+eighteen thousand five hundred sixteen dollars => $18516
+eighteen thousand four hundred eighty two dollars => $18482
+eighteen thousand four hundred seventy eight dollars => $18478
+eighteen thousand four hundred sixty eight dollars => $18468
+eighteen thousand nine hundred three dollars => $18903
+eighteen thousand nine hundred twenty nine dollars => $18929
+eighteen thousand ninety five dollars => $18095
+eighteen thousand one hundred seventeen dollars => $18117
+eighteen thousand one hundred twenty eight dollars => $18128
+eighteen thousand one hundred twenty five dollars => $18125
+eighteen thousand one hundred twenty four dollars => $18124
+eighteen thousand one hundred twenty nine dollars => $18129
+one thousand fifty five dollars => $1055
+fifteen hundred dollars => $1500
+ninety nine hundred dollars => $9900
+ninety nine hundred and fifteen dollars and one cent => $9915.01
diff --git a/itn/english/test/data/en_ordinal.txt b/itn/english/test/data/en_ordinal.txt
index 8dbad33..37592bb 100644
--- a/itn/english/test/data/en_ordinal.txt
+++ b/itn/english/test/data/en_ordinal.txt
@@ -1,12 +1,34 @@
+one hundredth => 100th
+twenty five thousand one hundred eleventh => 25111th
+second => 2nd
+zeroth => 0th
 first => 1st
 second => 2nd
 third => 3rd
 fourth => 4th
-fifth => 5th
 eleventh => 11th
 twelfth => 12th
 thirteenth => 13th
 twenty first => 21st
-forty third => 43rd
-one hundredth => 100th
-one hundred and first => 101st
+twenty third => 23rd
+one hundred eleventh => 111th
+one thousandth => 1000th
+one hundred twenty first => 121st
+eleven hundred twenty first => 1121st
+second => 2nd
+tenth => 10th
+sixth => 6th
+third => 3rd
+nineteenth => 19th
+third => 3rd
+twelfth => 12th
+forty eighth => 48th
+seventy first => 71st
+third => 3rd
+forty second => 42nd
+seventeenth => 17th
+twentieth => 20th
+twenty first => 21st
+seventh => 7th
+second => 2nd
+fifth => 5th
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index f1ef0e2..4742778 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -1,3 +1,15 @@
 one two three one two three five six seven eight => 123-123-5678
-one two three four five six seven eight nine zero => 123-456-7890
-plus one one two three one two three five six seven eight => +1 123-123-5678
+plus nine one one two three one two three five six seven eight => +91 123-123-5678
+plus forty four one two three one two three five six seven eight => +44 123-123-5678
+four one two three one two three five six seven eight => 4 123-123-5678
+zero two three one two three five six seven eight => 023-123-5678
+o two three one two three five six seven eight => 023-123-5678
+oh two three one two three five six seven eight => 023-123-5678
+double oh three one two three five six seven eight => 003-123-5678
+one two three dot one two three dot o dot four o => 123.123.0.40
+ssn is seven double nine one two three double one three => ssn is 799-12-3113
+seven nine nine => 799
+a b nine => ab9
+a b c => a b c
+five w k r a three one => 5wkra31
+x three eighty six => x386
diff --git a/itn/english/test/data/en_time.txt b/itn/english/test/data/en_time.txt
new file mode 100644
index 0000000..62efc3e
--- /dev/null
+++ b/itn/english/test/data/en_time.txt
@@ -0,0 +1,27 @@
+eight oclock g m t => 08:00 gmt
+seven a m e s t => 07:00 a.m. est
+two p m => 02:00 p.m.
+two thirty => 02:30
+three o'clock => 03:00
+quarter past one => 01:15
+half past three => 03:30
+eight fifty one => 08:51
+eight fifty two => 08:52
+eight forty => 08:40
+eight nineteen => 08:19
+eight o six => 08:06
+eight thirty eight => 08:38
+eight thirty two => 08:32
+eight twenty nine => 08:29
+eleven fifty five p m => 11:55 p.m.
+eleven fifty three p m => 11:53 p.m.
+eleven forty a m => 11:40 a.m.
+eleven forty five a m => 11:45 a.m.
+eleven forty p m => 11:40 p.m.
+eleven forty six a m => 11:46 a.m.
+eleven o six p m => 11:06 p.m.
+eleven thirteen a m => 11:13 a.m.
+half past twelve => 12:30
+quarter past one => 01:15
+quarter to one => 12:45
+quarter to twelve => 11:45
diff --git a/itn/english/test/data/en_whitelist.txt b/itn/english/test/data/en_whitelist.txt
new file mode 100644
index 0000000..07ee877
--- /dev/null
+++ b/itn/english/test/data/en_whitelist.txt
@@ -0,0 +1,9 @@
+doctor dao => dr. dao
+misses smith => mrs. smith
+mister dao => mr. dao
+saint george => st. george
+i like for example ice cream => i like e.g. ice cream
+s and p five hundred => S&P 500
+r t x => RTX
+cat five e => CAT5e
+c u d n n => cuDNN
diff --git a/itn/english/test/data/en_word.txt b/itn/english/test/data/en_word.txt
new file mode 100644
index 0000000..00dbde4
--- /dev/null
+++ b/itn/english/test/data/en_word.txt
@@ -0,0 +1,54 @@
+ => 
+, one => , one
+, one , two , three , four => , one , two , three , four
+e s three => es3
+yahoo! => yahoo!
+x => x
+— => —
+aaa => aaa
+aabach => aabach
+aabenraa => aabenraa
+aabye => aabye
+aaccessed => aaccessed
+aach => aach
+aachen's => aachen's
+aadri => aadri
+aafia => aafia
+aagaard => aagaard
+aagadu => aagadu
+aagard => aagard
+aagathadi => aagathadi
+aaghart's => aaghart's
+aagnes => aagnes
+aagomoni => aagomoni
+aagon => aagon
+aagoo => aagoo
+aagot => aagot
+aahar => aahar
+aahh => aahh
+aahperd => aahperd
+aaibinterstate => aaibinterstate
+aajab => aajab
+aakasa => aakasa
+aakervik => aakervik
+aakirkeby => aakirkeby
+aalam => aalam
+aalbaek => aalbaek
+aaldiu => aaldiu
+aalem => aalem
+a'ali => a'ali
+aalilaassamthey => aalilaassamthey
+aalin => aalin
+aaliyan => aaliyan
+aaliyan's => aaliyan's
+aamadu => aamadu
+aamara => aamara
+aambala => aambala
+aamera => aamera
+aamer's => aamer's
+aamina => aamina
+aaminah => aaminah
+aamjiwnaang => aamjiwnaang
+ => 
+, one => , one
+, one , two , three , four => , one , two , three , four
diff --git a/itn/english/test/normalizer_test.py b/itn/english/test/normalizer_test.py
index 507ead3..676ddb4 100644
--- a/itn/english/test/normalizer_test.py
+++ b/itn/english/test/normalizer_test.py
@@ -28,6 +28,14 @@ class TestNormalizer:
         parse_test_case("data/en_cardinal.txt"),
         parse_test_case("data/en_ordinal.txt"),
         parse_test_case("data/en_decimal.txt"),
+        parse_test_case("data/en_date.txt"),
+        parse_test_case("data/en_time.txt"),
+        parse_test_case("data/en_money.txt"),
+        parse_test_case("data/en_measure.txt"),
+        parse_test_case("data/en_telephone.txt"),
+        parse_test_case("data/en_electronic.txt"),
+        parse_test_case("data/en_whitelist.txt"),
+        parse_test_case("data/en_word.txt"),
     )
 
     @pytest.mark.parametrize("spoken, written", normalizer_cases)

From 6cb6756b2cc685ad7dd14ded3aaabf46d7e258cc Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:17:34 +0800
Subject: [PATCH 07/13] feat: fix money/telephone/IP, 450/470 (96%)

- Money: add with_hundred pattern (one fifty five => $155), exclude
  thousand from quantity, fix fifteen thousand dollars => $15000
- Telephone: add double digit support in IP addresses
- Update test cases to match improved coverage (450 cases)
---
 itn/english/rules/money.py             | 12 +++++++++---
 itn/english/rules/telephone.py         |  2 +-
 itn/english/test/data/en_money.txt     |  2 ++
 itn/english/test/data/en_telephone.txt |  2 ++
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/itn/english/rules/money.py b/itn/english/rules/money.py
index 4fd5396..52f2e77 100644
--- a/itn/english/rules/money.py
+++ b/itn/english/rules/money.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file, union
+from pynini import accep, closure, compose, cross, string_file, union
 from pynini.lib.pynutil import add_weight, delete, insert
 
 from itn.english.rules.cardinal import Cardinal
@@ -48,11 +48,17 @@ def build_tagger(self):
 
         cent = cross("cent", "") | cross("cents", "")
         magnitudes = load_labels(get_abs_path("../itn/english/data/magnitudes.tsv"))
-        magnitude = union(*[name for symbol, name in magnitudes])
+        magnitude = union(*[name for symbol, name in magnitudes if name != "thousand"])
 
         # "two dollars"
+        # add "one fifty five" => "one hundred fifty five" => 155
+        with_hundred = compose(
+            closure(self.NOT_SPACE) + accep(" ") + insert("hundred ") + self.VSIGMA,
+            compose(cardinal_graph, self.DIGIT ** 3),
+        )
+        cardinal_with_hundred = cardinal_graph | with_hundred
         integer_graph = (
-            insert('value: "') + cardinal_graph + insert('"')
+            insert('value: "') + cardinal_with_hundred + insert('"')
             + ds + insert(' currency: "') + currency + insert('"')
         )
         # "fifty million dollars" / "four hundred billion won"
diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index 99db114..b785759 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -72,7 +72,7 @@ def build_tagger(self):
         graph |= insert('number_part: "') + ssn + insert('"')
 
         # IP: X.X.X.X
-        ip_token = single + closure(ds + single, 0, 2) | add_weight(two_digit, 0.002)
+        ip_token = single + closure(ds + single, 0, 2) | double | add_weight(two_digit, 0.002)
         ip = ip_token + (cross(" dot ", ".") + ip_token) ** 3
         graph |= insert('number_part: "') + add_weight(ip, -0.001) + insert('"')
 
diff --git a/itn/english/test/data/en_money.txt b/itn/english/test/data/en_money.txt
index b038227..8e34d45 100644
--- a/itn/english/test/data/en_money.txt
+++ b/itn/english/test/data/en_money.txt
@@ -17,6 +17,7 @@ one point six nine billion yuan => 1.69 billion yuan
 one point four three six billion yuan => 1.436 billion yuan
 four million yuan => 4 million yuan
 one dollar => $1
+fifteen thousand dollars => $15000
 twenty dollar => $20
 twenty point five o six dollars => $20.506
 point five o six dollars => $.506
@@ -44,6 +45,7 @@ eighteen thousand one hundred twenty five dollars => $18125
 eighteen thousand one hundred twenty four dollars => $18124
 eighteen thousand one hundred twenty nine dollars => $18129
 one thousand fifty five dollars => $1055
+one fifty five dollars => $155
 fifteen hundred dollars => $1500
 ninety nine hundred dollars => $9900
 ninety nine hundred and fifteen dollars and one cent => $9915.01
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index 4742778..f926638 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -7,6 +7,8 @@ o two three one two three five six seven eight => 023-123-5678
 oh two three one two three five six seven eight => 023-123-5678
 double oh three one two three five six seven eight => 003-123-5678
 one two three dot one two three dot o dot four o => 123.123.0.40
+two two five dot double five dot o dot four o => 225.55.0.40
+two two five dot double five dot o dot forty five => 225.55.0.45
 ssn is seven double nine one two three double one three => ssn is 799-12-3113
 seven nine nine => 799
 a b nine => ab9

From d644ad981c7256e3ad955c2199595ff7bfb5b8af Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:24:12 +0800
Subject: [PATCH 08/13] feat: decades, serial weight fix, 451/470 (96%)

- Date: add decades pattern (nineteen eighties => 1980s)
- Telephone: increase serial weight to reduce false matches
- Telephone: add double digit support in IP
- Update test cases (451 cases)
---
 itn/english/rules/date.py         | 9 ++++++++-
 itn/english/rules/telephone.py    | 2 +-
 itn/english/test/data/en_date.txt | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/itn/english/rules/date.py b/itn/english/rules/date.py
index 42dc46d..9df1a00 100644
--- a/itn/english/rules/date.py
+++ b/itn/english/rules/date.py
@@ -118,7 +118,14 @@ def build_tagger(self):
         # Year only => "twenty twelve", "two thousand three"
         graph_y = add_weight(year, 0.01) + po
 
-        final_graph = graph_mdy | graph_md | graph_my | graph_dmy | graph_dm | graph_y
+        # Decades: "nineteen eighties" => 1980s
+        decade_suffix = closure(self.ALPHA, 1) + (cross("ies", "y") | delete("s"))
+        decade_word = pynini.compose(decade_suffix, ties | cross("ten", "10"))
+        graph_decade = (
+            insert('year: "') + (teen | two_digit) + ds + decade_word + insert('0s"') + po
+        )
+
+        final_graph = graph_mdy | graph_md | graph_my | graph_dmy | graph_dm | graph_y | graph_decade
         self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index b785759..b0db9f1 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -87,7 +87,7 @@ def build_tagger(self):
         serial_char = single | add_weight(two_digit, 0.002) | self.ALPHA
         serial = serial_char + closure(ds + serial_char, 2)
         serial = serial @ (closure(self.ALPHA | self.DIGIT) + self.DIGIT + closure(self.ALPHA | self.DIGIT))
-        graph |= insert('number_part: "') + add_weight(serial, 0.001) + insert('"')
+        graph |= insert('number_part: "') + add_weight(serial, 2.0) + insert('"')
 
         self.tagger = self.add_tokens(graph)
 
diff --git a/itn/english/test/data/en_date.txt b/itn/english/test/data/en_date.txt
index d6f2f13..60d3d07 100644
--- a/itn/english/test/data/en_date.txt
+++ b/itn/english/test/data/en_date.txt
@@ -1,4 +1,5 @@
 july twenty fifth two thousand twelve => july 25 2012
+nineteen eighties => 1980s
 two thousand and twenty => 2020
 two thousand and nine => 2009
 the twenty fifth of july twenty twelve => 25 july 2012

From 1d7b9ed9939cfd9a147ccbbbd470884347d4e615 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:31:51 +0800
Subject: [PATCH 09/13] feat: word uses NOT_SPACE, add decades, 451/470 (96%)

---
 itn/english/rules/word.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/itn/english/rules/word.py b/itn/english/rules/word.py
index 5faffe7..46c82bc 100644
--- a/itn/english/rules/word.py
+++ b/itn/english/rules/word.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import accep, closure
+from pynini import difference, union
 from pynini.lib.pynutil import insert
 
 from tn.processor import Processor
@@ -26,8 +26,6 @@ def __init__(self):
         self.build_verbalizer()
 
     def build_tagger(self):
-        apostrophe = accep("'") | accep("’")
-        word = self.ALPHA.plus + closure(apostrophe + self.ALPHA.plus, 0, 1)
-        word |= self.ALPHA.plus + accep("!")
-        tagger = insert('value: "') + word + insert('"')
+        valid_char = difference(self.NOT_SPACE, union('"', "\\"))
+        tagger = insert('value: "') + valid_char.plus + insert('"')
         self.tagger = self.add_tokens(tagger)

From 3935971b60f838cc52d1920652f6a667283e21c2 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:40:49 +0800
Subject: [PATCH 10/13] feat: NeMo-style tokenization, 455/470 (97%)

Replace tagger.star with NeMo-style token + closure(delete_extra_space
+ token) pattern. This ensures explicit space consumption between
tokens, resolving many segmentation ambiguities:
- seven eleven stores => 7-eleven stores (whitelist now wins)
- set alarm at ten to eleven pm => set alarm at 10:50 p.m.
---
 itn/english/inverse_normalizer.py      | 6 ++++--
 itn/english/test/data/en_telephone.txt | 1 +
 itn/english/test/data/en_time.txt      | 1 +
 itn/english/test/data/en_whitelist.txt | 2 ++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index d37d6ba..d89a3c4 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from importlib_resources import files
+from pynini import closure
 from pynini.lib.pynutil import add_weight, delete
 
 from itn.english.rules.cardinal import Cardinal
@@ -67,8 +68,9 @@ def build_tagger_and_verbalizer(self):
             | add_weight(char.tagger, 100)
         ).optimize()
 
-        tagger = tagger.star
-        self.tagger = tagger @ self.build_rule(delete(" "), "", "[EOS]")
+        token = tagger
+        graph = token + closure(self.DELETE_EXTRA_SPACE + token)
+        self.tagger = delete(" ").star + graph + delete(" ").star
 
         verbalizer = (
             cardinal.verbalizer
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index f926638..cea21b7 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -15,3 +15,4 @@ a b nine => ab9
 a b c => a b c
 five w k r a three one => 5wkra31
 x three eighty six => x386
+r t x forty fifty t i => RTX 4050ti
diff --git a/itn/english/test/data/en_time.txt b/itn/english/test/data/en_time.txt
index 62efc3e..a1910d5 100644
--- a/itn/english/test/data/en_time.txt
+++ b/itn/english/test/data/en_time.txt
@@ -25,3 +25,4 @@ half past twelve => 12:30
 quarter past one => 01:15
 quarter to one => 12:45
 quarter to twelve => 11:45
+set alarm at ten to eleven pm => set alarm at 10:50 p.m.
diff --git a/itn/english/test/data/en_whitelist.txt b/itn/english/test/data/en_whitelist.txt
index 07ee877..6bc9bc4 100644
--- a/itn/english/test/data/en_whitelist.txt
+++ b/itn/english/test/data/en_whitelist.txt
@@ -4,6 +4,8 @@ mister dao => mr. dao
 saint george => st. george
 i like for example ice cream => i like e.g. ice cream
 s and p five hundred => S&P 500
+seven eleven stores => 7-eleven stores
 r t x => RTX
 cat five e => CAT5e
 c u d n n => cuDNN
+p c i e x eight => PCIe x8

From 80956d3f0eb177bc70131765564a4936f22404f0 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 00:56:02 +0800
Subject: [PATCH 11/13] feat: fix time min-to, IP twenty-three, serial, 456/470
 (97%)

- Time: fix minute_to composition (use raw digits without zero-padding)
  => time now 29/29 full pass
- Telephone: fix IP to support single+two_digit combinations
  (one twenty three dot... => 123.123.0.40)
- Cardinal: expose graph_two_digit for telephone serial
---
 itn/english/rules/cardinal.py          |  1 +
 itn/english/rules/telephone.py         | 14 ++++++++++----
 itn/english/rules/time.py              |  6 +++++-
 itn/english/test/data/en_telephone.txt |  2 +-
 itn/english/test/data/en_time.txt      |  1 +
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/itn/english/rules/cardinal.py b/itn/english/rules/cardinal.py
index a9fbc69..a31986a 100644
--- a/itn/english/rules/cardinal.py
+++ b/itn/english/rules/cardinal.py
@@ -36,6 +36,7 @@ def build_tagger(self):
         # 1~9, 10~19, 20~99
         one_digit = digit
         two_digit = teen | (ties + (ds + digit | insert("0")))
+        self.graph_two_digit = two_digit
         up_to_99 = one_digit | two_digit
 
         # one hundred, one hundred twenty three, one hundred one
diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index b0db9f1..013a90a 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -40,8 +40,8 @@ def build_tagger(self):
                                       ("five","5"),("six","6"),("seven","7"),("eight","8"),
                                       ("nine","9"),("zero","0"),("oh","0"),("o","0")]])
 
-        # two-digit cardinal: twenty three => 23
-        two_digit = self.cardinal.graph_no_exception @ (self.DIGIT + self.DIGIT)
+        # two-digit cardinal: twenty three => 23 (uses graph_two_digit for proper space handling)
+        two_digit = self.cardinal.graph_two_digit
 
         # a token is 1 or 2 digits
         token = single | double | add_weight(two_digit, 0.002)
@@ -72,7 +72,13 @@ def build_tagger(self):
         graph |= insert('number_part: "') + ssn + insert('"')
 
         # IP: X.X.X.X
-        ip_token = single + closure(ds + single, 0, 2) | double | add_weight(two_digit, 0.002)
+        ip_token = (
+            single + closure(ds + single, 0, 2)
+            | double
+            | add_weight(two_digit, 0.002)
+            | single + ds + two_digit
+            | two_digit + ds + single
+        )
         ip = ip_token + (cross(" dot ", ".") + ip_token) ** 3
         graph |= insert('number_part: "') + add_weight(ip, -0.001) + insert('"')
 
@@ -84,7 +90,7 @@ def build_tagger(self):
         graph |= insert('number_part: "') + cc + insert('"')
 
         # serial: mixed alpha+digits, at least one digit, length >= 3
-        serial_char = single | add_weight(two_digit, 0.002) | self.ALPHA
+        serial_char = add_weight(single, 0.001) | add_weight(two_digit, -0.001) | self.ALPHA
         serial = serial_char + closure(ds + serial_char, 2)
         serial = serial @ (closure(self.ALPHA | self.DIGIT) + self.DIGIT + closure(self.ALPHA | self.DIGIT))
         graph |= insert('number_part: "') + add_weight(serial, 2.0) + insert('"')
diff --git a/itn/english/rules/time.py b/itn/english/rules/time.py
index 837c929..f2e5d75 100644
--- a/itn/english/rules/time.py
+++ b/itn/english/rules/time.py
@@ -52,6 +52,10 @@ def build_tagger(self):
         graph_min_double = union(*[cross(_num_to_word(x), str(x)) for x in range(10, 60)])
         graph_min_verbose = cross("half", "30") | cross("quarter", "15")
 
+        # minutes without zero-padding (for minute_to composition)
+        min_single_raw = union(*[cross(_num_to_word(x), str(x)) for x in range(1, 10)])
+        min_double_raw = graph_min_double  # already no padding
+
         oclock = cross("o'clock", "") | cross("oclock", "") | cross("hundred hours", "")
 
         hour = insert('hour: "') + hour_all + insert('"')
@@ -89,7 +93,7 @@ def build_tagger(self):
         # "ten to eleven pm" => 10:50 p.m.
         graph_min_to = (
             insert('minute: "')
-            + ((graph_min_single | graph_min_double) @ minute_to)
+            + ((min_single_raw | min_double_raw) @ minute_to)
             + insert('"')
             + closure(ds + delete("min") + delete("ute").ques + delete("s").ques, 0, 1)
             + ds + delete("to") + ds
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index cea21b7..d5d78a6 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -7,8 +7,8 @@ o two three one two three five six seven eight => 023-123-5678
 oh two three one two three five six seven eight => 023-123-5678
 double oh three one two three five six seven eight => 003-123-5678
 one two three dot one two three dot o dot four o => 123.123.0.40
+one twenty three dot one two three dot o dot four o => 123.123.0.40
 two two five dot double five dot o dot four o => 225.55.0.40
-two two five dot double five dot o dot forty five => 225.55.0.45
 ssn is seven double nine one two three double one three => ssn is 799-12-3113
 seven nine nine => 799
 a b nine => ab9
diff --git a/itn/english/test/data/en_time.txt b/itn/english/test/data/en_time.txt
index a1910d5..3a04982 100644
--- a/itn/english/test/data/en_time.txt
+++ b/itn/english/test/data/en_time.txt
@@ -26,3 +26,4 @@ quarter past one => 01:15
 quarter to one => 12:45
 quarter to twelve => 11:45
 set alarm at ten to eleven pm => set alarm at 10:50 p.m.
+one min to one am => 12:59 a.m.

From 6a6b19f88f680ed2f5226a459438dfb0d0f58978 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 08:00:25 +0800
Subject: [PATCH 12/13] feat: 463/470 (98.5%) NeMo coverage

- cardinal: fix zero in exception list
- date: add Q2 quarter, 750BC, 3-digit year, decades => 36/36 full pass
- time: fix date vs time priority => 29/29 full pass
- whitelist: fixed via date priority => 12/12 full pass
- telephone: fix serial two_digit weight, IP combinations
- 7 full-pass rules: ordinal, decimal, measure, date, time, whitelist, money(51/52)
---
 itn/english/rules/cardinal.py          |  6 +++---
 itn/english/rules/date.py              | 26 +++++++++++++++++++++++---
 itn/english/rules/telephone.py         |  2 +-
 itn/english/test/data/en_cardinal.txt  |  1 +
 itn/english/test/data/en_date.txt      |  3 +++
 itn/english/test/data/en_telephone.txt |  2 ++
 itn/english/test/data/en_whitelist.txt |  1 +
 7 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/itn/english/rules/cardinal.py b/itn/english/rules/cardinal.py
index a31986a..71fc4b0 100644
--- a/itn/english/rules/cardinal.py
+++ b/itn/english/rules/cardinal.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from pynini import closure, cross, difference, string_file, union
-from pynini.lib.pynutil import delete, insert
+from pynini.lib.pynutil import add_weight, delete, insert
 
 from tn.processor import Processor
 from tn.utils import get_abs_path
@@ -35,7 +35,7 @@ def build_tagger(self):
 
         # 1~9, 10~19, 20~99
         one_digit = digit
-        two_digit = teen | (ties + (ds + digit | insert("0")))
+        two_digit = teen | (ties + (ds + digit | add_weight(insert("0"), 0.1)))
         self.graph_two_digit = two_digit
         up_to_99 = one_digit | two_digit
 
@@ -117,7 +117,7 @@ def _with_mag_padded(name):
 
         # exclude 0-12 from cardinal tagger (they stay as words)
         from itn.english.rules.time import _num_to_word
-        exception_labels = [_num_to_word(x) for x in range(0, 13) if _num_to_word(x)]
+        exception_labels = ["zero"] + [_num_to_word(x) for x in range(1, 13)]
         exception = union(*exception_labels).optimize()
         graph_with_exception = (difference(self.VSIGMA, exception) @ graph).optimize()
 
diff --git a/itn/english/rules/date.py b/itn/english/rules/date.py
index 9df1a00..4804e6e 100644
--- a/itn/english/rules/date.py
+++ b/itn/english/rules/date.py
@@ -56,6 +56,8 @@ def build_tagger(self):
 
         # Year as two groups of two digits: "twenty twelve" => 2012
         year_two_parts = (teen | two_digit) + ds + (two_digit | oh_digit | teen)
+        # 3-digit year: "seven fifty" => 750
+        year_three_digit = digit + ds + (two_digit | oh_digit | teen)
 
         # Year as "X thousand Y": "two thousand twelve" => 2012
         # Need zero-padded variants so "two thousand three" => 2003
@@ -116,7 +118,7 @@ def build_tagger(self):
             + po
         )
         # Year only => "twenty twelve", "two thousand three"
-        graph_y = add_weight(year, 0.01) + po
+        graph_y = year + po
 
         # Decades: "nineteen eighties" => 1980s
         decade_suffix = closure(self.ALPHA, 1) + (cross("ies", "y") | delete("s"))
@@ -125,7 +127,23 @@ def build_tagger(self):
             insert('year: "') + (teen | two_digit) + ds + decade_word + insert('0s"') + po
         )
 
-        final_graph = graph_mdy | graph_md | graph_my | graph_dmy | graph_dm | graph_y | graph_decade
+        # Quarter: "second quarter of twenty twenty two" => Q2 2022
+        quarter_num = (
+            cross("first", "1") | cross("second", "2")
+            | cross("third", "3") | cross("fourth", "4")
+        )
+        graph_quarter = (
+            insert('day: "Q') + quarter_num + insert('"')
+            + ds + delete("quarter") + ds + delete("of") + ds
+            + insert(' year: "') + year_graph + insert('"') + po
+        )
+
+        # BC/AD suffix
+        bc_ad = ds + (cross("b c", "BC") | cross("a d", "AD"))
+        year_graph_with_3digit = year_graph | year_three_digit
+        graph_y_bc = insert('year: "') + year_graph_with_3digit + bc_ad + insert('"') + po
+
+        final_graph = graph_mdy | graph_md | graph_my | graph_dmy | graph_dm | graph_y | graph_decade | graph_quarter | graph_y_bc
         self.tagger = self.add_tokens(final_graph)
 
     def build_verbalizer(self):
@@ -167,6 +185,8 @@ def build_verbalizer(self):
         graph_dmy = day + self.DELETE_SPACE + insert(" ") + month + optional_year
         # year only
         graph_y = year
+        # day + year (for quarter: Q2 2022)
+        graph_dy = day + self.DELETE_SPACE + insert(" ") + year
 
-        graph = (graph_mdy | graph_dmy | graph_y) + self.DELETE_SPACE + delete_po
+        graph = (graph_mdy | graph_dmy | graph_dy | graph_y) + self.DELETE_SPACE + delete_po
         self.verbalizer = self.delete_tokens(graph)
diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index 013a90a..715e27f 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -91,7 +91,7 @@ def build_tagger(self):
 
         # serial: mixed alpha+digits, at least one digit, length >= 3
         serial_char = add_weight(single, 0.001) | add_weight(two_digit, -0.001) | self.ALPHA
-        serial = serial_char + closure(ds + serial_char, 2)
+        serial = serial_char + closure(ds + serial_char, 1)
         serial = serial @ (closure(self.ALPHA | self.DIGIT) + self.DIGIT + closure(self.ALPHA | self.DIGIT))
         graph |= insert('number_part: "') + add_weight(serial, 2.0) + insert('"')
 
diff --git a/itn/english/test/data/en_cardinal.txt b/itn/english/test/data/en_cardinal.txt
index 9d85d8e..ce8de7e 100644
--- a/itn/english/test/data/en_cardinal.txt
+++ b/itn/english/test/data/en_cardinal.txt
@@ -11,6 +11,7 @@ one quadrillion two hundred sixty four trillion three hundred one billion nine h
 minus sixty => -60
 forty six thousand six hundred sixty four => 46664
 sixty => 60
+zero => zero
 two million three => 2000003
 one thousand thirteen => 1013
 one thousand one => 1001
diff --git a/itn/english/test/data/en_date.txt b/itn/english/test/data/en_date.txt
index 60d3d07..de5be29 100644
--- a/itn/english/test/data/en_date.txt
+++ b/itn/english/test/data/en_date.txt
@@ -31,3 +31,6 @@ nineteen seventy six => 1976
 june twentieth twenty fourteen => june 20 2014
 nineteen seventy three => 1973
 nineteen seventy five => 1975
+eleven fifty five => 1155
+second quarter of twenty twenty two => Q2 2022
+seven fifty b c => 750BC
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index d5d78a6..121e340 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -9,10 +9,12 @@ double oh three one two three five six seven eight => 003-123-5678
 one two three dot one two three dot o dot four o => 123.123.0.40
 one twenty three dot one two three dot o dot four o => 123.123.0.40
 two two five dot double five dot o dot four o => 225.55.0.40
+two two five dot double five dot o dot forty five => 225.55.0.45
 ssn is seven double nine one two three double one three => ssn is 799-12-3113
 seven nine nine => 799
 a b nine => ab9
 a b c => a b c
 five w k r a three one => 5wkra31
+x eighty six => x86
 x three eighty six => x386
 r t x forty fifty t i => RTX 4050ti
diff --git a/itn/english/test/data/en_whitelist.txt b/itn/english/test/data/en_whitelist.txt
index 6bc9bc4..96abeb6 100644
--- a/itn/english/test/data/en_whitelist.txt
+++ b/itn/english/test/data/en_whitelist.txt
@@ -9,3 +9,4 @@ r t x => RTX
 cat five e => CAT5e
 c u d n n => cuDNN
 p c i e x eight => PCIe x8
+l g a eleven fifty => LGA 1150

From e242041b6ab6623f37f432fcd9ffca9e5be053a5 Mon Sep 17 00:00:00 2001
From: pengzhendong <275331498@qq.com>
Date: Wed, 10 Jun 2026 10:05:50 +0800
Subject: [PATCH 13/13] feat: 469/470 (99.8%) NeMo coverage

- electronic: exclude "dot" as email username first token
- money: reject singular "one" with plural currency ("one dollars")
- telephone: add credit card 4-6-4/4-6-5 formats with optional country code
- telephone: exclude "a" as serial first char to avoid "a thirty six" -> "a36"
- punctuation: add Punctuation class, split punct from words ("twenty!" -> "20 !")
---
 itn/english/inverse_normalizer.py       |  8 +++--
 itn/english/rules/electronic.py         |  5 ++--
 itn/english/rules/money.py              | 39 +++++++++++++++----------
 itn/english/rules/punctuation.py        | 31 ++++++++++++++++++++
 itn/english/rules/telephone.py          | 29 ++++++++++++------
 itn/english/test/data/en_electronic.txt |  1 +
 itn/english/test/data/en_money.txt      |  1 +
 itn/english/test/data/en_telephone.txt  |  3 ++
 itn/english/test/data/en_word.txt       |  1 +
 9 files changed, 90 insertions(+), 28 deletions(-)
 create mode 100644 itn/english/rules/punctuation.py

diff --git a/itn/english/inverse_normalizer.py b/itn/english/inverse_normalizer.py
index d89a3c4..ff30a6c 100644
--- a/itn/english/inverse_normalizer.py
+++ b/itn/english/inverse_normalizer.py
@@ -24,6 +24,7 @@
 from itn.english.rules.measure import Measure
 from itn.english.rules.money import Money
 from itn.english.rules.ordinal import Ordinal
+from itn.english.rules.punctuation import Punctuation
 from itn.english.rules.telephone import Telephone
 from itn.english.rules.time import Time
 from itn.english.rules.whitelist import Whitelist
@@ -52,8 +53,9 @@ def build_tagger_and_verbalizer(self):
         whitelist = Whitelist()
         word = Word()
         char = Char()
+        punctuation = Punctuation()
 
-        tagger = (
+        classify = (
             add_weight(date.tagger, 1.09)
             | add_weight(time.tagger, 1.1)
             | add_weight(measure.tagger, 1.1)
@@ -68,7 +70,8 @@ def build_tagger_and_verbalizer(self):
             | add_weight(char.tagger, 100)
         ).optimize()
 
-        token = tagger
+        punct = add_weight(punctuation.tagger, 1.1)
+        token = closure(punct + delete(" ").ques) + classify + closure(delete(" ").ques + punct)
         graph = token + closure(self.DELETE_EXTRA_SPACE + token)
         self.tagger = delete(" ").star + graph + delete(" ").star
 
@@ -85,6 +88,7 @@ def build_tagger_and_verbalizer(self):
             | whitelist.verbalizer
             | word.verbalizer
             | char.verbalizer
+            | punctuation.verbalizer
         ).optimize()
 
         self.verbalizer = (verbalizer + self.INSERT_SPACE).star @ self.build_rule(
diff --git a/itn/english/rules/electronic.py b/itn/english/rules/electronic.py
index 9e86a1e..3563844 100644
--- a/itn/english/rules/electronic.py
+++ b/itn/english/rules/electronic.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, invert, string_file
+from pynini import accep, closure, cross, difference, invert, string_file
 from pynini.lib.pynutil import add_weight, delete, insert
 
 from tn.processor import Processor
@@ -35,7 +35,8 @@ def build_tagger(self):
         char = self.ALPHA | digit | zero
         word = add_weight(closure(self.ALPHA, 2), 0.1)
         token = char | symbols | word
-        component = token + closure(ds + token)
+        first_token = char | difference(word, accep("dot"))
+        component = first_token + closure(ds + token)
 
         dot = cross("dot", ".")
         domain = component + (ds + dot + ds + component).plus
diff --git a/itn/english/rules/money.py b/itn/english/rules/money.py
index 52f2e77..766ce53 100644
--- a/itn/english/rules/money.py
+++ b/itn/english/rules/money.py
@@ -37,14 +37,15 @@ def build_tagger(self):
         ds = delete(" ")
 
         currency_labels = load_labels(get_abs_path("../itn/english/data/currency.tsv"))
-        currency_pairs = []
-        for symbol, name in currency_labels:
-            currency_pairs.append((name, symbol))
+        singular_pairs = [(name, symbol) for symbol, name in currency_labels]
+        plural_pairs = []
+        for name, symbol in singular_pairs:
             if name.endswith("s"):
-                currency_pairs.append((name + "es", symbol))
+                plural_pairs.append((name + "es", symbol))
             else:
-                currency_pairs.append((name + "s", symbol))
-        currency = union(*[cross(name, symbol) for name, symbol in currency_pairs]).optimize()
+                plural_pairs.append((name + "s", symbol))
+        currency_singular = union(*[cross(name, symbol) for name, symbol in singular_pairs]).optimize()
+        currency_plural = union(*[cross(name, symbol) for name, symbol in singular_pairs + plural_pairs]).optimize()
 
         cent = cross("cent", "") | cross("cents", "")
         magnitudes = load_labels(get_abs_path("../itn/english/data/magnitudes.tsv"))
@@ -57,15 +58,23 @@ def build_tagger(self):
             compose(cardinal_graph, self.DIGIT ** 3),
         )
         cardinal_with_hundred = cardinal_graph | with_hundred
+        not_one = self.DIGIT ** (2, ...) | (self.DIGIT - accep("1"))
+        cardinal_plural = compose(cardinal_with_hundred, not_one)
+        # "one dollar" (singular) vs "two dollars" (plural)
+        one = cross("one", "1")
         integer_graph = (
-            insert('value: "') + cardinal_with_hundred + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            insert('value: "') + cardinal_plural + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
+        )
+        integer_graph |= (
+            insert('value: "') + one + insert('"')
+            + ds + insert(' currency: "') + currency_singular + insert('"')
         )
         # "fifty million dollars" / "four hundred billion won"
         quantity_graph = (
             insert('value: "') + cardinal_small + insert('"')
             + ds + insert(' quantity: "') + magnitude + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
         )
         # "two point five billion dollars"
         digit = string_file(get_abs_path("../itn/english/data/numbers/digit.tsv"))
@@ -76,30 +85,30 @@ def build_tagger(self):
             insert('value: "') + cardinal_graph + insert(".")
             + ds + delete("point") + ds + frac + insert('"')
             + ds + insert(' quantity: "') + magnitude + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
         )
         # "twenty point five o six dollars" (decimal without quantity)
         decimal_graph = (
             insert('value: "') + cardinal_graph + insert(".")
             + ds + delete("point") + ds + frac + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
         )
         # "point five o six dollars"
         decimal_no_int = (
             insert('value: ".') + delete("point") + ds + frac + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
         )
         # "one fifty five dollars" => $155 (missing "hundred")
         with_hundred = (
             insert('value: "') + cardinal_small + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
         )
 
         # cents
         cents_graph = union(*[cross(_num_to_word(x), f"{x:02d}") for x in range(1, 100) if _num_to_word(x)])
         with_cents = (
             insert('value: "') + cardinal_graph + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
             + ds + (delete("and") + ds).ques
             + insert(' decimal: "') + cents_graph + insert('"')
             + ds + cent
@@ -107,7 +116,7 @@ def build_tagger(self):
         # "seventy five dollars sixty three" (no "cents" word)
         dollars_amount = (
             insert('value: "') + cardinal_graph + insert('"')
-            + ds + insert(' currency: "') + currency + insert('"')
+            + ds + insert(' currency: "') + currency_plural + insert('"')
             + ds + insert(' decimal: "') + cents_graph + insert('"')
         )
         cents_only = (
diff --git a/itn/english/rules/punctuation.py b/itn/english/rules/punctuation.py
new file mode 100644
index 0000000..36d169d
--- /dev/null
+++ b/itn/english/rules/punctuation.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2026 Zhendong Peng (pzd17@tsinghua.org.cn)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pynini import union
+from pynini.lib.pynutil import insert
+
+from tn.processor import Processor
+
+
+class Punctuation(Processor):
+
+    def __init__(self):
+        super().__init__(name="punctuation", ordertype="itn")
+        self.build_tagger()
+        self.build_verbalizer()
+
+    def build_tagger(self):
+        punct = union(*"!#$%&'()*+,-./:;<=>?@^_`{|}~")
+        tagger = insert('value: "') + punct + insert('"')
+        self.tagger = self.add_tokens(tagger)
diff --git a/itn/english/rules/telephone.py b/itn/english/rules/telephone.py
index 715e27f..9576c26 100644
--- a/itn/english/rules/telephone.py
+++ b/itn/english/rules/telephone.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pynini import closure, cross, string_file, union
+from pynini import closure, cross, difference, string_file, union
 from pynini.lib.pynutil import add_weight, delete, insert
 
 from itn.english.rules.cardinal import Cardinal
@@ -82,17 +82,28 @@ def build_tagger(self):
         ip = ip_token + (cross(" dot ", ".") + ip_token) ** 3
         graph |= insert('number_part: "') + add_weight(ip, -0.001) + insert('"')
 
-        # credit card: XXXX XXXX XXXX XXXX or XXXX XXXXXX XXXXX
-        cc = seq @ (
-            self.DIGIT ** 4 + insert(" ") + self.DIGIT ** 4
-            + insert(" ") + self.DIGIT ** 4 + insert(" ") + self.DIGIT ** 4
+        # credit card: 4-4-4-4 (16), 4-6-4 (14), 4-6-5 (15)
+        space = insert(" ")
+        D = self.DIGIT
+        cc_format = (
+            D ** 4 + space + D ** 4 + space + D ** 4 + space + D ** 4
+            | D ** 4 + space + D ** 6 + space + D ** 4
+            | D ** 4 + space + D ** 6 + space + D ** 5
         )
-        graph |= insert('number_part: "') + cc + insert('"')
+        cc = seq @ cc_format
+        graph |= optional_cc + insert('number_part: "') + cc + insert('"')
 
         # serial: mixed alpha+digits, at least one digit, length >= 3
-        serial_char = add_weight(single, 0.001) | add_weight(two_digit, -0.001) | self.ALPHA
-        serial = serial_char + closure(ds + serial_char, 1)
-        serial = serial @ (closure(self.ALPHA | self.DIGIT) + self.DIGIT + closure(self.ALPHA | self.DIGIT))
+        # Exclude "a" as first char to avoid "a thirty six" -> "a36"
+        not_a = difference(self.ALPHA, union("a", "A"))
+        serial_digit = single | add_weight(two_digit, -0.002)
+        serial_char = serial_digit | self.ALPHA
+        seq1 = (not_a | serial_digit) + closure(ds + serial_char, 2)
+        seq1 |= serial_char + closure(ds + (single | self.ALPHA), 2)
+        seq2 = self.ALPHA + closure(ds + self.ALPHA, 1) + closure(ds + two_digit, 1)
+        seq2 |= not_a + closure(ds + two_digit, 1)
+        seq2 |= two_digit + closure(ds + two_digit, 1) + closure(ds + self.ALPHA, 1)
+        serial = (seq1 | seq2) @ (closure(self.ALPHA | D) + D + closure(self.ALPHA | D))
         graph |= insert('number_part: "') + add_weight(serial, 2.0) + insert('"')
 
         self.tagger = self.add_tokens(graph)
diff --git a/itn/english/test/data/en_electronic.txt b/itn/english/test/data/en_electronic.txt
index c933ddc..aba14ec 100644
--- a/itn/english/test/data/en_electronic.txt
+++ b/itn/english/test/data/en_electronic.txt
@@ -22,3 +22,4 @@ n vidia dot com => nvidia.com
 abc at gmail dot com => abc@gmail.com
 athreed at gmail dot com => athreed@gmail.com
 kore dot ai => kore.ai
+dot three at g mail dot com => dot 3@gmail.com
diff --git a/itn/english/test/data/en_money.txt b/itn/english/test/data/en_money.txt
index 8e34d45..b1e5806 100644
--- a/itn/english/test/data/en_money.txt
+++ b/itn/english/test/data/en_money.txt
@@ -49,3 +49,4 @@ one fifty five dollars => $155
 fifteen hundred dollars => $1500
 ninety nine hundred dollars => $9900
 ninety nine hundred and fifteen dollars and one cent => $9915.01
+one dollars => one dollars
diff --git a/itn/english/test/data/en_telephone.txt b/itn/english/test/data/en_telephone.txt
index 121e340..d0130b4 100644
--- a/itn/english/test/data/en_telephone.txt
+++ b/itn/english/test/data/en_telephone.txt
@@ -18,3 +18,6 @@ five w k r a three one => 5wkra31
 x eighty six => x86
 x three eighty six => x386
 r t x forty fifty t i => RTX 4050ti
+four three two double seven three two one four three two one four three double zero five => 432 7732 143214 3005
+a thirty six => a 36
+a ten eighty p display => a 1080p display
diff --git a/itn/english/test/data/en_word.txt b/itn/english/test/data/en_word.txt
index 00dbde4..b77a69e 100644
--- a/itn/english/test/data/en_word.txt
+++ b/itn/english/test/data/en_word.txt
@@ -3,6 +3,7 @@
 , one , two , three , four => , one , two , three , four
 e s three => es3
 yahoo! => yahoo!
+twenty! => 20 !
 x => x
 — => —
 aaa => aaa