You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
abstract = {Scene text spotting aims to detect and recognize text in real-world images, where instances are often short, fragmented, or visually ambiguous. Existing methods primarily rely on visual cues and implicitly capture local character dependencies, but they overlook the benefits of external linguistic knowledge. Prior attempts to integrate language models either adapt language modeling objectives without external knowledge or apply pretrained models that are misaligned with the word-level granularity of scene text. We propose TiCLS, an end-to-end text spotter that explicitly incorporates external linguistic knowledge from a character-level pretrained language model. TiCLS introduces a linguistic decoder that fuses visual and linguistic features, yet can be initialized by a pretrained language model, enabling robust recognition of ambiguous or fragmented text. Experiments on ICDAR 2015 and Total-Text demonstrate that TiCLS achieves state-of-the-art performance, validating the effectiveness of PLM-guided linguistic integration for scene text spotting.},
6
+
author = {Leeje Jang and Yijun Lin and Yao-Yi Chiang and Jerod Weinman},
7
+
howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
8
+
primaryclass = {cs.CV},
9
+
title = {TiCLS : Tightly Coupled Language Text Spotter},
author = {Leeje Jang and Yao-Yi Chiang and Angela M Hastings and Patimaporn Pungchanchaikul and Martha B Lucas and Emily C Schultz and Jeffrey P Louie and Mohamed Estai and Wen-Chen Wang and Ryan HL Ip and Boyen Huang},
16
+
howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
17
+
title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
Copy file name to clipboardExpand all lines: publications.bib
+3-3Lines changed: 3 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -1,5 +1,5 @@
1
1
% AUTO-GENERATED FILE — DO NOT EDIT
2
-
% Updated on 2026-03-23T20:41:39Z
2
+
% Updated on 2026-03-23T21:09:10Z
3
3
4
4
@inproceedings{10.1007/978-3-032-04617-8_3,
5
5
abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},
author = {Leeje Jang and Yao-Yi Chiang and Angela M Hastings and Patimaporn Pungchanchaikul and Martha B Lucas and Emily C Schultz and Jeffrey P Louie and Mohamed Estai and Wen-Chen Wang and Ryan HL Ip and Boyen Huang},
1297
-
howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer …},
1297
+
howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
1298
1298
title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
abstract = {Scene text spotting aims to detect and recognize text in real-world images, where instances are often short, fragmented, or visually ambiguous. Existing methods primarily rely on visual cues and implicitly capture local character dependencies, but they overlook the benefits of external linguistic knowledge. Prior attempts to integrate language models either adapt language modeling objectives without external knowledge or apply pretrained models that are misaligned with the word-level granularity of scene text. We propose TiCLS, an end-to-end text spotter that explicitly incorporates external linguistic knowledge from a character-level pretrained language model. TiCLS introduces a linguistic decoder that fuses visual and linguistic features, yet can be initialized by a pretrained language model, enabling robust recognition of ambiguous or fragmented text. Experiments on ICDAR 2015 and Total-Text demonstrate that TiCLS achieves state-of-the-art performance, validating the effectiveness of PLM-guided linguistic integration for scene text spotting.},
1305
1305
author = {Leeje Jang and Yijun Lin and Yao-Yi Chiang and Jerod Weinman},
1306
-
howpublished = {arXiv},
1306
+
howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
1307
1307
primaryclass = {cs.CV},
1308
1308
title = {TiCLS : Tightly Coupled Language Text Spotter},
0 commit comments