Automated weekly lab webpage update (2026-03-23 16:36:55)

KCL Default · KCL Default · commit b0191bdca453 · 2026-03-23T16:36:55.000-05:00
diff --git a/_data/pub/dynamic.bib b/_data/pub/dynamic.bib
@@ -1,5 +1,5 @@
 % AUTO-GENERATED FILE. DO NOT EDIT.
-% Updated on 2026-03-23T20:41:38Z
+% Updated on 2026-03-23T21:36:00Z
 
 @inproceedings{10.1007/978-3-032-04617-8_3,
   abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},
@@ -330,14 +330,6 @@ @misc{Chiang2025_geoanomaly_detection_towards_finding_needles_of
   year = {2025}
 }
 
-@misc{Jang2026_omnident_towards_an_accessible_and_explainable,
-  author = {Leeje Jang and Yao-Yi Chiang and Angela M Hastings and Patimaporn Pungchanchaikul and Martha B Lucas and Emily C Schultz and Jeffrey P Louie and Mohamed Estai and Wen-Chen Wang and Ryan HL Ip and Boyen Huang},
-  howpublished = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer …},
-  title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
-  url = {https://openaccess.thecvf.com/content/WACV2026W/P2P/html/Jang_OMNI-Dent_Towards_an_Accessible_and_Explainable_AI_Framework_for_Automated_WACVW_2026_paper.html},
-  year = {2026}
-}
-
 @misc{Jang2026_ticls_tightly_coupled_language_text_spotter,
   abstract = {Scene text spotting aims to detect and recognize text in real-world images, where instances are often short, fragmented, or visually ambiguous. Existing methods primarily rely on visual cues and implicitly capture local character dependencies, but they overlook the benefits of external linguistic knowledge. Prior attempts to integrate language models either adapt language modeling objectives without external knowledge or apply pretrained models that are misaligned with the word-level granularity of scene text. We propose TiCLS, an end-to-end text spotter that explicitly incorporates external linguistic knowledge from a character-level pretrained language model. TiCLS introduces a linguistic decoder that fuses visual and linguistic features, yet can be initialized by a pretrained language model, enabling robust recognition of ambiguous or fragmented text. Experiments on ICDAR 2015 and Total-Text demonstrate that TiCLS achieves state-of-the-art performance, validating the effectiveness of PLM-guided linguistic integration for scene text spotting.},
   author = {Leeje Jang and Yijun Lin and Yao-Yi Chiang and Jerod Weinman},
@@ -348,6 +340,16 @@ @misc{Jang2026_ticls_tightly_coupled_language_text_spotter
   year = {2026}
 }
 
+@inproceedings{Jang_2026_WACV,
+  author = {Jang, Leeje and Chiang, Yao-Yi and Hastings, Angela M. and Pungchanchaikul, Patimaporn and Lucas, Martha B. and Schultz, Emily C. and Louie, Jeffrey P. and Estai, Mohamed and Wang, Wen-Chen and Ip, Ryan H.L and Huang, Boyen},
+  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops},
+  month = {March},
+  pages = {415-424},
+  title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
+  url = {https://openaccess.thecvf.com/content/WACV2026W/P2P/html/Jang_OMNI-Dent_Towards_an_Accessible_and_Explainable_AI_Framework_for_Automated_WACVW_2026_paper.html},
+  year = {2026}
+}
+
 @misc{Jelinski2025_finescale_soil_mapping_in_alaska_with,
   author = {Nicolas A Jelinski and Yao-Yi Chiang and Timm Nawrocki and Matt Macander and Sue Ives and Grunwald Sabine and Colby Brungard and Theresa Chen and Yijun Lin},
   howpublished = {ACM SIGSPATIAL 2025},
diff --git a/_data/pub/override.bib b/_data/pub/override.bib
@@ -9,13 +9,4 @@ @misc{Jang2026_ticls_tightly_coupled_language_text_spotter
   title = {TiCLS : Tightly Coupled Language Text Spotter},
   url = {https://arxiv.org/api/hSim6Wn9a5IS6tFjvGu5mnDLxXo},
   year = {2026}
-}
-
-@InProceedings{Jang2026_omnident_towards_an_accessible_and_explainable,
-    author    = {Jang, Leeje and Chiang, Yao-Yi and Hastings, Angela M. and Pungchanchaikul, Patimaporn and Lucas, Martha B. and Schultz, Emily C. and Louie, Jeffrey P. and Estai, Mohamed and Wang, Wen-Chen and Ip, Ryan H.L and Huang, Boyen},
-    title     = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
-    booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops},
-    month     = {March},
-    year      = {2026},
-    pages     = {415-424}
 }
diff --git a/publications.bib b/publications.bib
@@ -1,5 +1,5 @@
 % AUTO-GENERATED FILE — DO NOT EDIT
-% Updated on 2026-03-23T21:10:36Z
+% Updated on 2026-03-23T21:36:55Z
 
 @inproceedings{10.1007/978-3-032-04617-8_3,
   abstract = {Historical maps contain valuable, detailed survey data often unavailable elsewhere. Automatically extracting linear objects, such as fault lines, from scanned historical maps benefits diverse application areas, such as mining resource prediction. However, existing models encounter challenges in capturing adequate image context and spatial context. Insufficient image context leads to false detections by failing to distinguish desired linear objects from others with similar appearances. Meanwhile, insufficient spatial context hampers the accurate delineation of elongated, slender-shaped linear objects. This paper introduces the Linear Object Detection TRansformer (LDTR), which directly generates accurate vector graphs for linear objects from scanned map images. LDTR leverages multi-scale deformable attention to capture representative image context, reducing false detections. Furthermore, LDTR's innovative N-hop connectivity component explicitly encourages interactions among nodes within an N-hop neighborhood, enabling the model to learn sufficient spatial context for generating graphs with accurate connectivity. Experiments show that LDTR improves detection precision by 6{\%} and enhances line connectivity by 20{\%} over state-of-the-art baselines.},
@@ -1292,15 +1292,6 @@ @inproceedings{Jaiswal2014-km
   year = {2014}
 }
 
-@inproceedings{Jang2026_omnident_towards_an_accessible_and_explainable,
-  author = {Jang, Leeje and Chiang, Yao-Yi and Hastings, Angela M. and Pungchanchaikul, Patimaporn and Lucas, Martha B. and Schultz, Emily C. and Louie, Jeffrey P. and Estai, Mohamed and Wang, Wen-Chen and Ip, Ryan H.L and Huang, Boyen},
-  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops},
-  month = {March},
-  pages = {415-424},
-  title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
-  year = {2026}
-}
-
 @misc{Jang2026_ticls_tightly_coupled_language_text_spotter,
   abstract = {Scene text spotting aims to detect and recognize text in real-world images, where instances are often short, fragmented, or visually ambiguous. Existing methods primarily rely on visual cues and implicitly capture local character dependencies, but they overlook the benefits of external linguistic knowledge. Prior attempts to integrate language models either adapt language modeling objectives without external knowledge or apply pretrained models that are misaligned with the word-level granularity of scene text. We propose TiCLS, an end-to-end text spotter that explicitly incorporates external linguistic knowledge from a character-level pretrained language model. TiCLS introduces a linguistic decoder that fuses visual and linguistic features, yet can be initialized by a pretrained language model, enabling robust recognition of ambiguous or fragmented text. Experiments on ICDAR 2015 and Total-Text demonstrate that TiCLS achieves state-of-the-art performance, validating the effectiveness of PLM-guided linguistic integration for scene text spotting.},
   author = {Leeje Jang and Yijun Lin and Yao-Yi Chiang and Jerod Weinman},
@@ -1311,6 +1302,16 @@ @misc{Jang2026_ticls_tightly_coupled_language_text_spotter
   year = {2026}
 }
 
+@inproceedings{Jang_2026_WACV,
+  author = {Jang, Leeje and Chiang, Yao-Yi and Hastings, Angela M. and Pungchanchaikul, Patimaporn and Lucas, Martha B. and Schultz, Emily C. and Louie, Jeffrey P. and Estai, Mohamed and Wang, Wen-Chen and Ip, Ryan H.L and Huang, Boyen},
+  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops},
+  month = {March},
+  pages = {415-424},
+  title = {OMNI-Dent: Towards an Accessible and Explainable AI Framework for Automated Dental Diagnosis},
+  url = {https://openaccess.thecvf.com/content/WACV2026W/P2P/html/Jang_OMNI-Dent_Towards_an_Accessible_and_Explainable_AI_Framework_for_Automated_WACVW_2026_paper.html},
+  year = {2026}
+}
+
 @misc{Jelinski2025_finescale_soil_mapping_in_alaska_with,
   author = {Nicolas A Jelinski and Yao-Yi Chiang and Timm Nawrocki and Matt Macander and Sue Ives and Grunwald Sabine and Colby Brungard and Theresa Chen and Yijun Lin},
   howpublished = {ACM SIGSPATIAL 2025},
diff --git a/scripts/__pycache__/utils.cpython-38.pyc b/scripts/__pycache__/utils.cpython-38.pyc
diff --git a/scripts/update_pubs.py b/scripts/update_pubs.py
@@ -14,7 +14,7 @@
                            seq_ratio, token_set_ratio,       # Fuzzy matching
                            extract_doi, extract_arxiv_id,    # DOI stuff
                            make_bib_key,  # bibtex creation
-                           bibtex_to_fields, get_bibtex_with_fallback,    # Default grab
+                           bibtex_to_fields, get_bibtex_with_fallback, cv_family_bibtex_by_url,   # Default grab
                            arxiv_api_query_by_id, arxiv_find_best_by_title,     # Things for arxiv
                            acm_bibtex_by_doi, springer_bibtex_by_doi, crossref_bibtex_by_doi,    # Outer bibtex
                            crossref_bibtex_transform, crossref_search_best,     # Crossref grab
@@ -82,15 +82,27 @@ def main(scholar_id:str,
 
         print(f"Filling bibtex for {idx+1}: {title}")
 
-        # Try filling with scholarly
+        # Prefer BibTeX exposed directly on CV-family paper pages.
+        cv_family_bibtex = ""
+        cv_family_fields = {}
+        try:
+            cv_family_bibtex = cv_family_bibtex_by_url(link, title=title)
+            if cv_family_bibtex:
+                cv_family_fields = bibtex_to_fields(cv_family_bibtex)
+        except Exception:
+            cv_family_bibtex = ""
+            cv_family_fields = {}
+
+        # Fall back to scholarly when no conference-page BibTeX is available.
         scholar_bibtex = ""
         scholar_fields = {}
-        try:
-            scholar_bibtex = get_bibtex_with_fallback(p_full, title=title)
-            scholar_fields = bibtex_to_fields(scholar_bibtex)
-        except Exception:   # If either fails
-            scholar_bibtex = ""
-            scholar_fields = {}
+        if not cv_family_bibtex:
+            try:
+                scholar_bibtex = get_bibtex_with_fallback(p_full, title=title)
+                scholar_fields = bibtex_to_fields(scholar_bibtex)
+            except Exception:   # If either fails
+                scholar_bibtex = ""
+                scholar_fields = {}
 
         is_arxiv = False
         if ARXIV_WORD_RE.search(venue or ""):
@@ -133,8 +145,13 @@ def main(scholar_id:str,
         entry = {}
 
         # Try getting DOI
-        doi = extract_doi([link, scholar_bibtex,
-                           json.dumps(scholar_fields, ensure_ascii=False)])
+        doi = extract_doi([
+            link,
+            cv_family_bibtex,
+            scholar_bibtex,
+            json.dumps(cv_family_fields, ensure_ascii=False),
+            json.dumps(scholar_fields, ensure_ascii=False),
+        ])
 
         # ACM
         if doi and doi.startswith("10.1145/"):
@@ -229,6 +246,16 @@ def main(scholar_id:str,
             if cr_abs:
                 entry["abstract"] = cr_abs
 
+        if not entry and cv_family_bibtex:
+            entry = build_other_bib_entry(
+                cv_family_bibtex,
+                title_fallback=title,
+                venue_fallback=venue,
+                year_fallback=year,
+                link_fallback=link,
+                abstract_fallback="",
+            )
+
         # Fall back to Scholarly bibtex if they have that
         if not entry and scholar_bibtex:
             entry = build_other_bib_entry(
@@ -272,4 +299,4 @@ def main(scholar_id:str,
 
     main(scholar_id=args.scholar_id,
          year_window=args.year_window,
-         outpath=args.outpath)
+         outpath=args.outpath)
diff --git a/scripts/utils.py b/scripts/utils.py
@@ -241,6 +241,106 @@ def parse_first_bibtex_entry(bibtex_str: str) -> dict:
         "(KHTML, like Gecko) Chrome/120 Safari/537.36"
     )
 }
+
+CV_FAMILY_HOST_RE = re.compile(
+    r"^https?://(?:www\.)?(?:openaccess\.thecvf\.com|thecvf\.com|ecva\.net)/",
+    re.I,
+)
+CV_FAMILY_CONF_RE = re.compile(r"(?:CVPR|ICCV|ECCV|WACV)", re.I)
+BIBTEX_ENTRY_START_RE = re.compile(
+    r"@(?:article|book|booklet|conference|inbook|incollection|inproceedings|manual|"
+    r"mastersthesis|misc|phdthesis|proceedings|techreport|unpublished)\s*\{",
+    re.I,
+)
+
+def is_cv_family_url(url: str) -> bool:
+    url = normalize_ws(url)
+    return bool(url and CV_FAMILY_HOST_RE.search(url) and CV_FAMILY_CONF_RE.search(url))
+
+def cv_family_candidate_urls(url: str) -> List[str]:
+    url = normalize_ws(url)
+    if not url or not is_cv_family_url(url):
+        return []
+
+    candidates = [url]
+
+    # CVF PDF links map cleanly to paper HTML pages, which expose the BibTeX block.
+    if re.search(r"/papers/.+_paper\.pdf(?:\?.*)?$", url, re.I):
+        html_url = re.sub(r"/papers/", "/html/", url, flags=re.I)
+        html_url = re.sub(r"_paper\.pdf(?:\?.*)?$", "_paper.html", html_url, flags=re.I)
+        if html_url not in candidates:
+            candidates.append(html_url)
+
+    return candidates
+
+def extract_bibtex_entries(text: str) -> List[str]:
+    text = html.unescape(text or "")
+    entries = []
+
+    for match in BIBTEX_ENTRY_START_RE.finditer(text):
+        start = match.start()
+        pos = match.end() - 1
+        depth = 0
+
+        while pos < len(text):
+            ch = text[pos]
+            if ch == "{":
+                depth += 1
+            elif ch == "}":
+                depth -= 1
+                if depth == 0:
+                    entry = text[start:pos + 1].strip()
+                    if entry:
+                        entries.append(entry)
+                    break
+            pos += 1
+
+    return entries
+
+def cv_family_bibtex_by_url(url: str, title: str = "") -> str:
+    candidates = cv_family_candidate_urls(url)
+    if not candidates:
+        return ""
+
+    expected_title = normalize_title(title)
+
+    for candidate in candidates:
+        try:
+            r = requests.get(candidate, headers=UA_HEADERS, timeout=25)
+            if r.status_code != 200:
+                continue
+            if "pdf" in (r.headers.get("Content-Type") or "").lower():
+                continue
+        except Exception:
+            continue
+
+        entries = extract_bibtex_entries(r.text)
+        if not entries:
+            continue
+
+        if len(entries) == 1 and not expected_title:
+            return entries[0]
+
+        best_entry = ""
+        best_score = 0.0
+        for entry in entries:
+            fields = bibtex_to_fields(entry)
+            entry_title = normalize_title(fields.get("title") or "")
+            if not entry_title:
+                continue
+            score = seq_ratio(expected_title, entry_title) if expected_title else 0.0
+            if score > best_score:
+                best_score = score
+                best_entry = entry
+
+        if best_entry and best_score >= 0.88:
+            return best_entry
+
+        if len(entries) == 1:
+            return entries[0]
+
+    return ""
+
 # ACM
 def acm_bibtex_by_doi(doi: str) -> str:
     """
@@ -565,4 +665,4 @@ def get_bibtex_with_fallback(p_full: dict, title: str) -> str:
         return scholarly.bibtex(pub2) or ""
     except Exception:
         return ""
-    
+