From ccac508a436e01c8b859d504be17f492cb5a3363 Mon Sep 17 00:00:00 2001
From: a <b>
Date: Wed, 10 Jun 2026 12:13:43 +0800
Subject: [PATCH 1/2] reserve rowspan/colspan when bypass_tables is enabled

---
 crawl4ai/html2text/__init__.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py
index efbe27018..c1e90a3f3 100644
--- a/crawl4ai/html2text/__init__.py
+++ b/crawl4ai/html2text/__init__.py
@@ -703,7 +703,13 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
                     self.soft_br()
                 if tag in ["td", "th"]:
                     if start:
-                        self.o("<{}>\n\n".format(tag))
+                        # Preserve rowspan and colspan attributes
+                        extra = ""
+                        for attr_name in ("rowspan", "colspan"):
+                            val = attrs.get(attr_name)
+                            if val is not None:
+                                extra += ' {}="{}"'.format(attr_name, val)
+                        self.o("<{}{}>\n\n".format(tag, extra))
                     else:
                         self.o("\n</{}>".format(tag))
                 else:

From a5c04beed09804a2fdc6f3da2a93190cba5a35e7 Mon Sep 17 00:00:00 2001
From: a <b>
Date: Wed, 10 Jun 2026 12:16:04 +0800
Subject: [PATCH 2/2] add test for rowspan/colspan

---
 tests/bypass_tables/test_add_span.py | 50 ++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 tests/bypass_tables/test_add_span.py

diff --git a/tests/bypass_tables/test_add_span.py b/tests/bypass_tables/test_add_span.py
new file mode 100644
index 000000000..1e790eaeb
--- /dev/null
+++ b/tests/bypass_tables/test_add_span.py
@@ -0,0 +1,50 @@
+import asyncio
+import pathlib
+
+from crawl4ai.async_configs import BrowserConfig,CrawlerRunConfig
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
+from crawl4ai import DefaultTableExtraction
+from crawl4ai import AsyncWebCrawler,CacheMode
+from crawl4ai.content_filter_strategy import PruningContentFilter
+
+target_url = "https://en.wikipedia.org/wiki/List_of_prime_ministers_of_India"
+md_file = pathlib.Path(__file__).parent.absolute().joinpath('test.md').absolute()
+
+
+# browser_config
+browser_config = BrowserConfig(
+    headless=True,
+    user_agent_mode='random',
+)
+
+
+
+prune_filter = PruningContentFilter(
+    threshold=0.8,
+    threshold_type="dynamic",
+)
+
+# CrawlerConfig
+run_config = CrawlerRunConfig(
+    magic=True,
+    markdown_generator=DefaultMarkdownGenerator(
+        content_source = "cleaned_html",
+        options={
+            'bypass_tables': True,
+        }
+    ),
+    cache_mode=CacheMode.BYPASS,
+    css_selector='table.wikitable',
+    table_extraction= DefaultTableExtraction()
+)
+
+async def main():
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(url=target_url,config=run_config)
+        print(result.markdown)
+        print(result.tables)
+        with open(md_file,'w') as f:
+            f.write(result.markdown)
+
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file