unclecode · HW618 · Jun 10, 2026 · Jun 10, 2026
diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py
@@ -703,7 +703,13 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
                     self.soft_br()
                 if tag in ["td", "th"]:
                     if start:
-                        self.o("<{}>\n\n".format(tag))
+                        # Preserve rowspan and colspan attributes
+                        extra = ""
+                        for attr_name in ("rowspan", "colspan"):
+                            val = attrs.get(attr_name)
+                            if val is not None:
+                                extra += ' {}="{}"'.format(attr_name, val)
+                        self.o("<{}{}>\n\n".format(tag, extra))
                     else:
                         self.o("\n</{}>".format(tag))
                 else:

diff --git a/tests/bypass_tables/test_add_span.py b/tests/bypass_tables/test_add_span.py
@@ -0,0 +1,50 @@
+import asyncio
+import pathlib
+
+from crawl4ai.async_configs import BrowserConfig,CrawlerRunConfig
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
+from crawl4ai import DefaultTableExtraction
+from crawl4ai import AsyncWebCrawler,CacheMode
+from crawl4ai.content_filter_strategy import PruningContentFilter
+
+target_url = "https://en.wikipedia.org/wiki/List_of_prime_ministers_of_India"
+md_file = pathlib.Path(__file__).parent.absolute().joinpath('test.md').absolute()
+
+
+# browser_config
+browser_config = BrowserConfig(
+    headless=True,
+    user_agent_mode='random',
+)
+
+
+
+prune_filter = PruningContentFilter(
+    threshold=0.8,
+    threshold_type="dynamic",
+)
+
+# CrawlerConfig
+run_config = CrawlerRunConfig(
+    magic=True,
+    markdown_generator=DefaultMarkdownGenerator(
+        content_source = "cleaned_html",
+        options={
+            'bypass_tables': True,
+        }
+    ),
+    cache_mode=CacheMode.BYPASS,
+    css_selector='table.wikitable',
+    table_extraction= DefaultTableExtraction()
+)
+
+async def main():
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(url=target_url,config=run_config)
+        print(result.markdown)
+        print(result.tables)
+        with open(md_file,'w') as f:
+            f.write(result.markdown)
+
+if __name__ == "__main__":
+    asyncio.run(main())