diff --git a/lib/docs/filters/polars/clean_html.rb b/lib/docs/filters/polars/clean_html.rb new file mode 100644 index 0000000000..e270c8d985 --- /dev/null +++ b/lib/docs/filters/polars/clean_html.rb @@ -0,0 +1,32 @@ +module Docs + class Polars + class CleanHtmlFilter < Filter + def call + # Remove pydata-sphinx-theme chrome that survives the container extraction + # or sits inside the article (sidebars, in-page TOC, prev/next nav, footer). + css( + '.bd-sidebar-primary', + '.bd-sidebar-secondary', + '.bd-toc', + '.bd-header-article', + '.prev-next-area', + '.prev-next-footer', + '.bd-footer', + '.headerlink', + 'form' + ).remove + + # Drop banner/logo imagery on the landing page. + css('img').remove if root_page? + + # Make sure every code block is tagged so Prism highlights it as Python. + css('.highlight pre').each do |node| + node.content = node.content + node['data-language'] = 'python' + end + + doc + end + end + end +end diff --git a/lib/docs/filters/polars/entries.rb b/lib/docs/filters/polars/entries.rb new file mode 100644 index 0000000000..ab881f40ce --- /dev/null +++ b/lib/docs/filters/polars/entries.rb @@ -0,0 +1,64 @@ +module Docs + class Polars + class EntriesFilter < Docs::EntriesFilter + # Map the leading path segment of a reference page to a human readable + # type. The Polars reference is laid out as
/... under the base + # url (e.g. dataframe/api/polars.DataFrame.count.html). Top-level members + # (plain functions, datatypes, IO, config, ...) instead live flat under + # api/ and are classified by name in #classify_api. + SECTION_TYPES = { + 'dataframe' => 'DataFrame', + 'lazyframe' => 'LazyFrame', + 'series' => 'Series', + 'expressions' => 'Expressions', + 'functions' => 'Functions', + 'selectors' => 'Selectors', + 'datatypes' => 'Data Types', + 'datatype_expr' => 'Data Types', + 'config' => 'Config', + 'io' => 'Input/output', + 'sql' => 'SQL', + 'exceptions' => 'Exceptions', + 'testing' => 'Testing', + 'catalog' => 'Catalog', + 'metadata' => 'Metadata', + 'schema' => 'Schema', + 'plugins' => 'Plugins' + }.freeze + + def get_name + name = at_css('h1').content.strip + # This runs before clean_html removes the headerlink, so strip its + # anchor character off the heading. + name.sub! %r{\s*[#\u{00B6}]+\s*\z}, '' + name + end + + def get_type + return 'Manual' if root_page? + segment = slug.split('/').first + return classify_api(get_name) if segment == 'api' + SECTION_TYPES[segment] || 'Manual' + end + + private + + # Members stored flat under api/ (top-level polars.* objects). + def classify_api(name) + case name + when %r{\Apolars\.datatypes\.} then 'Data Types' + when %r{\Apolars\.Config\b} then 'Config' + when %r{\Apolars\.exceptions\.} then 'Exceptions' + when %r{\Apolars\.testing\.} then 'Testing' + when %r{\Apolars\.(api|plugins)\.} then 'Plugins' + when %r{\Apolars\.io\.} then 'Input/output' + when %r{\Apolars\.DataFrame\.} then 'DataFrame' + when %r{\Apolars\.LazyFrame\.} then 'LazyFrame' + when %r{\Apolars\.(read_|scan_|write_|from_)}, %r{\Apolars\.json_normalize\b} + 'Input/output' + else 'Functions' + end + end + end + end +end diff --git a/lib/docs/scrapers/polars.rb b/lib/docs/scrapers/polars.rb new file mode 100644 index 0000000000..fa332be7ca --- /dev/null +++ b/lib/docs/scrapers/polars.rb @@ -0,0 +1,34 @@ +module Docs + class Polars < UrlScraper + self.name = 'Polars' + self.type = 'sphinx' + self.release = '1.41.0' + self.base_url = 'https://docs.pola.rs/api/python/stable/reference/' + self.root_path = 'index.html' + self.links = { + home: 'https://pola.rs/', + code: 'https://github.com/pola-rs/polars' + } + + html_filters.push 'polars/entries', 'sphinx/clean_html', 'polars/clean_html' + + # pydata-sphinx-theme keeps the page content in the article body. + options[:container] = 'article.bd-article' + + options[:skip_patterns] = [/_changelog/, /whatsnew/] + + # https://github.com/pola-rs/polars/blob/main/LICENSE + options[:attribution] = <<-HTML + © 2020 Ritchie Vink
+ © 2022 Polars contributors
+ Licensed under the MIT License. + HTML + + # Polars tags both Rust (rs-*) and Python (py-*) releases in the same repo. + # The tags API only lists recent Rust ones, but the latest GitHub release is + # always the Python one, so use that and drop the py- prefix. + def get_latest_version(opts) + get_latest_github_release('pola-rs', 'polars', opts).sub(/\Apy-/, '') + end + end +end diff --git a/public/icons/docs/polars/16.png b/public/icons/docs/polars/16.png new file mode 100644 index 0000000000..c005950aa8 Binary files /dev/null and b/public/icons/docs/polars/16.png differ diff --git a/public/icons/docs/polars/16@2x.png b/public/icons/docs/polars/16@2x.png new file mode 100644 index 0000000000..940e5427f8 Binary files /dev/null and b/public/icons/docs/polars/16@2x.png differ diff --git a/public/icons/docs/polars/SOURCE b/public/icons/docs/polars/SOURCE new file mode 100644 index 0000000000..c63c050877 --- /dev/null +++ b/public/icons/docs/polars/SOURCE @@ -0,0 +1 @@ +https://raw.githubusercontent.com/pola-rs/polars-static/master/icons/favicon-32x32.png