Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions lib/docs/filters/polars/clean_html.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module Docs
class Polars
class CleanHtmlFilter < Filter
def call
# Remove pydata-sphinx-theme chrome that survives the container extraction
# or sits inside the article (sidebars, in-page TOC, prev/next nav, footer).
css(
'.bd-sidebar-primary',
'.bd-sidebar-secondary',
'.bd-toc',
'.bd-header-article',
'.prev-next-area',
'.prev-next-footer',
'.bd-footer',
'.headerlink',
'form'
).remove

# Drop banner/logo imagery on the landing page.
css('img').remove if root_page?

# Make sure every code block is tagged so Prism highlights it as Python.
css('.highlight pre').each do |node|
node.content = node.content
node['data-language'] = 'python'
end

doc
end
end
end
end
64 changes: 64 additions & 0 deletions lib/docs/filters/polars/entries.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
module Docs
class Polars
class EntriesFilter < Docs::EntriesFilter
# Map the leading path segment of a reference page to a human readable
# type. The Polars reference is laid out as <section>/... under the base
# url (e.g. dataframe/api/polars.DataFrame.count.html). Top-level members
# (plain functions, datatypes, IO, config, ...) instead live flat under
# api/ and are classified by name in #classify_api.
SECTION_TYPES = {
'dataframe' => 'DataFrame',
'lazyframe' => 'LazyFrame',
'series' => 'Series',
'expressions' => 'Expressions',
'functions' => 'Functions',
'selectors' => 'Selectors',
'datatypes' => 'Data Types',
'datatype_expr' => 'Data Types',
'config' => 'Config',
'io' => 'Input/output',
'sql' => 'SQL',
'exceptions' => 'Exceptions',
'testing' => 'Testing',
'catalog' => 'Catalog',
'metadata' => 'Metadata',
'schema' => 'Schema',
'plugins' => 'Plugins'
}.freeze

def get_name
name = at_css('h1').content.strip
# This runs before clean_html removes the headerlink, so strip its
# anchor character off the heading.
name.sub! %r{\s*[#\u{00B6}]+\s*\z}, ''
name
end

def get_type
return 'Manual' if root_page?
segment = slug.split('/').first
return classify_api(get_name) if segment == 'api'
SECTION_TYPES[segment] || 'Manual'
end

private

# Members stored flat under api/ (top-level polars.* objects).
def classify_api(name)
case name
when %r{\Apolars\.datatypes\.} then 'Data Types'
when %r{\Apolars\.Config\b} then 'Config'
when %r{\Apolars\.exceptions\.} then 'Exceptions'
when %r{\Apolars\.testing\.} then 'Testing'
when %r{\Apolars\.(api|plugins)\.} then 'Plugins'
when %r{\Apolars\.io\.} then 'Input/output'
when %r{\Apolars\.DataFrame\.} then 'DataFrame'
when %r{\Apolars\.LazyFrame\.} then 'LazyFrame'
when %r{\Apolars\.(read_|scan_|write_|from_)}, %r{\Apolars\.json_normalize\b}
'Input/output'
else 'Functions'
end
end
end
end
end
34 changes: 34 additions & 0 deletions lib/docs/scrapers/polars.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Docs
class Polars < UrlScraper
self.name = 'Polars'
self.type = 'sphinx'
self.release = '1.41.0'
self.base_url = 'https://docs.pola.rs/api/python/stable/reference/'
self.root_path = 'index.html'
self.links = {
home: 'https://pola.rs/',
code: 'https://github.com/pola-rs/polars'
}

html_filters.push 'polars/entries', 'sphinx/clean_html', 'polars/clean_html'

# pydata-sphinx-theme keeps the page content in the article body.
options[:container] = 'article.bd-article'

options[:skip_patterns] = [/_changelog/, /whatsnew/]

# https://github.com/pola-rs/polars/blob/main/LICENSE
options[:attribution] = <<-HTML
&copy; 2020 Ritchie Vink<br>
&copy; 2022 Polars contributors<br>
Licensed under the MIT License.
HTML

# Polars tags both Rust (rs-*) and Python (py-*) releases in the same repo.
# The tags API only lists recent Rust ones, but the latest GitHub release is
# always the Python one, so use that and drop the py- prefix.
def get_latest_version(opts)
get_latest_github_release('pola-rs', 'polars', opts).sub(/\Apy-/, '')
end
end
end
Binary file added public/icons/docs/polars/16.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added public/icons/docs/polars/16@2x.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions public/icons/docs/polars/SOURCE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://raw.githubusercontent.com/pola-rs/polars-static/master/icons/favicon-32x32.png
Loading