From 497f5901f1410f74ae3e99ae8f76ff74b93922fc Mon Sep 17 00:00:00 2001 From: Markus Koller Date: Sat, 13 Jun 2026 14:58:12 +0200 Subject: [PATCH 1/3] elixir: update HTML structure for section headings --- lib/docs/filters/elixir/clean_html.rb | 2 +- lib/docs/filters/elixir/entries.rb | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/docs/filters/elixir/clean_html.rb b/lib/docs/filters/elixir/clean_html.rb index 2d656ff65d..e3607ddfab 100644 --- a/lib/docs/filters/elixir/clean_html.rb +++ b/lib/docs/filters/elixir/clean_html.rb @@ -47,7 +47,7 @@ def api end end - css('h1 a.icon-action[title="View Source"]').each do |node| + css('.top-heading a.icon-action[title="View Source"]').each do |node| node['class'] = 'source' node.content = "Source" end diff --git a/lib/docs/filters/elixir/entries.rb b/lib/docs/filters/elixir/entries.rb index 640f65e19a..8097d4a983 100644 --- a/lib/docs/filters/elixir/entries.rb +++ b/lib/docs/filters/elixir/entries.rb @@ -7,19 +7,23 @@ def get_name end def get_type - section = at_css('h1 a.source').attr('href').match('elixir/pages/([^/]+)/')&.captures&.first + section = at_css('.top-heading a.source')&.attr('href')&.match('elixir/pages/([^/]+)/')&.captures&.first if section == "mix-and-otp" return "Mix & OTP" elsif section return section.gsub("-", " ").capitalize end - name = at_css('h1 span').text + # Sometimes the heading includes additional text in a tag, + # in which case the main text is wrapped in a + # e.g. https://elixir.hexdocs.pm/Exception.html + heading = at_css('.top-heading h1 span') || at_css('.top-heading h1') + name = heading.text case name.split(' ').first when 'mix' then 'Mix Tasks' when 'Changelog' then 'References' else - case at_css('h1 small').try(:content) + case at_css('.top-heading h1 small').try(:content) when 'exception' 'Exceptions' when 'protocol' From 1a8386f8f8d31a1a48ddfc31f7ce6ce4cd6f7f6e Mon Sep 17 00:00:00 2001 From: Markus Koller Date: Sat, 13 Jun 2026 14:59:21 +0200 Subject: [PATCH 2/3] Update Elixir documentation (1.20.1) Also switch to the new URL layout from hexdocs.pm, the old paths now all redirect so we can also change this for older Elixir versions. --- lib/docs/scrapers/elixir.rb | 220 +++++++++++++++++++----------------- 1 file changed, 116 insertions(+), 104 deletions(-) diff --git a/lib/docs/scrapers/elixir.rb b/lib/docs/scrapers/elixir.rb index 14b3c1645c..8f545342d2 100644 --- a/lib/docs/scrapers/elixir.rb +++ b/lib/docs/scrapers/elixir.rb @@ -17,52 +17,64 @@ class Elixir < UrlScraper options[:root_title] = 'Elixir' options[:attribution] = <<-HTML - © 2012-2024 The Elixir Team
+ © 2012-2026 The Elixir Team
Licensed under the Apache License, Version 2.0. HTML def initial_urls - [ "https://hexdocs.pm/elixir/#{self.class.release}/introduction.html", - "https://hexdocs.pm/eex/#{self.class.release}/EEx.html", - "https://hexdocs.pm/ex_unit/#{self.class.release}/ExUnit.html", - "https://hexdocs.pm/iex/#{self.class.release}/IEx.html", - "https://hexdocs.pm/logger/#{self.class.release}/Logger.html", - "https://hexdocs.pm/mix/#{self.class.release}/Mix.html" ] + [ "https://elixir.hexdocs.pm/#{self.class.release}/introduction.html", + "https://eex.hexdocs.pm/#{self.class.release}/EEx.html", + "https://ex-unit.hexdocs.pm/#{self.class.release}/ExUnit.html", + "https://iex.hexdocs.pm/#{self.class.release}/IEx.html", + "https://logger.hexdocs.pm/#{self.class.release}/Logger.html", + "https://mix.hexdocs.pm/#{self.class.release}/Mix.html" ] + end + + version '1.20' do + self.release = '1.20.1' + self.base_urls = [ + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/" + ] end version '1.18' do self.release = '1.18.1' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/" + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/" ] end version '1.17' do self.release = '1.17.2' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/" + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/" ] end version '1.16' do self.release = '1.16.3' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/" + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/" ] end @@ -71,12 +83,12 @@ def initial_urls version '1.15' do self.release = '1.15.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -84,12 +96,12 @@ def initial_urls version '1.14' do self.release = '1.14.1' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -97,12 +109,12 @@ def initial_urls version '1.13' do self.release = '1.13.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -110,12 +122,12 @@ def initial_urls version '1.12' do self.release = '1.12.0' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -123,12 +135,12 @@ def initial_urls version '1.11' do self.release = '1.11.2' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -136,12 +148,12 @@ def initial_urls version '1.10' do self.release = '1.10.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -149,12 +161,12 @@ def initial_urls version '1.9' do self.release = '1.9.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -162,12 +174,12 @@ def initial_urls version '1.8' do self.release = '1.8.2' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -175,12 +187,12 @@ def initial_urls version '1.7' do self.release = '1.7.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -188,12 +200,12 @@ def initial_urls version '1.6' do self.release = '1.6.6' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -201,12 +213,12 @@ def initial_urls version '1.5' do self.release = '1.5.3' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -214,12 +226,12 @@ def initial_urls version '1.4' do self.release = '1.4.5' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end @@ -227,18 +239,18 @@ def initial_urls version '1.3' do self.release = '1.3.4' self.base_urls = [ - "https://hexdocs.pm/elixir/#{release}/", - "https://hexdocs.pm/eex/#{release}/", - "https://hexdocs.pm/ex_unit/#{release}/", - "https://hexdocs.pm/iex/#{release}/", - "https://hexdocs.pm/logger/#{release}/", - "https://hexdocs.pm/mix/#{release}/", + "https://elixir.hexdocs.pm/#{release}/", + "https://eex.hexdocs.pm/#{release}/", + "https://ex-unit.hexdocs.pm/#{release}/", + "https://iex.hexdocs.pm/#{release}/", + "https://logger.hexdocs.pm/#{release}/", + "https://mix.hexdocs.pm/#{release}/", 'https://elixir-lang.org/getting-started/' ] end def get_latest_version(opts) - doc = fetch_doc('https://hexdocs.pm/elixir/api-reference.html', opts) + doc = fetch_doc('https://elixir.hexdocs.pm/api-reference.html', opts) doc.at_css('.sidebar-projectVersion').content.strip[1..-1] end end From 88880a8a2139fa31d05cbdec6001631ae1fb4df6 Mon Sep 17 00:00:00 2001 From: Markus Koller Date: Sat, 13 Jun 2026 15:00:32 +0200 Subject: [PATCH 3/3] Update Phoenix documentation (1.8.8) Also switch to the new URL layout from hexdocs.pm, which requires using multiple base URLs. Other tweaks: - Fix the `:skip_patterns` to avoid some pages starting with `js` not getting included - Drop the other `:skip_patterns`, this don't seem to be needed anymore - Drop the `:only_patterns` so we can include guides with their section titles --- lib/docs/filters/elixir/entries.rb | 10 +++++- lib/docs/scrapers/phoenix.rb | 50 ++++++++++++++++++------------ 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/lib/docs/filters/elixir/entries.rb b/lib/docs/filters/elixir/entries.rb index 8097d4a983..df8db120e1 100644 --- a/lib/docs/filters/elixir/entries.rb +++ b/lib/docs/filters/elixir/entries.rb @@ -7,11 +7,19 @@ def get_name end def get_type + # Use section titles for Elixir docs section = at_css('.top-heading a.source')&.attr('href')&.match('elixir/pages/([^/]+)/')&.captures&.first if section == "mix-and-otp" return "Mix & OTP" elsif section - return section.gsub("-", " ").capitalize + return section.gsub(/[-_]/, " ").capitalize + end + + # Use section titles for guides + guide = at_css('.top-heading a.source')&.attr('href')&.match('guides/(?:([^/]+)/)?') + if guide + section = guide.captures.first || "Guides" + return section.gsub(/[-_]/, " ").capitalize end # Sometimes the heading includes additional text in a tag, diff --git a/lib/docs/scrapers/phoenix.rb b/lib/docs/scrapers/phoenix.rb index d115ef053e..7e7018c4c6 100644 --- a/lib/docs/scrapers/phoenix.rb +++ b/lib/docs/scrapers/phoenix.rb @@ -1,34 +1,44 @@ module Docs class Phoenix < UrlScraper + include MultipleBaseUrls + self.type = 'elixir' - self.release = '1.6.11' - self.base_url = 'https://hexdocs.pm/' - self.root_path = 'phoenix/Phoenix.html' - self.initial_paths = %w( - phoenix/api-reference.html - ecto/api-reference.html - phoenix_html/api-reference.html - phoenix_live_view/api-reference.html - phoenix_pubsub/api-reference.html - plug/api-reference.html) + self.release = '1.8.8' + self.root_path = 'overview.html' self.links = { home: 'http://www.phoenixframework.org', code: 'https://github.com/phoenixframework/phoenix' } + self.base_urls = %w( + https://phoenix.hexdocs.pm/ + https://ecto.hexdocs.pm/ + https://phoenix-html.hexdocs.pm/ + https://phoenix-live-view.hexdocs.pm/ + https://phoenix-pubsub.hexdocs.pm/ + https://plug.hexdocs.pm/ + ) + + def initial_urls + %w( + https://phoenix.hexdocs.pm/overview.html + https://ecto.hexdocs.pm/Ecto.html + https://phoenix-html.hexdocs.pm/Phoenix.HTML.html + https://phoenix-live-view.hexdocs.pm/welcome.html + https://phoenix-pubsub.hexdocs.pm/Phoenix.PubSub.html + https://plug.hexdocs.pm/readme.html + ) + end + html_filters.push 'elixir/clean_html', 'elixir/entries' options[:container] = '#content' - options[:skip_patterns] = [/extra-api-reference/, /js/, /\d+\.\d+\.\d+/] - options[:only_patterns] = [ - /\Aphoenix\//, - /\Aecto\//, - /\Aphoenix_pubsub\//, - /\Aphoenix_html\//, - /\Aphoenix_live_view\//, - /\Aplug\// - ] + # Filter docs for JS libraries, these use a different HTML layout. + # e.g. https://phoenix.hexdocs.pm/js/ and https://phoenix-live-view.hexdocs.pm/1.2.0/js/ + # + # Only match on `js` directories so we still catch normal pages like https://phoenix-live-view.hexdocs.pm/js-interop.html + options[:skip_patterns] = [%r{(\A|/)js/}] options[:attribution] = -> (filter) { if filter.slug.start_with?('ecto') @@ -56,7 +66,7 @@ class Phoenix < UrlScraper } def get_latest_version(opts) - doc = fetch_doc('https://hexdocs.pm/phoenix/Phoenix.html', opts) + doc = fetch_doc('https://phoenix.hexdocs.pm/api-reference.html', opts) doc.at_css('.sidebar-projectVersion').content.strip[1..-1] end end