diff --git a/lib/percy.rb b/lib/percy.rb index 08d4971..8e9b915 100644 --- a/lib/percy.rb +++ b/lib/percy.rb @@ -1,11 +1,36 @@ require 'uri' require 'json' +require 'set' require 'version' require 'net/http' require 'selenium-webdriver' require_relative 'driver_metadata' module Percy + # Maximum nesting depth for cross-origin iframe recursion. Bounds the cost + # of pathological pages and prevents runaway recursion on cyclic frame trees. + DEFAULT_MAX_FRAME_DEPTH = 10 + + # Absolute ceiling on iframe nesting depth, regardless of user config. Mirrors + # the canonical sibling SDKs (Nightwatch/Protractor shims cap at 25). + HARD_MAX_FRAME_DEPTH = 25 + + # Iframe src prefixes / sentinels we never attempt to switch into -- these + # represent either browser-internal documents, non-HTTP URI schemes, or + # placeholder values that have no meaningful CORS content to capture. + UNSUPPORTED_IFRAME_SRCS = %w[ + about: chrome: chrome-extension: devtools: edge: opera: view-source: + data: javascript: blob: vbscript: file: + ].freeze + + # Raised when a nested-frame restoration step fails and we can no longer + # trust that subsequent driver.switch_to / find_elements calls will resolve + # against the correct frame context. Carries any iframes captured before + # the loss so the caller can still preserve partial work. + class PercyContextLost < StandardError + attr_accessor :partial_capture + end + CLIENT_INFO = "percy-selenium-ruby/#{VERSION}".freeze ENV_INFO = "selenium/#{Selenium::WebDriver::VERSION} ruby/#{RUBY_VERSION}".freeze @@ -79,6 +104,9 @@ def self.snapshot(driver, name, options = {}) begin percy_dom_script = fetch_percy_dom driver.execute_script(percy_dom_script) + # Expose closed shadow roots via CDP before serialization so PercyDOM + # can pierce them. No-op for non-Chromium drivers / when CDP fails. + expose_closed_shadow_roots(driver) dom_snapshot = if responsive_snapshot_capture?(options) capture_responsive_dom(driver, options, percy_dom_script: percy_dom_script) else @@ -108,6 +136,88 @@ def self.snapshot(driver, name, options = {}) end end + # Use CDP to discover closed shadow roots and expose them to + # PercyDOM.serialize(). Closed shadow roots are inaccessible from JavaScript + # (element.shadowRoot is null), but CDP's DOM domain can pierce them. For + # each closed shadow root we resolve both the host and the shadow root to JS + # objects, then store the shadow in a host-keyed WeakMap that clone-dom.js + # reads during serialization. Three CDP calls per closed root: + # DOM.getDocument (once, deep+pierce), then resolveNode + resolveNode + + # Runtime.callFunctionOn per pair. Non-fatal on any failure -- closed shadow + # DOM simply won't be captured (the existing behavior). + def self.expose_closed_shadow_roots(driver) + return unless driver.respond_to?(:execute_cdp) + + begin + result = driver.execute_cdp('DOM.getDocument', depth: -1, pierce: true) + rescue StandardError => e + log("CDP unavailable for closed shadow root discovery: #{e}", 'debug') + return + end + + root = result.is_a?(Hash) ? (result['root'] || result[:root]) : nil + return unless root + + closed_pairs = [] + walker = lambda do |node| + return if node.nil? + # Skip nodes inside child frame documents -- cross-frame closed shadow + # roots are not yet supported (their execution context lacks the WeakMap) + return if node['contentDocument'] || node[:contentDocument] + + shadow_roots = node['shadowRoots'] || node[:shadowRoots] + if shadow_roots.is_a?(Array) + shadow_roots.each do |sr| + type = sr['shadowRootType'] || sr[:shadowRootType] + if type == 'closed' + closed_pairs << { + host_backend_id: node['backendNodeId'] || node[:backendNodeId], + shadow_backend_id: sr['backendNodeId'] || sr[:backendNodeId], + } + end + walker.call(sr) + end + end + + children = node['children'] || node[:children] + children.each(&walker) if children.is_a?(Array) + end + walker.call(root) + + return if closed_pairs.empty? + + log("Found #{closed_pairs.length} closed shadow root(s), exposing via CDP", 'debug') + + begin + driver.execute_script( + 'window.__percyClosedShadowRoots = window.__percyClosedShadowRoots || new WeakMap();', + ) + + closed_pairs.each do |pair| + host = driver.execute_cdp('DOM.resolveNode', backendNodeId: pair[:host_backend_id]) + shadow = driver.execute_cdp('DOM.resolveNode', backendNodeId: pair[:shadow_backend_id]) + host_obj = host.is_a?(Hash) ? (host['object'] || host[:object]) : nil + shadow_obj = shadow.is_a?(Hash) ? (shadow['object'] || shadow[:object]) : nil + next unless host_obj && shadow_obj + + host_id = host_obj['objectId'] || host_obj[:objectId] + shadow_id = shadow_obj['objectId'] || shadow_obj[:objectId] + next unless host_id && shadow_id + + driver.execute_cdp( + 'Runtime.callFunctionOn', + functionDeclaration: + 'function(shadowRoot) { window.__percyClosedShadowRoots.set(this, shadowRoot); }', + objectId: host_id, + arguments: [{objectId: shadow_id}], + ) + end + rescue StandardError => e + # Non-fatal -- closed shadow DOM just won't be captured + log("Could not expose closed shadow roots via CDP: #{e}", 'debug') + end + end + def self.get_browser_instance(driver) if driver.respond_to?(:driver) && driver.driver.respond_to?(:browser) return driver.driver.browser.manage @@ -184,50 +294,276 @@ def self.get_serialized_dom(driver, options, percy_dom_script: nil) # wait_for_ready upstream, not a PercyDOM.serialize argument. serialize_options = options.reject { |k, _| k.to_s == 'readiness' } dom_snapshot = driver.execute_script("return PercyDOM.serialize(#{serialize_options.to_json})") + # Guard against a non-Hash serialize result (nil/old PercyDOM) before we + # index into it below for readiness_diagnostics / corsIframes / cookies. + # Matches canonical Nightwatch behaviour (`if (!domSnapshot) domSnapshot = {}`). + dom_snapshot = {} unless dom_snapshot.is_a?(Hash) # `!nil?` preserves legitimate falsy returns like {} ("gate ran, no # notable diagnostics"). if !readiness_diagnostics.nil? && dom_snapshot.is_a?(Hash) dom_snapshot['readiness_diagnostics'] = readiness_diagnostics end + if percy_dom_script + max_depth = resolve_max_frame_depth(options, @cli_config) + ignore_selectors = resolve_ignore_selectors(options, @cli_config) + ctx = { + max_frame_depth: max_depth, + ignore_selectors: ignore_selectors, + serialize_options: options, + percy_dom_script: percy_dom_script, + } + processed = capture_cors_iframes(driver, ctx) + dom_snapshot['corsIframes'] = processed if processed.any? + end + + dom_snapshot['cookies'] = get_browser_instance(driver).all_cookies + dom_snapshot + end + + # Top-level entry: enumerate iframes from the page document, filter the + # ones we should never enter (browser-internal, srcdoc, same-origin, + # ignored), then recurse into each one through process_frame_tree. A + # PercyContextLost raised by a deeper frame aborts further sibling + # iteration but preserves whatever we already captured. + def self.capture_cors_iframes(driver, ctx) + page_url = driver.current_url + page_origin = begin + get_origin(page_url) + rescue StandardError + nil + end + iframes_meta = enumerate_iframes(driver, ctx[:ignore_selectors]) + return [] if iframes_meta.empty? + + cors = [] + iframes_meta.each do |meta| + next if should_skip_iframe?(meta, page_origin) + + element = find_iframe_by_percy_id(driver, meta['percyElementId']) + next if element.nil? + + begin + entries = process_frame_tree(driver, element, meta, 1, + Set.new([page_url].compact), ctx,) + cors.concat(entries) if entries.any? + rescue PercyContextLost => e + log('Aborting further nested CORS capture due to lost frame context', 'debug') + cors.concat(e.partial_capture) if e.partial_capture&.any? + break + end + end + + cors + rescue StandardError => e + log("Failed to process cross-origin iframes: #{e}", 'debug') begin - page_origin = get_origin(driver.current_url) - iframes = percy_dom_script ? driver.find_elements(:tag_name, 'iframe') : [] - if iframes.any? - processed_frames = [] - iframes.each do |frame| - frame_src = frame.attribute('src') - next if unsupported_iframe_src?(frame_src) + driver.switch_to.default_content + rescue StandardError + nil + end + [] + end - begin - frame_origin = get_origin(URI.join(driver.current_url, frame_src).to_s) - rescue StandardError => e - log("Skipping iframe \"#{frame_src}\": #{e}", 'debug') - next - end + def self.enumerate_iframes(driver, ignore_selectors) + result = driver.execute_script("return #{enumerate_iframes_script(ignore_selectors)}") + result.is_a?(Array) ? result : [] + rescue StandardError => e + log("Failed to enumerate iframes: #{e}", 'debug') + [] + end - next if frame_origin == page_origin + # Look up an