Skip to content

[Bug] MATERIALIZED VIEW Partition Compensation Rewriting FailSummary #59225

@ScalaFirst

Description

@ScalaFirst

Search before asking

  • I had searched in the issues and found no similar issues.

Version

3.1.3

What's Wrong?

Image

select base table group by xxx, can not use MaterializedViewRewrite.

FailSummary: View struct info is invalid, need compensate union all, but can not, because the query structInfo.

show create table "base table" is :

CREATE TABLE stat_engine (
  event_hour datetime NOT NULL,
  channel_placement_name varchar(255) NULL,
  host varchar(255) NULL,
  bucket varchar(255) NULL,
  mid varchar(255) NULL,
  user_id varchar(255) NULL,
  user_name varchar(255) NULL,
  app_id varchar(255) NULL,
  app_name varchar(255) NULL,
  app_placement_id varchar(255) NULL,
  app_placement_name varchar(255) NULL,
  app_bundle varchar(255) NULL,
  app_version varchar(255) NULL,
  channel_id varchar(255) NULL,
  channel_name varchar(255) NULL,
  channel_placement_id varchar(255) NULL,
  ad_type varchar(255) NULL,
  os varchar(50) NULL,
  dlp varchar(255) NULL,
  lpp varchar(255) NULL,
  bct varchar(255) NULL,
  campaign_id varchar(255) NULL,
  make varchar(255) NULL,
  model varchar(255) NULL,
  pub_bid_floor_range varchar(255) NULL,
  adv_bid_floor_range varchar(255) NULL,
  pub_bid_price_range varchar(255) NULL,
  adv_bid_price_range varchar(255) NULL,
  request_num bigint NULL DEFAULT "0",
  fill_num bigint NULL DEFAULT "0",
  pub_bid_floor decimal(38,4) NULL DEFAULT "0.0",
  adv_bid_floor decimal(38,4) NULL DEFAULT "0.0",
  adv_bid_price decimal(38,4) NULL DEFAULT "0.0",
  pub_bid_price decimal(38,4) NULL DEFAULT "0.0",
  ssp_request_num bigint NULL DEFAULT "0",
  ssp_forward_num bigint NULL DEFAULT "0",
  ssp_forbidden_num bigint NULL DEFAULT "0",
  ssp_timeout_num bigint NULL DEFAULT "0",
  ssp_fill_num bigint NULL DEFAULT "0",
  impr_num bigint NULL DEFAULT "0",
  raw_impr_num bigint NULL DEFAULT "0",
  click_num bigint NULL DEFAULT "0",
  raw_click_num bigint NULL DEFAULT "0",
  drop_click_num bigint NULL DEFAULT "0",
  raw_drop_click_num bigint NULL DEFAULT "0",
  invoke_num bigint NULL DEFAULT "0",
  raw_invoke_num bigint NULL DEFAULT "0",
  income decimal(38,4) NULL DEFAULT "0.0",
  upstream_income decimal(38,4) NULL DEFAULT "0.0",
  earnings decimal(38,4) NULL DEFAULT "0.0",
  upstream_ratio decimal(38,4) NULL DEFAULT "0.0",
  qh_invoke_num bigint NULL DEFAULT "0",
  qh_dau_invoke_num bigint NULL DEFAULT "0",
  qh_no_client_cvr_num bigint NULL DEFAULT "0",
  qh_delayed_attributed_purchase_num bigint NULL DEFAULT "0",
  qh_delayed_unattributed_purchase_num bigint NULL DEFAULT "0",
  qh_no_delay_attributed_purchase_num bigint NULL DEFAULT "0",
  qh_no_delay_invoke_num bigint NULL DEFAULT "0",
  qh_client_cvr_attributed_purchase_num bigint NULL DEFAULT "0" COMMENT "",
  qh_flash_mac_num bigint NULL DEFAULT "0" COMMENT "",
  br_callback bigint NULL DEFAULT "0" COMMENT "",
  cvr79_num bigint NULL DEFAULT "0" COMMENT "",
  qh_flash110_num bigint NULL DEFAULT "0" COMMENT "",
  INDEX idx_user_name (user_name) USING INVERTED,
  INDEX idx_channel_name (channel_name) USING INVERTED,
  INDEX idx_channel_placement_name (channel_placement_name) USING INVERTED,
  INDEX idx_app_placement_id (app_placement_id) USING INVERTED,
  INDEX idx_app_bundle (app_bundle) USING INVERTED,
  INDEX idx_channel_placement_id (channel_placement_id) USING INVERTED,
  INDEX idx_app_placement_name (app_placement_name) USING INVERTED
) ENGINE=OLAP
DUPLICATE KEY(event_hour, channel_placement_name)
AUTO PARTITION BY RANGE (date_trunc(event_hour, 'hour'))
(...,PARTITION p20251222080000 VALUES [('2025-12-22 08:00:00'), ('2025-12-22 09:00:00')),...)
DISTRIBUTED BY HASH(app_placement_id, app_bundle, channel_placement_id) BUCKETS 16
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
;

show MATERIALIZED VIEW is :

CREATE MATERIALIZED VIEW IF NOT EXISTS `dwd_view_stat_engine_agg_hour_demension_normal` 
BUILD IMMEDIATE REFRESH AUTO ON SCHEDULE EVERY 1 hour starts '2025-12-22 19:10:00'
DUPLICATE KEY(`event_hour`,`channel_placement_name`,`host`)
PARTITION BY (event_hour)
DISTRIBUTED BY HASH(`channel_placement_name`, `app_placement_name`) BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"grace_period" = "300",
"use_for_rewrite" = "true",
"workload_group" = "scheduler_group"
) as 
SELECT
    date_trunc(event_hour,'hour') as event_hour,                      -- 1  
    channel_placement_name,          -- 2
    any_value(host) as host,                            -- 3
    any_value(bucket) as bucket,                          -- 4
    mid,                             -- 5
    user_id,  -- 6
    user_name,-- 7
    app_id,                          -- 8
    app_name,                        -- 9
    app_placement_id,                --10
    app_placement_name,              --11
    app_bundle,                      --12
    app_version,                     --13
    channel_id,                      --14
    channel_name,                    --15
    channel_placement_id,            --16
    ad_type,                         --17
    os,                              --18
    dlp,                             --19
    lpp,                             --20
    bct,                             --21
    campaign_id,                     --22
    any_value(make) as make,                            --23
    any_value(model) as model,                           --24
    pub_bid_floor_range,             --25
    adv_bid_floor_range,             --26
    pub_bid_price_range,             --27
    adv_bid_price_range,             --28
    SUM(request_num)              AS request_num,
    SUM(fill_num)                 AS fill_num,
    SUM(pub_bid_floor)            AS pub_bid_floor,
    SUM(adv_bid_floor)            AS adv_bid_floor,
    SUM(adv_bid_price)            AS adv_bid_price,
    SUM(pub_bid_price)            AS pub_bid_price,
    SUM(ssp_request_num)          AS ssp_request_num,
    SUM(ssp_forward_num)          AS ssp_forward_num,
    SUM(ssp_forbidden_num)        AS ssp_forbidden_num,
    SUM(ssp_timeout_num)          AS ssp_timeout_num,
    SUM(ssp_fill_num)             AS ssp_fill_num,
    SUM(impr_num)                 AS impr_num,
    SUM(raw_impr_num)             AS raw_impr_num,
    SUM(click_num)                AS click_num,
    SUM(raw_click_num)            AS raw_click_num,
    SUM(drop_click_num)           AS drop_click_num,
    SUM(raw_drop_click_num)       AS raw_drop_click_num,
    SUM(invoke_num)               AS invoke_num,
    SUM(raw_invoke_num)           AS raw_invoke_num,
    SUM(income)                   AS income,
    SUM(upstream_income)          AS upstream_income,
    SUM(earnings)                 AS earnings,
    SUM(upstream_ratio)           AS upstream_ratio,
    SUM(qh_invoke_num)            AS qh_invoke_num,
    SUM(qh_dau_invoke_num)        AS qh_dau_invoke_num,
    SUM(qh_no_client_cvr_num)     AS qh_no_client_cvr_num,
    SUM(qh_delayed_attributed_purchase_num)    AS qh_delayed_attributed_purchase_num,
    SUM(qh_delayed_unattributed_purchase_num)  AS qh_delayed_unattributed_purchase_num,
    SUM(qh_no_delay_attributed_purchase_num)   AS qh_no_delay_attributed_purchase_num,
    SUM(qh_no_delay_invoke_num)   AS qh_no_delay_invoke_num,
    SUM(qh_client_cvr_attributed_purchase_num) AS qh_client_cvr_attributed_purchase_num,
    SUM(qh_flash_mac_num)         AS qh_flash_mac_num,
    SUM(br_callback)              AS br_callback,
    SUM(cvr79_num)                AS cvr79_num,
    SUM(qh_flash110_num)          AS qh_flash110_num
FROM stat_engine
GROUP BY
    date_trunc(event_hour,'hour'),
    channel_placement_name,
    mid,
    user_id,  -- 6
    user_name,-- 7
    app_id,
    app_name,
    app_placement_id,
    app_placement_name,
    app_bundle,
    app_version,
    channel_id,
    channel_name,
    channel_placement_id,
    ad_type,
    os,
    dlp,
    lpp,
    bct,
    campaign_id,
    pub_bid_floor_range,
    adv_bid_floor_range,
    pub_bid_price_range,
    adv_bid_price_range
;

select SQL is:

explain SELECT
((SUM(income)/SUM(ssp_forward_num))*1000000) AS sumrequ-83f,
SUM(income) AS sum_income,
((SUM(income)/SUM(impr_num))*1000) AS suminco-4f4,
((SUM(earnings)/SUM(impr_num))*1000) AS suminco-5f8,
SUM(earnings) AS sum_earnings,
SUM(request_num) AS sumimpr-604,
SUM(fill_num) AS sumimpr-850,
((SUM(fill_num)/SUM(request_num))*100) AS sumfill-233,
SUM(impr_num) AS count,
((SUM(impr_num)/SUM(fill_num))*100) AS sumimpr-52a,
SUM(click_num) AS sumimpr-c22,
((SUM(adv_bid_price)/SUM(ssp_fill_num))/100) AS sum_adv_bid_price,
(SUM(income)-SUM(upstream_income)) AS sumearn-78f,
((SUM(income)-SUM(earnings))-SUM(upstream_income)) AS suminco-19e,
(1-(SUM(earnings)/SUM(income))) AS suminco-1e0
FROM stat_engine
GROUP BY ''
;

What You Expected?

query will use Partition Compensation Rewrite.

How to Reproduce?

No response

Anything Else?

by the way, when query filter refreshed partition, the query rewrite will take successed . such as "where event_hour < date_trunc(now() - interval 2 hour) -- rewrite success because view has refreshed at one hours ago"

Are you willing to submit PR?

  • Yes I am willing to submit a PR!

Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions