diff --git a/backend/api/metadata_registry.py b/backend/api/metadata_registry.py index 6fe49f3..bde78dd 100644 --- a/backend/api/metadata_registry.py +++ b/backend/api/metadata_registry.py @@ -61,6 +61,16 @@ "Income values are in nominal dollars (not inflation-adjusted across years)." ], }, + "unemployment_rate": { + "source": ( + "U.S. Census Bureau, American Community Survey 5-Year Estimates " + "(Table B23025)" + ), + "lastUpdated": "2023", + "caveats": [ + "Estimates for small geographies may have high margins of error." + ], + }, "qcew_employment": { "source": ( "U.S. Bureau of Labor Statistics, " diff --git a/backend/api/routes/post_routes/post_acs5_db.py b/backend/api/routes/post_routes/post_acs5_db.py index 3a4bf1d..2507a8b 100644 --- a/backend/api/routes/post_routes/post_acs5_db.py +++ b/backend/api/routes/post_routes/post_acs5_db.py @@ -1,5 +1,6 @@ import logging +import pandas as pd from fastapi import APIRouter from api.metadata_registry import get_metadata @@ -11,83 +12,336 @@ router = APIRouter() +def _aggregate_to_state(df: pd.DataFrame, average=False) -> pd.DataFrame: + """Aggregate county-level data to state level by summing Value and averaging Percent.""" + if df.empty: + return df + # Sum the Value column (population counts) and average the Percent column + agg_dict = {} + for col in df.columns: + if col in ['year', 'Section', 'Variable']: + agg_dict[col] = 'first' + elif average and col == 'Value': + agg_dict[col] = 'mean' + elif col == 'Value': + agg_dict[col] = 'sum' + elif col == 'Percent': + agg_dict[col] = 'mean' + + result = df.groupby(['year', 'Section', 'Variable'], + as_index=False).agg(agg_dict) + # Round percent to 1 decimal + if 'Percent' in result.columns: + result['Percent'] = result['Percent'].round(1) + return result + + +# Demographics @router.post("/load/acs5-db/tidy/demographics") async def tidy_demographics(request: FilterRequest): - rows = DB.execute( - """ - SELECT year, Section, Variable, Value, Percent - FROM b10_census - WHERE NAME = ? - AND CAST(year AS INTEGER) BETWEEN ? AND ? - ORDER BY year, Section, Variable - """, - [request.name, request.year_min, request.year_max], - ).df() + # If requesting Vermont (state-level), aggregate all counties + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b10_census + WHERE geo_type = 'county' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Section, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b10_census + WHERE NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Section, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() return make_response(data=rows, metadata=get_metadata("demographics")) +# Education @router.post("/load/acs5-db/tidy/education") async def tidy_education(request: FilterRequest): - rows = DB.execute( - """ - SELECT year, Section, Variable, Value, Percent - FROM b15003_education - WHERE NAME = ? - AND CAST(year AS INTEGER) BETWEEN ? AND ? - ORDER BY year, Variable - """, - [request.name, request.year_min, request.year_max], - ).df() + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b15003_education + WHERE geo_type = 'county' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b15003_education + WHERE NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() return make_response(data=rows, metadata=get_metadata("education")) +# Housing (TODO: Fix statewide aggregation for housing variables that are not counts, e.g. median rent) @router.post("/load/acs5-db/tidy/housing") async def tidy_housing(request: FilterRequest): - rows = DB.execute( - """ - SELECT year, Section, Variable, Value, Percent - FROM b_housing - WHERE NAME = ? - AND CAST(year AS INTEGER) BETWEEN ? AND ? - ORDER BY year, Variable - """, - [request.name, request.year_min, request.year_max], - ).df() + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_housing + WHERE geo_type = 'county' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows, ) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_housing + WHERE NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() return make_response(data=rows, metadata=get_metadata("housing")) +# Labor Force @router.post("/load/acs5-db/tidy/labor-force") async def tidy_labor_force(request: FilterRequest): - rows = DB.execute( - """ - SELECT year, Section, Variable, Value, Percent - FROM b_economic - WHERE NAME = ? - AND Section = 'Labor Force' - AND CAST(year AS INTEGER) BETWEEN ? AND ? - ORDER BY year, Variable - """, - [request.name, request.year_min, request.year_max], - ).df() + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_economic + WHERE geo_type = 'county' + AND Section = 'Labor Force' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_economic + WHERE NAME = ? + AND Section = 'Labor Force' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() return make_response(data=rows, metadata=get_metadata("labor_force")) +# Income @router.post("/load/acs5-db/tidy/income") async def tidy_income(request: FilterRequest): - rows = DB.execute( - """ - SELECT year, Section, Variable, Value, Percent - FROM b_economic - WHERE NAME = ? - AND Section = 'Income' - AND CAST(year AS INTEGER) BETWEEN ? AND ? - ORDER BY year, Variable - """, - [request.name, request.year_min, request.year_max], - ).df() + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_economic + WHERE geo_type = 'county' + AND Section = 'Income' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows, average=True) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b_economic + WHERE NAME = ? + AND Section = 'Income' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() return make_response(data=rows, metadata=get_metadata("income")) +# Median Age +@router.post("/load/acs5-db/tidy/demographics/median-age") +async def tidy_median_age(request: FilterRequest): + # If requesting Vermont (state-level), aggregate all counties + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Section, Variable, Value + FROM b10_census + WHERE geo_type = 'county' AND Variable = 'Median Age' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Section, Variable + """, + [request.year_min, request.year_max], + ).df() + rows = _aggregate_to_state(rows, average=True) + else: + rows = DB.execute( + """ + SELECT year, Section, Variable, Value, Percent + FROM b10_census + WHERE Variable = 'Median Age' AND NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year, Section, Variable + """, + [request.name, request.year_min, request.year_max], + ).df() + return make_response(data=rows, metadata=get_metadata("demographics")) + + +# Unemployment Rate +@router.post("/load/acs5-db/tidy/unemployment-rate") +async def tidy_unemployment_rate(request: FilterRequest): + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, Unemployment_Rate AS Value, Unemployment_Rate AS Percent + FROM unemployment_rate + WHERE NAME LIKE '%County, Vermont' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [request.year_min, request.year_max], + ).df() + # Average unemployment rate across counties for state level + if not rows.empty: + rows = rows.groupby(['year'], as_index=False).agg( + {'Value': 'mean', 'Percent': 'mean'}) + rows['NAME'] = 'Vermont' + elif request.name.lower().endswith(" county, vermont") and request.name.count(',') == 1: + # County-level: aggregate town-level data for the specified county + rows = DB.execute( + """ + SELECT year, Unemployment_Rate AS Value, Unemployment_Rate AS Percent + FROM unemployment_rate + WHERE NAME LIKE ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [f"%{request.name}%", request.year_min, request.year_max], + ).df() + if not rows.empty: + rows = rows.groupby(['year'], as_index=False).agg( + {'Value': 'mean', 'Percent': 'mean'}) + rows['NAME'] = request.name + elif request.name.count(',') >= 2: + # Town-level: names in unemployment_rate include suffixes like "city" or "town" + town_name, rest = request.name.split(',', 1) + rows = DB.execute( + """ + SELECT year, NAME, Unemployment_Rate AS Value, Unemployment_Rate AS Percent + FROM unemployment_rate + WHERE NAME LIKE ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [f"{town_name.strip()}%{rest.strip()}", + request.year_min, request.year_max], + ).df() + else: + rows = DB.execute( + """ + SELECT year, NAME, Unemployment_Rate AS Value, Unemployment_Rate AS Percent + FROM unemployment_rate + WHERE NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [request.name, request.year_min, request.year_max], + ).df() + return make_response(data=rows, metadata=get_metadata("unemployment_rate")) + + +# Median Earnings +@router.post("/load/acs5-db/tidy/median-earnings") +async def tidy_median_earnings(request: FilterRequest): + if request.name.lower() == "vermont": + rows = DB.execute( + """ + SELECT year, estimate AS Value, variable AS Variable + FROM median_earnings + WHERE NAME LIKE '%County, Vermont' + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [request.year_min, request.year_max], + ).df() + # Average median earnings across counties for state level + if not rows.empty: + rows = rows.groupby(['year', 'Variable'], as_index=False).agg( + {'Value': 'mean'}) + rows['NAME'] = 'Vermont' + elif request.name.lower().endswith(" county, vermont") and request.name.count(',') == 1: + # County-level: aggregate town-level data for the specified county + rows = DB.execute( + """ + SELECT year, estimate AS Value, variable AS Variable + FROM median_earnings + WHERE NAME LIKE ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [f"%{request.name}%", request.year_min, request.year_max], + ).df() + if not rows.empty: + rows = rows.groupby(['year', 'Variable'], as_index=False).agg( + {'Value': 'mean'}) + rows['NAME'] = request.name + elif request.name.count(',') >= 2: + town_name, rest = request.name.split(',', 1) + rows = DB.execute( + """ + SELECT year, NAME, estimate AS Value, variable AS Variable + FROM median_earnings + WHERE NAME LIKE ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [f"{town_name.strip()}%{rest.strip()}", + request.year_min, request.year_max], + ).df() + else: + rows = DB.execute( + """ + SELECT year, NAME, estimate AS Value, variable AS Variable + FROM median_earnings + WHERE NAME = ? + AND CAST(year AS INTEGER) BETWEEN ? AND ? + ORDER BY year + """, + [request.name, request.year_min, request.year_max], + ).df() + + return make_response(data=rows, metadata=get_metadata("median_earnings")) + + # --------------------------------------------------------------------------- # DP-series combined explorer (DP02 / DP03 / DP04 / DP05) # --------------------------------------------------------------------------- diff --git a/backend/api/routes/post_routes/post_qcew.py b/backend/api/routes/post_routes/post_qcew.py index 48fd704..94de3be 100644 --- a/backend/api/routes/post_routes/post_qcew.py +++ b/backend/api/routes/post_routes/post_qcew.py @@ -22,25 +22,40 @@ @router.post("/load/qcew/employment") async def employment_by_sector(request: FilterRequest): - county = (request.filters or {}).get("County", [None])[0] + county = (request.filters or {}).get("County", [None])[ + 0] if request.filters else None - query = """ - SELECT year, quarter, quarter_label, sector, employment_4qma - FROM qcew - WHERE sector != 'Total' - {county_filter} - ORDER BY year, quarter, sector - """ - if county: - rows: pd.DataFrame = DB.execute( - query.format(county_filter="AND County = ?"), [county] - ).df() + # For state-level (no county specified), aggregate all counties + if not county and (not request.name or request.name.lower() == "vermont"): + query = """ + SELECT year, quarter, quarter_label, sector, employment_4qma + FROM qcew + WHERE sector != 'Total' + ORDER BY year, quarter, sector + """ + rows: pd.DataFrame = DB.execute(query).df() + elif county: + query = """ + SELECT year, quarter, quarter_label, sector, employment_4qma + FROM qcew + WHERE sector != 'Total' + AND County = ? + ORDER BY year, quarter, sector + """ + rows: pd.DataFrame = DB.execute(query, [county]).df() else: - rows = DB.execute(query.format(county_filter="")).df() + # No county and not Vermont state-level - return empty + return make_response(data=[], metadata=get_metadata("qcew_employment")) if rows.empty: return make_response(data=[], metadata=get_metadata("qcew_employment")) + # For state-level, aggregate by summing employment across counties + if not county and (not request.name or request.name.lower() == "vermont"): + rows = rows.groupby(['year', 'quarter', 'quarter_label', 'sector'], as_index=False).agg({ + 'employment_4qma': 'sum' + }) + # Pivot to wide format: one row per quarter_label, one column per sector wide = rows.pivot_table( index=["year", "quarter", "quarter_label"], diff --git a/design/plots.md b/design/plots.md index 0f0361b..57cb361 100644 --- a/design/plots.md +++ b/design/plots.md @@ -40,6 +40,18 @@ These only render when a table-primary `ChartItem` is toggled to Chart view. The **Variables:** `Total Housing Units` (left axis, count), `Renter-Occupied Units` (left axis, count), `Median Home Value` (right axis, dollars). **Plot:** three-line `LineChart` with dual Y-axes. Left axis formatted with `toLocaleString()`; right axis in `$Xk`. +### `UnemploymentTrendChart` +**Variables:** `Unemployment Rate` (left axis, Percentage) +**Plot:** one-line `LineChart` with one Y-axis. Dual-line comparisons are available. + +### `EarningsTrendChart` +**Variables:** `Median Earnings` (left axis, $ Amount) +**Plot:** Three-line `LineChart` (Male vs Female vs All Workers) with one Y-axis. Comparisons are available. + +### `MedianAgeTrendChart` +**Variables:** `Median Age` (left axis, Age in years) +**Plot:** One-line `LineChart` with one Y-axis. Dual-line comparisons are available. + --- ## Employment Area Chart diff --git a/frontend/src/components/Charts/TrendCharts.tsx b/frontend/src/components/Charts/TrendCharts.tsx index db46f86..a40e044 100644 --- a/frontend/src/components/Charts/TrendCharts.tsx +++ b/frontend/src/components/Charts/TrendCharts.tsx @@ -121,6 +121,82 @@ export const DemographicsTrendChart = ({ ); }; + +// --------------------------------------------------------------------------- +// Demographics: Median Age Chart +// --------------------------------------------------------------------------- +export const MedianAgeTrendChart = ({ + chart, +}: { + chart: ChartItem; +}) => { + const data = chart.data as any[]; + const compareData = (chart.compareData ?? []) as any[]; + if (!data || data.length === 0) return null; + + const years = Array.from(new Set(data.map((r) => r.year))).sort(); + const labels = chart.chartParams?.legendLabels as + | [string, string] + | undefined; + const cmpName = labels?.[1] ?? 'Comparison'; + + const buildPoint = (rows: any[], year: number) => { + const find = (label: string) => + rows.find((r) => r.year === year && r.Variable === label)?.Value ?? null; + return {'Median Age': find('Median Age')}; + }; + + const plotData = years.map((year) => ({ + year, + ...buildPoint(data, year), + ...(compareData.length > 0 + ? { + 'Median Age (cmp)': buildPoint(compareData, year)['Median Age'], + } + : {}), + })); + + return ( + <> + {compareData.length > 0 && } + + + + + Number(value).toFixed(0)} /> + val != null ? `${Number(val).toFixed(1)} years` : '—'}/> + + + {compareData.length > 0 && ( + <> + + + )} + + + + ); +}; + // --------------------------------------------------------------------------- // Education: all attainment levels except "Some College, No Degree" // --------------------------------------------------------------------------- @@ -322,6 +398,202 @@ export const HousingTrendChart = ({ ); }; + +// --------------------------------------------------------------------------- +// Economics: Unemployment Rate +// --------------------------------------------------------------------------- + +export const UnemploymentTrendChart = ({ + chart, +}: { + chart: ChartItem; +}) => { + const data = chart.data as any[]; + const compareData = (chart.compareData ?? []) as any[]; + if (!data || data.length === 0) return null; + + const years = Array.from(new Set(data.map((r) => r.year))).sort(); + const labels = chart.chartParams?.legendLabels as + | [string, string] + | undefined; + const cmpName = labels?.[1] ?? 'Comparison'; + + const buildPoint = (rows: any[], year: number) => { + const row = rows.find((r) => r.year === year); + return { + 'Unemployment Rate': row?.Value ?? null, + }; + }; + + const plotData = years.map((year) => ({ + year, + ...buildPoint(data, year), + ...(compareData.length > 0 + ? { + 'Unemployment Rate (cmp)': buildPoint(compareData, year)['Unemployment Rate'], + } + : {}), + })); + + return ( + <> + {compareData.length > 0 && } + + + + + + (val != null ? `${val}%` : '—')} /> + + + {compareData.length > 0 && ( + <> + + + )} + + + + ); +}; + +// --------------------------------------------------------------------------- +// Economics: Median Earnings (Male vs Female vs All Workers) +// --------------------------------------------------------------------------- + +export const EarningsTrendChart = ({ + chart, +}: { + chart: ChartItem; +}) => { + const data = chart.data as any[]; + const compareData = (chart.compareData ?? []) as any[]; + if (!data || data.length === 0) return null; + + const years = Array.from(new Set(data.map((r) => r.year))).sort(); + const labels = chart.chartParams?.legendLabels as + | [string, string] + | undefined; + const cmpName = labels?.[1] ?? 'Comparison'; + + const buildPoint = (rows: any[], year: number) => { + const find = (label: string) => + rows.find( + (r) => String(r.year) === String(year) && + r.Variable === label + )?.Value ?? null; + return { + 'Male Full-Time Workers': find('DP03_0093'), + 'Female Full-Time Workers': find('DP03_0094'), + 'All Workers': find('DP03_0092'), + }; + }; + + const plotData = years.map((year) => ({ + year, + ...buildPoint(data, year), + ...(compareData.length > 0 + ? { + 'Male Full-Time Workers (cmp)': buildPoint(compareData, year)['Male Full-Time Workers'], + 'Female Full-Time Workers (cmp)': buildPoint(compareData, year)['Female Full-Time Workers'], + 'All Workers (cmp)': buildPoint(compareData, year)['All Workers'], + } + : {}), + })); + return ( + <> + {compareData.length > 0 && } + + + + + + `$${(v / 1000).toFixed(0)}k`} /> + + value != null? `$${Number(value).toLocaleString('en-US', {maximumFractionDigits: 0,})}`: '—'} /> + + + + + {compareData.length > 0 && ( + <> + + + + + )} + + + + ); +}; + // --------------------------------------------------------------------------- // Generic two-location trend chart for the DP-combined explorer // data: [{year, Value}] for side A diff --git a/frontend/src/components/Charts/configs/ChartDefs.tsx b/frontend/src/components/Charts/configs/ChartDefs.tsx index 46a69c5..c51d005 100644 --- a/frontend/src/components/Charts/configs/ChartDefs.tsx +++ b/frontend/src/components/Charts/configs/ChartDefs.tsx @@ -90,6 +90,21 @@ export const chartDefs: ChartDef[] = [ extraParams: { year_min: 2010, year_max: 2023 }, }, }, + { + id: 'median_age', + title: 'Median Age', + url: `${BASE_API_URL}/load/acs5-db/tidy/demographics/median-age`, + xField: '', + yField: '', + subtype: 'renderTableEstimates', + trendChart: 'MedianAgeTrendChart', + categories: ['Demographics'], + filterKey: '', + dataKey: '', + tableConfig: { + extraParams: { year_min: 2010, year_max: 2023 }, + }, + }, { id: 'education', title: 'Educational Attainment — Percent', @@ -179,6 +194,38 @@ export const chartDefs: ChartDef[] = [ extraParams: { year_min: 2010, year_max: 2023 }, }, }, + // Unemployment Rate + { + id: 'unemployment_rate', + title: 'Unemployment Rate — Percent', + url: `${BASE_API_URL}/load/acs5-db/tidy/unemployment-rate`, + xField: '', + yField: '', + subtype: 'renderTable', + trendChart: 'UnemploymentTrendChart', + categories: ['Labor & Economy'], + filterKey: '', + dataKey: '', + tableConfig: { + extraParams: { year_min: 2010, year_max: 2023 }, + }, + }, + // Median Earnings + { + id: 'earnings', + title: 'Median Earnings - Value', + url: `${BASE_API_URL}/load/acs5-db/tidy/median-earnings`, + xField: '', + yField: '', + subtype: 'renderTableEstimates', + trendChart: 'EarningsTrendChart', + categories: ['Labor & Economy'], + filterKey: '', + dataKey: '', + tableConfig: { + extraParams: { year_min: 2010, year_max: 2023 }, + }, + }, // Employment (QCEW quarterly, stacked by sector) { id: 'employment', diff --git a/frontend/src/components/Charts/index.tsx b/frontend/src/components/Charts/index.tsx index 3e49ebd..20c7e68 100644 --- a/frontend/src/components/Charts/index.tsx +++ b/frontend/src/components/Charts/index.tsx @@ -13,8 +13,11 @@ export { } from './DemographicsTable'; export { DemographicsTrendChart, + MedianAgeTrendChart, EducationTrendChart, HousingTrendChart, + UnemploymentTrendChart, + EarningsTrendChart, DPTrendChart, } from './TrendCharts'; export { EmploymentAreaChart } from './EmploymentAreaChart';