|
416 | 416 | "description": "QRF imputation for age, gender, and earnings split", |
417 | 417 | "details": "Imputes AGEDP1-3, AGERANGE, EARNSPLIT, and GENDER from matched PUF demographic records", |
418 | 418 | "source_file": "policyengine_us_data/datasets/puf/puf.py" |
| 419 | + }, |
| 420 | + { |
| 421 | + "id": "mortgage_convert", |
| 422 | + "label": "Structural Mortgage Conversion", |
| 423 | + "node_type": "process", |
| 424 | + "description": "Convert deductible mortgage interest into structural mortgage balances, interest, and origination-year inputs", |
| 425 | + "details": "Preserves current-law deductible mortgage and total interest deductions while deriving first-lien, secondary acquisition-debt, and non-mortgage residual interest inputs", |
| 426 | + "source_file": "policyengine_us_data/utils/mortgage_interest.py" |
419 | 427 | } |
420 | 428 | ], |
421 | 429 | "edges": [ |
|
534 | 542 | }, |
535 | 543 | { |
536 | 544 | "source": "impute_puf_demographics", |
| 545 | + "target": "mortgage_convert", |
| 546 | + "edge_type": "data_flow" |
| 547 | + }, |
| 548 | + { |
| 549 | + "source": "mortgage_convert", |
537 | 550 | "target": "out_puf", |
538 | 551 | "edge_type": "produces_artifact", |
539 | 552 | "label": "puf_2024.h5" |
|
581 | 594 | "id": 2, |
582 | 595 | "label": "Stage 2", |
583 | 596 | "title": "Stage 2: Extended CPS (PUF Clone)", |
584 | | - "description": "Merge CPS + PUF via cloning, impute 64 income vars + 51 override vars via QRF", |
| 597 | + "description": "Merge CPS + PUF via cloning, rematch clone features, QRF-impute incomes and CPS-only vars, then finalize Extended CPS inputs", |
585 | 598 | "country": "us", |
586 | 599 | "nodes": [ |
587 | 600 | { |
|
602 | 615 | "node_type": "input", |
603 | 616 | "description": "Census block populations" |
604 | 617 | }, |
| 618 | + { |
| 619 | + "id": "in_scf_s2", |
| 620 | + "label": "SCF_2022", |
| 621 | + "node_type": "input", |
| 622 | + "description": "From Stage 0 (mortgage-balance donor sample)" |
| 623 | + }, |
605 | 624 | { |
606 | 625 | "id": "geo_assign_s2", |
607 | 626 | "label": "Geography Assignment", |
|
620 | 639 | "node_type": "utility", |
621 | 640 | "description": "fit_predict() for sequential imputation" |
622 | 641 | }, |
| 642 | + { |
| 643 | + "id": "util_knn_s2", |
| 644 | + "label": "sklearn NearestNeighbors", |
| 645 | + "node_type": "utility", |
| 646 | + "description": "Role-aware donor matching on standardized clone predictors" |
| 647 | + }, |
623 | 648 | { |
624 | 649 | "id": "record_double", |
625 | 650 | "label": "Record Doubling", |
|
634 | 659 | "description": "64 income variables \u2014 training on PUF ~20K records, 7 demographic predictors", |
635 | 660 | "source_file": "policyengine_us_data/calibration/puf_impute.py" |
636 | 661 | }, |
637 | | - { |
638 | | - "id": "qrf_pass2", |
639 | | - "label": "QRF Pass 2: Override Imputation", |
640 | | - "node_type": "process", |
641 | | - "description": "Replace the PUF clone half with second-stage CPS-only QRF outputs", |
642 | | - "details": "Keeps original CPS donor values in the first half and maps person-level predictions onto each target entity before splicing", |
643 | | - "source_file": "policyengine_us_data/datasets/cps/extended_cps.py" |
644 | | - }, |
645 | 662 | { |
646 | 663 | "id": "retire_impute", |
647 | 664 | "label": "Retirement Contribution Imputation", |
|
663 | 680 | "description": "Retirement/Disability/Survivors/Dependents \u2014 scaled to match PUF total", |
664 | 681 | "source_file": "policyengine_us_data/calibration/puf_impute.py" |
665 | 682 | }, |
| 683 | + { |
| 684 | + "id": "clone_features", |
| 685 | + "label": "Clone Feature Rematching", |
| 686 | + "node_type": "process", |
| 687 | + "description": "kNN donor rematch of clone-half sex, race, Hispanic status, and occupation fields", |
| 688 | + "details": "Matches within tax-unit roles using demographics plus imputed income, then derives overtime and tipped-occupation inputs from donor occupations when available", |
| 689 | + "source_file": "policyengine_us_data/datasets/cps/extended_cps.py" |
| 690 | + }, |
666 | 691 | { |
667 | 692 | "id": "cps_only", |
668 | 693 | "label": "CPS-Only Variable Re-imputation", |
|
671 | 696 | "details": "Trains on CPS persons and predicts clone-half values from demographics plus PUF-imputed income, then applies retirement and ORG domain constraints", |
672 | 697 | "source_file": "policyengine_us_data/datasets/cps/extended_cps.py" |
673 | 698 | }, |
| 699 | + { |
| 700 | + "id": "qrf_pass2", |
| 701 | + "label": "QRF Pass 2: Override Imputation", |
| 702 | + "node_type": "process", |
| 703 | + "description": "Replace the PUF clone half with second-stage CPS-only QRF outputs", |
| 704 | + "details": "Keeps original CPS donor values in the first half, maps person-level predictions onto each target entity, and rebuilds capped childcare on the clone half", |
| 705 | + "source_file": "policyengine_us_data/datasets/cps/extended_cps.py" |
| 706 | + }, |
| 707 | + { |
| 708 | + "id": "mortgage_hints", |
| 709 | + "label": "Mortgage Balance Hint Imputation", |
| 710 | + "node_type": "process", |
| 711 | + "description": "Impute tax-unit mortgage balance hints from SCF donor balances", |
| 712 | + "details": "Fits a weighted QRF on SCF mortgage holders, predicts first-lien and secondary acquisition-debt balance hints, and enforces conservative nonnegative ordering", |
| 713 | + "source_file": "policyengine_us_data/utils/mortgage_interest.py" |
| 714 | + }, |
| 715 | + { |
| 716 | + "id": "mortgage_convert", |
| 717 | + "label": "Structural Mortgage Conversion", |
| 718 | + "node_type": "process", |
| 719 | + "description": "Convert deductible mortgage interest into structural mortgage balances, interest, and origination-year inputs", |
| 720 | + "details": "Preserves current-law deductible mortgage and total interest deductions while deriving first-lien, secondary acquisition-debt, and non-mortgage residual interest inputs", |
| 721 | + "source_file": "policyengine_us_data/utils/mortgage_interest.py" |
| 722 | + }, |
674 | 723 | { |
675 | 724 | "id": "formula_drop", |
676 | 725 | "label": "Formula Variable Dropping", |
|
717 | 766 | }, |
718 | 767 | { |
719 | 768 | "source": "qrf_pass1", |
720 | | - "target": "qrf_pass2", |
721 | | - "edge_type": "data_flow" |
722 | | - }, |
723 | | - { |
724 | | - "source": "qrf_pass2", |
725 | 769 | "target": "retire_impute", |
726 | 770 | "edge_type": "data_flow" |
727 | 771 | }, |
728 | 772 | { |
729 | | - "source": "qrf_pass2", |
| 773 | + "source": "qrf_pass1", |
730 | 774 | "target": "weeks_impute", |
731 | 775 | "edge_type": "data_flow" |
732 | 776 | }, |
|
742 | 786 | }, |
743 | 787 | { |
744 | 788 | "source": "ss_reconcile", |
| 789 | + "target": "clone_features", |
| 790 | + "edge_type": "data_flow" |
| 791 | + }, |
| 792 | + { |
| 793 | + "source": "clone_features", |
745 | 794 | "target": "cps_only", |
746 | 795 | "edge_type": "data_flow" |
747 | 796 | }, |
748 | 797 | { |
749 | 798 | "source": "cps_only", |
| 799 | + "target": "qrf_pass2", |
| 800 | + "edge_type": "data_flow" |
| 801 | + }, |
| 802 | + { |
| 803 | + "source": "qrf_pass2", |
| 804 | + "target": "mortgage_hints", |
| 805 | + "edge_type": "data_flow" |
| 806 | + }, |
| 807 | + { |
| 808 | + "source": "in_scf_s2", |
| 809 | + "target": "mortgage_hints", |
| 810 | + "edge_type": "data_flow", |
| 811 | + "label": "SCF donor sample" |
| 812 | + }, |
| 813 | + { |
| 814 | + "source": "mortgage_hints", |
| 815 | + "target": "mortgage_convert", |
| 816 | + "edge_type": "data_flow" |
| 817 | + }, |
| 818 | + { |
| 819 | + "source": "mortgage_convert", |
750 | 820 | "target": "formula_drop", |
751 | 821 | "edge_type": "data_flow" |
752 | 822 | }, |
|
762 | 832 | }, |
763 | 833 | { |
764 | 834 | "source": "util_qrf_s2", |
765 | | - "target": "qrf_pass2", |
| 835 | + "target": "cps_only", |
766 | 836 | "edge_type": "uses_utility" |
767 | 837 | }, |
768 | 838 | { |
769 | 839 | "source": "util_qrf_s2", |
770 | | - "target": "cps_only", |
| 840 | + "target": "mortgage_hints", |
| 841 | + "edge_type": "uses_utility" |
| 842 | + }, |
| 843 | + { |
| 844 | + "source": "util_knn_s2", |
| 845 | + "target": "clone_features", |
771 | 846 | "edge_type": "uses_utility" |
772 | 847 | } |
773 | 848 | ], |
|
2006 | 2081 | } |
2007 | 2082 | ], |
2008 | 2083 | "metadata": { |
2009 | | - "total_nodes": 156, |
2010 | | - "total_edges": 159 |
| 2084 | + "total_nodes": 162, |
| 2085 | + "total_edges": 165 |
2011 | 2086 | } |
2012 | 2087 | } |
0 commit comments