diff --git a/public/processdata/harmony_process_data.ipynb b/public/processdata/harmony_process_data.ipynb
new file mode 100644
index 0000000..85c53ef
--- /dev/null
+++ b/public/processdata/harmony_process_data.ipynb
@@ -0,0 +1,867 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Survey Data Harmonization Pipeline\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "IzrkAMChJwwk"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "A comprehensive notebook for harmonizing multiple survey datasets\n",
+        "while maintaining full transparency and reproducibility.\n",
+        "\n",
+        "SETUP INSTRUCTIONS:\n",
+        "1. Run the Google Drive mount cell (for Colab users)\n",
+        "2. Configure your surveys in SURVEYS_CONFIG\n",
+        "3. Define your mapping rules in MAPPINGS\n",
+        "4. Run the processing pipeline\n",
+        "5. Export your analysis-ready dataset\n",
+        "\n",
+        "For detailed guidance, see the configuration examples below."
+      ],
+      "metadata": {
+        "id": "Hgrtv0kfMl4n"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Google Colab Users\n",
+        "\n",
+        "Run the following cells to:\n",
+        "\n",
+        "1. Install dependencies\n",
+        "2. Mount Google drive and set the working directory."
+      ],
+      "metadata": {
+        "id": "6hLqGFZQM8jt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VwCmmrQYJq1c"
+      },
+      "outputs": [],
+      "source": [
+        "!pip -q install pandas numpy openpyxl matplotlib"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Mount Google Drive in Colab\n",
+        "from google.colab import drive\n",
+        "import os\n",
+        "\n",
+        "# Set your working directory - CUSTOMIZE THIS PATH\n",
+        "project_folder = \"/content/drive/MyDrive/Colab Notebooks\"\n",
+        "\n",
+        "# Optional: Mount Google Drive (for Colab users)\n",
+        "try:\n",
+        "    from google.colab import drive\n",
+        "    drive.mount('/content/drive')\n",
+        "    os.chdir(project_folder)\n",
+        "    print(\"Google Drive mounted successfully\")\n",
+        "except ImportError:\n",
+        "    print(\"ℹ️  Running locally - Google Drive mount not needed\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "E1QLjKbmLW7Y",
+        "outputId": "00eb24c8-0e32-4669-daeb-b30307120a82"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n",
+            "✅ Google Drive mounted successfully\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Import Libraries"
+      ],
+      "metadata": {
+        "id": "a7W4nD7wN5Rw"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import os\n",
+        "from pathlib import Path\n",
+        "import glob\n",
+        "from typing import Dict, List, Union, Optional, Tuple, Any\n",
+        "import warnings\n",
+        "warnings.filterwarnings('ignore')"
+      ],
+      "metadata": {
+        "id": "zBG3ULpGN7hU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Configuration Section - Customize this for your data.\n",
+        "\n",
+        "Each survey entry supports:\n",
+        "\n",
+        "1.   **path**: File path. (supports wildcards like *.csv for multiple files)\n",
+        "2.  **instrument**: Name of the survey instrument.\n",
+        "3. **id_col**: Column name containing participant IDs.\n",
+        "4. **item_prefixes**: List of prefixes for item columns. (optional)\n",
+        "5. **sheet_name**: For Excel files, specify sheet name. (optional)\n",
+        "6. **skip_rows**: N6.umber of rows to skip at top of file. (optional)\n",
+        "7. **reverse_items**: List of items to reverse-score. (optional)\n",
+        "8. **missing_values**: Custom missing value codes (optional)\n",
+        "\n",
+        "#### FLEXIBLE PATH OPTIONS:\n",
+        "- Single file: \"data/survey1.csv\"\n",
+        "- Multiple files: \"data/wave*.csv\" (uses glob pattern)\n",
+        "- Different folders: [\"folder1/survey.csv\", \"folder2/survey.xlsx\"]"
+      ],
+      "metadata": {
+        "id": "fI6OZC5wQPbE"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "SURVEYS_CONFIG = [\n",
+        "    {\n",
+        "        'path': 'data/baselinefile.csv',  # Single baseline file\n",
+        "        'instrument': 'SCARED',\n",
+        "        'id_col': 'participant_id',\n",
+        "        'item_prefixes': ['SCARED_'],\n",
+        "        'reverse_items': [],  # Add item numbers if any need reverse scoring\n",
+        "        'missing_values': [-999, 'N/A', '']\n",
+        "    },\n",
+        "    {\n",
+        "        'path': 'data/multiple_files*.xlsx',  # Multiple waves using wildcard\n",
+        "        'instrument': 'GAD7',\n",
+        "        'id_col': 'ID',\n",
+        "        'sheet_name': 'Data',  # Specific sheet in Excel\n",
+        "        'item_prefixes': ['GAD7_', 'gad_'],\n",
+        "        'skip_rows': 1,  # Skip header row if needed\n",
+        "        'missing_values': [-999, 99]\n",
+        "    },\n",
+        "    {\n",
+        "        'path': ['data/specificOne.csv', 'data/specificTwo.csv'],  # Explicit file list\n",
+        "        'instrument': 'PHQ9',\n",
+        "        'id_col': 'subject_id',\n",
+        "        'item_prefixes': ['PHQ9_', 'phq_'],\n",
+        "        'reverse_items': [],\n",
+        "        'missing_values': [-999]\n",
+        "    }\n",
+        "]"
+      ],
+      "metadata": {
+        "id": "nxHhZwUCTr-P"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "#### Mappings\n",
+        "\n",
+        "Define how to recode survey responses to binary (0/1)\n",
+        "\n",
+        "**MAPPING INSTRUCTIONS:**\n",
+        "\n",
+        "Define how your survey responses should be converted to binary (0/1) format.\n",
+        "\n",
+        "**STRUCTURE:**\n",
+        "```\n",
+        "{instrument_name: {item: {original_value: new_value}}}\n",
+        "```\n",
+        "- Use 'DEFAULT' for instrument-wide rules\n",
+        "- Add specific items to override defaults\n",
+        "- Values can be strings, numbers, or mixed\n",
+        "\n",
+        "**COMMON MAPPING PATTERNS:**\n",
+        "\n",
+        "PATTERN 1 - Likert scales (0-3 scale):\n",
+        "\n",
+        "```\n",
+        "'DEFAULT': {0: 0, 1: 0, 2: 1, 3: 1}  # Split at midpoint\n",
+        "```\n",
+        "\n",
+        "PATTERN 2 - Yes/No responses:\n",
+        "\n",
+        "```\n",
+        "'DEFAULT': {'Yes': 1, 'No': 0, 'Y': 1, 'N': 0}\n",
+        "```\n",
+        "\n",
+        "PATTERN 3 - Frequency scales:\n",
+        "\n",
+        "```\n",
+        "'DEFAULT': {\n",
+        "    'Never': 0, 'Rarely': 0,\n",
+        "    'Sometimes': 1, 'Often': 1, 'Always': 1\n",
+        "}\n",
+        "```\n",
+        "\n",
+        "PATTERN 4 - Conservative threshold (only highest = 1):\n",
+        "```\n",
+        "'DEFAULT': {0: 0, 1: 0, 2: 0, 3: 1}\n",
+        "```\n",
+        "\n",
+        "***SETUP GUIDE FOR MAPPINGS:***\n",
+        "1. Replace 'Survey_A', 'Survey_B' etc. with YOUR instrument names (must match SURVEYS_CONFIG)\n",
+        "2. Update the DEFAULT mappings to match YOUR response scales\n",
+        "3. Add item-specific overrides where needed\n",
+        "\n",
+        "Diagnostic function: print_unmapped_values() allows you to check for what values need mapping."
+      ],
+      "metadata": {
+        "id": "sit7J3oRTtRB"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "MAPPINGS = {\n",
+        "    # Survey A: Generic Likert scale (0-3)\n",
+        "    'Survey_A': {\n",
+        "        'DEFAULT': {\n",
+        "            0: 0, '0': 0,  # Not at all → 0\n",
+        "            1: 0, '1': 0,  # A little → 0\n",
+        "            2: 1, '2': 1,  # Moderately → 1\n",
+        "            3: 1, '3': 1   # A lot → 1\n",
+        "        },\n",
+        "        # Override for specific items if needed:\n",
+        "        # 'Q5': {0: 0, 1: 0, 2: 0, 3: 1}  # More conservative threshold\n",
+        "    },\n",
+        "\n",
+        "    # Survey B: Frequency scale\n",
+        "    'Survey_B': {\n",
+        "        'DEFAULT': {\n",
+        "            0: 0, '0': 0, 'Never': 0,\n",
+        "            1: 0, '1': 0, 'Rarely': 0,\n",
+        "            2: 1, '2': 1, 'Sometimes': 1,\n",
+        "            3: 1, '3': 1, 'Often': 1,\n",
+        "            4: 1, '4': 1, 'Always': 1\n",
+        "        }\n",
+        "    },\n",
+        "\n",
+        "    # Survey C: Mixed response types\n",
+        "    'Survey_C': {\n",
+        "        'DEFAULT': {\n",
+        "            0: 0, '0': 0, 'No': 0, 'False': 0,\n",
+        "            1: 1, '1': 1, 'Yes': 1, 'True': 1\n",
+        "        },\n",
+        "        # Specific overrides for certain items:\n",
+        "        'scale_10': {  # Different threshold for this item\n",
+        "            0: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1\n",
+        "        }\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "# SETUP GUIDE FOR MAPPINGS:\n",
+        "# 1. Replace 'Survey_A', 'Survey_B' etc. with YOUR instrument names (must match SURVEYS_CONFIG)\n",
+        "# 2. Update the DEFAULT mappings to match YOUR response scales\n",
+        "# 3. Add item-specific overrides where needed\n",
+        "# 4. Use the diagnostic function print_unmapped_values() to see what values need mapping\n",
+        "\n",
+        "# Global fallback for unmapped values (applied when no specific mapping exists)\n",
+        "GLOBAL_DEFAULT_MAPPING = {\n",
+        "    'Yes': 1, 'No': 0,\n",
+        "    'True': 1, 'False': 0,\n",
+        "    1: 1, 0: 0\n",
+        "}"
+      ],
+      "metadata": {
+        "id": "0W_LwSZGKTuj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Run the following function to validate the configuration you have set up."
+      ],
+      "metadata": {
+        "id": "vW7-BBZJRRFz"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def validate_config():\n",
+        "    \"\"\"Validate the survey configuration and provide helpful error messages.\"\"\"\n",
+        "    print(\" Validating survey configuration...\")\n",
+        "\n",
+        "    errors = []\n",
+        "    warnings_list = []\n",
+        "\n",
+        "    for i, survey in enumerate(SURVEYS_CONFIG):\n",
+        "        survey_name = survey.get('instrument', f'Survey {i+1}')\n",
+        "\n",
+        "        # Check required fields\n",
+        "        if 'path' not in survey:\n",
+        "            errors.append(f\" {survey_name}: Missing 'path' field\")\n",
+        "        if 'instrument' not in survey:\n",
+        "            errors.append(f\" {survey_name}: Missing 'instrument' field\")\n",
+        "        if 'id_col' not in survey:\n",
+        "            errors.append(f\" {survey_name}: Missing 'id_col' field\")\n",
+        "\n",
+        "        # Check if files exist\n",
+        "        if 'path' in survey:\n",
+        "            path = survey['path']\n",
+        "            if isinstance(path, list):\n",
+        "                for p in path:\n",
+        "                    if not any(Path(p).parent.glob(Path(p).name)):\n",
+        "                        warnings_list.append(f\"⚠️ {survey_name}: File not found: {p}\")\n",
+        "            elif isinstance(path, str):\n",
+        "                if '*' in path:\n",
+        "                    matches = glob.glob(path)\n",
+        "                    if not matches:\n",
+        "                        warnings_list.append(f\"⚠️ {survey_name}: No files match pattern: {path}\")\n",
+        "                    else:\n",
+        "                        print(f\"✅ {survey_name}: Found {len(matches)} files matching: {path}\")\n",
+        "                else:\n",
+        "                    if not Path(path).exists():\n",
+        "                        warnings_list.append(f\"⚠️ {survey_name}: File not found: {path}\")\n",
+        "\n",
+        "    if errors:\n",
+        "        print(\"\\n CONFIGURATION ERRORS (must fix):\")\n",
+        "        for error in errors:\n",
+        "            print(error)\n",
+        "        return False\n",
+        "\n",
+        "    if warnings_list:\n",
+        "        print(\"\\n WARNINGS (check these):\")\n",
+        "        for warning in warnings_list:\n",
+        "            print(warning)\n",
+        "\n",
+        "    print(\"\\n Configuration validation complete!\")\n",
+        "    return True"
+      ],
+      "metadata": {
+        "id": "AeYh37wURQfm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Utility Functions"
+      ],
+      "metadata": {
+        "id": "GHHkIL1JWsGJ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def expand_file_paths(path: Union[str, List[str]]) -> List[str]:\n",
+        "    \"\"\"Expand file paths, handling wildcards and lists.\"\"\"\n",
+        "    if isinstance(path, list):\n",
+        "        all_files = []\n",
+        "        for p in path:\n",
+        "            if '*' in p:\n",
+        "                all_files.extend(glob.glob(p))\n",
+        "            else:\n",
+        "                all_files.append(p)\n",
+        "        return all_files\n",
+        "    elif '*' in path:\n",
+        "        return glob.glob(path)\n",
+        "    else:\n",
+        "        return [path]\n",
+        "\n",
+        "def read_table(file_path: str, **kwargs) -> pd.DataFrame:\n",
+        "    \"\"\"Universal file reader supporting CSV and Excel with robust error handling.\"\"\"\n",
+        "    path = Path(file_path)\n",
+        "\n",
+        "    try:\n",
+        "        if path.suffix.lower() in ['.xlsx', '.xls']:\n",
+        "            df = pd.read_excel(file_path, **kwargs)\n",
+        "        elif path.suffix.lower() == '.csv':\n",
+        "            # Try different encodings and separators\n",
+        "            encodings = ['utf-8', 'latin-1', 'cp1252']\n",
+        "            separators = [',', ';', '\\t']\n",
+        "\n",
+        "            df = None\n",
+        "            for encoding in encodings:\n",
+        "                for sep in separators:\n",
+        "                    try:\n",
+        "                        df = pd.read_csv(file_path, encoding=encoding, sep=sep, **kwargs)\n",
+        "                        if df.shape[1] > 1:  # Successfully parsed multiple columns\n",
+        "                            break\n",
+        "                    except:\n",
+        "                        continue\n",
+        "                if df is not None and df.shape[1] > 1:\n",
+        "                    break\n",
+        "\n",
+        "            if df is None:\n",
+        "                raise ValueError(f\"Could not parse CSV file: {file_path}\")\n",
+        "        else:\n",
+        "            raise ValueError(f\"Unsupported file format: {path.suffix}\")\n",
+        "\n",
+        "        print(f\" Loaded {file_path}: {df.shape[0]} rows × {df.shape[1]} columns\")\n",
+        "        return df\n",
+        "\n",
+        "    except Exception as e:\n",
+        "        print(f\" Error loading {file_path}: {str(e)}\")\n",
+        "        raise\n",
+        "\n",
+        "def detect_item_columns(df: pd.DataFrame,\n",
+        "                       prefixes: Optional[List[str]] = None,\n",
+        "                       force_auto_detect: bool = False) -> List[str]:\n",
+        "    \"\"\"\n",
+        "    Intelligently detect survey item columns.\n",
+        "\n",
+        "    Args:\n",
+        "        df: DataFrame to analyze\n",
+        "        prefixes: List of column prefixes to look for\n",
+        "        force_auto_detect: If True, ignore prefixes and auto-detect based on data patterns\n",
+        "    \"\"\"\n",
+        "    if prefixes and not force_auto_detect:\n",
+        "        # Look for columns matching any prefix\n",
+        "        item_cols = []\n",
+        "        for col in df.columns:\n",
+        "            if any(str(col).startswith(prefix) for prefix in prefixes):\n",
+        "                item_cols.append(col)\n",
+        "\n",
+        "        if item_cols:\n",
+        "            print(f\" Found {len(item_cols)} columns with specified prefixes\")\n",
+        "            return item_cols\n",
+        "\n",
+        "    # Auto-detection fallback\n",
+        "    print(\" Auto-detecting item columns...\")\n",
+        "    item_cols = []\n",
+        "\n",
+        "    for col in df.columns:\n",
+        "        # Skip obvious non-item columns\n",
+        "        if str(col).lower() in ['id', 'participant_id', 'subject_id', 'age', 'gender', 'date']:\n",
+        "            continue\n",
+        "\n",
+        "        # Look for columns with limited unique values (likely Likert scales)\n",
+        "        unique_vals = df[col].dropna().nunique()\n",
+        "        if 2 <= unique_vals <= 10:  # Typical range for survey items\n",
+        "            item_cols.append(col)\n",
+        "\n",
+        "    print(f\" Auto-detected {len(item_cols)} potential item columns\")\n",
+        "    if len(item_cols) > 50:\n",
+        "        print(\" Warning: Many columns detected - you may want to specify prefixes\")\n",
+        "\n",
+        "    return item_cols\n",
+        "\n",
+        "def handle_missing_values(df: pd.DataFrame, missing_values: List = None) -> pd.DataFrame:\n",
+        "    \"\"\"Replace custom missing value codes with NaN.\"\"\"\n",
+        "    if missing_values:\n",
+        "        df = df.replace(missing_values, np.nan)\n",
+        "    return df\n",
+        "\n",
+        "def reverse_score_items(df: pd.DataFrame, reverse_items: List[str], max_value: int = None) -> pd.DataFrame:\n",
+        "    \"\"\"Reverse score specified items.\"\"\"\n",
+        "    if not reverse_items:\n",
+        "        return df\n",
+        "\n",
+        "    df_copy = df.copy()\n",
+        "    for item in reverse_items:\n",
+        "        if item in df_copy.columns:\n",
+        "            if max_value is None:\n",
+        "                # Auto-detect max value\n",
+        "                max_val = df_copy[item].max()\n",
+        "            else:\n",
+        "                max_val = max_value\n",
+        "\n",
+        "            df_copy[item] = max_val - df_copy[item]\n",
+        "            print(f\" Reverse scored {item} (max value: {max_val})\")\n",
+        "\n",
+        "    return df_copy\n",
+        "\n",
+        "def apply_mapping(df: pd.DataFrame, item_cols: List[str],\n",
+        "                 instrument_mappings: Dict, instrument: str) -> Tuple[pd.DataFrame, Dict]:\n",
+        "    \"\"\"Apply mapping rules to convert responses to binary.\"\"\"\n",
+        "    df_mapped = df.copy()\n",
+        "    mapping_log = {}\n",
+        "\n",
+        "    default_mapping = instrument_mappings.get('DEFAULT', {})\n",
+        "\n",
+        "    for col in item_cols:\n",
+        "        col_mapping = instrument_mappings.get(col, default_mapping)\n",
+        "        combined_mapping = {**GLOBAL_DEFAULT_MAPPING, **default_mapping, **col_mapping}\n",
+        "\n",
+        "        if combined_mapping:\n",
+        "            original_values = df[col].dropna().unique()\n",
+        "            df_mapped[col] = df[col].map(combined_mapping)\n",
+        "\n",
+        "            # Log what was mapped\n",
+        "            mapped_values = {k: v for k, v in combined_mapping.items() if k in original_values}\n",
+        "            mapping_log[col] = {\n",
+        "                'mapped_explicitly': bool(mapped_values),\n",
+        "                'mapping': mapped_values,\n",
+        "                'unmapped_values': list(set(original_values) - set(combined_mapping.keys()))\n",
+        "            }\n",
+        "\n",
+        "    return df_mapped, mapping_log\n",
+        "\n",
+        "def auto_binarize_fallback(df: pd.DataFrame, item_cols: List[str], mapping_log: Dict) -> Tuple[pd.DataFrame, Dict]:\n",
+        "    \"\"\"Apply automatic binarization for unmapped values.\"\"\"\n",
+        "    df_binary = df.copy()\n",
+        "\n",
+        "    for col in item_cols:\n",
+        "        unmapped_values = mapping_log.get(col, {}).get('unmapped_values', [])\n",
+        "\n",
+        "        if unmapped_values:\n",
+        "            print(f\" Auto-binarizing {col}: {unmapped_values}\")\n",
+        "\n",
+        "            # Sort unique non-null values\n",
+        "            unique_vals = sorted([v for v in df[col].dropna().unique() if pd.notna(v)])\n",
+        "\n",
+        "            if len(unique_vals) > 1:\n",
+        "                # Split at midpoint\n",
+        "                midpoint = len(unique_vals) / 2\n",
+        "\n",
+        "                for i, val in enumerate(unique_vals):\n",
+        "                    binary_val = 0 if i < midpoint else 1\n",
+        "                    df_binary.loc[df_binary[col] == val, col] = binary_val\n",
+        "\n",
+        "                mapping_log[col]['auto_binarized'] = True\n",
+        "                mapping_log[col]['auto_mapping'] = {val: (0 if i < midpoint else 1)\n",
+        "                                                   for i, val in enumerate(unique_vals)}\n",
+        "\n",
+        "    return df_binary, mapping_log\n",
+        "\n",
+        "def create_transparency_catalog(all_mapping_logs: Dict) -> pd.DataFrame:\n",
+        "    \"\"\"Create a transparency catalog showing all mapping decisions.\"\"\"\n",
+        "    catalog_data = []\n",
+        "\n",
+        "    for instrument, mapping_log in all_mapping_logs.items():\n",
+        "        for item, log in mapping_log.items():\n",
+        "            catalog_data.append({\n",
+        "                'instrument': instrument,\n",
+        "                'item': item,\n",
+        "                'mapping_type': 'explicit' if log.get('mapped_explicitly') else 'auto',\n",
+        "                'original_values': str(log.get('mapping', {}).keys()) if log.get('mapping') else 'auto',\n",
+        "                'binary_mapping': str(log.get('mapping', {})) or str(log.get('auto_mapping', {})),\n",
+        "                'had_unmapped_values': bool(log.get('unmapped_values'))\n",
+        "            })\n",
+        "\n",
+        "    return pd.DataFrame(catalog_data)\n",
+        "\n",
+        "def to_long_format(df: pd.DataFrame, item_cols: List[str],\n",
+        "                  id_col: str, instrument: str) -> pd.DataFrame:\n",
+        "    \"\"\"Convert survey data to long format.\"\"\"\n",
+        "    # Melt to long format\n",
+        "    long_df = df.melt(\n",
+        "        id_vars=[id_col],\n",
+        "        value_vars=item_cols,\n",
+        "        var_name='item',\n",
+        "        value_name='value'\n",
+        "    )\n",
+        "\n",
+        "    # Add instrument column\n",
+        "    long_df['instrument'] = instrument\n",
+        "\n",
+        "    # Extract item numbers if possible\n",
+        "    long_df['item_no'] = long_df['item'].str.extract(r'(\\d+)$').astype('Int64')\n",
+        "\n",
+        "    # Rename id column to standard name\n",
+        "    long_df = long_df.rename(columns={id_col: 'participant_id'})\n",
+        "\n",
+        "    # Remove missing values\n",
+        "    long_df = long_df.dropna(subset=['value'])\n",
+        "\n",
+        "    return long_df[['participant_id', 'item', 'item_no', 'value', 'instrument']]\n"
+      ],
+      "metadata": {
+        "id": "_MXKRrR8KABN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Main Processing Pipeline"
+      ],
+      "metadata": {
+        "id": "zx088HoEK0k-"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def process_all_surveys():\n",
+        "    \"\"\"Main processing function that orchestrates the entire pipeline.\"\"\"\n",
+        "\n",
+        "    if not validate_config():\n",
+        "        print(\" Please fix configuration errors before proceeding.\")\n",
+        "        return None, None\n",
+        "\n",
+        "    all_long_data = []\n",
+        "    all_mapping_logs = {}\n",
+        "\n",
+        "    print(\"\\n Starting survey processing pipeline...\\n\")\n",
+        "\n",
+        "    for survey_config in SURVEYS_CONFIG:\n",
+        "        instrument = survey_config['instrument']\n",
+        "        print(f\"\\n{'='*50}\")\n",
+        "        print(f\"Processing: {instrument}\")\n",
+        "        print(f\"{'='*50}\")\n",
+        "\n",
+        "        # Expand file paths\n",
+        "        file_paths = expand_file_paths(survey_config['path'])\n",
+        "\n",
+        "        if not file_paths:\n",
+        "            print(f\" No files found for {instrument}\")\n",
+        "            continue\n",
+        "\n",
+        "        # Process each file\n",
+        "        instrument_data = []\n",
+        "        for file_path in file_paths:\n",
+        "            print(f\"\\n Processing file: {file_path}\")\n",
+        "\n",
+        "            # Load data\n",
+        "            read_kwargs = {}\n",
+        "            if 'sheet_name' in survey_config:\n",
+        "                read_kwargs['sheet_name'] = survey_config['sheet_name']\n",
+        "            if 'skip_rows' in survey_config:\n",
+        "                read_kwargs['skiprows'] = survey_config['skip_rows']\n",
+        "\n",
+        "            df = read_table(file_path, **read_kwargs)\n",
+        "\n",
+        "            # Handle missing values\n",
+        "            df = handle_missing_values(df, survey_config.get('missing_values', []))\n",
+        "\n",
+        "            # Verify ID column exists\n",
+        "            id_col = survey_config['id_col']\n",
+        "            if id_col not in df.columns:\n",
+        "                print(f\" ID column '{id_col}' not found in {file_path}\")\n",
+        "                print(f\"Available columns: {list(df.columns)}\")\n",
+        "                continue\n",
+        "\n",
+        "            # Detect item columns\n",
+        "            item_cols = detect_item_columns(\n",
+        "                df,\n",
+        "                survey_config.get('item_prefixes'),\n",
+        "                survey_config.get('force_auto_detect', False)\n",
+        "            )\n",
+        "\n",
+        "            if not item_cols:\n",
+        "                print(f\" No item columns detected in {file_path}\")\n",
+        "                continue\n",
+        "\n",
+        "            print(f\" Processing {len(item_cols)} items\")\n",
+        "\n",
+        "            # Reverse score items if needed\n",
+        "            df = reverse_score_items(df, survey_config.get('reverse_items', []))\n",
+        "\n",
+        "            # Apply mappings\n",
+        "            instrument_mappings = MAPPINGS.get(instrument, {})\n",
+        "            df_mapped, mapping_log = apply_mapping(df, item_cols, instrument_mappings, instrument)\n",
+        "\n",
+        "            # Auto-binarize fallback\n",
+        "            df_binary, mapping_log = auto_binarize_fallback(df_mapped, item_cols, mapping_log)\n",
+        "\n",
+        "            # Convert to long format\n",
+        "            long_data = to_long_format(df_binary, item_cols, id_col, instrument)\n",
+        "            instrument_data.append(long_data)\n",
+        "\n",
+        "            print(f\" Processed {len(long_data)} participant×item observations\")\n",
+        "\n",
+        "        # Combine all files for this instrument\n",
+        "        if instrument_data:\n",
+        "            combined_instrument_data = pd.concat(instrument_data, ignore_index=True)\n",
+        "            all_long_data.append(combined_instrument_data)\n",
+        "            all_mapping_logs[instrument] = mapping_log\n",
+        "\n",
+        "            print(f\" Total for {instrument}: {len(combined_instrument_data)} observations\")\n",
+        "\n",
+        "    # Combine all instruments\n",
+        "    if all_long_data:\n",
+        "        combined_long = pd.concat(all_long_data, ignore_index=True)\n",
+        "        catalog = create_transparency_catalog(all_mapping_logs)\n",
+        "\n",
+        "        print(f\"\\n PIPELINE COMPLETE!\")\n",
+        "        print(f\" Final dataset: {len(combined_long)} total observations\")\n",
+        "        print(f\" Participants: {combined_long['participant_id'].nunique()}\")\n",
+        "        print(f\" Instruments: {combined_long['instrument'].nunique()}\")\n",
+        "        print(f\" Items: {combined_long['item'].nunique()}\")\n",
+        "\n",
+        "        return combined_long, catalog\n",
+        "    else:\n",
+        "        print(\" No data processed successfully\")\n",
+        "        return None, None\n",
+        "\n",
+        "def export_results(combined_long: pd.DataFrame, catalog: pd.DataFrame,\n",
+        "                  output_dir: str = \"outputs\"):\n",
+        "    \"\"\"Export the final results with multiple format options.\"\"\"\n",
+        "\n",
+        "    # Create output directory\n",
+        "    Path(output_dir).mkdir(exist_ok=True)\n",
+        "\n",
+        "    # Primary CSV export\n",
+        "    csv_path = Path(output_dir) / \"analysis_ready_long.csv\"\n",
+        "    combined_long.to_csv(csv_path, index=False)\n",
+        "    print(f\" Exported primary dataset: {csv_path}\")\n",
+        "\n",
+        "    # Excel workbook with multiple sheets\n",
+        "    excel_path = Path(output_dir) / \"analysis_ready_outputs.xlsx\"\n",
+        "    with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:\n",
+        "        combined_long.to_excel(writer, sheet_name='long', index=False)\n",
+        "        catalog.to_excel(writer, sheet_name='catalog', index=False)\n",
+        "\n",
+        "        # Summary statistics\n",
+        "        summary = combined_long.groupby('instrument')['value'].agg([\n",
+        "            'count', 'sum', 'mean'\n",
+        "        ]).round(3)\n",
+        "        summary['prevalence_%'] = (summary['mean'] * 100).round(1)\n",
+        "        summary.to_excel(writer, sheet_name='summary')\n",
+        "\n",
+        "    print(f\" Exported Excel workbook: {excel_path}\")\n",
+        "\n",
+        "    return csv_path, excel_path"
+      ],
+      "metadata": {
+        "id": "fRt4SbjLSJOv"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Diagnostic and Helper Functions"
+      ],
+      "metadata": {
+        "id": "LN5RglUpStO6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def print_unmapped_values():\n",
+        "    \"\"\"Print all unique values that need mapping - helpful for configuration.\"\"\"\n",
+        "    print(\" Scanning for unmapped values in your data...\\n\")\n",
+        "\n",
+        "    for survey_config in SURVEYS_CONFIG:\n",
+        "        instrument = survey_config['instrument']\n",
+        "        print(f\"\\n{instrument}:\")\n",
+        "        print(\"-\" * 30)\n",
+        "\n",
+        "        file_paths = expand_file_paths(survey_config['path'])\n",
+        "        for file_path in file_paths[:1]:  # Just check first file\n",
+        "            try:\n",
+        "                df = read_table(file_path)\n",
+        "                item_cols = detect_item_columns(df, survey_config.get('item_prefixes'))\n",
+        "\n",
+        "                for col in item_cols[:5]:  # Show first 5 items\n",
+        "                    unique_vals = df[col].dropna().unique()\n",
+        "                    print(f\"  {col}: {sorted(unique_vals)}\")\n",
+        "            except Exception as e:\n",
+        "                print(f\"  Error reading {file_path}: {e}\")\n",
+        "\n",
+        "def quick_data_preview():\n",
+        "    \"\"\"Show a quick preview of your data structure.\"\"\"\n",
+        "    print(\" Quick data preview:\\n\")\n",
+        "\n",
+        "    for survey_config in SURVEYS_CONFIG:\n",
+        "        instrument = survey_config['instrument']\n",
+        "        print(f\"{instrument}:\")\n",
+        "\n",
+        "        file_paths = expand_file_paths(survey_config['path'])\n",
+        "        if file_paths:\n",
+        "            try:\n",
+        "                df = read_table(file_paths[0])\n",
+        "                print(f\"  Shape: {df.shape}\")\n",
+        "                print(f\"  Columns: {list(df.columns)}\")\n",
+        "                print(f\"  ID column '{survey_config['id_col']}' exists: {survey_config['id_col'] in df.columns}\")\n",
+        "                print()\n",
+        "            except Exception as e:\n",
+        "                print(f\"  Error: {e}\\n\")"
+      ],
+      "metadata": {
+        "id": "IehUzGT7Sr5V"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Final Execution"
+      ],
+      "metadata": {
+        "id": "fEPEnUrpS7zL"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Run validation and preview\n",
+        "print(\" Survey Data Harmonization Pipeline\")\n",
+        "print(\"=\" * 50)\n",
+        "\n",
+        "# Uncomment these lines for diagnostics:\n",
+        "# quick_data_preview()\n",
+        "# print_unmapped_values()\n",
+        "\n",
+        "# Process all surveys\n",
+        "combined_long, catalog = process_all_surveys()\n",
+        "\n",
+        "# Export results\n",
+        "if combined_long is not None:\n",
+        "    csv_path, excel_path = export_results(combined_long, catalog)\n",
+        "\n",
+        "    print(f\"\\n SUCCESS! Your harmonized dataset is ready:\")\n",
+        "    print(f\" Primary file: {csv_path}\")\n",
+        "    print(f\" Full workbook: {excel_path}\")\n",
+        "    print(f\"\\nThe 'long' format dataset contains:\")\n",
+        "    print(f\"- participant_id: Unique participant identifier\")\n",
+        "    print(f\"- item: Original survey item name\")\n",
+        "    print(f\"- item_no: Item number (if extractable)\")\n",
+        "    print(f\"- value: Harmonized binary value (0/1)\")\n",
+        "    print(f\"- instrument: Survey instrument name\")\n",
+        "\n",
+        "    print(f\"\\n Check the 'catalog' sheet for transparency on all mapping decisions!\")"
+      ],
+      "metadata": {
+        "id": "xtJDV4cSS64Q"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/src/components/App.js b/src/components/App.js
index ffe6735..f88b56b 100644
--- a/src/components/App.js
+++ b/src/components/App.js
@@ -317,6 +317,18 @@ function App() {
     XLSXutils.book_append_sheet(workbook, matches, "Matches");
     XLSXutils.book_append_sheet(workbook, matrix, "Matrix");
     XLSXwriteFile(workbook, "Harmony.xlsx");
+
+    // Add Jupyter Notebook download for processing results on local machine
+    downloadJupyterNotebook();
+  };
+
+  const downloadJupyterNotebook = () => {
+    const link = document.createElement('a');
+    link.href = process.env.PUBLIC_URL + '/processdata/harmony_process_data.ipynb';
+    link.download = 'harmony_process_data.ipynb';
+    document.body.appendChild(link);
+    link.click();
+    document.body.removeChild(link);
   };
 
   let theme = useMemo(