Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions statvar_imports/who_tuberculosis/treatmentoutcome_tb_hiv/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# WHO Treatment Outcome for TB and HIV

- source: https://data.who.int/indicators/i/DCDC2EB/625E736

- type of place: Country Data

- statvars: Health

- years: 2012 to 2023

- place_resolution: manually.

### Release Frequency: P1Y

### How to run:

- To download the input file

`python3 tb_data_download_who.py`

- To process the input file

`python3 ../../../tools/statvar_importer/stat_var_processor.py --input_data=input_files/Tuberculosis_outcome_TB_HIV.csv --pv_map=tuberculosis_outcome_pvmap.csv --config_file=metadata.csv --output_path=output/tuberculosis_output --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf `

#### Refresh type: Fully Autorefresh

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"import_specifications": [
{
"import_name": "WHO_TuberculosisAndHIVTreatmentOutcome",
"curator_emails": [
"support@datacommons.org"
],
"provenance_url": "https://data.who.int/indicators/i/DCDC2EB/625E736",
"provenance_description": "Percentage of people with TB/HIV who started dug-susceptible TB treatment and whose treatment outcome was recorded as treatment success (cured or treatment completed), treatment failed, died, lost to follow-up, or not evaluated, within the reporting period.",
"scripts": [
"tb_data_download_who.py",
"../../../tools/statvar_importer/stat_var_processor.py --input_data=input_files/Tuberculosis_outcome_TB_HIV.csv --pv_map=tuberculosis_outcome_pvmap.csv --config_file=metadata.csv --output_path=output/tuberculosis_output --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
],
"import_inputs": [
{
"template_mcf": "output/tuberculosis_output.tmcf",
"cleaned_csv": "output/tuberculosis_output.csv"
}
],
"source_files": [
"input_files/Tuberculosis_outcome_TB_HIV.csv"
],
"cron_schedule": "0 10 10 * *"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
config,value
mapped_rows,1
mapped_columns,6
output_columns,"observationDate,observationAbout,variableMeasured,value,unit,scalingFactor"
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import requests
import io
import pandas as pd
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def download_who_data():
# 1. Get the Clean Data from the API using the new Indicator ID
api_url = "https://xmart-api-public.who.int/DATA_/RELAY_TB_DATA"
params = {
"$filter": "IND_ID eq 'DCDC2EB625E736'",
#"$select": "IND_ID,INDICATOR_NAME,YEAR,COUNTRY,VALUE",
"$format": "csv"
}

logging.info("1. Fetching clean percentage data from WHO API...")
api_response = requests.get(api_url, params=params)

if api_response.status_code != 200:
logging.info(f"Failed to fetch API data. HTTP {api_response.status_code}")
return

# Load the clean API data into a pandas table
api_df = pd.read_csv(io.StringIO(api_response.text))

# 2. Get ONLY the iso3 code from the master database
logging.info("2. Fetching country iso3 codes from WHO master database...")
master_url = "https://extranet.who.int/tme/generateCSV.asp?ds=notifications"
master_response = requests.get(master_url)
if master_response.status_code != 200:
logging.fatal(f"Failed to fetch master data. HTTP {master_response.status_code}")
return

# We only pull the 'country' (for matching) and 'iso3' columns
geo_columns = ['country', 'iso3']
master_df = pd.read_csv(io.StringIO(master_response.text),
usecols=geo_columns).drop_duplicates()

# 3. Merge the two datasets together based on the country name
logging.info("3. Merging data and formatting...")
# The API uses uppercase 'COUNTRY', the master uses lowercase 'country'
merged_df = pd.merge(api_df, master_df, left_on='COUNTRY', right_on='country', how='left')

# Drop the duplicate lowercase 'country' column used for joining
merged_df = merged_df.drop(columns=['country'])

# Reorder columns so the iso3 code sits right next to the Country name
final_columns = [
'IND_ID', 'INDICATOR_NAME', 'DISAGGR_1', 'YEAR', 'COUNTRY', 'iso3', 'VALUE'
]
merged_df = merged_df[final_columns]

# 4. Save to CSV in a new folder
output_dir = "input_files"
filename = os.path.join(output_dir, "Tuberculosis_outcome_TB_HIV.csv")

os.makedirs(output_dir, exist_ok=True)

# Save without the pandas index column
merged_df.to_csv(filename, index=False)
logging.info(f"Success! Data saved locally as '{filename}'")

if __name__ == "__main__":
download_who_data()
Loading