Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ydata-profiling/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Automate Python Data Analysis With YData Profiling

This folder provides the code examples for the Real Python tutorial [Automate Python Data Analysis With YData Profiling](https://realpython.com/ydata-profiling-eda/).
14 changes: 14 additions & 0 deletions ydata-profiling/airport_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pandas as pd
from ydata_profiling import ProfileReport

df = pd.read_csv("flight_data_2024_sample.csv")

# Split into flights originating from LAX and ATL
df_lax = df[df["origin"] == "LAX"]
df_atl = df[df["origin"] == "ATL"]

lax_profile = ProfileReport(df_lax, title="LAX Flights")
atl_profile = ProfileReport(df_atl, title="ATL Flights")

comparison = lax_profile.compare(atl_profile)
comparison.to_file("airport_comparison.html")
10,001 changes: 10,001 additions & 0 deletions ydata-profiling/flight_data_2024_sample.csv

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions ydata-profiling/flight_report_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import pandas as pd
from ydata_profiling import ProfileReport

df = pd.read_csv("flight_data_2024_sample.csv")

profile = ProfileReport(df)
profile.to_file("flight_report.html")
16 changes: 16 additions & 0 deletions ydata-profiling/flight_report_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pandas as pd
from ydata_profiling import ProfileReport

df = pd.read_csv("flight_data_2024_sample.csv")

profile = ProfileReport(
df,
variables={
"descriptions": {
"origin": "Airport code where the flight originated",
"dest": "Airport code of flight destination",
"crs_dep_time": "Scheduled departure time at origin (hhmm)",
}
},
)
profile.to_file("documented_report.html")
13 changes: 13 additions & 0 deletions ydata-profiling/flight_report_v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd
from ydata_profiling import ProfileReport

df = pd.read_csv("flight_data_2024_sample.csv")

# Option 1: Generate a minimal report
profile = ProfileReport(df, minimal=True)
profile.to_file("minimal_report.html")

# Option 2: Sample your data before profiling
df_sample = df.sample(n=10000, random_state=42)
profile = ProfileReport(df_sample)
profile.to_file("sampled_report.html")
13 changes: 13 additions & 0 deletions ydata-profiling/flight_time_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd
from ydata_profiling import ProfileReport

df = pd.read_csv("flight_data_2024_sample.csv")
df["fl_date"] = pd.to_datetime(df["fl_date"])

profile = ProfileReport(
df,
title="Flight Delay Report",
tsmode=True,
sortby="fl_date",
)
profile.to_file("flight_timeseries_report.html")