Ridgeline Plot

ridgeline_plot.py
# %%import numpy as npimport matplotlib.pyplot as pltimport pandas as pdfrom scipy.stats import gaussian_kdefrom src.config import DATA_PATH# %%# Years to includeYEAR_RANGE = (2000, 2022)# X-axis resolutionN_POINTS = 200# Overlap in the ridgeline plotOVERLAP = 0.85# %%# Read data from filedata = pd.read_csv(DATA_PATH / "co2-trends.csv")# Limit data to between the specified years and columns to year and ppm. Also drop missing.co2 = data.loc[lambda x: x["year"].between(*YEAR_RANGE), ["year", "ppm"]].dropna()# %%years = np.arange(YEAR_RANGE[0], YEAR_RANGE[1] + 1)y_baseline = np.arange(len(years)) * (1 - OVERLAP)x = np.linspace(365, 423, N_POINTS)densities = np.stack(    co2.groupby("year").apply(lambda d: gaussian_kde(d["ppm"])(x)).values)ys = (y_baseline + densities.T).T# %%fig, ax = plt.subplots(figsize=(6, 8), constrained_layout=True)for b, y in zip(y_baseline, ys):    plt.fill_between(        x, np.ones(N_POINTS) * b, y, where=~np.isclose(b, y), color="tomato"    )    plt.plot(x, y, c="black")ax.set_yticks(y_baseline)ax.set_yticklabels(years)ax.set_xlabel("Parts per Million")ax.set_ylabel("Year")ax.margins(x=0, y=0.01)ax.tick_params(axis="both", bottom=False, left=False)ax.set_axisbelow(True)ax.grid(True, axis="x")ax.spines["top"].set_visible(False)ax.spines["right"].set_visible(False)ax.spines["bottom"].set_visible(False)ax.spines["left"].set_visible(False)fig.suptitle("Distribution of CO² Concentration per Year", fontsize=18, color="gray")

Ridgeline Plot