Révision | 122e257ab677afde702863fd5c1bf7e8b20206e2 |
---|---|
Taille | 1,723 octets |
l'heure | 2024-09-04 22:41:45 |
Auteur | Lorenzo Isella |
Message de Log | ChatGPT translation of my R code to calculate the kpi by DG grow. |
import pandas as pd
import matplotlib.pyplot as plt
# Define the Herfindahl-Hirschman Index (HHI) function
def hhi_index(x):
y = x / x.sum()
res = (y**2).sum()
return res
# Load the dataset
df = pd.read_csv("../../input/scb_data_for_figures-19-02-2024.tar.gz", compression='gzip')
# Clean column names (convert to lowercase and replace spaces with underscores)
df.columns = df.columns.str.lower().str.replace(' ', '_')
# Group by 'expenditure_year' and 'member_state_2_letter_codes' and summarize
df_hhi = (df.groupby(['expenditure_year', 'member_state_2_letter_codes'])
.agg(exp_eur_bn=('aid_element_eur_bn', 'sum'),
ms_gdp_eur_bn=('gdp_eur_bn', 'first'))
.reset_index())
# Group by 'expenditure_year', calculate shares, and then calculate HHI
df_hhi = (df_hhi.groupby('expenditure_year')
.apply(lambda x: pd.Series({
'hhi_aid': hhi_index(x['exp_eur_bn'] / x['exp_eur_bn'].sum()) * 1e4,
'hhi_gdp': hhi_index(x['ms_gdp_eur_bn'] / x['ms_gdp_eur_bn'].sum()) * 1e4
}))
.reset_index())
# Plot hhi_aid against expenditure_year (First Plot)
plt.figure(figsize=(10, 6))
## plt.plot(df_hhi['hhi_aid'], df_hhi['expenditure_year'], 'g-', label='HHI Aid vs Year (reversed)') # Green line for reverse plot
plt.plot(df_hhi['expenditure_year'], df_hhi['hhi_aid'], 'b-', label='Year vs HHI Aid') # Blue line for standard plot
plt.plot(df_hhi['expenditure_year'], df_hhi['hhi_gdp'], 'b-', label='Year vs HHIGDP') # Blue line for standard plot
# Add labels and title
plt.xlabel('Expenditure Year')
plt.ylabel('HHI Aid')
plt.title('HHI Aid Over Time')
# Display the legend and plot
plt.legend()
plt.show()