Cebu Solid Waste Analysis

📅 2021-10-31


[47]
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set(
    rc={
    'figure.figsize':(8,5),
    }
)
[48]
demogs2 = pd.read_csv('data/census_brgy.txt', delim_whitespace = True)
demogs2.head()
barangay pop_2000 pop_2010 annual_growth_rate hhold_pop num_hholds ave_hhold_size median_age land_area pop_density
0 Adlaon 2,847 3,647 2.51 3,647 848 4.3 23 7.46 489
1 Agsungot 1,746 1,981 1.27 1,981 461 4.3 23 6.29 315
2 Apas 15,492 22,566 3.83 22,463 4,925 4.6 24 0.49 46,053
3 Bacayan 8,604 14,021 5.00 14,021 3,286 4.3 24 1.22 11,493
4 Banilad 5,220 9,903 6.61 9,299 2,003 4.6 25 1.51 6,558
[49]
# convert these cols to int64
int_cols = ['pop_2000', 'pop_2010', 'hhold_pop', 'num_hholds']
int_cols_dict = {k: 'int64' for k in int_cols}
demogs2[int_cols] = demogs2[int_cols].apply(lambda x: x.str.replace(',', ''))
demogs2[int_cols] = demogs2[int_cols].astype(int_cols_dict)
demogs2['barangay'] = demogs2.barangay.str.strip().str.replace('_', ' ')
demogs2['pop_density'] = demogs2.pop_2010/demogs2.land_area

[50]
data = pd.read_csv("data/cebu_hh_survey_2011.txt", delim_whitespace=True)
# remove underscore
data['barangay'] = data.barangay.str.replace('_', ' ')
data.head()

barangay hh_id hh_size organic paper plastic metal others total av_kg_per_day av_g_per_day
0 Capitol Site H/H/1 7 5.25 2.25 2.25 0.0 0.0 9.75 1.39 1,390
1 Capitol Site H/H/2 4 6.50 2.50 2.25 0.0 5.0 16.25 2.32 2,320
2 Capitol Site H/H/3 6 6.50 2.00 2.00 0.0 7.0 17.50 2.50 2,500
3 Capitol Site H/H/4 3 8.25 2.00 2.25 0.0 0.0 12.50 1.79 1,790
4 Capitol Site H/H/5 9 9.00 2.00 2.00 0.0 0.0 13.00 1.86 1,860
[51]
merged = pd.merge(data, demogs2[['barangay', 'pop_2010', 'pop_density', 'num_hholds', 'land_area', 'ave_hhold_size']], on = 'barangay', how= 'inner')
merged.head()
barangay hh_id hh_size organic paper plastic metal others total av_kg_per_day av_g_per_day pop_2010 pop_density num_hholds land_area ave_hhold_size
0 Capitol Site H/H/1 7 5.25 2.25 2.25 0.0 0.0 9.75 1.39 1,390 15308 18668.292683 3801 0.82 4.0
1 Capitol Site H/H/2 4 6.50 2.50 2.25 0.0 5.0 16.25 2.32 2,320 15308 18668.292683 3801 0.82 4.0
2 Capitol Site H/H/3 6 6.50 2.00 2.00 0.0 7.0 17.50 2.50 2,500 15308 18668.292683 3801 0.82 4.0
3 Capitol Site H/H/4 3 8.25 2.00 2.25 0.0 0.0 12.50 1.79 1,790 15308 18668.292683 3801 0.82 4.0
4 Capitol Site H/H/5 9 9.00 2.00 2.00 0.0 0.0 13.00 1.86 1,860 15308 18668.292683 3801 0.82 4.0
[52]
ax = sns.histplot(data=data, x = 'av_kg_per_day', bins = 50)
ax.set_xlabel('Average Waste Generation (kg/day)')
plt.show()
[53]
merged.av_kg_per_day.median()
1.14

On average, organics comprise the highest waste generation

[54]
waste_type = pd.melt(data, id_vars = ['barangay', 'hh_id'],  
value_vars=['organic', 'plastic', 'metal', 'paper', 'others'], 
var_name = 'waste_type', value_name = 'weight')
fig, axs = plt.subplots(1,2, figsize =(15,8))
# waste generation plot per waste type
sns.boxplot(data = waste_type, x = 'waste_type', y= 'weight', palette = 'Set3', ax = axs[0])
axs[0].set_ylabel('Waste Generated in a week (kg)')
axs[0].set_xlabel('Waste Type')

# waste generation per barangay per waste type
waste_type = waste_type.assign(
    sum_wt = lambda x: x.groupby(['barangay', 'waste_type'])['weight'].transform('sum'),
    wt_total_brgy = lambda x: x.groupby(['barangay'])['weight'].transform('sum'),
    pct = lambda x: x['sum_wt']/x['wt_total_brgy'] * 100
)
g = sns.histplot(waste_type[['barangay', 'waste_type', 'pct']].drop_duplicates(), 
x = 'barangay', hue = 'waste_type', weights = 'pct', multiple='stack',
palette = 'Set3', shrink = 0.8, ax = axs[1])
axs[1].set_ylabel('% Total Waste Generation in a Week')
axs[1].set_xlabel('Barangay')
axs[1].tick_params(axis = 'x', labelsize = 8.5)
plt.show()

[55]
average_fraction = waste_type.groupby('waste_type').apply(lambda x: x['weight'].sum()/waste_type['weight'].sum())
average_fraction
waste_type
metal      0.018975
organic    0.380777
others     0.260897
paper      0.163185
plastic    0.176166
dtype: float64
[56]
market_share= 0.1
average_price = pd.DataFrame({
    'waste_type': ['metal', 'paper', 'plastic', 'others'],
    'price': [7.7, 10.25, 4., 1.5]
})
average_price['eff_price'] = average_price.price * 0.5
ave_waste_generation_mo = merged.av_kg_per_day.median() * 30
price_waste_type = pd.merge(average_fraction.reset_index().rename(columns={0:'fraction'}), 
average_price, on = 'waste_type')
price_waste_type['revenue_hh_mo'] = price_waste_type['fraction'] * ave_waste_generation_mo * price_waste_type['eff_price']
total_num_hholds = demogs2['num_hholds'].sum() * market_share
ave_revenue_per_hh_mo = price_waste_type['revenue_hh_mo'].sum()
print('number of households to service:', int(total_num_hholds))
print('Estimated Revenue per month:', round(int(total_num_hholds) * ave_revenue_per_hh_mo,2), 'PHP')
number of households to service: 19546 Estimated Revenue per month: 974220.56 PHP
[57]
average_price['eff_price']
0    3.850
1    5.125
2    2.000
3    0.750
Name: eff_price, dtype: float64
[58]
demogs2[['barangay', 'num_hholds']].drop_duplicates()['num_hholds'].sum() * 0.1
19546.100000000002
[59]
average_fraction * 34
waste_type
metal       0.645150
organic    12.946409
others      8.870509
paper       5.548289
plastic     5.989644
dtype: float64
[60]
ax = sns.boxplot(data = data, x = 'barangay', y = 'av_kg_per_day', palette = 'Set3')
ax.set_ylabel('Average Waste Generation (kg/day)')
plt.show()
[61]
ax = sns.boxplot(data = data, x="barangay", y= 'hh_size', palette='Set3')
ax.set(xlabel = 'Barangay', ylabel = 'Household size')
plt.show()

We look at relating household size with with total waste generation.

[62]
ax = sns.scatterplot(data = data, x = 'hh_size', y = 'total', hue = 'barangay', s= 100, alpha = 0.8, style = 'barangay')
ax.set(xlabel = 'Household size', ylabel = 'Total Waste generation  in a week (kg)')
plt.show()

There seems to be no clear relationship between the population of a locale to the total generation of waste.

[63]
tmp = merged.groupby(['barangay', 'pop_2010'], as_index = False) \
            .agg({'av_kg_per_day': ['mean', 'std']})
[64]
vars  = ['pop_2010', 'land_area', 'pop_density', 'ave_hhold_size']
x_labels = ['2010 Population', 'Land area (km^2)', 'Population Density (person/km^2)', 'Average Household Size']
fig, axs = plt.subplots(2,2, figsize = (10, 7),sharey= True)
# plt.ylabel('Average Weight Generation (kg/day)')
for ax, v, x_lab in zip(axs.reshape(-1), vars, x_labels):
    tmp = merged.groupby(['barangay', v], as_index = False) \
            .agg({'av_kg_per_day': ['mean', 'std']})
    tmp.columns = list(map(''.join, tmp.columns.values))
    sns.scatterplot(x = v, y = 'av_kg_per_daymean', data = tmp, s= 200,
    hue = 'barangay', style = 'barangay', ax = ax)
    ax.get_legend().set_visible(False)
    ax.set_xlabel(x_lab)
    ax.set_ylabel(None)
    if v in ['pop_2010', 'pop_density']:
        ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
handles, labels= ax.get_legend_handles_labels()
# _overlay = fig.add_subplot(111, frameon = False)
# # _overlay.set_visible(False)
# plt.tick_params(labelcolor = 'none', which = 'both', top=False, bottom=False, left=False, right=False)
# plt.ylabel('Average Weight Generation (kg/day)', fontsize = 15)
fig.supylabel('Average Weight Generation (kg/day)')
fig.legend(handles, labels, loc = 'lower center', ncol = 4, bbox_to_anchor = (0.5, -0.075))
fig.tight_layout()
plt.show()