Alden's Web Nook

[47]

import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set(
    rc={
    'figure.figsize':(8,5),
    }
)

[48]

demogs2 = pd.read_csv('data/census_brgy.txt', delim_whitespace = True)
demogs2.head()

	barangay	pop_2000	pop_2010	annual_growth_rate	hhold_pop	num_hholds	ave_hhold_size	median_age	land_area	pop_density
0	Adlaon	2,847	3,647	2.51	3,647	848	4.3	23	7.46	489
1	Agsungot	1,746	1,981	1.27	1,981	461	4.3	23	6.29	315
2	Apas	15,492	22,566	3.83	22,463	4,925	4.6	24	0.49	46,053
3	Bacayan	8,604	14,021	5.00	14,021	3,286	4.3	24	1.22	11,493
4	Banilad	5,220	9,903	6.61	9,299	2,003	4.6	25	1.51	6,558

[49]

# convert these cols to int64
int_cols = ['pop_2000', 'pop_2010', 'hhold_pop', 'num_hholds']
int_cols_dict = {k: 'int64' for k in int_cols}
demogs2[int_cols] = demogs2[int_cols].apply(lambda x: x.str.replace(',', ''))
demogs2[int_cols] = demogs2[int_cols].astype(int_cols_dict)
demogs2['barangay'] = demogs2.barangay.str.strip().str.replace('_', ' ')
demogs2['pop_density'] = demogs2.pop_2010/demogs2.land_area

[50]

data = pd.read_csv("data/cebu_hh_survey_2011.txt", delim_whitespace=True)
# remove underscore
data['barangay'] = data.barangay.str.replace('_', ' ')
data.head()

	barangay	hh_id	hh_size	organic	paper	plastic	others	total	av_kg_per_day	av_g_per_day
0	Capitol Site	H/H/1	7	5.25	2.25	2.25	0.0	9.75	1.39	1,390
1	Capitol Site	H/H/2	4	6.50	2.50	2.25	5.0	16.25	2.32	2,320
2	Capitol Site	H/H/3	6	6.50	2.00	2.00	7.0	17.50	2.50	2,500
3	Capitol Site	H/H/4	3	8.25	2.00	2.25	0.0	12.50	1.79	1,790
4	Capitol Site	H/H/5	9	9.00	2.00	2.00	0.0	13.00	1.86	1,860

[51]

merged = pd.merge(data, demogs2[['barangay', 'pop_2010', 'pop_density', 'num_hholds', 'land_area', 'ave_hhold_size']], on = 'barangay', how= 'inner')
merged.head()

	barangay	hh_id	hh_size	organic	paper	plastic	others	total	av_kg_per_day	av_g_per_day	pop_2010	pop_density	num_hholds	land_area	ave_hhold_size
0	Capitol Site	H/H/1	7	5.25	2.25	2.25	0.0	9.75	1.39	1,390	15308	18668.292683	3801	0.82	4.0
1	Capitol Site	H/H/2	4	6.50	2.50	2.25	5.0	16.25	2.32	2,320	15308	18668.292683	3801	0.82	4.0
2	Capitol Site	H/H/3	6	6.50	2.00	2.00	7.0	17.50	2.50	2,500	15308	18668.292683	3801	0.82	4.0
3	Capitol Site	H/H/4	3	8.25	2.00	2.25	0.0	12.50	1.79	1,790	15308	18668.292683	3801	0.82	4.0
4	Capitol Site	H/H/5	9	9.00	2.00	2.00	0.0	13.00	1.86	1,860	15308	18668.292683	3801	0.82	4.0

[52]

ax = sns.histplot(data=data, x = 'av_kg_per_day', bins = 50)
ax.set_xlabel('Average Waste Generation (kg/day)')
plt.show()

[53]

merged.av_kg_per_day.median()

1.14

On average, organics comprise the highest waste generation

[54]

waste_type = pd.melt(data, id_vars = ['barangay', 'hh_id'],  
value_vars=['organic', 'plastic', 'metal', 'paper', 'others'], 
var_name = 'waste_type', value_name = 'weight')
fig, axs = plt.subplots(1,2, figsize =(15,8))
# waste generation plot per waste type
sns.boxplot(data = waste_type, x = 'waste_type', y= 'weight', palette = 'Set3', ax = axs[0])
axs[0].set_ylabel('Waste Generated in a week (kg)')
axs[0].set_xlabel('Waste Type')

# waste generation per barangay per waste type
waste_type = waste_type.assign(
    sum_wt = lambda x: x.groupby(['barangay', 'waste_type'])['weight'].transform('sum'),
    wt_total_brgy = lambda x: x.groupby(['barangay'])['weight'].transform('sum'),
    pct = lambda x: x['sum_wt']/x['wt_total_brgy'] * 100
)
g = sns.histplot(waste_type[['barangay', 'waste_type', 'pct']].drop_duplicates(), 
x = 'barangay', hue = 'waste_type', weights = 'pct', multiple='stack',
palette = 'Set3', shrink = 0.8, ax = axs[1])
axs[1].set_ylabel('% Total Waste Generation in a Week')
axs[1].set_xlabel('Barangay')
axs[1].tick_params(axis = 'x', labelsize = 8.5)
plt.show()

[55]

average_fraction = waste_type.groupby('waste_type').apply(lambda x: x['weight'].sum()/waste_type['weight'].sum())
average_fraction

waste_type
metal      0.018975
organic    0.380777
others     0.260897
paper      0.163185
plastic    0.176166
dtype: float64

[56]

market_share= 0.1
average_price = pd.DataFrame({
    'waste_type': ['metal', 'paper', 'plastic', 'others'],
    'price': [7.7, 10.25, 4., 1.5]
})
average_price['eff_price'] = average_price.price * 0.5
ave_waste_generation_mo = merged.av_kg_per_day.median() * 30
price_waste_type = pd.merge(average_fraction.reset_index().rename(columns={0:'fraction'}), 
average_price, on = 'waste_type')
price_waste_type['revenue_hh_mo'] = price_waste_type['fraction'] * ave_waste_generation_mo * price_waste_type['eff_price']
total_num_hholds = demogs2['num_hholds'].sum() * market_share
ave_revenue_per_hh_mo = price_waste_type['revenue_hh_mo'].sum()
print('number of households to service:', int(total_num_hholds))
print('Estimated Revenue per month:', round(int(total_num_hholds) * ave_revenue_per_hh_mo,2), 'PHP')

number of households to service: 19546
Estimated Revenue per month: 974220.56 PHP

[57]

average_price['eff_price']

0    3.850
1    5.125
2    2.000
3    0.750
Name: eff_price, dtype: float64

[58]

demogs2[['barangay', 'num_hholds']].drop_duplicates()['num_hholds'].sum() * 0.1

19546.100000000002

[59]

average_fraction * 34

waste_type
metal       0.645150
organic    12.946409
others      8.870509
paper       5.548289
plastic     5.989644
dtype: float64

[60]

ax = sns.boxplot(data = data, x = 'barangay', y = 'av_kg_per_day', palette = 'Set3')
ax.set_ylabel('Average Waste Generation (kg/day)')
plt.show()

[61]

ax = sns.boxplot(data = data, x="barangay", y= 'hh_size', palette='Set3')
ax.set(xlabel = 'Barangay', ylabel = 'Household size')
plt.show()

We look at relating household size with with total waste generation.

[62]

ax = sns.scatterplot(data = data, x = 'hh_size', y = 'total', hue = 'barangay', s= 100, alpha = 0.8, style = 'barangay')
ax.set(xlabel = 'Household size', ylabel = 'Total Waste generation  in a week (kg)')
plt.show()

There seems to be no clear relationship between the population of a locale to the total generation of waste.

[63]

tmp = merged.groupby(['barangay', 'pop_2010'], as_index = False) \
            .agg({'av_kg_per_day': ['mean', 'std']})

[64]

vars  = ['pop_2010', 'land_area', 'pop_density', 'ave_hhold_size']
x_labels = ['2010 Population', 'Land area (km^2)', 'Population Density (person/km^2)', 'Average Household Size']
fig, axs = plt.subplots(2,2, figsize = (10, 7),sharey= True)
# plt.ylabel('Average Weight Generation (kg/day)')
for ax, v, x_lab in zip(axs.reshape(-1), vars, x_labels):
    tmp = merged.groupby(['barangay', v], as_index = False) \
            .agg({'av_kg_per_day': ['mean', 'std']})
    tmp.columns = list(map(''.join, tmp.columns.values))
    sns.scatterplot(x = v, y = 'av_kg_per_daymean', data = tmp, s= 200,
    hue = 'barangay', style = 'barangay', ax = ax)
    ax.get_legend().set_visible(False)
    ax.set_xlabel(x_lab)
    ax.set_ylabel(None)
    if v in ['pop_2010', 'pop_density']:
        ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
handles, labels= ax.get_legend_handles_labels()
# _overlay = fig.add_subplot(111, frameon = False)
# # _overlay.set_visible(False)
# plt.tick_params(labelcolor = 'none', which = 'both', top=False, bottom=False, left=False, right=False)
# plt.ylabel('Average Weight Generation (kg/day)', fontsize = 15)
fig.supylabel('Average Weight Generation (kg/day)')
fig.legend(handles, labels, loc = 'lower center', ncol = 4, bbox_to_anchor = (0.5, -0.075))
fig.tight_layout()
plt.show()

Alden Cabajar

Cebu Solid Waste Analysis

📅 2021-10-31