[47]
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set(
rc={
'figure.figsize':(8,5),
}
)
[48]
demogs2 = pd.read_csv('data/census_brgy.txt', delim_whitespace = True)
demogs2.head()
barangay | pop_2000 | pop_2010 | annual_growth_rate | hhold_pop | num_hholds | ave_hhold_size | median_age | land_area | pop_density | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Adlaon | 2,847 | 3,647 | 2.51 | 3,647 | 848 | 4.3 | 23 | 7.46 | 489 |
1 | Agsungot | 1,746 | 1,981 | 1.27 | 1,981 | 461 | 4.3 | 23 | 6.29 | 315 |
2 | Apas | 15,492 | 22,566 | 3.83 | 22,463 | 4,925 | 4.6 | 24 | 0.49 | 46,053 |
3 | Bacayan | 8,604 | 14,021 | 5.00 | 14,021 | 3,286 | 4.3 | 24 | 1.22 | 11,493 |
4 | Banilad | 5,220 | 9,903 | 6.61 | 9,299 | 2,003 | 4.6 | 25 | 1.51 | 6,558 |
[49]
# convert these cols to int64
int_cols = ['pop_2000', 'pop_2010', 'hhold_pop', 'num_hholds']
int_cols_dict = {k: 'int64' for k in int_cols}
demogs2[int_cols] = demogs2[int_cols].apply(lambda x: x.str.replace(',', ''))
demogs2[int_cols] = demogs2[int_cols].astype(int_cols_dict)
demogs2['barangay'] = demogs2.barangay.str.strip().str.replace('_', ' ')
demogs2['pop_density'] = demogs2.pop_2010/demogs2.land_area
[50]
data = pd.read_csv("data/cebu_hh_survey_2011.txt", delim_whitespace=True)
# remove underscore
data['barangay'] = data.barangay.str.replace('_', ' ')
data.head()
barangay | hh_id | hh_size | organic | paper | plastic | metal | others | total | av_kg_per_day | av_g_per_day | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Capitol Site | H/H/1 | 7 | 5.25 | 2.25 | 2.25 | 0.0 | 0.0 | 9.75 | 1.39 | 1,390 |
1 | Capitol Site | H/H/2 | 4 | 6.50 | 2.50 | 2.25 | 0.0 | 5.0 | 16.25 | 2.32 | 2,320 |
2 | Capitol Site | H/H/3 | 6 | 6.50 | 2.00 | 2.00 | 0.0 | 7.0 | 17.50 | 2.50 | 2,500 |
3 | Capitol Site | H/H/4 | 3 | 8.25 | 2.00 | 2.25 | 0.0 | 0.0 | 12.50 | 1.79 | 1,790 |
4 | Capitol Site | H/H/5 | 9 | 9.00 | 2.00 | 2.00 | 0.0 | 0.0 | 13.00 | 1.86 | 1,860 |
[51]
merged = pd.merge(data, demogs2[['barangay', 'pop_2010', 'pop_density', 'num_hholds', 'land_area', 'ave_hhold_size']], on = 'barangay', how= 'inner')
merged.head()
barangay | hh_id | hh_size | organic | paper | plastic | metal | others | total | av_kg_per_day | av_g_per_day | pop_2010 | pop_density | num_hholds | land_area | ave_hhold_size | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Capitol Site | H/H/1 | 7 | 5.25 | 2.25 | 2.25 | 0.0 | 0.0 | 9.75 | 1.39 | 1,390 | 15308 | 18668.292683 | 3801 | 0.82 | 4.0 |
1 | Capitol Site | H/H/2 | 4 | 6.50 | 2.50 | 2.25 | 0.0 | 5.0 | 16.25 | 2.32 | 2,320 | 15308 | 18668.292683 | 3801 | 0.82 | 4.0 |
2 | Capitol Site | H/H/3 | 6 | 6.50 | 2.00 | 2.00 | 0.0 | 7.0 | 17.50 | 2.50 | 2,500 | 15308 | 18668.292683 | 3801 | 0.82 | 4.0 |
3 | Capitol Site | H/H/4 | 3 | 8.25 | 2.00 | 2.25 | 0.0 | 0.0 | 12.50 | 1.79 | 1,790 | 15308 | 18668.292683 | 3801 | 0.82 | 4.0 |
4 | Capitol Site | H/H/5 | 9 | 9.00 | 2.00 | 2.00 | 0.0 | 0.0 | 13.00 | 1.86 | 1,860 | 15308 | 18668.292683 | 3801 | 0.82 | 4.0 |
[52]
ax = sns.histplot(data=data, x = 'av_kg_per_day', bins = 50)
ax.set_xlabel('Average Waste Generation (kg/day)')
plt.show()
[53]
merged.av_kg_per_day.median()
1.14
On average, organics comprise the highest waste generation
[54]
waste_type = pd.melt(data, id_vars = ['barangay', 'hh_id'],
value_vars=['organic', 'plastic', 'metal', 'paper', 'others'],
var_name = 'waste_type', value_name = 'weight')
fig, axs = plt.subplots(1,2, figsize =(15,8))
# waste generation plot per waste type
sns.boxplot(data = waste_type, x = 'waste_type', y= 'weight', palette = 'Set3', ax = axs[0])
axs[0].set_ylabel('Waste Generated in a week (kg)')
axs[0].set_xlabel('Waste Type')
# waste generation per barangay per waste type
waste_type = waste_type.assign(
sum_wt = lambda x: x.groupby(['barangay', 'waste_type'])['weight'].transform('sum'),
wt_total_brgy = lambda x: x.groupby(['barangay'])['weight'].transform('sum'),
pct = lambda x: x['sum_wt']/x['wt_total_brgy'] * 100
)
g = sns.histplot(waste_type[['barangay', 'waste_type', 'pct']].drop_duplicates(),
x = 'barangay', hue = 'waste_type', weights = 'pct', multiple='stack',
palette = 'Set3', shrink = 0.8, ax = axs[1])
axs[1].set_ylabel('% Total Waste Generation in a Week')
axs[1].set_xlabel('Barangay')
axs[1].tick_params(axis = 'x', labelsize = 8.5)
plt.show()
[55]
average_fraction = waste_type.groupby('waste_type').apply(lambda x: x['weight'].sum()/waste_type['weight'].sum())
average_fraction
waste_type
metal 0.018975
organic 0.380777
others 0.260897
paper 0.163185
plastic 0.176166
dtype: float64
[56]
market_share= 0.1
average_price = pd.DataFrame({
'waste_type': ['metal', 'paper', 'plastic', 'others'],
'price': [7.7, 10.25, 4., 1.5]
})
average_price['eff_price'] = average_price.price * 0.5
ave_waste_generation_mo = merged.av_kg_per_day.median() * 30
price_waste_type = pd.merge(average_fraction.reset_index().rename(columns={0:'fraction'}),
average_price, on = 'waste_type')
price_waste_type['revenue_hh_mo'] = price_waste_type['fraction'] * ave_waste_generation_mo * price_waste_type['eff_price']
total_num_hholds = demogs2['num_hholds'].sum() * market_share
ave_revenue_per_hh_mo = price_waste_type['revenue_hh_mo'].sum()
print('number of households to service:', int(total_num_hholds))
print('Estimated Revenue per month:', round(int(total_num_hholds) * ave_revenue_per_hh_mo,2), 'PHP')
number of households to service: 19546
Estimated Revenue per month: 974220.56 PHP
[57]
average_price['eff_price']
0 3.850
1 5.125
2 2.000
3 0.750
Name: eff_price, dtype: float64
[58]
demogs2[['barangay', 'num_hholds']].drop_duplicates()['num_hholds'].sum() * 0.1
19546.100000000002
[59]
average_fraction * 34
waste_type
metal 0.645150
organic 12.946409
others 8.870509
paper 5.548289
plastic 5.989644
dtype: float64
[60]
ax = sns.boxplot(data = data, x = 'barangay', y = 'av_kg_per_day', palette = 'Set3')
ax.set_ylabel('Average Waste Generation (kg/day)')
plt.show()
[61]
ax = sns.boxplot(data = data, x="barangay", y= 'hh_size', palette='Set3')
ax.set(xlabel = 'Barangay', ylabel = 'Household size')
plt.show()
We look at relating household size with with total waste generation.
[62]
ax = sns.scatterplot(data = data, x = 'hh_size', y = 'total', hue = 'barangay', s= 100, alpha = 0.8, style = 'barangay')
ax.set(xlabel = 'Household size', ylabel = 'Total Waste generation in a week (kg)')
plt.show()
There seems to be no clear relationship between the population of a locale to the total generation of waste.
[63]
tmp = merged.groupby(['barangay', 'pop_2010'], as_index = False) \
.agg({'av_kg_per_day': ['mean', 'std']})
[64]
vars = ['pop_2010', 'land_area', 'pop_density', 'ave_hhold_size']
x_labels = ['2010 Population', 'Land area (km^2)', 'Population Density (person/km^2)', 'Average Household Size']
fig, axs = plt.subplots(2,2, figsize = (10, 7),sharey= True)
# plt.ylabel('Average Weight Generation (kg/day)')
for ax, v, x_lab in zip(axs.reshape(-1), vars, x_labels):
tmp = merged.groupby(['barangay', v], as_index = False) \
.agg({'av_kg_per_day': ['mean', 'std']})
tmp.columns = list(map(''.join, tmp.columns.values))
sns.scatterplot(x = v, y = 'av_kg_per_daymean', data = tmp, s= 200,
hue = 'barangay', style = 'barangay', ax = ax)
ax.get_legend().set_visible(False)
ax.set_xlabel(x_lab)
ax.set_ylabel(None)
if v in ['pop_2010', 'pop_density']:
ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
handles, labels= ax.get_legend_handles_labels()
# _overlay = fig.add_subplot(111, frameon = False)
# # _overlay.set_visible(False)
# plt.tick_params(labelcolor = 'none', which = 'both', top=False, bottom=False, left=False, right=False)
# plt.ylabel('Average Weight Generation (kg/day)', fontsize = 15)
fig.supylabel('Average Weight Generation (kg/day)')
fig.legend(handles, labels, loc = 'lower center', ncol = 4, bbox_to_anchor = (0.5, -0.075))
fig.tight_layout()
plt.show()