In [1]:
import matplotlib.pyplot as plt
plt.style.use('classic')
%matplotlib inline
import seaborn as sns
import pandas as pd 
import numpy as np
In [2]:
df = pd.read_csv("WBPopulation\Population-EstimatesData.csv")
df #Original Data
df.head() #Sample of original data
Out[2]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2042 2043 2044 2045 2046 2047 2048 2049 2050 Unnamed: 95
0 Arab World ARB Age dependency ratio (% of working-age populat... SP.POP.DPND 88.205810 89.644734 90.944498 92.063631 92.892881 93.364459 ... 54.715564 54.856725 55.017692 55.194098 55.392757 55.588676 55.782619 55.974144 56.162680 NaN
1 Arab World ARB Age dependency ratio, old SP.POP.DPND.OL 6.602746 6.714313 6.809043 6.884778 6.937449 6.965510 ... 13.404670 13.783196 14.174138 14.576902 14.970161 15.364431 15.763190 16.170089 16.587507 NaN
2 Arab World ARB Age dependency ratio, young SP.POP.DPND.YG 81.465388 82.775832 83.945660 84.928415 85.637945 86.037480 ... 40.751346 40.556722 40.367825 40.179692 40.016288 39.845605 39.665364 39.471969 39.262090 NaN
3 Arab World ARB Age population, age 0, female, interpolated SP.POP.AG00.FE.IN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Arab World ARB Age population, age 0, male, interpolated SP.POP.AG00.MA.IN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 96 columns

In [3]:
df['Country Name'].nunique() #How many countries are there?
Out[3]:
259

Compare the means of annual population growth rates from 1960 to 2017 among the World, ASEAN and Singapore population trends. Create separate data set for each of the 10 ASEAN Countries and one combined data set to represent ASEAN.

In [4]:
# 10 ASEAN Countries, 10 sets
df_Sg = df[df['Country Name']=='Singapore'].reset_index().drop(columns=['index','Country Name','Unnamed: 95']) 
df_My = df[df['Country Name']=='Malaysia'].reset_index().drop(columns=['index','Country Name', 'Unnamed: 95'])
df_Indo = df[df['Country Name']=='Indonesia'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Php = df[df['Country Name']=='Philippines'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Th = df[df['Country Name']=='Thailand'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Vet = df[df['Country Name']=='Vietnam'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Cam = df[df['Country Name']=='Cambodia'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Mym = df[df['Country Name']=='Myanmar'].reset_index().drop(columns=['index','Country Name', 'Unnamed: 95'])
df_Bru = df[df['Country Name']=='Brunei Darussalam'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
df_Lao = df[df['Country Name']=='Lao PDR'].reset_index().drop(columns=['index','Country Name','Unnamed: 95'])
In [5]:
# Create a data set for ASEAN from member countries 
df_ASEAN = df_Sg.append([df_My, df_Indo, df_Php,df_Th,df_Vet,df_Cam,df_Mym,df_Bru,df_Lao])
# Extract the population growth data and clean it up
df_ASEAN1 = df_ASEAN[(df_ASEAN['Indicator Name']=='Population growth (annual %)')]
ASEAN_1 = df_ASEAN1.reset_index().drop(columns=['index'])
ASEAN_1 = ASEAN_1.dropna(axis='columns',how='all').round(2)
# Get the averages and capped the decimal places at 2, by default the cut off end at 2017
ASEAN_mean = ASEAN_1.mean().round(2) 
In [6]:
# Do same for the world's averages up to 2017
World_1 = df[(df['Indicator Name']=='Population growth (annual %)')]
World_1 = World_1.reset_index().drop(columns=['index']) #remove the old index
World_1 = World_1.dropna(axis='columns',how='all') #drop columns with invalid data
World_1 = World_1.iloc[0:,0:62] #to end at 2017
World_1.describe().round(2)
World_mean = World_1.mean().round(2)
In [7]:
# Also do the same for the Singapore's averages up to 2017
df_Sg1 = df_Sg[(df_Sg['Indicator Name']=='Population growth (annual %)')]
df_Sg1 = df_Sg1.reset_index().drop(columns=['index'])
df_Sg1 = df_Sg1.dropna(axis='columns',how='all')
df_Sg1 = df_Sg1.iloc[0:,0:62] #to end at 2017
df_Sg1.describe().round(2)
Sg_mean = df_Sg1.mean()
Sg_mean = Sg_mean.round(2)
In [8]:
#Plot line charts with matplot, seaborn
sns.set()

#format 
MyXLabel = Sg_mean.index # create the tick label for x-axis
SgAvg = Sg_mean.values
W_Avg = World_mean.values
A_Avg = ASEAN_mean.values

plt.plot(SgAvg,  color="green", linewidth=1, linestyle="solid", label="Singapore")
plt.plot(W_Avg,  color="red", linewidth=1, linestyle="solid", label="World")
plt.plot(A_Avg,  color="blue", linewidth=1, linestyle="solid", label="ASEAN")

plt.xlim(-3) #Keep both sides even
#plt.ylim(-2, 10)

plt.xticks(np.arange(0,60, step=3), (MyXLabel[0:60:3] ), rotation=90) #'arrange' works alongside 'label', 3 steps
plt.xlabel('Years')
plt.ylabel('%')
plt.title('Annual Population Growth % from 1960 To 2017')
plt.legend( ncol=3, loc='upper right')
plt.show()