{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exercise: Age Groups"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ages = np.random.randint(0,100,20)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([39, 80, 38, 11, 49, 60, 56, 32, 5, 53, 50, 60, 43, 24, 63, 62, 41,\n",
" 37, 31, 20])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ages"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pure Python\n",
"\n",
"One option would be to group the data into categories and storing the results in a `dict` object"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Sort list into Groups\n",
"groups = {\n",
" '0 - 9' : [],\n",
" '10 - 19' : [],\n",
" '20 - 29' : [],\n",
" '30 - 39' : [],\n",
" '40 - 49' : [],\n",
" '50 - 59' : [],\n",
" '60 - 69' : [],\n",
" '70 - 79' : [],\n",
" '80 - 89' : [],\n",
" '90 - 99' : [],\n",
"} \n",
"for age in ages:\n",
" if age < 10:\n",
" groups['0 - 9'].append(age)\n",
" elif age < 20:\n",
" groups['10 - 19'].append(age)\n",
" elif age < 30:\n",
" groups['20 - 29'].append(age)\n",
" elif age < 40:\n",
" groups['30 - 39'].append(age)\n",
" elif age < 50:\n",
" groups['40 - 49'].append(age)\n",
" elif age < 60:\n",
" groups['50 - 59'].append(age)\n",
" elif age < 70:\n",
" groups['60 - 69'].append(age)\n",
" elif age < 80:\n",
" groups['70 - 79'].append(age)\n",
" elif age < 90:\n",
" groups['80 - 89'].append(age)\n",
" elif age < 100:\n",
" groups['90 - 99'].append(age)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'0 - 9': [5],\n",
" '10 - 19': [11],\n",
" '20 - 29': [24, 20],\n",
" '30 - 39': [39, 38, 32, 37, 31],\n",
" '40 - 49': [49, 43, 41],\n",
" '50 - 59': [56, 53, 50],\n",
" '60 - 69': [60, 60, 63, 62],\n",
" '70 - 79': [],\n",
" '80 - 89': [80],\n",
" '90 - 99': []}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"groups"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pandas"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame(ages, columns=['ages'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ages | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 39 | \n",
"
\n",
" \n",
" 1 | \n",
" 80 | \n",
"
\n",
" \n",
" 2 | \n",
" 38 | \n",
"
\n",
" \n",
" 3 | \n",
" 11 | \n",
"
\n",
" \n",
" 4 | \n",
" 49 | \n",
"
\n",
" \n",
" 5 | \n",
" 60 | \n",
"
\n",
" \n",
" 6 | \n",
" 56 | \n",
"
\n",
" \n",
" 7 | \n",
" 32 | \n",
"
\n",
" \n",
" 8 | \n",
" 5 | \n",
"
\n",
" \n",
" 9 | \n",
" 53 | \n",
"
\n",
" \n",
" 10 | \n",
" 50 | \n",
"
\n",
" \n",
" 11 | \n",
" 60 | \n",
"
\n",
" \n",
" 12 | \n",
" 43 | \n",
"
\n",
" \n",
" 13 | \n",
" 24 | \n",
"
\n",
" \n",
" 14 | \n",
" 63 | \n",
"
\n",
" \n",
" 15 | \n",
" 62 | \n",
"
\n",
" \n",
" 16 | \n",
" 41 | \n",
"
\n",
" \n",
" 17 | \n",
" 37 | \n",
"
\n",
" \n",
" 18 | \n",
" 31 | \n",
"
\n",
" \n",
" 19 | \n",
" 20 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ages\n",
"0 39\n",
"1 80\n",
"2 38\n",
"3 11\n",
"4 49\n",
"5 60\n",
"6 56\n",
"7 32\n",
"8 5\n",
"9 53\n",
"10 50\n",
"11 60\n",
"12 43\n",
"13 24\n",
"14 63\n",
"15 62\n",
"16 41\n",
"17 37\n",
"18 31\n",
"19 20"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"labels = [\"{0} - {1}\".format(i, i + 9) for i in range(0, 100, 10)]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"data['groups'] = pd.cut(data.ages, range(0, 101, 10), right=False, labels=labels)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ages | \n",
" groups | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 39 | \n",
" 30 - 39 | \n",
"
\n",
" \n",
" 1 | \n",
" 80 | \n",
" 80 - 89 | \n",
"
\n",
" \n",
" 2 | \n",
" 38 | \n",
" 30 - 39 | \n",
"
\n",
" \n",
" 3 | \n",
" 11 | \n",
" 10 - 19 | \n",
"
\n",
" \n",
" 4 | \n",
" 49 | \n",
" 40 - 49 | \n",
"
\n",
" \n",
" 5 | \n",
" 60 | \n",
" 60 - 69 | \n",
"
\n",
" \n",
" 6 | \n",
" 56 | \n",
" 50 - 59 | \n",
"
\n",
" \n",
" 7 | \n",
" 32 | \n",
" 30 - 39 | \n",
"
\n",
" \n",
" 8 | \n",
" 5 | \n",
" 0 - 9 | \n",
"
\n",
" \n",
" 9 | \n",
" 53 | \n",
" 50 - 59 | \n",
"
\n",
" \n",
" 10 | \n",
" 50 | \n",
" 50 - 59 | \n",
"
\n",
" \n",
" 11 | \n",
" 60 | \n",
" 60 - 69 | \n",
"
\n",
" \n",
" 12 | \n",
" 43 | \n",
" 40 - 49 | \n",
"
\n",
" \n",
" 13 | \n",
" 24 | \n",
" 20 - 29 | \n",
"
\n",
" \n",
" 14 | \n",
" 63 | \n",
" 60 - 69 | \n",
"
\n",
" \n",
" 15 | \n",
" 62 | \n",
" 60 - 69 | \n",
"
\n",
" \n",
" 16 | \n",
" 41 | \n",
" 40 - 49 | \n",
"
\n",
" \n",
" 17 | \n",
" 37 | \n",
" 30 - 39 | \n",
"
\n",
" \n",
" 18 | \n",
" 31 | \n",
" 30 - 39 | \n",
"
\n",
" \n",
" 19 | \n",
" 20 | \n",
" 20 - 29 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ages groups\n",
"0 39 30 - 39\n",
"1 80 80 - 89\n",
"2 38 30 - 39\n",
"3 11 10 - 19\n",
"4 49 40 - 49\n",
"5 60 60 - 69\n",
"6 56 50 - 59\n",
"7 32 30 - 39\n",
"8 5 0 - 9\n",
"9 53 50 - 59\n",
"10 50 50 - 59\n",
"11 60 60 - 69\n",
"12 43 40 - 49\n",
"13 24 20 - 29\n",
"14 63 60 - 69\n",
"15 62 60 - 69\n",
"16 41 40 - 49\n",
"17 37 30 - 39\n",
"18 31 30 - 39\n",
"19 20 20 - 29"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}