{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercise: Age Groups" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ages = np.random.randint(0,100,20)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([39, 80, 38, 11, 49, 60, 56, 32, 5, 53, 50, 60, 43, 24, 63, 62, 41,\n", " 37, 31, 20])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ages" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pure Python\n", "\n", "One option would be to group the data into categories and storing the results in a `dict` object" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Sort list into Groups\n", "groups = {\n", " '0 - 9' : [],\n", " '10 - 19' : [],\n", " '20 - 29' : [],\n", " '30 - 39' : [],\n", " '40 - 49' : [],\n", " '50 - 59' : [],\n", " '60 - 69' : [],\n", " '70 - 79' : [],\n", " '80 - 89' : [],\n", " '90 - 99' : [],\n", "} \n", "for age in ages:\n", " if age < 10:\n", " groups['0 - 9'].append(age)\n", " elif age < 20:\n", " groups['10 - 19'].append(age)\n", " elif age < 30:\n", " groups['20 - 29'].append(age)\n", " elif age < 40:\n", " groups['30 - 39'].append(age)\n", " elif age < 50:\n", " groups['40 - 49'].append(age)\n", " elif age < 60:\n", " groups['50 - 59'].append(age)\n", " elif age < 70:\n", " groups['60 - 69'].append(age)\n", " elif age < 80:\n", " groups['70 - 79'].append(age)\n", " elif age < 90:\n", " groups['80 - 89'].append(age)\n", " elif age < 100:\n", " groups['90 - 99'].append(age)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'0 - 9': [5],\n", " '10 - 19': [11],\n", " '20 - 29': [24, 20],\n", " '30 - 39': [39, 38, 32, 37, 31],\n", " '40 - 49': [49, 43, 41],\n", " '50 - 59': [56, 53, 50],\n", " '60 - 69': [60, 60, 63, 62],\n", " '70 - 79': [],\n", " '80 - 89': [80],\n", " '90 - 99': []}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pandas" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame(ages, columns=['ages'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ages
039
180
238
311
449
560
656
732
85
953
1050
1160
1243
1324
1463
1562
1641
1737
1831
1920
\n", "
" ], "text/plain": [ " ages\n", "0 39\n", "1 80\n", "2 38\n", "3 11\n", "4 49\n", "5 60\n", "6 56\n", "7 32\n", "8 5\n", "9 53\n", "10 50\n", "11 60\n", "12 43\n", "13 24\n", "14 63\n", "15 62\n", "16 41\n", "17 37\n", "18 31\n", "19 20" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "labels = [\"{0} - {1}\".format(i, i + 9) for i in range(0, 100, 10)]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data['groups'] = pd.cut(data.ages, range(0, 101, 10), right=False, labels=labels)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesgroups
03930 - 39
18080 - 89
23830 - 39
31110 - 19
44940 - 49
56060 - 69
65650 - 59
73230 - 39
850 - 9
95350 - 59
105050 - 59
116060 - 69
124340 - 49
132420 - 29
146360 - 69
156260 - 69
164140 - 49
173730 - 39
183130 - 39
192020 - 29
\n", "
" ], "text/plain": [ " ages groups\n", "0 39 30 - 39\n", "1 80 80 - 89\n", "2 38 30 - 39\n", "3 11 10 - 19\n", "4 49 40 - 49\n", "5 60 60 - 69\n", "6 56 50 - 59\n", "7 32 30 - 39\n", "8 5 0 - 9\n", "9 53 50 - 59\n", "10 50 50 - 59\n", "11 60 60 - 69\n", "12 43 40 - 49\n", "13 24 20 - 29\n", "14 63 60 - 69\n", "15 62 60 - 69\n", "16 41 40 - 49\n", "17 37 30 - 39\n", "18 31 30 - 39\n", "19 20 20 - 29" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }