2894 lines
77 KiB
Plaintext
2894 lines
77 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Indexing and selecting data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"try:\n",
|
|
" import seaborn\n",
|
|
"except ImportError:\n",
|
|
" pass"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>country</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>Belgium</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>France</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>Germany</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>Netherlands</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>United Kingdom</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital country population\n",
|
|
"0 30510 Brussels Belgium 11.3\n",
|
|
"1 671308 Paris France 64.3\n",
|
|
"2 357050 Berlin Germany 81.3\n",
|
|
"3 41526 Amsterdam Netherlands 16.9\n",
|
|
"4 244820 London United Kingdom 64.9"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# redefining the example objects\n",
|
|
"\n",
|
|
"# series\n",
|
|
"population = pd.Series({'Germany': 81.3, 'Belgium': 11.3, 'France': 64.3, \n",
|
|
" 'United Kingdom': 64.9, 'Netherlands': 16.9})\n",
|
|
"\n",
|
|
"# dataframe\n",
|
|
"data = {'country': ['Belgium', 'France', 'Germany', 'Netherlands', 'United Kingdom'],\n",
|
|
" 'population': [11.3, 64.3, 81.3, 16.9, 64.9],\n",
|
|
" 'area': [30510, 671308, 357050, 41526, 244820],\n",
|
|
" 'capital': ['Brussels', 'Paris', 'Berlin', 'Amsterdam', 'London']}\n",
|
|
"countries = pd.DataFrame(data)\n",
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Setting the index to the country names:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 11.3\n",
|
|
"France 671308 Paris 64.3\n",
|
|
"Germany 357050 Berlin 81.3\n",
|
|
"Netherlands 41526 Amsterdam 16.9\n",
|
|
"United Kingdom 244820 London 64.9"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries = countries.set_index('country')\n",
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Some notes on selecting data\n",
|
|
"\n",
|
|
"One of pandas' basic features is the labeling of rows and columns, but this makes indexing also a bit more complex compared to numpy. We now have to distuinguish between:\n",
|
|
"\n",
|
|
"- selection by label\n",
|
|
"- selection by position."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### `data[]` provides some convenience shortcuts "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"For a DataFrame, basic indexing selects the columns.\n",
|
|
"\n",
|
|
"Selecting a single column:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"country\n",
|
|
"Belgium 30510\n",
|
|
"France 671308\n",
|
|
"Germany 357050\n",
|
|
"Netherlands 41526\n",
|
|
"United Kingdom 244820\n",
|
|
"Name: area, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['area']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"or multiple columns:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area population\n",
|
|
"country \n",
|
|
"Belgium 30510 11.3\n",
|
|
"France 671308 64.3\n",
|
|
"Germany 357050 81.3\n",
|
|
"Netherlands 41526 16.9\n",
|
|
"United Kingdom 244820 64.9"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[['area', 'population']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"But, slicing accesses the rows:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population\n",
|
|
"country \n",
|
|
"France 671308 Paris 64.3\n",
|
|
"Germany 357050 Berlin 81.3\n",
|
|
"Netherlands 41526 Amsterdam 16.9"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['France':'Netherlands']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-danger\">\n",
|
|
" <b>NOTE</b>: Unlike slicing in numpy, the end label is **included**.\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"So as a summary, `[]` provides the following convenience shortcuts:\n",
|
|
"\n",
|
|
"- Series: selecting a label: `s[label]`\n",
|
|
"- DataFrame: selecting a single or multiple columns: `df['col']` or `df[['col1', 'col2']]`\n",
|
|
"- DataFrame: slicing the rows: `df['row_label1':'row_label2']` or `df[mask]`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Systematic indexing with `loc` and `iloc`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"When using `[]` like above, you can only select from one axis at once (rows or columns, not both). For more advanced indexing, you have some extra attributes:\n",
|
|
" \n",
|
|
"* `loc`: selection by label\n",
|
|
"* `iloc`: selection by position\n",
|
|
"\n",
|
|
"These methods index the different dimensions of the frame:\n",
|
|
"\n",
|
|
"* `df.loc[row_indexer, column_indexer]`\n",
|
|
"* `df.iloc[row_indexer, column_indexer]`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Selecting a single element:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"357050"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries.loc['Germany', 'area']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"But the row or column indexer can also be a list, slice, boolean array, .."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area population\n",
|
|
"country \n",
|
|
"France 671308 64.3\n",
|
|
"Germany 357050 81.3"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries.loc['France':'Germany', ['area', 'population']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"Selecting by position with `iloc` works similar as indexing numpy arrays:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" capital population\n",
|
|
"country \n",
|
|
"Belgium Brussels 11.3\n",
|
|
"France Paris 64.3"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries.iloc[0:2,1:3]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The different indexing methods can also be used to assign data:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"countries2 = countries.copy()\n",
|
|
"countries2.loc['Belgium':'Germany', 'population'] = 10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>10.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>10.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>10.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 10.0\n",
|
|
"France 671308 Paris 10.0\n",
|
|
"Germany 357050 Berlin 10.0\n",
|
|
"Netherlands 41526 Amsterdam 16.9\n",
|
|
"United Kingdom 244820 London 64.9"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Boolean indexing (filtering)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Often, you want to select rows based on a certain condition. This can be done with 'boolean indexing' (like a where clause in SQL). \n",
|
|
"\n",
|
|
"The indexer (or boolean mask) should be 1-dimensional and the same length as the thing being indexed."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"country\n",
|
|
"Belgium False\n",
|
|
"France True\n",
|
|
"Germany True\n",
|
|
"Netherlands False\n",
|
|
"United Kingdom True\n",
|
|
"Name: area, dtype: bool"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['area'] > 100000"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population\n",
|
|
"country \n",
|
|
"France 671308 Paris 64.3\n",
|
|
"Germany 357050 Berlin 81.3\n",
|
|
"United Kingdom 244820 London 64.9"
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[countries['area'] > 100000]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"\n",
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Add a column `density` with the population density (note: population column is expressed in millions)\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 11.3 370.370370\n",
|
|
"France 671308 Paris 64.3 95.783158\n",
|
|
"Germany 357050 Berlin 81.3 227.699202\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944\n",
|
|
"United Kingdom 244820 London 64.9 265.092721"
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['density'] = countries['population']*1000000 / countries['area']\n",
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Select the capital and the population column of those countries where the density is larger than 300\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" capital population\n",
|
|
"country \n",
|
|
"Belgium Brussels 11.3\n",
|
|
"Netherlands Amsterdam 16.9"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries.loc[countries['density'] > 300, ['capital', 'population']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Add a column 'density_ratio' with the ratio of the density to the mean density\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" <td>0.350618</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>London</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 11.3 370.370370 1.355755\n",
|
|
"France 671308 Paris 64.3 95.783158 0.350618\n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 London 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['density_ratio'] = countries['density'] / countries['density'].mean()\n",
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Change the capital of the UK to Cambridge\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" <td>0.350618</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 11.3 370.370370 1.355755\n",
|
|
"France 671308 Paris 64.3 95.783158 0.350618\n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries.loc['United Kingdom', 'capital'] = 'Cambridge'\n",
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Select all countries whose population density is between 100 and 300 people/km²\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[(countries['density'] > 100) & (countries['density'] < 300)]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"source": [
|
|
"## Some other useful methods: `isin` and string methods"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The `isin` method of Series is very useful to select rows that may contain certain values:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"s = countries['capital']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"s.isin?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"country\n",
|
|
"Belgium False\n",
|
|
"France False\n",
|
|
"Germany True\n",
|
|
"Netherlands False\n",
|
|
"United Kingdom False\n",
|
|
"Name: capital, dtype: bool"
|
|
]
|
|
},
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"s.isin(['Berlin', 'London'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"This can then be used to filter the dataframe with boolean indexing:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502"
|
|
]
|
|
},
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[countries['capital'].isin(['Berlin', 'London'])]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Let's say we want to select all data for which the capital starts with a 'B'. In Python, when having a string, we could use the `startswith` method:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"True"
|
|
]
|
|
},
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"'Berlin'.startswith('B')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"In pandas, these are available on a Series through the `str` namespace:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"country\n",
|
|
"Belgium True\n",
|
|
"France False\n",
|
|
"Germany True\n",
|
|
"Netherlands False\n",
|
|
"United Kingdom False\n",
|
|
"Name: capital, dtype: bool"
|
|
]
|
|
},
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['capital'].str.startswith('B')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"For an overview of all string methods, see: http://pandas.pydata.org/pandas-docs/stable/api.html#string-handling"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Select all countries that have capital names with more than 7 characters\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Brussels</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Brussels 11.3 370.370370 1.355755\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[countries['capital'].str.len() > 7]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Select all countries that have capital names that contain the character sequence 'am'\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[countries['capital'].str.contains('am')]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Pitfall: chained indexing (and the 'SettingWithCopyWarning')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"countries.loc['Belgium', 'capital'] = 'Ghent' "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Ghent</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" <td>0.350618</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Ghent 11.3 370.370370 1.355755\n",
|
|
"France 671308 Paris 64.3 95.783158 0.350618\n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/joris/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|
"\n",
|
|
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
|
|
" if __name__ == '__main__':\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"countries['capital']['Belgium'] = 'Antwerp' "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Antwerp</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" <td>0.350618</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Antwerp 11.3 370.370370 1.355755\n",
|
|
"France 671308 Paris 64.3 95.783158 0.350618\n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/joris/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
|
|
" if __name__ == '__main__':\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"countries[countries['capital'] == 'Antwerp']['capital'] = 'Brussels' "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>area</th>\n",
|
|
" <th>capital</th>\n",
|
|
" <th>population</th>\n",
|
|
" <th>density</th>\n",
|
|
" <th>density_ratio</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>country</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Belgium</th>\n",
|
|
" <td>30510</td>\n",
|
|
" <td>Antwerp</td>\n",
|
|
" <td>11.3</td>\n",
|
|
" <td>370.370370</td>\n",
|
|
" <td>1.355755</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>France</th>\n",
|
|
" <td>671308</td>\n",
|
|
" <td>Paris</td>\n",
|
|
" <td>64.3</td>\n",
|
|
" <td>95.783158</td>\n",
|
|
" <td>0.350618</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Germany</th>\n",
|
|
" <td>357050</td>\n",
|
|
" <td>Berlin</td>\n",
|
|
" <td>81.3</td>\n",
|
|
" <td>227.699202</td>\n",
|
|
" <td>0.833502</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Netherlands</th>\n",
|
|
" <td>41526</td>\n",
|
|
" <td>Amsterdam</td>\n",
|
|
" <td>16.9</td>\n",
|
|
" <td>406.973944</td>\n",
|
|
" <td>1.489744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>United Kingdom</th>\n",
|
|
" <td>244820</td>\n",
|
|
" <td>Cambridge</td>\n",
|
|
" <td>64.9</td>\n",
|
|
" <td>265.092721</td>\n",
|
|
" <td>0.970382</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" area capital population density density_ratio\n",
|
|
"country \n",
|
|
"Belgium 30510 Antwerp 11.3 370.370370 1.355755\n",
|
|
"France 671308 Paris 64.3 95.783158 0.350618\n",
|
|
"Germany 357050 Berlin 81.3 227.699202 0.833502\n",
|
|
"Netherlands 41526 Amsterdam 16.9 406.973944 1.489744\n",
|
|
"United Kingdom 244820 Cambridge 64.9 265.092721 0.970382"
|
|
]
|
|
},
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"countries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"How to avoid this?\n",
|
|
"\n",
|
|
"* Use `loc` instead of chained indexing if possible!\n",
|
|
"* Or `copy` explicitly if you don't want to change the original data."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"source": [
|
|
"## More exercises!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"For the quick ones among you, here are some more exercises with some larger dataframe with film data. These exercises are based on the [PyCon tutorial of Brandon Rhodes](https://github.com/brandon-rhodes/pycon-pandas-tutorial/) (so all credit to him!) and the datasets he prepared for that. You can download these data from here: [`titles.csv`](https://drive.google.com/open?id=0B3G70MlBnCgKajNMa1pfSzN6Q3M) and [`cast.csv`](https://drive.google.com/open?id=0B3G70MlBnCgKal9UYTJSR2ZhSW8) and put them in the `/data` folder."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>character</th>\n",
|
|
" <th>n</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Suuri illusioni</td>\n",
|
|
" <td>1985</td>\n",
|
|
" <td>Homo $</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Guests</td>\n",
|
|
" <td>22</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Gangsta Rap: The Glockumentary</td>\n",
|
|
" <td>2007</td>\n",
|
|
" <td>Too $hort</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Himself</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Menace II Society</td>\n",
|
|
" <td>1993</td>\n",
|
|
" <td>Too $hort</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Lew-Loc</td>\n",
|
|
" <td>27</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Porndogs: The Adventures of Sadie</td>\n",
|
|
" <td>2009</td>\n",
|
|
" <td>Too $hort</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Bosco</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Stop Pepper Palmer</td>\n",
|
|
" <td>2014</td>\n",
|
|
" <td>Too $hort</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Himself</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year name type character n\n",
|
|
"0 Suuri illusioni 1985 Homo $ actor Guests 22\n",
|
|
"1 Gangsta Rap: The Glockumentary 2007 Too $hort actor Himself NaN\n",
|
|
"2 Menace II Society 1993 Too $hort actor Lew-Loc 27\n",
|
|
"3 Porndogs: The Adventures of Sadie 2009 Too $hort actor Bosco 3\n",
|
|
"4 Stop Pepper Palmer 2014 Too $hort actor Himself NaN"
|
|
]
|
|
},
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"cast = pd.read_csv('data/cast.csv')\n",
|
|
"cast.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>The Rising Son</td>\n",
|
|
" <td>1990</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Ashes of Kukulcan</td>\n",
|
|
" <td>2016</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>The Thousand Plane Raid</td>\n",
|
|
" <td>1969</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Crucea de piatra</td>\n",
|
|
" <td>1993</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>The 86</td>\n",
|
|
" <td>2015</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year\n",
|
|
"0 The Rising Son 1990\n",
|
|
"1 Ashes of Kukulcan 2016\n",
|
|
"2 The Thousand Plane Raid 1969\n",
|
|
"3 Crucea de piatra 1993\n",
|
|
"4 The 86 2015"
|
|
]
|
|
},
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"titles = pd.read_csv('data/titles.csv')\n",
|
|
"titles.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: How many movies are listed in the titles dataframe?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"215981"
|
|
]
|
|
},
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(titles)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: What are the earliest two films listed in the titles dataframe?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>165182</th>\n",
|
|
" <td>Miss Jerry</td>\n",
|
|
" <td>1894</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>85708</th>\n",
|
|
" <td>Reproduction of the Corbett and Fitzsimmons Fight</td>\n",
|
|
" <td>1897</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year\n",
|
|
"165182 Miss Jerry 1894\n",
|
|
"85708 Reproduction of the Corbett and Fitzsimmons Fight 1897"
|
|
]
|
|
},
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"titles.sort('year').head(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: How many movies have the title \"Hamlet\"?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"19"
|
|
]
|
|
},
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(titles[titles.title == 'Hamlet'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: List all of the \"Treasure Island\" movies from earliest to most recent.\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>191379</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1918</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>47769</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1920</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>192917</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1934</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>90175</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1950</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>104714</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1972</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>103646</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1973</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>190792</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1985</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>166675</th>\n",
|
|
" <td>Treasure Island</td>\n",
|
|
" <td>1999</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year\n",
|
|
"191379 Treasure Island 1918\n",
|
|
"47769 Treasure Island 1920\n",
|
|
"192917 Treasure Island 1934\n",
|
|
"90175 Treasure Island 1950\n",
|
|
"104714 Treasure Island 1972\n",
|
|
"103646 Treasure Island 1973\n",
|
|
"190792 Treasure Island 1985\n",
|
|
"166675 Treasure Island 1999"
|
|
]
|
|
},
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"titles[titles.title == 'Treasure Island'].sort('year')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: How many movies were made from 1950 through 1959?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"12120"
|
|
]
|
|
},
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"t = titles\n",
|
|
"len(t[(t.year >= 1950) & (t.year <= 1959)])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"12120"
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"len(t[t.year // 10 == 195])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: How many roles in the movie \"Inception\" are NOT ranked by an \"n\" value?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"22"
|
|
]
|
|
},
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"c = cast\n",
|
|
"c = c[c.title == 'Inception']\n",
|
|
"c = c[c.n.isnull()]\n",
|
|
"len(c)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: But how many roles in the movie \"Inception\" did receive an \"n\" value?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"51"
|
|
]
|
|
},
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"c = cast\n",
|
|
"c = c[c.title == 'Inception']\n",
|
|
"c = c[c.n.notnull()]\n",
|
|
"len(c)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: Display the cast of \"North by Northwest\" in their correct \"n\"-value order, ignoring roles that did not earn a numeric \"n\" value.\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>character</th>\n",
|
|
" <th>n</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>778278</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Cary Grant</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Roger O. Thornhill</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3104905</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Eva Marie Saint</td>\n",
|
|
" <td>actress</td>\n",
|
|
" <td>Eve Kendall</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1300582</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>James Mason</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Phillip Vandamm</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2794420</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Jessie Royce Landis</td>\n",
|
|
" <td>actress</td>\n",
|
|
" <td>Clara Thornhill</td>\n",
|
|
" <td>4</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>317310</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Leo G. Carroll</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>The Professor</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2702399</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Josephine Hutchinson</td>\n",
|
|
" <td>actress</td>\n",
|
|
" <td>Mrs. Townsend</td>\n",
|
|
" <td>6</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1513974</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Philip Ober</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Lester Townsend</td>\n",
|
|
" <td>7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1137555</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Martin Landau</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Leonard</td>\n",
|
|
" <td>8</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2181363</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Adam Williams</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Valerian</td>\n",
|
|
" <td>9</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1616877</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Edward Platt</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Victor Larrabee</td>\n",
|
|
" <td>10</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>594433</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Robert Ellenstein</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Licht</td>\n",
|
|
" <td>11</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2046317</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Les Tremayne</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Auctioneer</td>\n",
|
|
" <td>12</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>413825</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Philip Coolidge</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Dr. Cross</td>\n",
|
|
" <td>13</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1347170</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Patrick McVey</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Sergeant Flamm</td>\n",
|
|
" <td>14</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>182129</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Edward Binns</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Captain Junket</td>\n",
|
|
" <td>15</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1235786</th>\n",
|
|
" <td>North by Northwest</td>\n",
|
|
" <td>1959</td>\n",
|
|
" <td>Ken Lynch</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Charley - Chicago Policeman</td>\n",
|
|
" <td>16</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year name type \\\n",
|
|
"778278 North by Northwest 1959 Cary Grant actor \n",
|
|
"3104905 North by Northwest 1959 Eva Marie Saint actress \n",
|
|
"1300582 North by Northwest 1959 James Mason actor \n",
|
|
"2794420 North by Northwest 1959 Jessie Royce Landis actress \n",
|
|
"317310 North by Northwest 1959 Leo G. Carroll actor \n",
|
|
"2702399 North by Northwest 1959 Josephine Hutchinson actress \n",
|
|
"1513974 North by Northwest 1959 Philip Ober actor \n",
|
|
"1137555 North by Northwest 1959 Martin Landau actor \n",
|
|
"2181363 North by Northwest 1959 Adam Williams actor \n",
|
|
"1616877 North by Northwest 1959 Edward Platt actor \n",
|
|
"594433 North by Northwest 1959 Robert Ellenstein actor \n",
|
|
"2046317 North by Northwest 1959 Les Tremayne actor \n",
|
|
"413825 North by Northwest 1959 Philip Coolidge actor \n",
|
|
"1347170 North by Northwest 1959 Patrick McVey actor \n",
|
|
"182129 North by Northwest 1959 Edward Binns actor \n",
|
|
"1235786 North by Northwest 1959 Ken Lynch actor \n",
|
|
"\n",
|
|
" character n \n",
|
|
"778278 Roger O. Thornhill 1 \n",
|
|
"3104905 Eve Kendall 2 \n",
|
|
"1300582 Phillip Vandamm 3 \n",
|
|
"2794420 Clara Thornhill 4 \n",
|
|
"317310 The Professor 5 \n",
|
|
"2702399 Mrs. Townsend 6 \n",
|
|
"1513974 Lester Townsend 7 \n",
|
|
"1137555 Leonard 8 \n",
|
|
"2181363 Valerian 9 \n",
|
|
"1616877 Victor Larrabee 10 \n",
|
|
"594433 Licht 11 \n",
|
|
"2046317 Auctioneer 12 \n",
|
|
"413825 Dr. Cross 13 \n",
|
|
"1347170 Sergeant Flamm 14 \n",
|
|
"182129 Captain Junket 15 \n",
|
|
"1235786 Charley - Chicago Policeman 16 "
|
|
]
|
|
},
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"c = cast\n",
|
|
"c = c[c.title == 'North by Northwest']\n",
|
|
"c = c[c.n.notnull()]\n",
|
|
"c = c.sort('n')\n",
|
|
"c"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: How many roles were credited in the silent 1921 version of Hamlet?\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"9"
|
|
]
|
|
},
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"c = cast\n",
|
|
"c = c[(c.title == 'Hamlet') & (c.year == 1921)]\n",
|
|
"len(c)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<div class=\"alert alert-success\">\n",
|
|
" <b>EXERCISE</b>: List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.\n",
|
|
"</div>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {
|
|
"clear_cell": true,
|
|
"collapsed": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>character</th>\n",
|
|
" <th>n</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>778275</th>\n",
|
|
" <td>My Favorite Wife</td>\n",
|
|
" <td>1940</td>\n",
|
|
" <td>Cary Grant</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Nick</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>778285</th>\n",
|
|
" <td>Penny Serenade</td>\n",
|
|
" <td>1941</td>\n",
|
|
" <td>Cary Grant</td>\n",
|
|
" <td>actor</td>\n",
|
|
" <td>Roger Adams</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" title year name type character n\n",
|
|
"778275 My Favorite Wife 1940 Cary Grant actor Nick 2\n",
|
|
"778285 Penny Serenade 1941 Cary Grant actor Roger Adams 2"
|
|
]
|
|
},
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"c = cast\n",
|
|
"c = c[c.name == 'Cary Grant']\n",
|
|
"c = c[c.year // 10 == 194]\n",
|
|
"c = c[c.n == 2]\n",
|
|
"c = c.sort('year')\n",
|
|
"c"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"celltoolbar": "Nbtutor - export exercises",
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.5.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|