In [1]:

import numpy as np
import pandas as pd

In [2]:

df = pd.DataFrame({
    'ticker': ['AAPL', 'AAPL', 'MSFT', 'IBM', 'YHOO'],
    'date': ['2015-12-30', '2015-12-31', '2015-12-30', '2015-12-30', '2015-12-30'],
    'open': [426.23, 427.81, 42.3, 101.65, 35.53]
})

Exercise:

Select the open column as a Series using attribute lookup
Select the open column as a Series using dict-style lookup
Select the date column as a DataFrame

In [3]:

res1a = df.open
res1b = df['open']
res1c = df[['open']]

Exercise:

Select all rows with the AAPL ticker and the date and open columns.
Assign to the variable df1 a new DataFrame with ticker as the index.
Assign to the variable df2 a new DataFrame with date as the index. Create this DataFrame from df1 with a single statement.
Sort df2 by the index values.

In [4]:

res2a = df.loc[df.ticker == 'AAPL', ['date', 'open']]
df1 = df.set_index('ticker')
df2 = df1.reset_index().set_index('date')

In [5]:

df2_sorted = df2.sort_index()

Exercise:

Create a copy of df called df3. Add a new column of NaNs to df3 called close. Assign close the same value as open for all open values greater than 100.
Sort df3 by its close values.

In [6]:

df3 = df.copy()

# this could be skipped from a functional standpoint, though
# the instructions say to do it
df3['close'] = np.nan 

gt100 = df3.open[df3.open > 100]
df3.close = gt100 # you can use dot syntax b/c `close` already exists
df3

Out[6]:

	ticker	date	open	close
0	AAPL	2015-12-30	426.23	426.23
1	AAPL	2015-12-31	427.81	427.81
2	MSFT	2015-12-30	42.30	NaN
3	IBM	2015-12-30	101.65	101.65
4	YHOO	2015-12-30	35.53	NaN