In [2]:
import pandas
In [3]:
csvfile = pandas.read_csv("https://pkgstore.datahub.io/core/pharmaceutical-drug-spending/data_csv/data/db46fb3c420e7100e1d2b1f973e2cbcd/data_csv.csv")
In [4]:
csvfile.head()
Out[4]:
LOCATION TIME PC_HEALTHXP PC_GDP USD_CAP FLAG_CODES TOTAL_SPEND
0 AUS 1971 15.992 0.727 35.720 NaN 462.11
1 AUS 1972 15.091 0.686 36.056 NaN 475.11
2 AUS 1973 15.117 0.681 39.871 NaN 533.47
3 AUS 1974 14.771 0.755 47.559 NaN 652.65
4 AUS 1975 11.849 0.682 47.561 NaN 660.76
In [5]:
type(csvfile)
Out[5]:
pandas.core.frame.DataFrame
In [6]:
csvfile.describe()
Out[6]:
TIME PC_HEALTHXP PC_GDP USD_CAP TOTAL_SPEND
count 1036.000000 1036.000000 1036.000000 1036.000000 1036.000000
mean 1996.809846 16.406307 1.170948 295.046989 11765.423118
std 12.498330 6.389064 0.465635 209.152134 34878.286123
min 1970.000000 5.545000 0.207000 3.160000 6.290000
25% 1987.000000 11.516250 0.768250 111.098500 728.180000
50% 1999.000000 14.967500 1.141500 266.332000 2349.570000
75% 2007.000000 20.590500 1.523250 446.109500 7778.180000
max 2016.000000 40.239000 2.797000 1162.399000 373009.910000
In [7]:
csvfile[5:15]
Out[7]:
LOCATION TIME PC_HEALTHXP PC_GDP USD_CAP FLAG_CODES TOTAL_SPEND
5 AUS 1976 10.920 0.630 46.908 NaN 658.26
6 AUS 1977 10.087 0.613 47.649 NaN 676.23
7 AUS 1978 9.958 0.591 50.799 NaN 729.37
8 AUS 1979 8.981 0.523 49.766 NaN 722.30
9 AUS 1980 9.263 0.540 56.972 NaN 837.03
10 AUS 1981 9.388 0.548 65.390 NaN 976.08
11 AUS 1982 9.248 0.564 67.606 NaN 1026.12
12 AUS 1983 9.589 0.580 74.083 NaN 1138.58
13 AUS 1984 9.465 0.569 77.270 NaN 1201.08
14 AUS 1985 9.560 0.580 84.134 NaN 1325.78
In [8]:
csvfile.query('PC_HEALTHXP < 10')[1:5]
Out[8]:
LOCATION TIME PC_HEALTHXP PC_GDP USD_CAP FLAG_CODES TOTAL_SPEND
8 AUS 1979 8.981 0.523 49.766 NaN 722.30
9 AUS 1980 9.263 0.540 56.972 NaN 837.03
10 AUS 1981 9.388 0.548 65.390 NaN 976.08
11 AUS 1982 9.248 0.564 67.606 NaN 1026.12
In [9]:
import altair
In [10]:
a = [ 1, 2, 3, 4, 5 ]

for i, v in enumerate(a):
    if i % 2 == 0:
        print(v)
1
3
5
In [11]:
csvfile.head()
Out[11]:
LOCATION TIME PC_HEALTHXP PC_GDP USD_CAP FLAG_CODES TOTAL_SPEND
0 AUS 1971 15.992 0.727 35.720 NaN 462.11
1 AUS 1972 15.091 0.686 36.056 NaN 475.11
2 AUS 1973 15.117 0.681 39.871 NaN 533.47
3 AUS 1974 14.771 0.755 47.559 NaN 652.65
4 AUS 1975 11.849 0.682 47.561 NaN 660.76

Now we will load the chart and visualise it, because it is a good thing to do!

In [12]:
altair.Chart(csvfile).mark_point().encode(
    x='PC_HEALTHXP',
    y='TOTAL_SPEND'
)
Out[12]:
In [13]:
altair.__version__
Out[13]:
'2.0.0rc2'