# define a function to download the data if not already downloaded, and then read it in
import pandas_datareader
from pandas import DataFrame, Series
import pandas as pd
import os
from urllib.request import urlretrieve
def getdata_read_or_download(filename, source_URL, force_download = False):
'''Use pandas to read in data from a specified local file in the current
working directory, or download data from a specified source URL if the
local file does not exist in the current working directory. Download
can be forced if the local file is corrupt or simply needs to be updated.
Parameters:
===========
filename : string
location of already-dowloaded data in current working directory
source_URL : string
location of data on internet
force_download: boolean (optional)
if True, force redownload of data
Returns:
========
datafame : pandas dataframe
the data file for the analysis
'''
if ((force_download == True) or not os.path.exists(filename)):
urlretrieve(source_URL,filename)
dataframe = pd.read_csv(filename)
return dataframe
!cd getdata_read_or_download
! pwd
/Users/delong/Dropbox/jupyter notebook files (.ipynb)/2017-08-05-delong-jupyter
# Test: read in the 2014 data from the web
ccuds_pandp_data = getdata_read_or_download(filename = "pandp.csv",
source_URL = "http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv")
# ccuds_pandp_data = pd.read_csv(
# 'http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv')
# Test: check to see if the data is in a comprehensible format...
ccuds_pandp_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 189 entries, 0 to 188 Data columns (total 4 columns): code 189 non-null object country 189 non-null object distance_to_frontier_2014 189 non-null float64 national_income_per_capita_2014 189 non-null int64 dtypes: float64(1), int64(1), object(2) memory usage: 6.0+ KB
# Test: check to see if the data looks like the right data...
ccuds_pandp_data
code | country | distance_to_frontier_2014 | national_income_per_capita_2014 | |
---|---|---|---|---|
0 | TCD | Chad | 32.06 | 2141 |
1 | CAF | Central African Republic | 32.75 | 578 |
2 | ERI | Eritrea | 32.81 | 1140 |
3 | SSD | South Sudan | 34.07 | 2574 |
4 | LBY | Libya | 35.43 | 14887 |
5 | ZAR | Congo, Dem. Rep. | 37.80 | 768 |
6 | VEN | Venezuela, RB | 38.81 | 16666 |
7 | COG | Congo, Rep. | 40.24 | 5905 |
8 | AFG | Afghanistan | 40.78 | 1877 |
9 | HTI | Haiti | 42.82 | 1670 |
10 | NER | Niger | 42.89 | 923 |
11 | AGO | Angola | 43.03 | 7546 |
12 | MMR | Myanmar | 43.14 | 3727 |
13 | SEN | Senegal | 44.07 | 2206 |
14 | GNB | Guinea-Bissau | 44.14 | 1363 |
15 | BEN | Benin | 44.19 | 1779 |
16 | TMP | Timor-Leste | 44.71 | 2173 |
17 | ZWE | Zimbabwe | 44.87 | 1773 |
18 | TJK | Tajikistan | 44.88 | 2533 |
19 | GIN | Guinea | 44.94 | 1179 |
20 | NIGE | Nigeria | 45.01 | 5607 |
21 | BFA | Burkina Faso | 45.41 | 1606 |
22 | TGO | Togo | 45.52 | 1387 |
23 | BANG | Bangladesh | 45.57 | 2991 |
24 | MRT | Mauritania | 46.23 | 3718 |
25 | MDG | Madagascar | 46.41 | 1371 |
26 | SYR | Syrian Arab Republic | 46.41 | 5105 |
27 | SUR | Suriname | 46.46 | 16649 |
28 | ETH | Ethiopia | 47.56 | 1432 |
29 | GNQ | Equatorial Guinea | 47.76 | 30783 |
... | ... | ... | ... | ... |
159 | BEL | Belgium | 74.23 | 40885 |
160 | PRT | Portugal | 74.54 | 26055 |
161 | ARE | United Arab Emirates | 75.26 | 60578 |
162 | CHE | Switzerland | 75.57 | 55776 |
163 | LTU | Lithuania | 75.88 | 25708 |
164 | THA | Thailand | 75.99 | 13986 |
165 | EST | Estonia | 76.29 | 25865 |
166 | NLD | Netherlands | 77.10 | 45281 |
167 | MUS | Mauritius | 77.18 | 17731 |
168 | AUT | Austria | 77.84 | 43906 |
169 | LVA | Latvia | 78.24 | 22460 |
170 | JAP | Japan | 78.39 | 35635 |
171 | TWN | Taiwan, China | 79.24 | 41376 |
172 | DEU | Germany | 79.55 | 43444 |
173 | CAN | Canada | 80.52 | 42817 |
174 | MYS | Malaysia | 81.20 | 23579 |
175 | AUS | Australia | 81.69 | 43219 |
176 | GEO | Georgia | 82.09 | 7233 |
177 | US | United States | 82.14 | 52118 |
178 | FIN | Finland | 82.18 | 38569 |
179 | SWE | Sweden | 82.51 | 44029 |
180 | ISL | Iceland | 83.04 | 41237 |
181 | IRL | Ireland | 83.07 | 46633 |
182 | NOR | Norway | 83.56 | 64020 |
183 | KOR | Korea, Rep. | 83.92 | 33629 |
184 | GBR | United Kingdom | 85.61 | 37614 |
185 | DNK | Denmark | 85.71 | 42777 |
186 | HKG | Hong Kong SAR, China | 88.67 | 52552 |
187 | NZL | New Zealand | 89.32 | 33538 |
188 | SGP | Singapore | 91.24 | 78958 |
189 rows × 4 columns