import pandas as pd
import matplotlib.pyplot as plt
url= "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average"
tables= pd.read_html(url, match= r'Symbol')
# There should be only one table.
print(len(tables))
1
dj30= tables[0]
dj30.head()
Company | Exchange | Symbol | Industry | Date added | Notes | Index weighting | |
---|---|---|---|---|---|---|---|
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | As Minnesota Mining and Manufacturing | 2.88% |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | NaN | 3.56% |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | NaN | 4.88% |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | NaN | 3.15% |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | NaN | 3.40% |
dj30.rename(columns= {'Company':'name', 'Symbol':'ticker', 'Industry':'industry', 'Date added': 'date_added', 'Exchange': 'exchg', 'Index weighting':'weight'}, inplace= True)
dj30['weight']= dj30['weight'].str.replace('%', '')
dj30.drop(columns= {'Notes'}, inplace= True)
dj30.head()
name | exchg | ticker | industry | date_added | weight | |
---|---|---|---|---|---|---|
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | 2.88 |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | 3.56 |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | 4.88 |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | 3.15 |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | 3.40 |
dj30['weight']= dj30['weight'].astype(str).astype(float)
dj30.dtypes
name object exchg object ticker object industry object date_added object weight float64 dtype: object
dj30.groupby('exchg')[['ticker']].count().sort_values('ticker', ascending= False)
ticker | |
---|---|
exchg | |
NYSE | 23 |
NASDAQ | 7 |
dj30.groupby('exchg')[['ticker']].count().sort_values('ticker', ascending= False). \
plot(kind='bar', figsize= (6,4), grid= True, legend= False)
plt.xticks(rotation=0)
plt.xlabel('Stock Exchanges', fontsize= 15)
plt.show()
dj30.groupby('industry')[['ticker']].count().sort_values('ticker', ascending= False)
ticker | |
---|---|
industry | |
Information technology | 5 |
Financial services | 4 |
Retailing | 2 |
Pharmaceutical industry | 2 |
Conglomerate | 2 |
Aerospace and defense | 1 |
Home Improvement | 1 |
Semiconductor industry | 1 |
Petroleum industry | 1 |
Managed health care | 1 |
Insurance | 1 |
Food industry | 1 |
Biopharmaceutical | 1 |
Fast-moving consumer goods | 1 |
Drink industry | 1 |
Construction and Mining | 1 |
Clothing industry | 1 |
Chemical industry | 1 |
Broadcasting and entertainment | 1 |
Telecommunications industry | 1 |
dj30.groupby('industry')[['ticker']].count().sort_values('ticker', ascending= False). \
plot(kind='bar', figsize= (25,5), grid= True, legend= False)
plt.xticks(rotation=60, fontsize=15)
plt.xlabel('Industry', fontsize= 20)
plt.show()
dj30.head()
name | exchg | ticker | industry | date_added | weight | |
---|---|---|---|---|---|---|
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | 2.88 |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | 3.56 |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | 4.88 |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | 3.15 |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | 3.40 |
dj30.sort_values('weight', ascending= False).head(10).plot(kind= 'bar', x= 'name', y='weight', legend= False, figsize= (10,4))
plt.xticks(rotation=60, fontsize= 12)
plt.xlabel('Top 10 weights in Dow Jones 30', fontsize= 15)
plt.ylabel('weight (%)', fontsize= 12)
plt.show()
dj30.sort_values('date_added')
name | exchg | ticker | industry | date_added | weight | |
---|---|---|---|---|---|---|
22 | Procter & Gamble | NYSE | PG | Fast-moving consumer goods | 1932-05-26 | 3.15 |
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | 2.88 |
19 | Merck | NYSE | MRK | Pharmaceutical industry | 1979-06-29 | 1.65 |
14 | IBM | NYSE | IBM | Information technology | 1979-06-29 | 2.69 |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | 3.56 |
18 | McDonald's | NYSE | MCD | Food industry | 1985-10-30 | 4.89 |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | 3.40 |
8 | Coca-Cola | NYSE | KO | Drink industry | 1987-03-12 | 1.28 |
5 | Caterpillar | NYSE | CAT | Construction and Mining | 1991-05-06 | 4.19 |
9 | Disney | NYSE | DIS | Broadcasting and entertainment | 1991-05-06 | 2.32 |
17 | JPMorgan Chase | NYSE | JPM | Financial services | 1991-05-06 | 2.45 |
16 | Johnson & Johnson | NYSE | JNJ | Pharmaceutical industry | 1997-03-17 | 3.60 |
29 | Walmart | NYSE | WMT | Retailing | 1997-03-17 | 3.04 |
15 | Intel | NASDAQ | INTC | Semiconductor industry | 1999-11-01 | 0.91 |
12 | Home Depot | NYSE | HD | Home Improvement | 1999-11-01 | 5.90 |
20 | Microsoft | NASDAQ | MSFT | Information technology | 1999-11-01 | 5.43 |
26 | Verizon | NYSE | VZ | Telecommunications industry | 2004-04-08 | 0.97 |
6 | Chevron | NYSE | CVX | Petroleum industry | 2008-02-19 | 3.05 |
7 | Cisco | NASDAQ | CSCO | Information technology | 2009-06-08 | 1.00 |
24 | Travelers | NYSE | TRV | Insurance | 2009-06-08 | 3.38 |
25 | UnitedHealth | NYSE | UNH | Managed health care | 2012-09-24 | 10.14 |
11 | Goldman Sachs | NYSE | GS | Financial services | 2013-09-20 | 6.22 |
21 | Nike | NYSE | NKE | Clothing industry | 2013-09-20 | 2.48 |
27 | Visa | NYSE | V | Financial services | 2013-09-20 | 4.06 |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | 3.15 |
28 | Walgreens Boots Alliance | NASDAQ | WBA | Retailing | 2018-06-26 | 0.88 |
10 | Dow | NYSE | DOW | Chemical industry | 2019-04-02 | 1.32 |
13 | Honeywell | NASDAQ | HON | Conglomerate | 2020-08-31 | 3.71 |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | 4.88 |
23 | Salesforce | NYSE | CRM | Information technology | 2020-08-31 | 3.38 |
dj30.to_csv('dj30.csv')
dj30.head()
name | exchg | ticker | industry | date_added | weight | |
---|---|---|---|---|---|---|
0 | 3M | NYSE | MMM | Conglomerate | 1976-08-09 | 2.88 |
1 | American Express | NYSE | AXP | Financial services | 1982-08-30 | 3.56 |
2 | Amgen | NASDAQ | AMGN | Biopharmaceutical | 2020-08-31 | 4.88 |
3 | Apple | NASDAQ | AAPL | Information technology | 2015-03-19 | 3.15 |
4 | Boeing | NYSE | BA | Aerospace and defense | 1987-03-12 | 3.40 |