This project focuses on applying machine learning techniques to stock market analysis and trading. The goal is to leverage natural language processing (NLP) of financial news combined with quantitative trading algorithms to generate actionable insights for making profitable trades. Specifically, the project aims to analyze sentiment in news headlines about 30 companies in the Dow Jones Industrial Average (DJIA) index, and correlate this sentiment with actual stock price movements over corresponding time periods. Sentiment refers to the tone of the text - whether it is positive, negative or neutral about a company. By quantifying and correlating sentiment signals in headlines with price changes, the system can identify predictive indicators.
These sentiment-based indicators can then be combined with traditional technical analysis strategies, like detecting trends and momentum in price charts, to generate more robust trading recommendations. The project implements a technical trading algorithm called MACD (Moving Average Convergence Divergence) which detects crossover buy and sell signals in price charts. By merging sentiment analysis predictions with the MACD technical signals, the system can produce 1-5 recommendations for each stock ranging from "Strong Buy" to "Strong Sell". The recommendations are projected 1 week into the future based on current market conditions.
# Importing necessary libraries
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import finnhub
import sqlite3
from datetime import datetime, timedelta, timezone
from tqdm import tqdm
tqdm.pandas()
### DATE SETTINGS ###
# Function to convert date string to Unix timestamp
def unix_timestamp_from_date(date_str, date_format="%Y-%m-%d"):
dt = datetime.strptime(date_str, date_format)
unix_timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
return int(unix_timestamp)
# Get the current date and time
current_datetime = datetime.now()
# Format the current date
# current_date = current_datetime.strftime("%Y-%m-%d")
# Set the current date to predefined date
# current_date = datetime.now().strftime("%Y-%m-%d")
current_date = '2023-09-01'
# Calculate the date 30 days ago from the current date
from_datetime = current_datetime - timedelta(days=30)
from_datetime
# Format the from_date
from_date_str = from_datetime.strftime("%Y-%m-%d")
# Convert to Unix timestamps
from_date = unix_timestamp_from_date(from_date_str)
to_date = unix_timestamp_from_date(current_date)
### FINNHUB API SETTINGS ###
# API key setup (replace 'YOUR_API_KEY' with the actual API key)
api_key = 'API_KEY'
# DJIA Tickers and Companies
symbols = ['AAPL', 'MSFT', 'JNJ', 'PG', 'V', 'RTX', 'UNH', 'VZ', 'CSCO', 'KO',
'DOW', 'TRV', 'JPM', 'INTC', 'WBA', 'CVX', 'CAT', 'MMM', 'GS',
'NKE', 'HD', 'MRK', 'DIS', 'IBM', 'MCD', 'BA', 'AMGN', 'CRM', 'XOM', 'PFE']
companies = ['Apple', 'Microsoft', 'Johnson & Johnson', 'Procter & Gamble',
'Visa', 'Raytheon', 'UnitedHealth', 'Verizon', 'Cisco', 'Coca-Cola',
'Dow Chemical', 'Travelers', 'JPMorgan Chase', 'Intel', 'Walgreens',
'Chevron', 'Caterpillar', '3M', 'Goldman Sachs', 'Nike', 'Home Depot',
'Merck', 'Disney', 'IBM', 'McDonalds', 'Boeing', 'Amgen', 'Salesforce',
'Exxon Mobil', 'Pfizer']
# Initialize an empty list to store news headlines as DataFrames
df_list = []
# Loop through each DJIA company
for symbol, company in zip(symbols, companies):
# API request to get news headlines
url = f'https://finnhub.io/api/v1/company-news?symbol={symbol}&from={current_date}&to={current_date}&token={api_key}'
response = requests.get(url)
# Error checking for API response
if response.status_code != 200:
print(f"Failed to get data for {symbol}")
continue
# Extract news headlines
news_data = response.json()
news_df = pd.DataFrame(news_data)
# Convert Unix timestamps to human-readable datetime
news_df['datetime'] = pd.to_datetime(news_df['datetime'], unit='s')
# Add 'company' and 'symbol' columns
news_df['company'] = company
news_df['symbol'] = symbol
# Keep only the columns we need
# news_df = news_df[['company', 'symbol', 'datetime', 'headline']]
# Add to list of DataFrames
df_list.append(news_df)
# Concatenate all the collected DataFrames
df_headlines = pd.concat(df_list, ignore_index=True)
# Create new column 'analysis' by concatenating 'headline' and 'summary'
df_headlines['analysis'] = df_headlines['headline'] + ' ' + df_headlines['summary']
### SENTIMENT ANALYSIS SETTINGS ###
# Initialize finBERT model and tokenizer
# For finBERT, we used the model identifier from Hugging Face
model_name = 'ProsusAI/finbert'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Function to compute sentiment
def compute_sentiment(headline):
inputs = tokenizer(headline, return_tensors="pt", max_length=512, truncation=True)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
sentiment = torch.softmax(logits, dim=1).numpy()
# Assuming 0: negative, 1: neutral, 2: positive
return ['negative', 'neutral', 'positive'][sentiment.argmax()]
# Reminder: df_headlines is our DataFrame with news headlines
# Add a new column for sentiment
df_headlines['sentiment'] = df_headlines['analysis'].progress_apply(compute_sentiment)
print(f"Data for {current_date} has been fetched and inserted into the dataframe.")
/shared-libs/python3.9/py/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm Downloading (…)okenizer_config.json: 100%|██████████| 252/252 [00:00<00:00, 53.2kB/s] Downloading (…)lve/main/config.json: 100%|██████████| 758/758 [00:00<00:00, 153kB/s] Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 737kB/s] Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 108kB/s] Downloading pytorch_model.bin: 100%|██████████| 438M/438M [00:01<00:00, 366MB/s] 100%|██████████| 474/474 [00:38<00:00, 12.37it/s]Data for 2023-09-01 has been fetched and inserted into the dataframe.
display(df_headlines)
category | datetime | headline | id | image | related | source | summary | url | company | symbol | analysis | sentiment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | company | 2023-09-01 23:13:08 | Exclusive-Arm signs up big tech firms for IPO ... | 122382404 | https://media.zenfs.com/en/reuters-finance.com... | AAPL | Yahoo | NEW YORK (Reuters) -Customers of Arm Holdings ... | https://finnhub.io/api/news?id=4cd8bf8f9821bfe... | Apple | AAPL | Exclusive-Arm signs up big tech firms for IPO ... | negative |
1 | company | 2023-09-01 23:08:56 | SoftBank Lines Up Apple, Nvidia as Strategic A... | 122382018 | https://s.yimg.com/ny/api/res/1.2/HorrfiXzXAyV... | AAPL | Yahoo | (Bloomberg) -- SoftBank Group Corp. has lined ... | https://finnhub.io/api/news?id=376135bb5ff5d10... | Apple | AAPL | SoftBank Lines Up Apple, Nvidia as Strategic A... | positive |
2 | company | 2023-09-01 22:00:37 | Tech suppliers in China skip seasonal hiring r... | 122385415 | https://s.yimg.com/cv/apiv2/social/images/yaho... | AAPL | Yahoo | The Amazon supplier has not had to make any sp... | https://finnhub.io/api/news?id=ef6e62f55088bc3... | Apple | AAPL | Tech suppliers in China skip seasonal hiring r... | neutral |
3 | company | 2023-09-01 21:45:17 | Apple (AAPL) Outpaces Stock Market Gains: What... | 122385416 | https://media.zenfs.com/en/zacks.com/3496571a8... | AAPL | Yahoo | Apple (AAPL) closed at $189.46 in the latest t... | https://finnhub.io/api/news?id=ea85159d6abc081... | Apple | AAPL | Apple (AAPL) Outpaces Stock Market Gains: What... | negative |
4 | company | 2023-09-01 21:36:00 | Globalstar Satellites Could Score for Small De... | 122378763 | https://s.yimg.com/ny/api/res/1.2/fcqof7cpXY5H... | AAPL | Yahoo | Moves in the satellite company's share price p... | https://finnhub.io/api/news?id=780b55e2afd6e44... | Apple | AAPL | Globalstar Satellites Could Score for Small De... | negative |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
469 | company | 2023-09-01 08:22:00 | Horizon Therapeutics gains after FTC settles w... | 122368387 | PFE | Seeking Alpha | Looking for stock market analysis and research... | https://finnhub.io/api/news?id=e45fd97f6120653... | Pfizer | PFE | Horizon Therapeutics gains after FTC settles w... | positive | |
470 | company | 2023-09-01 07:59:00 | Horizon Therapeutics gains amid reports FTC se... | 122368388 | PFE | Seeking Alpha | Looking for stock market analysis and research... | https://finnhub.io/api/news?id=eb8ebe3b7547b3c... | Pfizer | PFE | Horizon Therapeutics gains amid reports FTC se... | negative | |
471 | company | 2023-09-01 07:46:02 | EU authorises use of adapted Pfizer/BioNtech v... | 122357955 | https://s.yimg.com/cv/apiv2/social/images/yaho... | PFE | Yahoo | The European Commission has authorised an upda... | https://finnhub.io/api/news?id=88a5a0a9076c24e... | Pfizer | PFE | EU authorises use of adapted Pfizer/BioNtech v... | positive |
472 | company | 2023-09-01 05:59:00 | Pfizer, BioNTech granted EU nod for new Omicro... | 122377748 | PFE | Seeking Alpha | Looking for stock market analysis and research... | https://finnhub.io/api/news?id=700f854610d9880... | Pfizer | PFE | Pfizer, BioNTech granted EU nod for new Omicro... | positive | |
473 | company | 2023-09-01 01:00:00 | Noteworthy Friday Option Activity: PFE, C, GOOG | 122380060 | PFE | Stock Options Channel | Looking for stock market analysis and research... | https://finnhub.io/api/news?id=57d1995f05d1154... | Pfizer | PFE | Noteworthy Friday Option Activity: PFE, C, GOO... | positive |
474 rows × 13 columns
# Convert 'datetime' to date
df_headlines['date'] = pd.to_datetime(df_headlines['datetime'], unit='s').dt.date
# Group by 'company' and 'date', then count sentiment occurrences
grouped_df = df_headlines.groupby(['company', 'date', 'sentiment']).size().reset_index(name='count')
# Initialize an empty DataFrame to store final results
result_df = pd.DataFrame()
# Loop through unique companies and dates
for name, group in grouped_df.groupby(['company', 'date']):
company, date = name
total_count = group['count'].sum()
# Calculate sentiment score
sentiment_score = ""
for idx, row in group.iterrows():
if row['count'] / total_count > 0.5:
sentiment_score = row['sentiment']
break
if not sentiment_score:
sentiment_score = "neutral"
# Append to result DataFrame
result_df = result_df.append({'company': company, 'date': date, 'sentiment_score': sentiment_score}, ignore_index=True)
display(result_df)
company | date | sentiment_score | |
---|---|---|---|
0 | 3M | 2023-09-01 | positive |
1 | Amgen | 2023-09-01 | positive |
2 | Apple | 2023-09-01 | positive |
3 | Boeing | 2023-09-01 | neutral |
4 | Caterpillar | 2023-09-01 | negative |
5 | Chevron | 2023-09-01 | neutral |
6 | Cisco | 2023-09-01 | positive |
7 | Coca-Cola | 2023-09-01 | positive |
8 | Disney | 2023-09-01 | neutral |
9 | Dow Chemical | 2023-09-01 | neutral |
10 | Exxon Mobil | 2023-09-01 | neutral |
11 | Goldman Sachs | 2023-09-01 | positive |
12 | Home Depot | 2023-09-01 | neutral |
13 | IBM | 2023-09-01 | positive |
14 | Intel | 2023-09-01 | positive |
15 | JPMorgan Chase | 2023-09-01 | positive |
16 | Johnson & Johnson | 2023-09-01 | positive |
17 | McDonalds | 2023-09-01 | positive |
18 | Merck | 2023-09-01 | neutral |
19 | Microsoft | 2023-09-01 | positive |
20 | Nike | 2023-09-01 | positive |
21 | Pfizer | 2023-09-01 | positive |
22 | Procter & Gamble | 2023-09-01 | negative |
23 | Raytheon | 2023-09-01 | neutral |
24 | Salesforce | 2023-09-01 | positive |
25 | Travelers | 2023-09-01 | negative |
26 | UnitedHealth | 2023-09-01 | neutral |
27 | Verizon | 2023-09-01 | positive |
28 | Visa | 2023-09-01 | neutral |
29 | Walgreens | 2023-09-01 | neutral |
import finnhub
import os
import time
from datetime import datetime, timezone
from datetime import date
from zoneinfo import ZoneInfo
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
# Function to convert date string to Unix timestamp
def unix_timestamp_from_date(date_str, date_format="%Y-%m-%d"):
dt = datetime.strptime(date_str, date_format)
unix_timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
return int(unix_timestamp)
start_date = '2023-08-01'
#current_date = '2023-09-01'
# Convert to Unix timestamps
from_date = unix_timestamp_from_date(start_date)
to_date = unix_timestamp_from_date(current_date)
# DJIA Tickers and Companies
symbols = ['AAPL', 'MSFT', 'JNJ', 'PG', 'V', 'RTX', 'UNH', 'VZ', 'CSCO', 'KO',
'DOW', 'TRV', 'JPM', 'INTC', 'WBA', 'CVX', 'CAT', 'MMM', 'GS',
'NKE', 'HD', 'MRK', 'DIS', 'IBM', 'MCD', 'BA', 'AMGN', 'CRM', 'XOM', 'PFE']
companies = ['Apple', 'Microsoft', 'Johnson & Johnson', 'Procter & Gamble',
'Visa', 'Raytheon', 'UnitedHealth', 'Verizon', 'Cisco', 'Coca-Cola',
'Dow Chemical', 'Travelers', 'JPMorgan Chase', 'Intel', 'Walgreens',
'Chevron', 'Caterpillar', '3M', 'Goldman Sachs', 'Nike', 'Home Depot',
'Merck', 'Disney', 'IBM', 'McDonalds', 'Boeing', 'Amgen', 'Salesforce',
'Exxon Mobil', 'Pfizer']
# Set up client
finnhub_client = finnhub.Client(api_key='API_KEY')
df_list = []
resolution = 'D'
# make request and print
for symbol, company in zip(symbols, companies):
res = finnhub_client.stock_candles(
symbol,
resolution,
from_date,
to_date
)
# Ditch the status code
try:
res.pop('s')
except KeyError as e:
print("Already ditched status code")
stock_data = res
stock_df = pd.DataFrame(stock_data)
# Convert Unix timestamps to human-readable datetime
stock_df['t'] = pd.to_datetime(stock_df['t'], unit='s')
# Add 'company' and 'symbol' columns
stock_df['company'] = company
stock_df['symbol'] = symbol
stock_df['prices'] = stock_df.pop('c')
stock_df['dates'] = stock_df.pop('t')
# Keep only the columns we need
stock_df = stock_df[['company', 'symbol', 'dates', 'prices']]
# Add to list of DataFrames
df_list.append(stock_df)
df_stock_data = pd.concat(df_list, ignore_index=True)
display(df_stock_data)
company | symbol | dates | prices | |
---|---|---|---|---|
0 | Apple | AAPL | 2023-08-01 | 195.605 |
1 | Apple | AAPL | 2023-08-02 | 192.580 |
2 | Apple | AAPL | 2023-08-03 | 191.170 |
3 | Apple | AAPL | 2023-08-04 | 181.990 |
4 | Apple | AAPL | 2023-08-07 | 178.850 |
... | ... | ... | ... | ... |
715 | Pfizer | PFE | 2023-08-28 | 36.210 |
716 | Pfizer | PFE | 2023-08-29 | 36.150 |
717 | Pfizer | PFE | 2023-08-30 | 35.900 |
718 | Pfizer | PFE | 2023-08-31 | 35.380 |
719 | Pfizer | PFE | 2023-09-01 | 35.780 |
720 rows × 4 columns
df_stock_data['Daily Return'] = df_stock_data.groupby('symbol')['prices'].pct_change()
df_stock_data.sort_values(['symbol', 'dates'], inplace=True)
# Define the short-term and long-term periods for EMA
short_term = 12
long_term = 26
# Calculate the short-term and long-term exponential moving averages (EMAs)
df_stock_data['ShortEMA'] = df_stock_data.groupby('symbol')['Daily Return'].transform(lambda x: x.ewm(span=short_term).mean())
df_stock_data['LongEMA'] = df_stock_data.groupby('symbol')['Daily Return'].transform(lambda x: x.ewm(span=long_term).mean())
# Calculate the MACD line (the difference between short-term and long-term EMAs)
df_stock_data['MACD'] = df_stock_data['ShortEMA'] - df_stock_data['LongEMA']
# Define the signal line period
signal_period = 9
# Calculate the signal line (9-day EMA of the MACD)
df_stock_data['SignalLine'] = df_stock_data.groupby('symbol')['MACD'].transform(lambda x: x.ewm(span=signal_period).mean())
# Calculate the MACD histogram (the difference between MACD and Signal Line)
df_stock_data['MACD_Histogram'] = df_stock_data['MACD'] - df_stock_data['SignalLine']
df_stock_data['Delta Histogram'] = df_stock_data.groupby('symbol')['MACD_Histogram'].pct_change()
def get_stock_recommendation(row):
if row['Delta Histogram'] > 0.00:
return "Buy"
else:
return "Sell"
df_stock_data['stock_rec'] = df_stock_data.apply(get_stock_recommendation, axis=1)
# Sort by date and ticker
#df_stock_data = df_stock_data.sort_values(by=['symbol', 'dates'])
# Drop duplicates keeping last occurrence
df_stock_data = df_stock_data.drop_duplicates(subset='company', keep='last')
display(df_stock_data)
company | symbol | dates | prices | Daily Return | ShortEMA | LongEMA | MACD | SignalLine | MACD_Histogram | Delta Histogram | stock_rec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
23 | Apple | AAPL | 2023-09-01 | 189.46 | 0.008463 | 0.006467 | 0.003095 | 0.003372 | 0.003035 | 0.000337 | -0.414676 | Sell |
647 | Amgen | AMGN | 2023-09-01 | 256.71 | 0.001443 | -0.000339 | 0.001466 | -0.001805 | -0.002154 | 0.000349 | 2.415509 | Buy |
623 | Boeing | BA | 2023-09-01 | 223.40 | -0.002812 | -0.002538 | -0.002502 | -0.000037 | 0.000285 | -0.000321 | -0.158081 | Sell |
407 | Caterpillar | CAT | 2023-09-01 | 286.25 | 0.018212 | 0.004967 | 0.002225 | 0.002742 | 0.001584 | 0.001157 | 1.122546 | Buy |
671 | Salesforce | CRM | 2023-09-01 | 221.53 | 0.000316 | 0.006914 | 0.003505 | 0.003408 | 0.002646 | 0.000762 | -0.592881 | Sell |
215 | Cisco | CSCO | 2023-09-01 | 57.84 | 0.008544 | 0.005881 | 0.005032 | 0.000849 | 0.000511 | 0.000338 | 0.249152 | Buy |
383 | Chevron | CVX | 2023-09-01 | 164.30 | 0.019863 | 0.004139 | 0.002160 | 0.001979 | 0.000674 | 0.001306 | 1.989457 | Buy |
551 | Disney | DIS | 2023-09-01 | 81.64 | -0.024379 | -0.006020 | -0.004606 | -0.001414 | -0.000346 | -0.001068 | -8.142865 | Sell |
263 | Dow Chemical | DOW | 2023-09-01 | 55.29 | 0.013380 | 0.002495 | 0.001066 | 0.001429 | 0.000887 | 0.000542 | -4.757586 | Sell |
455 | Goldman Sachs | GS | 2023-09-01 | 327.40 | -0.000946 | -0.000374 | -0.001975 | 0.001601 | 0.001526 | 0.000075 | -0.751903 | Sell |
503 | Home Depot | HD | 2023-09-01 | 333.08 | 0.008417 | 0.002531 | 0.001256 | 0.001275 | 0.000638 | 0.000636 | 0.588479 | Buy |
575 | IBM | IBM | 2023-09-01 | 147.94 | 0.007560 | 0.003462 | 0.002454 | 0.001008 | 0.000840 | 0.000168 | -4.152970 | Sell |
335 | Intel | INTC | 2023-09-01 | 36.61 | 0.041833 | 0.011753 | 0.006045 | 0.005707 | 0.002679 | 0.003029 | 0.793822 | Buy |
71 | Johnson & Johnson | JNJ | 2023-09-01 | 160.48 | -0.007422 | -0.005262 | -0.003950 | -0.001313 | -0.001131 | -0.000181 | 0.102452 | Buy |
311 | JPMorgan Chase | JPM | 2023-09-01 | 146.82 | 0.003349 | -0.001874 | -0.002421 | 0.000546 | 0.000431 | 0.000115 | -1.434142 | Sell |
239 | Coca-Cola | KO | 2023-09-01 | 59.31 | -0.008691 | -0.002913 | -0.002134 | -0.000779 | -0.000026 | -0.000753 | 0.484531 | Buy |
599 | McDonalds | MCD | 2023-09-01 | 280.94 | -0.000747 | -0.001454 | -0.001417 | -0.000036 | 0.000223 | -0.000260 | -0.336243 | Sell |
431 | 3M | MMM | 2023-09-01 | 106.95 | 0.002625 | 0.006066 | 0.002624 | 0.003442 | 0.002888 | 0.000554 | -0.585099 | Sell |
527 | Merck | MRK | 2023-09-01 | 109.84 | 0.007891 | 0.001089 | 0.001501 | -0.000413 | -0.000377 | -0.000036 | -0.948195 | Sell |
47 | Microsoft | MSFT | 2023-09-01 | 328.66 | 0.002746 | 0.001777 | 0.000697 | 0.001081 | 0.001193 | -0.000113 | -0.067368 | Sell |
479 | Nike | NKE | 2023-09-01 | 102.36 | 0.006391 | 0.000420 | -0.001615 | 0.002036 | 0.000776 | 0.001260 | 0.009254 | Buy |
719 | Pfizer | PFE | 2023-09-01 | 35.78 | 0.011306 | -0.001179 | -0.000430 | -0.000749 | -0.000736 | -0.000013 | -0.988759 | Sell |
95 | Procter & Gamble | PG | 2023-09-01 | 154.51 | 0.001101 | 0.000716 | 0.000057 | 0.000659 | 0.000442 | 0.000217 | -0.281676 | Sell |
143 | Raytheon | RTX | 2023-09-01 | 86.28 | 0.002789 | 0.000973 | 0.000213 | 0.000760 | 0.000604 | 0.000156 | 0.429147 | Buy |
287 | Travelers | TRV | 2023-09-01 | 162.30 | 0.006636 | 0.000011 | -0.001317 | 0.001328 | 0.000760 | 0.000568 | 1.231905 | Buy |
167 | UnitedHealth | UNH | 2023-09-01 | 476.24 | -0.000713 | -0.004915 | -0.003673 | -0.001242 | -0.000627 | -0.000615 | -0.513259 | Sell |
119 | Visa | V | 2023-09-01 | 248.11 | 0.009891 | 0.003441 | 0.002471 | 0.000970 | 0.000625 | 0.000345 | -8.793524 | Sell |
191 | Verizon | VZ | 2023-09-01 | 34.86 | -0.003431 | 0.004667 | 0.003470 | 0.001198 | 0.001270 | -0.000072 | -1.096585 | Sell |
359 | Walgreens | WBA | 2023-09-01 | 23.43 | -0.074279 | -0.017044 | -0.013230 | -0.003814 | -0.000976 | -0.002838 | -3.490151 | Sell |
695 | Exxon Mobil | XOM | 2023-09-01 | 113.52 | 0.020955 | 0.006182 | 0.004116 | 0.002065 | 0.000593 | 0.001472 | 0.995660 | Buy |
# Convert 'dates' to date
df_stock_data['dates'] = pd.to_datetime(df_stock_data['dates'], unit='s').dt.date
display(df_stock_data)
company | symbol | dates | prices | Daily Return | ShortEMA | LongEMA | MACD | SignalLine | MACD_Histogram | Delta Histogram | stock_rec | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
23 | Apple | AAPL | 2023-09-01 | 189.46 | 0.008463 | 0.006467 | 0.003095 | 0.003372 | 0.003035 | 0.000337 | -0.414676 | Sell |
647 | Amgen | AMGN | 2023-09-01 | 256.71 | 0.001443 | -0.000339 | 0.001466 | -0.001805 | -0.002154 | 0.000349 | 2.415509 | Buy |
623 | Boeing | BA | 2023-09-01 | 223.40 | -0.002812 | -0.002538 | -0.002502 | -0.000037 | 0.000285 | -0.000321 | -0.158081 | Sell |
407 | Caterpillar | CAT | 2023-09-01 | 286.25 | 0.018212 | 0.004967 | 0.002225 | 0.002742 | 0.001584 | 0.001157 | 1.122546 | Buy |
671 | Salesforce | CRM | 2023-09-01 | 221.53 | 0.000316 | 0.006914 | 0.003505 | 0.003408 | 0.002646 | 0.000762 | -0.592881 | Sell |
215 | Cisco | CSCO | 2023-09-01 | 57.84 | 0.008544 | 0.005881 | 0.005032 | 0.000849 | 0.000511 | 0.000338 | 0.249152 | Buy |
383 | Chevron | CVX | 2023-09-01 | 164.30 | 0.019863 | 0.004139 | 0.002160 | 0.001979 | 0.000674 | 0.001306 | 1.989457 | Buy |
551 | Disney | DIS | 2023-09-01 | 81.64 | -0.024379 | -0.006020 | -0.004606 | -0.001414 | -0.000346 | -0.001068 | -8.142865 | Sell |
263 | Dow Chemical | DOW | 2023-09-01 | 55.29 | 0.013380 | 0.002495 | 0.001066 | 0.001429 | 0.000887 | 0.000542 | -4.757586 | Sell |
455 | Goldman Sachs | GS | 2023-09-01 | 327.40 | -0.000946 | -0.000374 | -0.001975 | 0.001601 | 0.001526 | 0.000075 | -0.751903 | Sell |
503 | Home Depot | HD | 2023-09-01 | 333.08 | 0.008417 | 0.002531 | 0.001256 | 0.001275 | 0.000638 | 0.000636 | 0.588479 | Buy |
575 | IBM | IBM | 2023-09-01 | 147.94 | 0.007560 | 0.003462 | 0.002454 | 0.001008 | 0.000840 | 0.000168 | -4.152970 | Sell |
335 | Intel | INTC | 2023-09-01 | 36.61 | 0.041833 | 0.011753 | 0.006045 | 0.005707 | 0.002679 | 0.003029 | 0.793822 | Buy |
71 | Johnson & Johnson | JNJ | 2023-09-01 | 160.48 | -0.007422 | -0.005262 | -0.003950 | -0.001313 | -0.001131 | -0.000181 | 0.102452 | Buy |
311 | JPMorgan Chase | JPM | 2023-09-01 | 146.82 | 0.003349 | -0.001874 | -0.002421 | 0.000546 | 0.000431 | 0.000115 | -1.434142 | Sell |
239 | Coca-Cola | KO | 2023-09-01 | 59.31 | -0.008691 | -0.002913 | -0.002134 | -0.000779 | -0.000026 | -0.000753 | 0.484531 | Buy |
599 | McDonalds | MCD | 2023-09-01 | 280.94 | -0.000747 | -0.001454 | -0.001417 | -0.000036 | 0.000223 | -0.000260 | -0.336243 | Sell |
431 | 3M | MMM | 2023-09-01 | 106.95 | 0.002625 | 0.006066 | 0.002624 | 0.003442 | 0.002888 | 0.000554 | -0.585099 | Sell |
527 | Merck | MRK | 2023-09-01 | 109.84 | 0.007891 | 0.001089 | 0.001501 | -0.000413 | -0.000377 | -0.000036 | -0.948195 | Sell |
47 | Microsoft | MSFT | 2023-09-01 | 328.66 | 0.002746 | 0.001777 | 0.000697 | 0.001081 | 0.001193 | -0.000113 | -0.067368 | Sell |
479 | Nike | NKE | 2023-09-01 | 102.36 | 0.006391 | 0.000420 | -0.001615 | 0.002036 | 0.000776 | 0.001260 | 0.009254 | Buy |
719 | Pfizer | PFE | 2023-09-01 | 35.78 | 0.011306 | -0.001179 | -0.000430 | -0.000749 | -0.000736 | -0.000013 | -0.988759 | Sell |
95 | Procter & Gamble | PG | 2023-09-01 | 154.51 | 0.001101 | 0.000716 | 0.000057 | 0.000659 | 0.000442 | 0.000217 | -0.281676 | Sell |
143 | Raytheon | RTX | 2023-09-01 | 86.28 | 0.002789 | 0.000973 | 0.000213 | 0.000760 | 0.000604 | 0.000156 | 0.429147 | Buy |
287 | Travelers | TRV | 2023-09-01 | 162.30 | 0.006636 | 0.000011 | -0.001317 | 0.001328 | 0.000760 | 0.000568 | 1.231905 | Buy |
167 | UnitedHealth | UNH | 2023-09-01 | 476.24 | -0.000713 | -0.004915 | -0.003673 | -0.001242 | -0.000627 | -0.000615 | -0.513259 | Sell |
119 | Visa | V | 2023-09-01 | 248.11 | 0.009891 | 0.003441 | 0.002471 | 0.000970 | 0.000625 | 0.000345 | -8.793524 | Sell |
191 | Verizon | VZ | 2023-09-01 | 34.86 | -0.003431 | 0.004667 | 0.003470 | 0.001198 | 0.001270 | -0.000072 | -1.096585 | Sell |
359 | Walgreens | WBA | 2023-09-01 | 23.43 | -0.074279 | -0.017044 | -0.013230 | -0.003814 | -0.000976 | -0.002838 | -3.490151 | Sell |
695 | Exxon Mobil | XOM | 2023-09-01 | 113.52 | 0.020955 | 0.006182 | 0.004116 | 0.002065 | 0.000593 | 0.001472 | 0.995660 | Buy |
clean_stock_df = df_stock_data[['company', 'symbol', 'dates', 'stock_rec']]
clean_stock_df['primary_key'] = clean_stock_df['company'] + '_' + clean_stock_df['dates'].astype(str)
display(clean_stock_df)
/tmp/ipykernel_144/1530004256.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy clean_stock_df['primary_key'] = clean_stock_df['company'] + '_' + clean_stock_df['dates'].astype(str)
company | symbol | dates | stock_rec | primary_key | |
---|---|---|---|---|---|
23 | Apple | AAPL | 2023-09-01 | Sell | Apple_2023-09-01 |
647 | Amgen | AMGN | 2023-09-01 | Buy | Amgen_2023-09-01 |
623 | Boeing | BA | 2023-09-01 | Sell | Boeing_2023-09-01 |
407 | Caterpillar | CAT | 2023-09-01 | Buy | Caterpillar_2023-09-01 |
671 | Salesforce | CRM | 2023-09-01 | Sell | Salesforce_2023-09-01 |
215 | Cisco | CSCO | 2023-09-01 | Buy | Cisco_2023-09-01 |
383 | Chevron | CVX | 2023-09-01 | Buy | Chevron_2023-09-01 |
551 | Disney | DIS | 2023-09-01 | Sell | Disney_2023-09-01 |
263 | Dow Chemical | DOW | 2023-09-01 | Sell | Dow Chemical_2023-09-01 |
455 | Goldman Sachs | GS | 2023-09-01 | Sell | Goldman Sachs_2023-09-01 |
503 | Home Depot | HD | 2023-09-01 | Buy | Home Depot_2023-09-01 |
575 | IBM | IBM | 2023-09-01 | Sell | IBM_2023-09-01 |
335 | Intel | INTC | 2023-09-01 | Buy | Intel_2023-09-01 |
71 | Johnson & Johnson | JNJ | 2023-09-01 | Buy | Johnson & Johnson_2023-09-01 |
311 | JPMorgan Chase | JPM | 2023-09-01 | Sell | JPMorgan Chase_2023-09-01 |
239 | Coca-Cola | KO | 2023-09-01 | Buy | Coca-Cola_2023-09-01 |
599 | McDonalds | MCD | 2023-09-01 | Sell | McDonalds_2023-09-01 |
431 | 3M | MMM | 2023-09-01 | Sell | 3M_2023-09-01 |
527 | Merck | MRK | 2023-09-01 | Sell | Merck_2023-09-01 |
47 | Microsoft | MSFT | 2023-09-01 | Sell | Microsoft_2023-09-01 |
479 | Nike | NKE | 2023-09-01 | Buy | Nike_2023-09-01 |
719 | Pfizer | PFE | 2023-09-01 | Sell | Pfizer_2023-09-01 |
95 | Procter & Gamble | PG | 2023-09-01 | Sell | Procter & Gamble_2023-09-01 |
143 | Raytheon | RTX | 2023-09-01 | Buy | Raytheon_2023-09-01 |
287 | Travelers | TRV | 2023-09-01 | Buy | Travelers_2023-09-01 |
167 | UnitedHealth | UNH | 2023-09-01 | Sell | UnitedHealth_2023-09-01 |
119 | Visa | V | 2023-09-01 | Sell | Visa_2023-09-01 |
191 | Verizon | VZ | 2023-09-01 | Sell | Verizon_2023-09-01 |
359 | Walgreens | WBA | 2023-09-01 | Sell | Walgreens_2023-09-01 |
695 | Exxon Mobil | XOM | 2023-09-01 | Buy | Exxon Mobil_2023-09-01 |
clean_result_df = result_df[['company', 'date', 'sentiment_score']]
clean_result_df['primary_key'] = clean_result_df['company'] + '_' + clean_result_df['date'].astype(str)
display(clean_result_df)
company | date | sentiment_score | primary_key | |
---|---|---|---|---|
0 | 3M | 2023-09-01 | positive | 3M_2023-09-01 |
1 | Amgen | 2023-09-01 | positive | Amgen_2023-09-01 |
2 | Apple | 2023-09-01 | positive | Apple_2023-09-01 |
3 | Boeing | 2023-09-01 | neutral | Boeing_2023-09-01 |
4 | Caterpillar | 2023-09-01 | negative | Caterpillar_2023-09-01 |
5 | Chevron | 2023-09-01 | neutral | Chevron_2023-09-01 |
6 | Cisco | 2023-09-01 | positive | Cisco_2023-09-01 |
7 | Coca-Cola | 2023-09-01 | positive | Coca-Cola_2023-09-01 |
8 | Disney | 2023-09-01 | neutral | Disney_2023-09-01 |
9 | Dow Chemical | 2023-09-01 | neutral | Dow Chemical_2023-09-01 |
10 | Exxon Mobil | 2023-09-01 | neutral | Exxon Mobil_2023-09-01 |
11 | Goldman Sachs | 2023-09-01 | positive | Goldman Sachs_2023-09-01 |
12 | Home Depot | 2023-09-01 | neutral | Home Depot_2023-09-01 |
13 | IBM | 2023-09-01 | positive | IBM_2023-09-01 |
14 | Intel | 2023-09-01 | positive | Intel_2023-09-01 |
15 | JPMorgan Chase | 2023-09-01 | positive | JPMorgan Chase_2023-09-01 |
16 | Johnson & Johnson | 2023-09-01 | positive | Johnson & Johnson_2023-09-01 |
17 | McDonalds | 2023-09-01 | positive | McDonalds_2023-09-01 |
18 | Merck | 2023-09-01 | neutral | Merck_2023-09-01 |
19 | Microsoft | 2023-09-01 | positive | Microsoft_2023-09-01 |
20 | Nike | 2023-09-01 | positive | Nike_2023-09-01 |
21 | Pfizer | 2023-09-01 | positive | Pfizer_2023-09-01 |
22 | Procter & Gamble | 2023-09-01 | negative | Procter & Gamble_2023-09-01 |
23 | Raytheon | 2023-09-01 | neutral | Raytheon_2023-09-01 |
24 | Salesforce | 2023-09-01 | positive | Salesforce_2023-09-01 |
25 | Travelers | 2023-09-01 | negative | Travelers_2023-09-01 |
26 | UnitedHealth | 2023-09-01 | neutral | UnitedHealth_2023-09-01 |
27 | Verizon | 2023-09-01 | positive | Verizon_2023-09-01 |
28 | Visa | 2023-09-01 | neutral | Visa_2023-09-01 |
29 | Walgreens | 2023-09-01 | neutral | Walgreens_2023-09-01 |
merged_df = pd.merge(clean_stock_df, clean_result_df, how='inner', on='primary_key')
merged_df = merged_df[['primary_key', 'symbol', 'date', 'sentiment_score', 'stock_rec']]
display(merged_df)
primary_key | symbol | date | sentiment_score | stock_rec | |
---|---|---|---|---|---|
0 | Apple_2023-09-01 | AAPL | 2023-09-01 | positive | Sell |
1 | Amgen_2023-09-01 | AMGN | 2023-09-01 | positive | Buy |
2 | Boeing_2023-09-01 | BA | 2023-09-01 | neutral | Sell |
3 | Caterpillar_2023-09-01 | CAT | 2023-09-01 | negative | Buy |
4 | Salesforce_2023-09-01 | CRM | 2023-09-01 | positive | Sell |
5 | Cisco_2023-09-01 | CSCO | 2023-09-01 | positive | Buy |
6 | Chevron_2023-09-01 | CVX | 2023-09-01 | neutral | Buy |
7 | Disney_2023-09-01 | DIS | 2023-09-01 | neutral | Sell |
8 | Dow Chemical_2023-09-01 | DOW | 2023-09-01 | neutral | Sell |
9 | Goldman Sachs_2023-09-01 | GS | 2023-09-01 | positive | Sell |
10 | Home Depot_2023-09-01 | HD | 2023-09-01 | neutral | Buy |
11 | IBM_2023-09-01 | IBM | 2023-09-01 | positive | Sell |
12 | Intel_2023-09-01 | INTC | 2023-09-01 | positive | Buy |
13 | Johnson & Johnson_2023-09-01 | JNJ | 2023-09-01 | positive | Buy |
14 | JPMorgan Chase_2023-09-01 | JPM | 2023-09-01 | positive | Sell |
15 | Coca-Cola_2023-09-01 | KO | 2023-09-01 | positive | Buy |
16 | McDonalds_2023-09-01 | MCD | 2023-09-01 | positive | Sell |
17 | 3M_2023-09-01 | MMM | 2023-09-01 | positive | Sell |
18 | Merck_2023-09-01 | MRK | 2023-09-01 | neutral | Sell |
19 | Microsoft_2023-09-01 | MSFT | 2023-09-01 | positive | Sell |
20 | Nike_2023-09-01 | NKE | 2023-09-01 | positive | Buy |
21 | Pfizer_2023-09-01 | PFE | 2023-09-01 | positive | Sell |
22 | Procter & Gamble_2023-09-01 | PG | 2023-09-01 | negative | Sell |
23 | Raytheon_2023-09-01 | RTX | 2023-09-01 | neutral | Buy |
24 | Travelers_2023-09-01 | TRV | 2023-09-01 | negative | Buy |
25 | UnitedHealth_2023-09-01 | UNH | 2023-09-01 | neutral | Sell |
26 | Visa_2023-09-01 | V | 2023-09-01 | neutral | Sell |
27 | Verizon_2023-09-01 | VZ | 2023-09-01 | positive | Sell |
28 | Walgreens_2023-09-01 | WBA | 2023-09-01 | neutral | Sell |
29 | Exxon Mobil_2023-09-01 | XOM | 2023-09-01 | neutral | Buy |
def total_rec(row):
if row['stock_rec'] == 'Buy' and row['sentiment_score']=='positive':
return "Strong Buy"
elif row['stock_rec'] == 'Buy' and row['sentiment_score']=='neutral':
return "Buy"
elif row['stock_rec'] == 'Buy' and row['sentiment_score']=='negative':
return "Hold"
elif row['stock_rec'] == 'Sell' and row['sentiment_score']=='positive':
return "Hold"
elif row['stock_rec'] == 'Sell' and row['sentiment_score']=='neutral':
return "Sell"
else:
return "Strong Sell"
merged_df['total_rec'] = merged_df.apply(total_rec, axis=1)
display(merged_df)
primary_key | symbol | date | sentiment_score | stock_rec | total_rec | |
---|---|---|---|---|---|---|
0 | Apple_2023-09-01 | AAPL | 2023-09-01 | positive | Sell | Hold |
1 | Amgen_2023-09-01 | AMGN | 2023-09-01 | positive | Buy | Strong Buy |
2 | Boeing_2023-09-01 | BA | 2023-09-01 | neutral | Sell | Sell |
3 | Caterpillar_2023-09-01 | CAT | 2023-09-01 | negative | Buy | Hold |
4 | Salesforce_2023-09-01 | CRM | 2023-09-01 | positive | Sell | Hold |
5 | Cisco_2023-09-01 | CSCO | 2023-09-01 | positive | Buy | Strong Buy |
6 | Chevron_2023-09-01 | CVX | 2023-09-01 | neutral | Buy | Buy |
7 | Disney_2023-09-01 | DIS | 2023-09-01 | neutral | Sell | Sell |
8 | Dow Chemical_2023-09-01 | DOW | 2023-09-01 | neutral | Sell | Sell |
9 | Goldman Sachs_2023-09-01 | GS | 2023-09-01 | positive | Sell | Hold |
10 | Home Depot_2023-09-01 | HD | 2023-09-01 | neutral | Buy | Buy |
11 | IBM_2023-09-01 | IBM | 2023-09-01 | positive | Sell | Hold |
12 | Intel_2023-09-01 | INTC | 2023-09-01 | positive | Buy | Strong Buy |
13 | Johnson & Johnson_2023-09-01 | JNJ | 2023-09-01 | positive | Buy | Strong Buy |
14 | JPMorgan Chase_2023-09-01 | JPM | 2023-09-01 | positive | Sell | Hold |
15 | Coca-Cola_2023-09-01 | KO | 2023-09-01 | positive | Buy | Strong Buy |
16 | McDonalds_2023-09-01 | MCD | 2023-09-01 | positive | Sell | Hold |
17 | 3M_2023-09-01 | MMM | 2023-09-01 | positive | Sell | Hold |
18 | Merck_2023-09-01 | MRK | 2023-09-01 | neutral | Sell | Sell |
19 | Microsoft_2023-09-01 | MSFT | 2023-09-01 | positive | Sell | Hold |
20 | Nike_2023-09-01 | NKE | 2023-09-01 | positive | Buy | Strong Buy |
21 | Pfizer_2023-09-01 | PFE | 2023-09-01 | positive | Sell | Hold |
22 | Procter & Gamble_2023-09-01 | PG | 2023-09-01 | negative | Sell | Strong Sell |
23 | Raytheon_2023-09-01 | RTX | 2023-09-01 | neutral | Buy | Buy |
24 | Travelers_2023-09-01 | TRV | 2023-09-01 | negative | Buy | Hold |
25 | UnitedHealth_2023-09-01 | UNH | 2023-09-01 | neutral | Sell | Sell |
26 | Visa_2023-09-01 | V | 2023-09-01 | neutral | Sell | Sell |
27 | Verizon_2023-09-01 | VZ | 2023-09-01 | positive | Sell | Hold |
28 | Walgreens_2023-09-01 | WBA | 2023-09-01 | neutral | Sell | Sell |
29 | Exxon Mobil_2023-09-01 | XOM | 2023-09-01 | neutral | Buy | Buy |