# Importing necessary libraries
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import finnhub
import sqlite3
from datetime import datetime, timedelta, timezone
from tqdm import tqdm
tqdm.pandas()


### DATE SETTINGS ###

# Function to convert date string to Unix timestamp
def unix_timestamp_from_date(date_str, date_format="%Y-%m-%d"):
    dt = datetime.strptime(date_str, date_format)
    unix_timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
    return int(unix_timestamp)

# Get the current date and time
current_datetime = datetime.now()

# Format the current date
# current_date = current_datetime.strftime("%Y-%m-%d")

# Set the current date to predefined date
# current_date = datetime.now().strftime("%Y-%m-%d")
current_date = '2023-09-01'

# Calculate the date 30 days ago from the current date
from_datetime = current_datetime - timedelta(days=30)
from_datetime
# Format the from_date
from_date_str = from_datetime.strftime("%Y-%m-%d")

# Convert to Unix timestamps
from_date = unix_timestamp_from_date(from_date_str)
to_date = unix_timestamp_from_date(current_date)


### FINNHUB API SETTINGS ###

# API key setup (replace 'YOUR_API_KEY' with the actual API key)
api_key = 'API_KEY'

# DJIA Tickers and Companies
symbols = ['AAPL', 'MSFT', 'JNJ', 'PG', 'V', 'RTX', 'UNH', 'VZ', 'CSCO', 'KO', 
        'DOW', 'TRV', 'JPM', 'INTC', 'WBA', 'CVX', 'CAT', 'MMM', 'GS', 
        'NKE', 'HD', 'MRK', 'DIS', 'IBM', 'MCD', 'BA', 'AMGN', 'CRM', 'XOM', 'PFE']

companies = ['Apple', 'Microsoft', 'Johnson & Johnson', 'Procter & Gamble', 
            'Visa', 'Raytheon', 'UnitedHealth', 'Verizon', 'Cisco', 'Coca-Cola',
            'Dow Chemical', 'Travelers', 'JPMorgan Chase', 'Intel', 'Walgreens',
            'Chevron', 'Caterpillar', '3M', 'Goldman Sachs', 'Nike', 'Home Depot',
            'Merck', 'Disney', 'IBM', 'McDonalds', 'Boeing', 'Amgen', 'Salesforce',
            'Exxon Mobil', 'Pfizer']

# Initialize an empty list to store news headlines as DataFrames
df_list = []

# Loop through each DJIA company
for symbol, company in zip(symbols, companies):
    # API request to get news headlines
    url = f'https://finnhub.io/api/v1/company-news?symbol={symbol}&from={current_date}&to={current_date}&token={api_key}'
    response = requests.get(url)
    
    # Error checking for API response
    if response.status_code != 200:
        print(f"Failed to get data for {symbol}")
        continue
    
    # Extract news headlines
    news_data = response.json()
    news_df = pd.DataFrame(news_data)

    # Convert Unix timestamps to human-readable datetime
    news_df['datetime'] = pd.to_datetime(news_df['datetime'], unit='s')

    # Add 'company' and 'symbol' columns
    news_df['company'] = company
    news_df['symbol'] = symbol
    
    # Keep only the columns we need
    # news_df = news_df[['company', 'symbol', 'datetime', 'headline']]
    
    # Add to list of DataFrames
    df_list.append(news_df)

# Concatenate all the collected DataFrames
df_headlines = pd.concat(df_list, ignore_index=True)

# Create new column 'analysis' by concatenating 'headline' and 'summary'
df_headlines['analysis'] = df_headlines['headline'] + ' ' + df_headlines['summary']


### SENTIMENT ANALYSIS SETTINGS ###

# Initialize finBERT model and tokenizer
# For finBERT, we used the model identifier from Hugging Face
model_name = 'ProsusAI/finbert'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function to compute sentiment
def compute_sentiment(headline):
    inputs = tokenizer(headline, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    sentiment = torch.softmax(logits, dim=1).numpy()
    # Assuming 0: negative, 1: neutral, 2: positive
    return ['negative', 'neutral', 'positive'][sentiment.argmax()]

# Reminder: df_headlines is our DataFrame with news headlines
# Add a new column for sentiment
df_headlines['sentiment'] = df_headlines['analysis'].progress_apply(compute_sentiment)


print(f"Data for {current_date} has been fetched and inserted into the dataframe.")

display(df_headlines)

# Convert 'datetime' to date
df_headlines['date'] = pd.to_datetime(df_headlines['datetime'], unit='s').dt.date

# Group by 'company' and 'date', then count sentiment occurrences
grouped_df = df_headlines.groupby(['company', 'date', 'sentiment']).size().reset_index(name='count')

# Initialize an empty DataFrame to store final results
result_df = pd.DataFrame()

# Loop through unique companies and dates
for name, group in grouped_df.groupby(['company', 'date']):
    company, date = name
    total_count = group['count'].sum()
    
    # Calculate sentiment score
    sentiment_score = ""
    for idx, row in group.iterrows():
        if row['count'] / total_count > 0.5:
            sentiment_score = row['sentiment']
            break
    if not sentiment_score:
        sentiment_score = "neutral"
    
    # Append to result DataFrame
    result_df = result_df.append({'company': company, 'date': date, 'sentiment_score': sentiment_score}, ignore_index=True)

display(result_df)

import finnhub
import os
import time
from datetime import datetime, timezone
from datetime import date
from zoneinfo import ZoneInfo
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3

# Function to convert date string to Unix timestamp
def unix_timestamp_from_date(date_str, date_format="%Y-%m-%d"):
    dt = datetime.strptime(date_str, date_format)
    unix_timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
    return int(unix_timestamp)

start_date = '2023-08-01'
#current_date =  '2023-09-01'

# Convert to Unix timestamps
from_date = unix_timestamp_from_date(start_date)
to_date = unix_timestamp_from_date(current_date)


# DJIA Tickers and Companies
symbols = ['AAPL', 'MSFT', 'JNJ', 'PG', 'V', 'RTX', 'UNH', 'VZ', 'CSCO', 'KO', 
        'DOW', 'TRV', 'JPM', 'INTC', 'WBA', 'CVX', 'CAT', 'MMM', 'GS', 
        'NKE', 'HD', 'MRK', 'DIS', 'IBM', 'MCD', 'BA', 'AMGN', 'CRM', 'XOM', 'PFE']

companies = ['Apple', 'Microsoft', 'Johnson & Johnson', 'Procter & Gamble', 
            'Visa', 'Raytheon', 'UnitedHealth', 'Verizon', 'Cisco', 'Coca-Cola',
            'Dow Chemical', 'Travelers', 'JPMorgan Chase', 'Intel', 'Walgreens',
            'Chevron', 'Caterpillar', '3M', 'Goldman Sachs', 'Nike', 'Home Depot',
            'Merck', 'Disney', 'IBM', 'McDonalds', 'Boeing', 'Amgen', 'Salesforce',
            'Exxon Mobil', 'Pfizer']


# Set up client
finnhub_client = finnhub.Client(api_key='API_KEY')
df_list = []
resolution = 'D'

# make request and print
for symbol, company in zip(symbols, companies):
    res = finnhub_client.stock_candles(
        symbol,
        resolution,
        from_date,
        to_date
      )

# Ditch the status code
    try:
        res.pop('s')
    except KeyError as e:
        print("Already ditched status code")

    stock_data = res
    stock_df = pd.DataFrame(stock_data)

    # Convert Unix timestamps to human-readable datetime
    stock_df['t'] = pd.to_datetime(stock_df['t'], unit='s')

    # Add 'company' and 'symbol' columns
    stock_df['company'] = company
    stock_df['symbol'] = symbol
    stock_df['prices'] = stock_df.pop('c')
    stock_df['dates'] = stock_df.pop('t')
    
    # Keep only the columns we need
    stock_df = stock_df[['company', 'symbol', 'dates', 'prices']]
    
    # Add to list of DataFrames
    df_list.append(stock_df)

    df_stock_data = pd.concat(df_list, ignore_index=True)

display(df_stock_data)

df_stock_data['Daily Return'] = df_stock_data.groupby('symbol')['prices'].pct_change()
df_stock_data.sort_values(['symbol', 'dates'], inplace=True)

# Define the short-term and long-term periods for EMA
short_term = 12
long_term = 26

# Calculate the short-term and long-term exponential moving averages (EMAs)
df_stock_data['ShortEMA'] = df_stock_data.groupby('symbol')['Daily Return'].transform(lambda x: x.ewm(span=short_term).mean())
df_stock_data['LongEMA'] = df_stock_data.groupby('symbol')['Daily Return'].transform(lambda x: x.ewm(span=long_term).mean())

# Calculate the MACD line (the difference between short-term and long-term EMAs)
df_stock_data['MACD'] = df_stock_data['ShortEMA'] - df_stock_data['LongEMA']

# Define the signal line period
signal_period = 9

# Calculate the signal line (9-day EMA of the MACD)
df_stock_data['SignalLine'] = df_stock_data.groupby('symbol')['MACD'].transform(lambda x: x.ewm(span=signal_period).mean())

# Calculate the MACD histogram (the difference between MACD and Signal Line)
df_stock_data['MACD_Histogram'] = df_stock_data['MACD'] - df_stock_data['SignalLine']

df_stock_data['Delta Histogram'] = df_stock_data.groupby('symbol')['MACD_Histogram'].pct_change()

def get_stock_recommendation(row):
    if row['Delta Histogram'] > 0.00:
        return "Buy"
    else:
        return "Sell" 

df_stock_data['stock_rec'] = df_stock_data.apply(get_stock_recommendation, axis=1)  

# Sort by date and ticker
#df_stock_data = df_stock_data.sort_values(by=['symbol', 'dates']) 

# Drop duplicates keeping last occurrence 
df_stock_data = df_stock_data.drop_duplicates(subset='company', keep='last')

display(df_stock_data)

# Convert 'dates' to date
df_stock_data['dates'] = pd.to_datetime(df_stock_data['dates'], unit='s').dt.date

display(df_stock_data)

clean_stock_df = df_stock_data[['company', 'symbol', 'dates', 'stock_rec']]
clean_stock_df['primary_key'] = clean_stock_df['company'] + '_' + clean_stock_df['dates'].astype(str)
display(clean_stock_df)

clean_result_df = result_df[['company', 'date', 'sentiment_score']]
clean_result_df['primary_key'] = clean_result_df['company'] + '_' + clean_result_df['date'].astype(str)
display(clean_result_df)

merged_df = pd.merge(clean_stock_df, clean_result_df, how='inner', on='primary_key')
merged_df = merged_df[['primary_key', 'symbol', 'date', 'sentiment_score', 'stock_rec']]
display(merged_df)


def total_rec(row):
    if row['stock_rec'] == 'Buy' and row['sentiment_score']=='positive':
        return "Strong Buy"
    elif row['stock_rec'] == 'Buy' and row['sentiment_score']=='neutral':
        return "Buy"
    elif row['stock_rec'] == 'Buy' and row['sentiment_score']=='negative':
        return "Hold"
    elif row['stock_rec'] == 'Sell' and row['sentiment_score']=='positive':
        return "Hold"
    elif row['stock_rec'] == 'Sell' and row['sentiment_score']=='neutral':
        return "Sell"    
    else:
        return "Strong Sell" 

merged_df['total_rec'] = merged_df.apply(total_rec, axis=1)  
display(merged_df)