# Importing necessary libraries import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns !pip install -q yfinance # Importing data using Yahoo Finance API import yfinance as yf # Downloading historical data for AT&T (a major telecom company) data = yf.download('T', start='2020-01-01', end='2023-01-01') data.head() # Plotting the distribution of stock prices (Close and Adj Close) plt.figure(figsize=(12, 6)) sns.histplot(data[['Close', 'Adj Close']], kde=True, element='step') plt.title('Distribution of Stock Prices') plt.xlabel('Stock Price') plt.ylabel('Frequency') plt.legend(['Close', 'Adj Close']) plt.show() # Calculating 7-day and 30-day moving averages for the 'Close' price data['7_day_MA_Close'] = data['Close'].rolling(window=7).mean() data['30_day_MA_Close'] = data['Close'].rolling(window=30).mean() # Calculating 7-day and 30-day moving averages for the 'Volume' data['7_day_MA_Volume'] = data['Volume'].rolling(window=7).mean() data['30_day_MA_Volume'] = data['Volume'].rolling(window=30).mean() # Displaying the first few rows of the modified dataset data.head() # Importing KMeans from sklearn from sklearn.cluster import KMeans # Dropping rows with NaN values data_clustering = data.dropna() # Selecting features for clustering features = ['7_day_MA_Close', '30_day_MA_Close', '7_day_MA_Volume', '30_day_MA_Volume'] X = data_clustering[features] # Fitting KMeans model kmeans = KMeans(n_clusters=3, random_state=0).fit(X) # Adding cluster labels to the original data data_clustering['Cluster'] = kmeans.labels_ # Displaying the first few rows of data with cluster labels data_clustering.head() # Grouping the data by cluster and calculating the mean for each feature cluster_summary = data_clustering.groupby('Cluster').mean() # Displaying the statistical summary of each cluster cluster_summary # Creating a dashboard to visualize cluster insights fig, axes = plt.subplots(2, 2, figsize=(14, 10)) # Plotting the average Adjusted Close Price for each cluster sns.barplot(x=cluster_summary.index, y='Adj Close', data=cluster_summary, ax=axes[0, 0]) axes[0, 0].set_title('Average Adjusted Close Price by Cluster') # Plotting the average Volume for each cluster sns.barplot(x=cluster_summary.index, y='Volume', data=cluster_summary, ax=axes[0, 1]) axes[0, 1].set_title('Average Volume by Cluster') # Plotting the average 7-day MA Close for each cluster sns.barplot(x=cluster_summary.index, y='7_day_MA_Close', data=cluster_summary, ax=axes[1, 0]) axes[1, 0].set_title('Average 7-day MA Close by Cluster') # Plotting the average 30-day MA Close for each cluster sns.barplot(x=cluster_summary.index, y='30_day_MA_Close', data=cluster_summary, ax=axes[1, 1]) axes[1, 1].set_title('Average 30-day MA Close by Cluster') plt.tight_layout() plt.show() # Renaming cluster labels for better interpretation cluster_names = { 0: 'Stable Segment', 1: 'Active/Premium Segment', 2: 'Volatile/Riskier Segment' } cluster_summary.rename(index=cluster_names, inplace=True) # Creating a dashboard with renamed cluster labels fig, axes = plt.subplots(2, 2, figsize=(14, 10)) # Plotting the average Adjusted Close Price for each cluster sns.barplot(x=cluster_summary.index, y='Adj Close', data=cluster_summary, ax=axes[0, 0]) axes[0, 0].set_title('Average Adjusted Close Price by Cluster') # Plotting the average Volume for each cluster sns.barplot(x=cluster_summary.index, y='Volume', data=cluster_summary, ax=axes[0, 1]) axes[0, 1].set_title('Average Volume by Cluster') # Plotting the average 7-day MA Close for each cluster sns.barplot(x=cluster_summary.index, y='7_day_MA_Close', data=cluster_summary, ax=axes[1, 0]) axes[1, 0].set_title('Average 7-day MA Close by Cluster') # Plotting the average 30-day MA Close for each cluster sns.barplot(x=cluster_summary.index, y='30_day_MA_Close', data=cluster_summary, ax=axes[1, 1]) axes[1, 1].set_title('Average 30-day MA Close by Cluster') plt.tight_layout() plt.show() from math import pi # Function to create radar chart def create_radar_chart(data, title): labels = data.index num_vars = len(labels) angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist() angles += angles[:1] fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True)) ax.fill(angles, data.tolist() + data.tolist()[:1], color='blue', alpha=0.25) ax.set_yticklabels([]) ax.set_xticks(angles[:-1]) ax.set_xticklabels(labels) ax.set_title(title, size=14, color='blue', y=1.1) plt.show() # Normalizing the data for radar chart cluster_summary_normalized = (cluster_summary - cluster_summary.min()) / (cluster_summary.max() - cluster_summary.min()) # Creating radar charts for each cluster for cluster, row in cluster_summary_normalized.iterrows(): create_radar_chart(row, f'Radar Chart for {cluster}')