#!/usr/bin/env python # coding: utf-8 # # Python Intermediate: Creating a SimpleFrame Class # # Designing Our Class # # SimpleFrame should make it easy for us to load , preview, manipulate, and make calculations with our data. # # To preview our data, we’ll need to: # - Be able to view the first five rows # - Be able to view the shape of our data # # To manipulate our data, we’ll need to: # - Add new columns # - Be able to apply values to columns # - Be able to subset our data # # To make calculations, we’ll need to: # - Finding the minimum # - Finding the maximum # - Finding the mean # - Finding the standard deviation # # Translating our words into objects # # - SimpleFrame -> Class # - Load -> Method # - Data -> Attribute # - Columns -> Attribute # # ## Preview # # - View the first five rows -> Method # - View num of rows/cols of our data -> Method # # ## Manipulate # # - Add new columns -> Method # - Apply values to columns -> Method # - Subset our data -> Method # # ## Calculations # # - Minimum -> Method # - Maximum -> Method # - Mean -> Method # - Standard deviation -> Method # In[2]: import csv from statistics import mean, stdev, median, mode class SimpleFrame(): def __init__(self, filename): self.filename = filename def read_data(self): ''' Reads and opens the data ''' f = open(self.filename,"r") self.data = list(csv.reader(f)) self.columns = self.data[0] def head(self): ''' Displays the first five rows ''' return self.data[:5] def shape(self): num_rows = 0 for row in self.data: num_rows += 1 num_cols = len(self.data[0]) return [num_rows, num_cols] def new_column(self, column_name): for pos, d in enumerate(self.data): if pos == 0: d.append(column_name) else: d.append('NA') def apply(self, column_name, new_value): for pos, col in enumerate(self.data[0]): if col == column_name: column_index = pos for data in self.data[1:]: data[column_index] = new_value def subset(self, column_name, row_value): for pos, col in enumerate(self.data[0]): if col == column_name: column_index = pos print(column_index) subset_data = [] for data in self.data[1:]: if row_value in data: subset_data.append(data[column_index]) return subset_data def summary_stats(self, column_name): for pos, col in enumerate(self.data[0]): if col == column_name: column_index = pos num_data = [data[column_index] for data in self.data[1:]] m = statistics.mean(num_data) std = stdev(num_data) median = statistics.median(num_data) print("Mean is {mean}".format(mean= m)) print("Standard Deviation is {std}".format(std= std)) print("Median is {median}".format(median= median)) def minimum(self, column): for pos, col in enumerate(self.data[0]): if col == column: column_index = pos ## Find min value col_data = [] for row in self.data[1:]: col_data.append([row[1],row[2],row[column_index]]) return min(col_data, key= lambda x: x[2]) def maximum(self, column): for pos, col in enumerate(self.data[0]): if col == column: column_index = pos ## Find min value col_data = [] for row in self.data[1:]: col_data.append([row[1],row[2],row[column_index]]) return max(col_data, key= lambda x: x[2]) s = SimpleFrame("music_data.csv") s.read_data() s.shape() s.columns s.new_column('hello') s.subset("Artist","Shakira") print(s.maximum("Streams")) print(s.minimum("Streams")) # # Results # # The song that had the highest number of streams in one day was Despacito by Luis Fonsi with 64238 streams. # # The song that had the lowest number of streams in one day was Por Fin Te Encontre by Cali Y El Dandee with 1993. #