import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import Series
import statsmodels #FOR NEXT STEP -- RUNNING REGRESSIONS 
import statsmodels.api as sm
import statsmodels.formula.api as smf   #FOR USING 'R'-STYLE FORMULAS FOR REGRESSIONS

#Set PANDAS to show all columns in DataFrame
pd.set_option('display.max_columns', None)

pd.set_option('display.float_format', lambda x: '%.2f' % x)

print pd.__version__
print statsmodels.__version__

df = pd.read_excel('fb.xls', 'Sheet1', header=0)
print len(df)
df.head(2)

df.columns

df.columns.tolist()

df = df[['hashtag_count', 'like_count_14days', 'comment_count_14days', 'share_count_14days',
 'source', 'source_External', 'video_dummy', 'picture_dummy',
 'Total_Revenue', 'Log_of_Total_Revenue', 'followers_count',  'Log_of_Followers']]

print len(df) 
print len(df.columns)
df.head(2)

df.describe()

DataFrame.describe?

dir(df.describe())

np.round(df.describe(), 2)

np.round(df.describe(), 2).T

np.round(df.describe(), 2).T[['count','mean', 'std', 'min', 'max']]

#ALTERNATIVE WAY OF WRITING
np.round(df.describe(), 2).transpose()

#WITH FOUR DECIMAL PLACES (DEFAULT)
df.describe().transpose().to_csv('summary stats.csv', sep=',')

df.describe().transpose()[['count','mean', 'std', 'min', 'max']].to_csv('summary stats.csv', sep=',')

#WITH TWO DECIMAL PLACES
np.round(df.describe(), 2).T[['count','mean', 'std', 'min', 'max']].to_csv('summary stats.csv', sep=',')

cols = ['hashtag_count','like_count_14days']
np.round(df[cols].describe(), 2).T[['count','mean', 'std', 'min', 'max']].to_csv('summary stats (partial).csv', sep=',')