#!/usr/bin/env python # coding: utf-8 # #### Author:马肖 # #### E-Mail:maxiaoscut@aliyun.com # #### GitHub:https://github.com/Albertsr # #### 生成实验数据集 # In[1]: import datetime as dt import numpy as np import pandas as pd from create_time_feature import create_time_feature as ctf # 生成日期、时间信息 today = dt.datetime.today() size = 600 full_time = pd.date_range(end=today, freq='1D 45min 13s 451521us', periods=size) # 生成交易额信息 np.random.seed(size) consume_num = np.random.uniform(0, 1000, size) # 运用datetime.strftime("%Y-%m-%d")从完整的时间中分离出字符串格式的日期、时间 # 运用pd.DatetimeIndex或者pd.to_datetime将字符串格式的日期转化为日期索引 consume_date = pd.DatetimeIndex(full_time.strftime("%Y-%m-%d")) # 构建数据框 dict_ = {"Amount": consume_num, "Time":full_time} sales = pd.DataFrame(dict_, index=consume_date) sales.head() # In[2]: ctf(sales, sales['Time'], 3).tail(5) # #### 结合时间型特征、数字型特征进行分析 # In[3]: print(sales.loc["2018-05", "Amount"].sum()) # In[4]: sales.resample("3M").agg([np.sum, np.mean]) # In[5]: sales.pivot_table(index=["Season"], values=["Amount"], columns=["Time_Range"], aggfunc=[np.sum, np.mean]) # In[6]: time_sub_dummies = pd.get_dummies(sales["Time_Range"]) # sales.drop("time_sub", axis=1).join(time_sub_dummies).head() # sales.join(time_sub_dummies).head() sales["Time_Range"].value_counts()