import plotly.graph_objects as go
import plotly as py
from plotly.subplots import make_subplots
import pandas as pd
import plotly.express as px
import os
import plotly.io as pio
from IPython.display import Image
pio.kaleido.scope.default_format = "svg"
path='D:\'
df=pd.read_csv(path+'项目数据.csv',encoding='gbk')
fig = make_subplots(rows=2, cols=2, column_widths=[0.5, 0.5],row_heights=[0.5,0.5],
specs=[[{"type": "domain"}, {"type": "polar"}],
[{"type": "xy","colspan":2}, None]],
subplot_titles=("<b>性别</b>","<b>时段</b>",
"<b>年龄</b>"))
feature='GENDER'
data=df[feature].value_counts()
data
M 233935 F 123874 Name: GENDER, dtype: int64
labels = ['男','女']
values = [233935, 123874]
subfigureGender=go.Pie(labels=labels, values=values, textinfo='label+percent',
insidetextorientation='radial')
go.Figure(subfigureGender)
testfigure=go.Figure(subfigureGender)
testfigure.write_image(path+"性别.svg",width=350, height=350, scale=1)
feature='DEPARTHOUR'
data=df[feature].value_counts()
data
8 32139 7 29878 9 29280 13 26040 15 24824 10 24529 12 24302 11 24104 14 23992 16 22337 17 20193 18 17431 19 14943 6 13293 20 12411 21 9930 22 6505 23 2587 5 1429 4 503 1 459 0 446 2 438 3 420 Name: DEPARTHOUR, dtype: int64
data=pd.DataFrame(data)
data=data.reset_index(drop=False)
data
index | DEPARTHOUR | |
---|---|---|
0 | 8 | 32139 |
1 | 7 | 29878 |
2 | 9 | 29280 |
3 | 13 | 26040 |
4 | 15 | 24824 |
5 | 10 | 24529 |
6 | 12 | 24302 |
7 | 11 | 24104 |
8 | 14 | 23992 |
9 | 16 | 22337 |
10 | 17 | 20193 |
11 | 18 | 17431 |
12 | 19 | 14943 |
13 | 6 | 13293 |
14 | 20 | 12411 |
15 | 21 | 9930 |
16 | 22 | 6505 |
17 | 23 | 2587 |
18 | 5 | 1429 |
19 | 4 | 503 |
20 | 1 | 459 |
21 | 0 | 446 |
22 | 2 | 438 |
23 | 3 | 420 |
data["index"]
0 8 1 7 2 9 3 13 4 15 5 10 6 12 7 11 8 14 9 16 10 17 11 18 12 19 13 6 14 20 15 21 16 22 17 23 18 5 19 4 20 1 21 0 22 2 23 3 Name: index, dtype: int64
data=data.sort_values(by='index')
data
index | DEPARTHOUR | |
---|---|---|
21 | 0 | 446 |
20 | 1 | 459 |
22 | 2 | 438 |
23 | 3 | 420 |
19 | 4 | 503 |
18 | 5 | 1429 |
13 | 6 | 13293 |
1 | 7 | 29878 |
0 | 8 | 32139 |
2 | 9 | 29280 |
5 | 10 | 24529 |
7 | 11 | 24104 |
6 | 12 | 24302 |
3 | 13 | 26040 |
8 | 14 | 23992 |
4 | 15 | 24824 |
9 | 16 | 22337 |
10 | 17 | 20193 |
11 | 18 | 17431 |
12 | 19 | 14943 |
14 | 20 | 12411 |
15 | 21 | 9930 |
16 | 22 | 6505 |
17 | 23 | 2587 |
subfigureDepartTime=go.Scatterpolar(
theta=data["index"].astype('str'),
r=data[feature],
fill='toself',name="出发时段")
subfigureDepartTime_f=go.Figure(subfigureDepartTime)
subfigureDepartTime_f.update_layout(
polar=dict(
radialaxis=dict(
visible=True
),
),
showlegend=False
)
subfigureDepartTime_f.show()
feature='ARRIVEHOUR_S'
data=df[feature].value_counts()
data
15 25908 18 25808 17 25201 14 25189 16 25117 19 24612 20 24139 21 23060 13 22924 22 21581 23 20283 12 19716 11 18058 10 15756 9 13002 0 12789 8 6800 1 5703 7 2761 6 1640 5 854 2 611 4 492 3 409 Name: ARRIVEHOUR_S, dtype: int64
data=pd.DataFrame(data)
data=data.reset_index(drop=False)
data
index | ARRIVEHOUR_S | |
---|---|---|
0 | 15 | 25908 |
1 | 18 | 25808 |
2 | 17 | 25201 |
3 | 14 | 25189 |
4 | 16 | 25117 |
5 | 19 | 24612 |
6 | 20 | 24139 |
7 | 21 | 23060 |
8 | 13 | 22924 |
9 | 22 | 21581 |
10 | 23 | 20283 |
11 | 12 | 19716 |
12 | 11 | 18058 |
13 | 10 | 15756 |
14 | 9 | 13002 |
15 | 0 | 12789 |
16 | 8 | 6800 |
17 | 1 | 5703 |
18 | 7 | 2761 |
19 | 6 | 1640 |
20 | 5 | 854 |
21 | 2 | 611 |
22 | 4 | 492 |
23 | 3 | 409 |
data["index"]
0 15 1 18 2 17 3 14 4 16 5 19 6 20 7 21 8 13 9 22 10 23 11 12 12 11 13 10 14 9 15 0 16 8 17 1 18 7 19 6 20 5 21 2 22 4 23 3 Name: index, dtype: int64
data=data.sort_values(by='index')
data
index | ARRIVEHOUR_S | |
---|---|---|
15 | 0 | 12789 |
17 | 1 | 5703 |
21 | 2 | 611 |
23 | 3 | 409 |
22 | 4 | 492 |
20 | 5 | 854 |
19 | 6 | 1640 |
18 | 7 | 2761 |
16 | 8 | 6800 |
14 | 9 | 13002 |
13 | 10 | 15756 |
12 | 11 | 18058 |
11 | 12 | 19716 |
8 | 13 | 22924 |
3 | 14 | 25189 |
0 | 15 | 25908 |
4 | 16 | 25117 |
2 | 17 | 25201 |
1 | 18 | 25808 |
5 | 19 | 24612 |
6 | 20 | 24139 |
7 | 21 | 23060 |
9 | 22 | 21581 |
10 | 23 | 20283 |
subfigureArriveTime=go.Scatterpolar(
theta=data["index"].astype('str'),
r=data[feature],
fill='toself',marker={'color':"lightsalmon"},name="到达时段")
subfigureArriveTime_f=go.Figure(subfigureArriveTime)
subfigureArriveTime_f.update_layout(
polar=dict(
radialaxis=dict(
visible=True
),
),
showlegend=False
)
subfigureArriveTime_f.show()
data=[subfigureDepartTime,subfigureArriveTime]
testfigure=go.Figure(data=data)
testfigure.write_image(path+"时段.svg",width=350, height=350, scale=1)
testfigure
feature='AGE'
data=df[(~df[feature].isnull())&(df[feature]<100)][feature]
subfigureAge=go.Histogram(x=data,name="年龄",marker={"color":"lightsalmon"})
testfigure=go.Figure(subfigureAge)
layout = go.Layout(xaxis={"title":'年龄'})
testfigure.update_layout(layout)
testfigure.write_image(path+"年龄.svg",width=500, height=350, scale=1)
testfigure
fig.add_trace(subfigureGender,1,1)
fig.add_trace(subfigureDepartTime,1,2)
fig.add_trace(subfigureArriveTime,1,2)
fig.add_trace(subfigureAge,2,1)
py.offline.plot(fig,filename="用户画像特征.html")
'用户画像特征.html'