import pandas as pd
health = pd.read_csv('ICU.csv.txt')
health .head()
Unnamed: 0 | ID | Survive | Age | AgeGroup | Sex | Infection | SysBP | Pulse | Emergency | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 4 | 0 | 87 | 3 | 1 | 1 | 80 | 96 | 1 |
1 | 2 | 8 | 1 | 27 | 1 | 1 | 1 | 142 | 88 | 1 |
2 | 3 | 12 | 1 | 59 | 2 | 0 | 0 | 112 | 80 | 1 |
3 | 4 | 14 | 1 | 77 | 3 | 0 | 0 | 100 | 70 | 0 |
4 | 5 | 27 | 0 | 76 | 3 | 1 | 1 | 128 | 90 | 1 |
Dữ liệu xuất phát từ một nghiên cứu mô tả cắt ngang, khảo sát huyết áp và mạch của 200 bệnh nhân shock nhiễm khuẩn trong khoa ICU. Câu hỏi nghiên cứu giả định của chúng ta là : So sánh huyết áp tâm thu (SysBP) giữa 2 phân nhóm bệnh nhân Tử vong và Sống sót.
health.groupby('Survive').mean()
Unnamed: 0 | ID | Age | AgeGroup | Sex | Infection | SysBP | Pulse | Emergency | |
---|---|---|---|---|---|---|---|---|---|
Survive | |||||||||
0 | 90.4500 | 395.9500 | 65.125 | 2.325 | 0.400 | 0.600 | 118.82500 | 100.625 | 0.95000 |
1 | 103.0125 | 457.0375 | 55.650 | 1.950 | 0.375 | 0.375 | 135.64375 | 98.500 | 0.68125 |
#The SysBP of people who did not survive is smaller than those who have survived.
## Next step: Hypothesis testing to see if this difference is statistically significant (later)
#Bargraph dead vs survive regarding SysBP
import matplotlib.pyplot as plt
import numpy as np
health.set_index('Survive')['SysBP'].plot.bar()
plt.show()
<Figure size 640x480 with 1 Axes>
dead = health[(health['Survive'] == 0)]
dead.set_index('Survive')['SysBP'].plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x291fb037400>
dead['SysBP'].describe()
count 40.000000 mean 118.825000 std 41.080838 min 36.000000 25% 89.000000 50% 126.000000 75% 140.000000 max 256.000000 Name: SysBP, dtype: float64
alive = health[(health['Survive'] == 1)]
alive.set_index('Survive')['SysBP'].plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x291fb131208>
alive['SysBP'].describe()
count 160.000000 mean 135.643750 std 29.801513 min 48.000000 25% 112.000000 50% 132.000000 75% 154.000000 max 224.000000 Name: SysBP, dtype: float64
df_plot = dead.set_index('ID')['SysBP'].plot.line()
plt.show()
df_plot = alive.set_index('ID')['SysBP'].plot.line()
plt.show()
alive = health[(health['Survive'] == 1)]
np.array(alive)
array([[ 2, 8, 1, ..., 142, 88, 1], [ 3, 12, 1, ..., 112, 80, 1], [ 4, 14, 1, ..., 100, 70, 0], ..., [198, 924, 1, ..., 162, 100, 1], [199, 925, 1, ..., 100, 88, 1], [200, 929, 1, ..., 122, 84, 1]], dtype=int64)
np.array(health[(health['Survive'] == 1)])
array([[ 2, 8, 1, ..., 142, 88, 1], [ 3, 12, 1, ..., 112, 80, 1], [ 4, 14, 1, ..., 100, 70, 0], ..., [198, 924, 1, ..., 162, 100, 1], [199, 925, 1, ..., 100, 88, 1], [200, 929, 1, ..., 122, 84, 1]], dtype=int64)