首頁資訊 python大數(shù)據(jù)分析

python大數(shù)據(jù)分析

來源：泰然健康網(wǎng) 時間：2024年11月24日 04:02

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

!pip install xgboost

import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder

import warnings

import xgboost

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

warnings.filterwarnings('ignore')

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']

df = pd.read_csv('/home/mw/input/data1581/Sleep_health_and_lifestyle_dataset.csv',encoding='gbk')

df.head()

df.info()

df.duplicated().sum()df_new = df.copy()cat_cols = []for each in df_new.columns.tolist():

if df_new[each].dtype == 'object' and each != '血壓':

cat_cols.append(each)

print(df_new[each].value_counts().to_frame())

le = LabelEncoder()

le.fit(df_new[col])

df_new[col] = le.transform(df_new[col])df_new.head()xueya = df_new['血壓'].str.split('/',expand=True)

xueya.columns = ['高壓','低壓']

xueya = xueya.astype(int)df_new = pd.concat([df_new,xueya],axis=1)df_new.info()plt.figure(figsize=(12,8))

sns.countplot(x='性別',hue='職業(yè)',data=df,palette='Set3')

plt.title('男女及從事職業(yè)情況',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

sns.countplot(x='性別',hue='睡眠障礙',data=df,palette='Set3')

plt.title('男女睡眠障礙情況',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

sns.countplot(x='性別',hue='BMI',data=df,palette='Set3')

plt.title('男女BMI情況',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

plt.hist(df['年齡'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['年齡'].plot(kind = 'kde')

plt.title('年齡分布',fontsize=20)

plt.show()df['年齡'].min(),df['年齡'].max()plt.figure(figsize=(12,8))

plt.hist(df['睡眠時長'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['睡眠時長'].plot(kind = 'kde')

plt.title('睡眠時間分布',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

plt.hist(df['睡眠質量'],density=True,bins=6,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['睡眠質量'].plot(kind = 'kde')

plt.title('睡眠質量分布',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

plt.hist(df['心率'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['心率'].plot(kind = 'kde')

plt.title('心率分布',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

plt.hist(df_new['高壓'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7),label='高壓')

df_new['高壓'].plot(kind = 'kde',label='高壓')

plt.hist(df_new['低壓'],density=True,bins=15,color=plt.cm.RdBu(0.3),edgecolor=plt.cm.RdBu(0.2),label='低壓')

df_new['低壓'].plot(kind = 'kde',label='低壓')

plt.title('血壓分布',fontsize=20)

plt.legend()

plt.show()df_new['高壓'].max(),df_new['高壓'].min(),df_new['低壓'].min(),df_new['低壓'].max()plt.figure(figsize=(12,8))

plt.hist(df['身體活動水平'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['身體活動水平'].plot(kind = 'kde')

plt.title('身體活動水平分布',fontsize=20)

plt.show()df['身體活動水平'].min(),df['身體活動水平'].max()plt.figure(figsize=(12,8))

plt.hist(df['壓力水平'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['壓力水平'].plot(kind = 'kde')

plt.title('壓力水平分布',fontsize=20)

plt.show()plt.figure(figsize=(12,8))

plt.hist(df['每日步數(shù)'],density=True,bins=15,color=plt.cm.RdBu(0.6),edgecolor=plt.cm.RdBu(0.7))

df['每日步數(shù)'].plot(kind = 'kde')

plt.title('每日步數(shù)分布',fontsize=20)

plt.show()sns.pairplot(df_new[df_new.columns.tolist()[1:]])plt.figure(figsize=(12,12))

plt.imshow(df_new.iloc[:,1:].corr(),cmap='Blues')

plt.xticks(range(len(df_new.iloc[:,1:].corr().columns.tolist())),df_new.iloc[:,1:].corr().columns.tolist(),rotation=45)

plt.yticks(range(len(df_new.iloc[:,1:].corr().columns.tolist())),df_new.iloc[:,1:].corr().columns.tolist(),rotation=45)

plt.colorbar()

plt.show()target = ['睡眠時長','睡眠質量','睡眠障礙']

df_new.drop(columns=['ID'],inplace=True)df_new.drop(columns=['血壓'],inplace=True)for i in range(len(target[:2])):

y = df_new[target[i]]

X = df_new.iloc[:,~df_new.columns.isin(target)]

model = RandomForestRegressor()

model.fit(X,y)

print('在'+ target[i] + '作為因變量時，各因素重要性為：')

plt.figure(figsize=(8,8))

plt.subplot(2,1,i+1)

plt.imshow(model.feature_importances_.reshape(-1,1))

plt.yticks(range(len(X.columns.tolist())),X.columns.tolist())

plt.xticks(range(1))

plt.xlabel(target[i])

plt.colorbar()

plt.show()y = df_new[target[2]]

X = df_new.iloc[:,~df_new.columns.isin(target)]

model1 = RandomForestClassifier()

model1.fit(X,y)

plt.imshow(model1.feature_importances_.reshape(-1,1))

plt.yticks(range(len(X.columns.tolist())),X.columns.tolist())

plt.xticks(range(1))

plt.xlabel(target[2])

plt.colorbar()

plt.show()plt.figure(figsize=(4,8))

sns.boxplot(x='性別',y='身體活動水平',palette='Set3',data=df_new)

plt.title('不同性別身體活動水平的箱型圖分析',fontsize=15)

plt.show()plt.figure(figsize=(4,8))

sns.boxplot(x='性別',y='壓力水平',palette='Set3',data=df_new)

plt.title('不同性別壓力水平的箱型圖分析',fontsize=15)

plt.show()lt.figure(figsize=(4,8))

sns.boxplot(x='性別',y='心率',palette='Set3',data=df_new)

plt.title('不同性別心率的箱型圖分析',fontsize=15)

plt.show()plt.figure(figsize=(12,8))

sns.boxplot(x='性別',y='高壓',palette='Set3',data=df_new)

sns.boxplot(x='性別',y='低壓',palette='Set3',data=df_new)

plt.title('不同性別血壓的箱型圖分析',fontsize=15)

plt.show()X = df_new.drop(columns=['睡眠障礙'])

y = df_new[['睡眠障礙']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7, random_state=42)model2 = xgboost.XGBClassifier()

model2.fit(X_train, y_train)y_pred = model2.predict(X_test)df_new['睡眠障礙'].unique()cm = confusion_matrix(y_test, y_pred)label_mapping = {0:'失眠',1:'無',2:'睡眠呼吸暫停'}

for i, true_label in enumerate(label_mapping):

row = ''

for j, pred_label in enumerate(label_mapping.values()):

row += f'{cm[i, j]} ({pred_label})t'

print(f'{row} | {true_label}')

print(classification_report(y_test, y_pred,target_names=['失眠','無', '睡眠呼吸暫停']))

fig, ax = plt.subplots()

im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)

ax.figure.colorbar(im, ax=ax)

ax.set(xticks=np.arange(cm.shape[1]),

yticks=np.arange(cm.shape[0]),

xticklabels=label_names, yticklabels=label_names,

title='Confusion matrix',

ylabel='True label',

xlabel='Predicted label')

thresh = cm.max() / 2.

for i in range(cm.shape[0]):

for j in range(cm.shape[1]):

ax.text(j, i, format(cm[i, j], 'd'),

ha="center", va="center",

color="white" if cm[i, j] > thresh else "black")

fig.tight_layout()

plt.show()

網(wǎng)址: python大數(shù)據(jù)分析 http://www.u1s5d6.cn/newsview46260.html

91高清中文字幕|亚洲无码网站网址|欧美一区二区乱伦|a乱码精品一区二区三|成人一区二区毛片|国产日韩精品视频短片|不卡无码无需播放器|鲁噜精品免费视频|wwwh日韩中出|精品五月婷婷无码

python大數(shù)據(jù)分析

推薦資訊

從出汗看健康出汗透露你的健康信號

早上怎么喝水最健康？

91高清中文字幕|亚洲无码网站网址|欧美一区二区乱伦|a乱码精品一区二区三|成人一区二区毛片|国产日韩精品视频短片|不卡无码无需播放器|鲁噜精品免费视频|wwwh日韩中出|精品五月婷婷无码

python大數(shù)據(jù)分析

推薦資訊

從出汗看健康 出汗透露你的健康信號

早上怎么喝水最健康？

從出汗看健康出汗透露你的健康信號