0

我有一个 df,它分为 4 列,它是合并数据 - 我想显示 3 列的一系列箱线图,然后使用最后一列作为 y 轴。但是,箱线图相互重叠。我也不认为可以使用“色调”,因为我想通过它们的列而不是列中的公共元素来分隔箱线图。职位可能是可能的,但不确定。

数据框

箱线图在彼此之上

谢谢

df = pd.read_csv("C:\\Users\\VeraFranke\\Desktop\\Complete_hourly_df.dat", parse_dates=True, infer_datetime_format=True)
df = df.set_index(df.columns[0])
df.index = pd.to_datetime(df.index)
print(df.index)

width = 24
bins = np.arange(0,241, width)
print(bins)

df['binned_ocean'] = pd.cut(df.ocean_time, bins=bins)
df['binned_land'] = pd.cut(df.land_time, bins=bins) 
df['binned_ice'] = pd.cut(df.time_over_ice, bins=bins) # labels=['1','2','3','4','5','6','7','8']) #[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75, 1.]) #, precision=0, include_lowest=True)

df1 = df[['AE_450', 'binned_ocean']]
df2 = df[['binned_land']]
df3 = df[['binned_ice']]

cdf = pd.concat([df1, df2, df3], axis=1, sort=False)    

cdf = cdf[['AE_450','binned_ocean','binned_land','binned_ice']]

ax = cdf.boxplot(by='binned_ocean', positions = [1, 2,3,4,5,7,8,13,15,18], return_type='axes')
cdf.boxplot(by='binned_land', ax=ax)
cdf.boxplot(by='binned_ice', ax=ax)
# CONCATENATE
#mdf = pd.melt(cdf, id_vars=[''], var_name=['Number'])  

#sns.boxplot(x=cdf.binned_ocean, y=df.AE_450, palette="Set3", showfliers=False, whis=[5, 95])
#sns.boxplot(x=df.binned_land, y=df.AE_450, hue=df.binned_land, palette="Set3", showfliers=False, whis=[5, 95])
#sns.boxplot(x=df.binned_ice, y=df.AE_450, hue=df.binned_ice, palette="Set3", showfliers=False, whis=[5, 95])

plt.ylim(-1,5)
plt.xlabel('Time over Ocean',size=15)
plt.ylabel('Scattering Ångström Exponent',size=15)
plt.title("Boxplots: Data separated into bins of 24 hours width (whis 5 & 95)", size=25)

plt.show()

设法做到了:

df = df.set_index(df.columns[0])
df.index = pd.to_datetime(df.index)
print(df.index)

width = 24
bins = np.arange(0,241, width)
print(bins)

df['binned'] = pd.cut(df.ocean_time, bins=bins)
df1 = df[['AE_450','binned']]
df['binned'] = pd.cut(df.land_time, bins=bins) 
df2 = df[['AE_450','binned']]
df['binned'] = pd.cut(df.time_over_ice, bins=bins) # labels=['1','2','3','4','5','6','7','8']) #[-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75, 1.]) #, precision=0, include_lowest=True)
df3 = df[['AE_450','binned']]

df1['type'] = 'ocean'
df2['type'] = 'land'
df3['type'] = 'ice'

df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)
df3 = df3.reset_index(drop=True)

frames = [df1, df2, df3]

result = pd.concat(frames)

plt.figure(figsize=(20,10))

b = sns.boxplot(x=result.binned, y=result.AE_450, hue=result.type, palette="Set3", showfliers=False, whis=[5, 95])
b.axes.set_title("Binned data: Ångström Exponent vs time over different land types",fontsize=20)
b.set_xlabel("Hours spent above type time",fontsize=20)
b.set_ylabel("Ångström Exponent",fontsize=20)
b.tick_params(labelsize=15)

plt.ylim(-1.2,5.2)

plt.show()
4

0 回答 0