我试图让我的函数输出数组“数据”中的异常值。我创建了一个图表来显示异常值,但是我希望我的函数也能吐出实际值。基本上我希望在我的代码中输出值“220”。我怎样才能做到这一点?我的代码做错了什么?我觉得我的距离有些不对劲
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
web_stats = {'Day': [1,2,3,4,5,6], 'Visitors': [43,53,34,45,64,34],
'Bounce_Rate': [65,72,62,64,54,220]}
df = pd.DataFrame(web_stats)
data = np.array(df['Bounce_Rate'])
def find_outlier(data, q1, q3):
lower = q1 - 1.5 * (q3 - q1)
upper = q3 + 1.5 * (q3 - q1)
return data <= lower or data >= upper
def find_indices(data):
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
indices_of_outliers = []
for ind, value in enumerate(data):
if find_outlier(value, q1, q3):
indices_of_outliers.append(ind)
return indices_of_outliers
dist=data
find_indices = find_indices(dist)
fig = plt.figure()
ax = fig.add_subplot(111) # 1x1 grid, first subplot
ax.plot(dist, 'b-', label='distances')
ax.plot(
find_indices,
data[find_indices],
'ro',
markersize = 7,
label='outliers')
ax.legend(loc='best')