import pandas as pd
import numpy as np
import random
import copy
import feather
import plotly.graph_objects as go
import plotly.express as px
import panel as pn
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import cartopy
import cartopy.feature as cf
from geoviews import opts
from cartopy import crs as ccrs
import hvplot.pandas # noqa
import colorcet as cc
from colorcet.plotting import swatch
hv.extension("bokeh","plotly")
我有一个名为test
:
Out[5]:
age age_band car_ins_renew_month people_type
0 NaN NaN NaN sign_up_only
1 61.0 55-64 7.0 active_interest
2 NaN NaN NaN sign_up_only
3 55.0 55-64 8.0 previous_customer
4 NaN NaN NaN sign_up_only
... ... ... ... ...
107627 42.0 35-44 6.0 previous_customer
107628 73.0 65+ 7.0 previous_customer
107629 NaN NaN NaN sign_up_only
107630 NaN NaN NaN sign_up_only
107631 NaN NaN NaN sign_up_only
[107632 rows x 4 columns]
In [6]: test.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107632 entries, 0 to 107631
Data columns (total 4 columns):
age 73289 non-null float32
age_band 73289 non-null category
car_ins_renew_month 64290 non-null float32
people_type 107632 non-null category
dtypes: category(2), float32(2)
memory usage: 1.0 MB
对于整个test
数据框,我可以使用 hvplot 成功生成直方图:
age
(带有悬停数据age_band
):
In [7]: test.hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
car_ins_renew_month
:
test.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
但是,对于test
where的子集people_type
等于previous_customer
:
In [11]: test_prev_cust = test.loc[test["people_type"]=="previous_customer"]
car_ins_renew_month
虽然我可以成功地为属性生成直方图:
In [13]: test_prev_cust.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
当我尝试为age
属性生成直方图时,出现以下错误:
In [14]: test_prev_cust = hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-100-b2108cee586d> in <module>
7 color="teal",legend=False,
8 line_width=4,line_color="w",
----> 9 width=650,height=280
10 )
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in hist(self, y, by, **kwds)
399 The HoloViews representation of the plot.
400 """
--> 401 return self(kind='hist', x=None, y=y, by=by, **kwds)
402
403 def kde(self, y=None, by=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
70 return pn.panel(plot, **panel_dict)
71
---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y)
73
74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
942 obj = DynamicMap(cbcallable, streams=[self.stream])
943 else:
--> 944 obj = method(x, y)
945
946 if self.crs and self.project:
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in hist(self, x, y, data)
1383 if self.by:
1384 hist = hists = histogram(
-> 1385 ds.groupby(self.by), dimension=y, **hist_opts
1386 )
1387 hist = hists.last
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/param/parameterized.py in __new__(class_, *args, **params)
2810 inst = class_.instance()
2811 inst.param._set_name(class_.__name__)
-> 2812 return inst.__call__(*args,**params)
2813
2814 def __call__(self,*args,**kw):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
--> 164 return element.apply(self, **kwargs)
165
166
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
113 for k, v in self._obj.data.items():
114 new_val = v.apply(function, dynamic=dynamic, streams=streams,
--> 115 link_inputs=link_inputs, **kwargs)
116 if new_val is not None:
117 mapped.append((k, new_val))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
108 if hasattr(function, 'dynamic'):
109 inner_kwargs['dynamic'] = False
--> 110 return function(self._obj, **inner_kwargs)
111 elif self._obj._deep_indexable:
112 mapped = []
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
159 for k, el in element.items()])
160 elif isinstance(element, ViewableElement):
--> 161 return self._apply(element)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in _apply(self, element, key)
119 for hook in self._preprocess_hooks:
120 kwargs.update(hook(self, element))
--> 121 ret = self._process(element, key)
122 for hook in self._postprocess_hooks:
123 ret = hook(self, ret, **kwargs)
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/operation/element.py in _process(self, element, key)
657 hist *= edges[1]-edges[0]
658 return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
--> 659 label=element.label, **params)
660
661
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/element/chart.py in __init__(self, data, edges, **params)
196 elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
197 data = data[::-1]
--> 198 super(Histogram, self).__init__(data, **params)
199
200
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/__init__.py in __init__(self, data, kdims, vdims, **kwargs)
209 validate_vdims = kwargs.pop('_validate_vdims', True)
210 initialized = Interface.initialize(type(self), data, kdims, vdims,
--> 211 datatype=kwargs.get('datatype'))
212 (data, self.interface, dims, extra_kws) = initialized
213 super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/interface.py in initialize(cls, eltype, data, kdims, vdims, datatype)
252 % (intfc.__name__, e))
253 error = ' '.join([error, priority_error])
--> 254 raise DataError(error)
255
256 return data, interface, dims, extra_kws
DataError: None of the available storage backends were able to support the supplied data format.
我知道我可以使用 hvplot 成功地为我的数据帧的子集生成直方图,test
因为car_ins_renew_month
我age
能够做到这一点,因为people_type
等于active_interest
。我就是不能为people_type
等于previous_customer
。
我确实注意到我的数据框的一件事是test_prev_cust
在以下两个类别中没有人age_band
:
In [18]: test_prev_cust["age_band"].value_counts()
Out[18]:
45-54 13457
55-64 10369
35-44 8760
65+ 7801
25-34 0
18-24 0
Name: age_band, dtype: int64
这可能是我的问题的原因吗?如果是这样,那么有没有办法解决这个问题,并且仍然age_band
在我的情节中包含悬停数据?
谢谢
软件版本:
bokeh 1.4.0 py37_0
cartopy 0.17.0 py37haea56ea_1
colorcet 2.0.2 py_0 pyviz
feather-format 0.4.0 py_1003 conda-forge
geoviews 1.6.5 py_0 pyviz
holoviews 1.12.6 py_0 pyviz
hvplot 0.5.2 py_0 pyviz
jupyter 1.0.0 py37_7
matplotlib 3.1.1 py37h54f8f79_0
notebook 6.0.2 py37_0
numpy 1.17.3 py37h4174a10_0
pandas 0.25.3 py37h0a44026_0
panel 0.7.0 py_0 pyviz
plotly 4.3.0 py_0 plotly
plotly_express 0.4.1 py_0 plotly
python 3.7.5 h359304d_0
seaborn 0.9.0 pyh91ea838_1
我在 os x Catalina 上,使用最新版本的 Firefox,我正在使用 Jupyter 笔记本。