1
import pandas as pd
import numpy as np
import random
import copy
import feather
import plotly.graph_objects as go
import plotly.express as px
import panel as pn
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import cartopy
import cartopy.feature as cf
from geoviews import opts
from cartopy import crs as ccrs
import hvplot.pandas # noqa
import colorcet as cc
from colorcet.plotting import swatch
hv.extension("bokeh","plotly")

我有一个名为test

Out[5]: 
         age age_band  car_ins_renew_month        people_type
0        NaN      NaN                  NaN       sign_up_only
1       61.0    55-64                  7.0    active_interest
2        NaN      NaN                  NaN       sign_up_only
3       55.0    55-64                  8.0  previous_customer
4        NaN      NaN                  NaN       sign_up_only
...      ...      ...                  ...                ...
107627  42.0    35-44                  6.0  previous_customer
107628  73.0      65+                  7.0  previous_customer
107629   NaN      NaN                  NaN       sign_up_only
107630   NaN      NaN                  NaN       sign_up_only
107631   NaN      NaN                  NaN       sign_up_only

[107632 rows x 4 columns]
In [6]: test.info()                                                             
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107632 entries, 0 to 107631
Data columns (total 4 columns):
age                    73289 non-null float32
age_band               73289 non-null category
car_ins_renew_month    64290 non-null float32
people_type            107632 non-null category
dtypes: category(2), float32(2)
memory usage: 1.0 MB

对于整个test数据框,我可以使用 hvplot 成功生成直方图:

age(带有悬停数据age_band):

In [7]: test.hvplot.hist( 
   ...:             y="age",by=["age_band"], 
   ...:             bins=[18,25,35,45,55,65,74], 
   ...:             xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")], 
   ...:             color="teal",legend=False, 
   ...:             line_width=4,line_color="w", 
   ...:             width=650,height=280 
   ...: )

在此处输入图像描述

car_ins_renew_month

        test.hvplot.hist( 
   ...:             y="car_ins_renew_month", 
   ...:             bins=[1,2,3,4,5,6,7,8,9,10,11,12,13], 
   ...:             xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")], 
   ...:             color="teal",legend=False, 
   ...:             line_width=4,line_color="w", 
   ...:             width=650,height=280 
   ...: ) 

在此处输入图像描述

但是,对于testwhere的子集people_type等于previous_customer

In [11]: test_prev_cust = test.loc[test["people_type"]=="previous_customer"]

car_ins_renew_month虽然我可以成功地为属性生成直方图:

In [13]: test_prev_cust.hvplot.hist( 
    ...:             y="car_ins_renew_month", 
    ...:             bins=[1,2,3,4,5,6,7,8,9,10,11,12,13], 
    ...:             xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")], 
    ...:             color="teal",legend=False, 
    ...:             line_width=4,line_color="w", 
    ...:             width=650,height=280 
    ...: )  

在此处输入图像描述

当我尝试为age属性生成直方图时,出现以下错误:

In [14]: test_prev_cust = hvplot.hist( 
    ...:             y="age",by=["age_band"], 
    ...:             bins=[18,25,35,45,55,65,74], 
    ...:             xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")], 
    ...:             color="teal",legend=False, 
    ...:             line_width=4,line_color="w", 
    ...:             width=650,height=280 
    ...: ) 
---------------------------------------------------------------------------
DataError                                 Traceback (most recent call last)
<ipython-input-100-b2108cee586d> in <module>
      7             color="teal",legend=False,
      8             line_width=4,line_color="w",
----> 9             width=650,height=280
     10 )

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in hist(self, y, by, **kwds)
    399             The HoloViews representation of the plot.
    400         """
--> 401         return self(kind='hist', x=None, y=y, by=by, **kwds)
    402 
    403     def kde(self, y=None, by=None, **kwds):

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
     70                 return pn.panel(plot, **panel_dict)
     71 
---> 72         return self._get_converter(x, y, kind, **kwds)(kind, x, y)
     73 
     74     def _get_converter(self, x=None, y=None, kind=None, **kwds):

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
    942                 obj = DynamicMap(cbcallable, streams=[self.stream])
    943             else:
--> 944                 obj = method(x, y)
    945 
    946         if self.crs and self.project:

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in hist(self, x, y, data)
   1383             if self.by:
   1384                 hist = hists = histogram(
-> 1385                     ds.groupby(self.by), dimension=y, **hist_opts
   1386                 )
   1387                 hist = hists.last

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/param/parameterized.py in __new__(class_, *args, **params)
   2810         inst = class_.instance()
   2811         inst.param._set_name(class_.__name__)
-> 2812         return inst.__call__(*args,**params)
   2813 
   2814     def __call__(self,*args,**kw):

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
    162         elif 'streams' not in kwargs:
    163             kwargs['streams'] = self.p.streams
--> 164         return element.apply(self, **kwargs)
    165 
    166 

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
    113             for k, v in self._obj.data.items():
    114                 new_val = v.apply(function, dynamic=dynamic, streams=streams,
--> 115                                   link_inputs=link_inputs, **kwargs)
    116                 if new_val is not None:
    117                     mapped.append((k, new_val))

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
    108             if hasattr(function, 'dynamic'):
    109                 inner_kwargs['dynamic'] = False
--> 110             return function(self._obj, **inner_kwargs)
    111         elif self._obj._deep_indexable:
    112             mapped = []

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
    159                                       for k, el in element.items()])
    160             elif isinstance(element, ViewableElement):
--> 161                 return self._apply(element)
    162         elif 'streams' not in kwargs:
    163             kwargs['streams'] = self.p.streams

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in _apply(self, element, key)
    119         for hook in self._preprocess_hooks:
    120             kwargs.update(hook(self, element))
--> 121         ret = self._process(element, key)
    122         for hook in self._postprocess_hooks:
    123             ret = hook(self, ret, **kwargs)

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/operation/element.py in _process(self, element, key)
    657                 hist *= edges[1]-edges[0]
    658         return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
--> 659                          label=element.label, **params)
    660 
    661 

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/element/chart.py in __init__(self, data, edges, **params)
    196         elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
    197             data = data[::-1]
--> 198         super(Histogram, self).__init__(data, **params)
    199 
    200 

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/__init__.py in __init__(self, data, kdims, vdims, **kwargs)
    209         validate_vdims = kwargs.pop('_validate_vdims', True)
    210         initialized = Interface.initialize(type(self), data, kdims, vdims,
--> 211                                            datatype=kwargs.get('datatype'))
    212         (data, self.interface, dims, extra_kws) = initialized
    213         super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))

~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/interface.py in initialize(cls, eltype, data, kdims, vdims, datatype)
    252                                   % (intfc.__name__, e))
    253                 error = ' '.join([error, priority_error])
--> 254             raise DataError(error)
    255 
    256         return data, interface, dims, extra_kws

DataError: None of the available storage backends were able to support the supplied data format.

我知道我可以使用 hvplot 成功地为我的数据帧的子集生成直方图,test因为car_ins_renew_monthage能够做到这一点,因为people_type等于active_interest。我就是不能为people_type等于previous_customer

我确实注意到我的数据框的一件事是test_prev_cust在以下两个类别中没有人age_band

In [18]: test_prev_cust["age_band"].value_counts()
Out[18]: 
45-54    13457
55-64    10369
35-44     8760
65+       7801
25-34        0
18-24        0
Name: age_band, dtype: int64

这可能是我的问题的原因吗?如果是这样,那么有没有办法解决这个问题,并且仍然age_band在我的情节中包含悬停数据?

谢谢


软件版本:

bokeh                     1.4.0                    py37_0    
cartopy                   0.17.0           py37haea56ea_1   
colorcet                  2.0.2                      py_0    pyviz
feather-format            0.4.0                   py_1003    conda-forge 
geoviews                  1.6.5                      py_0    pyviz
holoviews                 1.12.6                     py_0    pyviz 
hvplot                    0.5.2                      py_0    pyviz   
jupyter                   1.0.0                    py37_7  
matplotlib                3.1.1            py37h54f8f79_0     
notebook                  6.0.2                    py37_0   
numpy                     1.17.3           py37h4174a10_0  
pandas                    0.25.3           py37h0a44026_0  
panel                     0.7.0                      py_0    pyviz
plotly                    4.3.0                      py_0    plotly
plotly_express            0.4.1                      py_0    plotly
python                    3.7.5                h359304d_0  
seaborn                   0.9.0              pyh91ea838_1  

我在 os x Catalina 上,使用最新版本的 Firefox,我正在使用 Jupyter 笔记本。

4

1 回答 1

1

问题是由于您的变量 age_band 是分类的,某些类别的计数为 0 并将其与关键字一起使用by=['age_band]

您可以尝试将 age_band 转换为字符串,但在这种情况下,我认为创建条形图更好:

age_band_group = df.groupby(['age_band']
    ).agg(count=('age', np.size)
    ).fillna(0)

age_band_group.hvplot.bar(color='teal')
于 2020-01-15T20:53:27.797 回答