我正在尝试在 Altair 中重新创建 pyLDAvis 图表。我有一个 VL 规范,其中有很多转换,但我无法将其转换为 Altair。所有的功劳都在这里和这里帮助我走到了这一步。
我想我已经接近了,但我收到以下错误:
Altair.vegalite.v4.schema.channels.ColorValue, validating 'additionalProperties'
Additional properties are not allowed ('selection' was unexpected)
最后,我最关心的是我是否将所有transforms
从 VL 到 Altair 的翻译正确。
非常感谢任何帮助,因为我认为这将是对 NLP/主题建模社区的一个很好的贡献。
import altair as alt
import pandas as pd
import numpy as np
data={
'Term': ['algorithm','learning','learning','algorithm','algorithm','learning'],
'Freq_x': [1330,1353,304.42,296.69,157.59,140.35],
'Total': [1330, 1353,1353.7,1330.47,1330.47,1353.7],
'Category': ['Default', 'Default', 'Topic1', 'Topic1', 'Topic2', 'Topic2'],
'logprob': [30.0, 27.0, -5.116, -5.1418, -5.4112, -5.5271],
'loglift': [30.0, 27.0, 0.0975, 0.0891, -0.1803, -0.3135],
'saliency_ind': [0, 3, 76, 77, 181, 186],
'x': [np.nan,np.nan,-0.0080,-0.0080,-0.0053,-0.0053],
'y': [np.nan,np.nan,-0.0056,-0.0056, 0.0003,0.0003],
'topics': [np.nan, np.nan, 1.0, 1.0, 2.0, 2.0],
'cluster': [np.nan, np.nan, 1.0, 1.0, 1.0, 1.0],
'Freq_y': [np.nan,np.nan,20.39,20.39,14.18,14.18]}
df=pd.DataFrame(data)
pts = alt.selection(type="single", fields=['Category'], empty='none')
points=alt.Chart().mark_circle(tooltip=True).encode(
x='mean(x)',
y='mean(y)',
size='Freq_y',
tooltip=['topics', 'cluster'],
detail='Category',
color=alt.condition(pts, alt.value('#F28E2B'), alt.value('#4E79A7'))
).add_selection(pts)
trans=alt.Chart(
).transform_joinaggregate(
max_fx='max(Freq_x)'
).transform_calculate(
filterCategory="selector046['Category'] ? selector046['Category'] : []"
).transform_calculate(
filtered_Freq_x="indexof(datum.filterCategory,datum['Category']) > -1 ? datum['Freq_x'] : null"
).transform_window(
Sorted='rank()',
sort=[{'field': "filtered_Freq_x:Q", "order": "descending"}]
)
b1=alt.Chart().mark_bar().encode(
x='Freq_x',
y=alt.Y('Term', sort=alt.SortField("Sorted")),
tooltip=['Total'],
)
b2=alt.Chart().mark_bar(color='#F28E2B').encode(
x='filtered_Freq_x:Q',
y=alt.Y('Term', sort=alt.SortField("Sorted")),
tooltip=['Total'],
)
bars_1=trans+b1
bars_2=trans+b2
alt.hconcat(points,bars_1+bars_2, data=df).resolve_legend(
color="independent",
size="independent"
)