python - 在 python 中使用贝叶斯模型进行预测的问题

Question

我正在使用简单的泰坦尼克数据集来使用贝叶斯网络预测幸存的数据。虽然我能够通过结构学习来制作结构，但是在我将测试数据集放在贝叶斯模型之后它显示出关键错误，就好像我能够通过正确的字典中的数据。请参阅贝叶斯模型文档：https ://pgmpy.org/_modules/pgmpy/models/BayesianModel.html

from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD, DiscreteFactor 
from pgmpy.inference import BeliefPropagation
from pgmpy.inference import VariableElimination
from pgmpy.estimators import MaximumLikelihoodEstimator,BayesianEstimator,ConstraintBasedEstimator,HillClimbSearch, BicScore,K2Score,ExhaustiveSearch


import numpy as np
import pandas as pd
import networkx as nx 
import matplotlib.pyplot as plt 
import seaborn as sns

BN_Model = BayesianModel([('Embarked', 'Fare'), ('Fare', 'Pclass'), ('Parch', 'Age'), ('Parch', 'Fare'), ('Parch', 'SibSp'), ('Parch', 'Sex'), ('Pclass', 'Survived'), ('Pclass', 'Age'), ('Sex', 'Survived'), ('SibSp', 'Fare'), ('SibSp', 'Sex')]) 
nx.draw_networkx(BN_Model,with_labels=True) 
plt.show() 

BN_Model.fit(train, estimator=MaximumLikelihoodEstimator)

test['Fare']=test['Fare'].replace(np.nan,test['Fare'].mean())

x=BN_Model.predict(test[['Embarked','Fare' ,'Parch', 'Pclass', 'Sex', 'SibSp']])```


---------------------------------------------------------------------------
_RemoteTraceback                          Traceback (most recent call last)
_RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
    r = call_item()
  File "/opt/conda/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
    return self.fn(*self.args, **self.kwargs)
  File "/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 608, in __call__
    return self.func(*args, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/joblib/parallel.py", line 256, in __call__
    for func, args, kwargs in self.items]
  File "/opt/conda/lib/python3.6/site-packages/joblib/parallel.py", line 256, in <listcomp>
    for func, args, kwargs in self.items]
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 370, in map_query
    show_progress=show_progress,
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 157, in _variable_elimination
    working_factors = self._get_working_factors(evidence)
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 44, in _get_working_factors
    [(evidence_var, evidence[evidence_var])], inplace=False
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/factors/discrete/DiscreteFactor.py", line 428, in reduce
    (var, self.get_state_no(var, state_name)) for var, state_name in values
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/factors/discrete/DiscreteFactor.py", line 428, in <listcomp>
    (var, self.get_state_no(var, state_name)) for var, state_name in values
  File "/opt/conda/lib/python3.6/site-packages/pgmpy/utils/state_name.py", line 74, in get_state_no
    return self.name_to_no[var][state_name]
KeyError: 7.8292
"""

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-105-37e427dce88d> in <module>
----> 1 x=BN_Model.predict(test[['Embarked','Fare' ,'Parch', 'Pclass', 'Sex', 'SibSp']])
      2 
      3 

/opt/conda/lib/python3.6/site-packages/pgmpy/models/BayesianModel.py in predict(self, data, n_jobs)
    592             )
    593             for index, data_point in tqdm(
--> 594                 data_unique.iterrows(), total=data_unique.shape[0]
    595             )
    596         )

/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
   1015 
   1016             with self._backend.retrieval_context():
-> 1017                 self.retrieve()
   1018             # Make sure that we get a last message telling us we are done
   1019             elapsed_time = time.time() - self._start_time

/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in retrieve(self)
    907             try:
    908                 if getattr(self._backend, 'supports_timeout', False):
--> 909                     self._output.extend(job.get(timeout=self.timeout))
    910                 else:
    911                     self._output.extend(job.get())

/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
    560         AsyncResults.get from multiprocessing."""
    561         try:
--> 562             return future.result(timeout=timeout)
    563         except LokyTimeoutError:
    564             raise TimeoutError()

/opt/conda/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
    430                 raise CancelledError()
    431             elif self._state == FINISHED:
--> 432                 return self.__get_result()
    433             else:
    434                 raise TimeoutError()

/opt/conda/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

KeyError: 7.8292

'''

score 0 · Accepted Answer

尽管您还描述了推理，但请尝试使用bnlearn进行推理。此博客显示了结构学习和推理的分步指南。

安装环境：

conda create -n env_bnlearn python=3.8
conda activate env_bnlearn

pip install bnlearn

现在您可以像这样对幸存者进行推断：

import bnlearn as bn

# Load titanic dataset containing mixed variables
df_raw = bn.import_example(data='titanic')

# Pre-processing of the input dataset
dfhot, dfnum = bn.df2onehot(df_raw)

# Structure learning
DAG = bn.structure_learning.fit(dfnum)

# Plot
G = bn.plot(DAG)

# Parameter learning
model = bn.parameter_learning.fit(DAG, df)

# Print CPDs
bn.print_CPD(model)

# Make inference
q = bn.inference.fit(model, variables=['Survived'], evidence={'Sex':0, 'Pclass':1})

print(q.values)
print(q.df)

更多示例可以在这里找到。

score 0 · Accepted Answer

对于预测，最好使用 sklearn 库。尽管 pgmpy 包含贝叶斯功能，但它的目标与您所描述的不同。

对于预测，我将使用以下库：

pip install sklearn
pip install df2onehot
pip install classeval

进行预测的建议：

import df2onehot
import classeval

# Import titanic dataset
X = df2onehot.import_example()
y = X['Survived']
# Remove y from X
del X['Survived']

# Make one-hot, remove numeric variables and features that contain less then 2 samples.
X = df2onehot.df2onehot(X, y_min=2)['onehot']

# Split into train test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

#Import Multinomial Naive Bayes model because its all one-hot now and perhaps the most appropriate if you decide to go for Bayes.
from sklearn.naive_bayes import MultinomialNB

#Create a naive-bayes Classifier
model = MultinomialNB()

# Train the model using the training sets
model.fit(X_train, y_train)

# Predict Output
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)

# Evaluate results
results = classeval.eval(y_test.values.astype(bool), y_pred.astype(bool), y_proba[:,0])
classeval.plot(results)

python - 在 python 中使用贝叶斯模型进行预测的问题

2 回答 2

Related

Reference