接受 Jacobs 的建议,我编写了一个 pomegranate 实现示例:
import pomegranate
import numpy
import sklearn
import sklearn.datasets
#-------------------------------------------------------------------------------
#Get data from somewhere (moons data is nice for examples)
Xmoon, ymoon = sklearn.datasets.make_moons(200, shuffle = False, noise=.05, random_state=0)
Moon1 = Xmoon[:100]
Moon2 = Xmoon[100:]
MoonsDataSet = Xmoon
#Weight the data from moon2 much higher than moon1:
MoonWeights = numpy.array([numpy.ones(100), numpy.ones(100)*10]).flatten()
#Make the GMM model using pomegranate
model = pomegranate.gmm.GeneralMixtureModel.from_samples(
pomegranate.MultivariateGaussianDistribution, #Either single function, or list of functions
n_components=6, #Required if single function passed as first arg
X=MoonsDataSet, #data format: each row is a point-coordinate, each column is a dimension
)
#Force the model to train again, using additional fitting parameters
model.fit(
X=MoonsDataSet, #data format: each row is a coordinate, each column is a dimension
weights = MoonWeights, #List of weights. One for each point-coordinate
stop_threshold = .001, #Lower this value to get better fit but take longer.
# (sklearn likes better/slower fits than pomegrante by default)
)
#Wrap the model object into a probability density python function
# f(x_vector)
def GaussianMixtureModelFunction(Point):
return model.probability(numpy.atleast_2d( numpy.array(Point) ))
#Plug in a single point to the mixture model and get back a value:
ExampleProbability = GaussianMixtureModelFunction( numpy.array([ 0,0 ]) )
print ('ExampleProbability', ExampleProbability)