在 DEAP 开发人员的帮助下,我能够解决这个问题。对于那些发现自己处于我的位置的人,下面是 DEAP GP 算法的一些工作代码,该算法将 DataFrame 的两列中的值优化为 0。示例问题显然是微不足道且无用的;它是 DEAP 在 DataFrames 上工作的一个简单示例。
import operator
import math
import random
import numpy as np
import pandas as pd
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
def add_5(input_df):
return input_df + 5.
def subtract_5(input_df):
return input_df - 5.
def multiply_5(input_df):
return input_df * 5.
def divide_5(input_df):
return input_df / 5.
pset = gp.PrimitiveSet('MAIN', 1)
pset.addPrimitive(add_5, 1)
pset.addPrimitive(subtract_5, 1)
pset.addPrimitive(multiply_5, 1)
pset.addPrimitive(divide_5, 1)
creator.create('FitnessMin', base.Fitness, weights=(-1.0,))
creator.create('Individual', gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register('expr', gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('compile', gp.compile, pset=pset)
def evalSymbReg(individual, points):
# Transform the tree expression in a callable function
func = toolbox.compile(expr=individual)
result = func(points)
return abs(result.column1.sum() + result.column2.sum()),
toolbox.register('evaluate', evalSymbReg, points=pd.DataFrame({'column1': [125] * 500, 'column2': [125] * 500}))
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('mate', gp.cxOnePoint)
toolbox.register('expr_mut', gp.genFull, min_=0, max_=2)
toolbox.register('mutate', gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
if __name__ == '__main__':
pop = toolbox.population(n=100)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register('avg', np.mean)
stats.register('min', np.min)
stats.register('max', np.max)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 20, stats=stats, halloffame=hof)