9 遗传算法概览

目标函数可以选择性地接受 step、budget 和 generations 参数。

step - 相同的目标函数将运行 #evaluation_early_stop_steps 次，当前步数将作为整数传递给函数。（例如，这对于获得交叉验证的单个折叠很有用）。

budget - 在迭代过程中变化的参数。作为介于 0 和 1 之间的浮点数传递给目标函数。如果上次评估的 budget 小于当前 budget，则将重新评估。这在训练早期使用较小数据集时很有用。

generations - 对应于当前代数的整数。

输入 [1]

已复制！





#knapsack problem
import numpy as np
import tpot
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster

class SubsetSelector(tpot.individual.BaseIndividual):
    def __init__(   self,
                    values,
                    initial_set = None,
                    k=1, #step size for shuffling
                ):

        if isinstance(values, int):
            self.values = set(range(0,values))
        else:
            self.values = set(values)


        if initial_set is None:
            self.subsets = set(random.choices(values, k=k))
        else:
            self.subsets = set(initial_set)

        self.k = k

        self.mutation_list = [self._mutate_add, self._mutate_remove]
        self.crossover_list = [self._crossover_swap]
        

    def mutate(self, rng=None):
        mutation_list_copy = self.mutation_list.copy()
        random.shuffle(mutation_list_copy)
        for func in mutation_list_copy:
            if func():
                return True
        return False

    def crossover(self, ind2, rng=None):
        crossover_list_copy = self.crossover_list.copy()
        random.shuffle(crossover_list_copy)
        for func in crossover_list_copy:
            if func(ind2):
                return True
        return False

    def _mutate_add(self,):
        not_included = list(self.values.difference(self.subsets))
        if len(not_included) > 1:
            self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
            return True
        else:
            return False

    def _mutate_remove(self,):
        if len(self.subsets) > 1:
            self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))

    def _crossover_swap(self, ss2):
        diffs = self.subsets.symmetric_difference(ss2.subsets)

        if len(diffs) == 0:
            return False
        for v in diffs:
            self.subsets.discard(v)
            ss2.subsets.discard(v)
            random.choice([self.subsets, ss2.subsets]).add(v)
        
        return True

    def unique_id(self):
        return str(tuple(sorted(self.subsets)))

def individual_generator():
    while True:
        yield SubsetSelector(values=np.arange(len(values)))


values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50

def simple_objective(ind, **kwargs):
    subset = np.array(list(ind.subsets))
    if len(subset) == 0:
        return 0, 0

    total_weight = np.sum(weights[subset])
    total_value = np.sum(values[subset])

    if total_weight > max_weight:
        total_value = 0

    return total_value, total_weight

objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]



evolver = tpot.evolvers.BaseEvolver(   individual_generator=individual_generator(), 
                                objective_functions=[simple_objective],
                                objective_function_weights = objective_function_weights,
                                bigger_is_better = True,
                                population_size= 100,
                                objective_names = objective_names,
                                generations= 100,
                                n_jobs=32,
                                verbose = 1,

)

evolver.optimize()
#背包问题 import numpy as np import tpot import random import matplotlib.pyplot as plt from dask.distributed import Client, LocalCluster class SubsetSelector(tpot.individual.BaseIndividual): def __init__( self, values, initial_set = None, k=1, #洗牌的步长 ): if isinstance(values, int): self.values = set(range(0,values)) else: self.values = set(values) if initial_set is None: self.subsets = set(random.choices(values, k=k)) else: self.subsets = set(initial_set) self.k = k self.mutation_list = [self._mutate_add, self._mutate_remove] self.crossover_list = [self._crossover_swap] def mutate(self, rng=None): mutation_list_copy = self.mutation_list.copy() random.shuffle(mutation_list_copy) for func in mutation_list_copy: if func(): return True return False def crossover(self, ind2, rng=None): crossover_list_copy = self.crossover_list.copy() random.shuffle(crossover_list_copy) for func in crossover_list_copy: if func(ind2): return True return False def _mutate_add(self,): not_included = list(self.values.difference(self.subsets)) if len(not_included) > 1: self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included)))) return True else: return False def _mutate_remove(self,): if len(self.subsets) > 1: self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) )) def _crossover_swap(self, ss2): diffs = self.subsets.symmetric_difference(ss2.subsets) if len(diffs) == 0: return False for v in diffs: self.subsets.discard(v) ss2.subsets.discard(v) random.choice([self.subsets, ss2.subsets]).add(v) return True def unique_id(self): return str(tuple(sorted(self.subsets))) def individual_generator(): while True: yield SubsetSelector(values=np.arange(len(values))) values = np.random.randint(200,size=100) weights = np.random.random(200)*10 max_weight = 50 def simple_objective(ind, **kwargs): subset = np.array(list(ind.subsets)) if len(subset) == 0: return 0, 0 total_weight = np.sum(weights[subset]) total_value = np.sum(values[subset]) if total_weight > max_weight: total_value = 0 return total_value, total_weight objective_names = ["Value", "Weight"] objective_function_weights = [1,-1] evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(), objective_functions=[simple_objective], objective_function_weights = objective_function_weights, bigger_is_better = True, population_size= 100, objective_names = objective_names, generations= 100, n_jobs=32, verbose = 1, ) evolver.optimize()

/opt/anaconda3/envs/tpotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
Generation: 100%|██████████| 100/100 [01:43<00:00,  1.03s/it]

输入 [2]

已复制！





final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})

best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()

print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals final_population_results.reset_index(inplace=True) final_population_results = final_population_results.rename(columns = {'index':'选中索引'}) best_idx = final_population_results["Value"].idxmax() best_individual = final_population_results.loc[best_idx]['Individual'] print("最佳子集", best_individual.subsets) print("最佳价值 {0}, 重量 {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"])) print() print("所有结果") final_population_results

best subset {1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 42, 43, 48, 50, 61, 62, 68, 80, 89, 91, 97, 98}
Best value 3070.0, weight 49.01985602703945

All results

输出 [2]

	选定索引	值	权重	父代	变异函数	个体	代	提交时间戳	完成时间戳	评估错误	帕累托前沿
0	(40,)	89.0	9.883465	NaN	NaN	<__main__.SubsetSelector object at 0x32aa80eb0>	0.0	1.740209e+09	1.740209e+09	None	NaN
1	(45,)	116.0	6.643557	NaN	NaN	<__main__.SubsetSelector object at 0x32aa83b50>	0.0	1.740209e+09	1.740209e+09	None	NaN
2	(52,)	172.0	9.273163	NaN	NaN	<__main__.SubsetSelector object at 0x32aa81210>	0.0	1.740209e+09	1.740209e+09	None	NaN
3	(33,)	112.0	1.594347	NaN	NaN	<__main__.SubsetSelector object at 0x32aa838e0>	0.0	1.740209e+09	1.740209e+09	None	NaN
4	(37,)	90.0	3.273826	NaN	NaN	<__main__.SubsetSelector object at 0x32aa83e50>	0.0	1.740209e+09	1.740209e+09	None	NaN
...	...	...	...	...	...	...	...	...	...	...	...
9995	(1, 9, 16, 23, 24, 31, 77, 79)	998.0	11.622582	((1, 9, 16, 17, 23, 24, 31, 77), (1, 9, 16, 17...	ind_mutate	<__main__.SubsetSelector object at 0x3a739b010>	99.0	1.740209e+09	1.740209e+09	None	NaN
9996	(1, 8, 9, 16, 22, 23, 24, 28, 29, 31, 48, 49, ...	0.0	51.400433	((1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 48,...	ind_mutate	<__main__.SubsetSelector object at 0x3af9a4460>	99.0	1.740209e+09	1.740209e+09	None	NaN
9997	(1, 4, 8, 9, 16, 17, 23, 24, 31, 49, 68, 77, 8...	1728.0	15.997430	((1, 4, 8, 9, 16, 17, 23, 24, 31, 68, 77, 88, ...	ind_mutate	<__main__.SubsetSelector object at 0x3aa303430>	99.0	1.740209e+09	1.740209e+09	None	1.0
9998	(8, 9, 17, 23, 24, 25, 31, 51, 77)	972.0	11.991547	((8, 9, 17, 23, 24, 31, 77, 88), (8, 9, 17, 23...	ind_mutate	<__main__.SubsetSelector object at 0x3a7399600>	99.0	1.740209e+09	1.740209e+09	None	NaN
9999	(8, 23, 24, 73, 79)	648.0	12.109013	((8, 16, 17, 23, 24), (8, 16, 17, 23, 24))	ind_mutate	<__main__.SubsetSelector object at 0x3a88d4430>	99.0	1.740209e+09	1.740209e+09	None	NaN

10000 行 × 11 列

输入 [3]

已复制！





from scipy.stats import binned_statistic_2d

y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]

x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)

ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])

fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))

im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")

cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d y = final_population_results["Value"] x = final_population_results["Weight"] c = final_population_results["Generation"] x_bins = np.linspace(0, 100, 100) y_bins = np.linspace(0, 3000, 100) ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins]) fig, ax1 = plt.subplots(1, 1, figsize=(12, 4)) im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03) ax1.set_xlabel("重量") ax1.set_ylabel("价值") ax1.set_title("分箱平均代数") cbar = fig.colorbar(im,) cbar.set_label('代数') plt.tight_layout()

No description has been provided for this image