9 遗传算法概览
目标函数可以选择性地接受 step、budget 和 generations 参数。
step - 相同的目标函数将运行 #evaluation_early_stop_steps 次,当前步数将作为整数传递给函数。(例如,这对于获得交叉验证的单个折叠很有用)。
budget - 在迭代过程中变化的参数。作为介于 0 和 1 之间的浮点数传递给目标函数。如果上次评估的 budget 小于当前 budget,则将重新评估。这在训练早期使用较小数据集时很有用。
generations - 对应于当前代数的整数。
输入 [1]
已复制!
#knapsack problem
import numpy as np
import tpot
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot.individual.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=32,
verbose = 1,
)
evolver.optimize()
#背包问题 import numpy as np import tpot import random import matplotlib.pyplot as plt from dask.distributed import Client, LocalCluster class SubsetSelector(tpot.individual.BaseIndividual): def __init__( self, values, initial_set = None, k=1, #洗牌的步长 ): if isinstance(values, int): self.values = set(range(0,values)) else: self.values = set(values) if initial_set is None: self.subsets = set(random.choices(values, k=k)) else: self.subsets = set(initial_set) self.k = k self.mutation_list = [self._mutate_add, self._mutate_remove] self.crossover_list = [self._crossover_swap] def mutate(self, rng=None): mutation_list_copy = self.mutation_list.copy() random.shuffle(mutation_list_copy) for func in mutation_list_copy: if func(): return True return False def crossover(self, ind2, rng=None): crossover_list_copy = self.crossover_list.copy() random.shuffle(crossover_list_copy) for func in crossover_list_copy: if func(ind2): return True return False def _mutate_add(self,): not_included = list(self.values.difference(self.subsets)) if len(not_included) > 1: self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included)))) return True else: return False def _mutate_remove(self,): if len(self.subsets) > 1: self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) )) def _crossover_swap(self, ss2): diffs = self.subsets.symmetric_difference(ss2.subsets) if len(diffs) == 0: return False for v in diffs: self.subsets.discard(v) ss2.subsets.discard(v) random.choice([self.subsets, ss2.subsets]).add(v) return True def unique_id(self): return str(tuple(sorted(self.subsets))) def individual_generator(): while True: yield SubsetSelector(values=np.arange(len(values))) values = np.random.randint(200,size=100) weights = np.random.random(200)*10 max_weight = 50 def simple_objective(ind, **kwargs): subset = np.array(list(ind.subsets)) if len(subset) == 0: return 0, 0 total_weight = np.sum(weights[subset]) total_value = np.sum(values[subset]) if total_weight > max_weight: total_value = 0 return total_value, total_weight objective_names = ["Value", "Weight"] objective_function_weights = [1,-1] evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(), objective_functions=[simple_objective], objective_function_weights = objective_function_weights, bigger_is_better = True, population_size= 100, objective_names = objective_names, generations= 100, n_jobs=32, verbose = 1, ) evolver.optimize()
/opt/anaconda3/envs/tpotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm Generation: 100%|██████████| 100/100 [01:43<00:00, 1.03s/it]
输入 [2]
已复制!
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals final_population_results.reset_index(inplace=True) final_population_results = final_population_results.rename(columns = {'index':'选中索引'}) best_idx = final_population_results["Value"].idxmax() best_individual = final_population_results.loc[best_idx]['Individual'] print("最佳子集", best_individual.subsets) print("最佳价值 {0}, 重量 {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"])) print() print("所有结果") final_population_results
best subset {1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 42, 43, 48, 50, 61, 62, 68, 80, 89, 91, 97, 98} Best value 3070.0, weight 49.01985602703945 All results
输出 [2]
选定索引 | 值 | 权重 | 父代 | 变异函数 | 个体 | 代 | 提交时间戳 | 完成时间戳 | 评估错误 | 帕累托前沿 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | (40,) | 89.0 | 9.883465 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa80eb0> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
1 | (45,) | 116.0 | 6.643557 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa83b50> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
2 | (52,) | 172.0 | 9.273163 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa81210> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
3 | (33,) | 112.0 | 1.594347 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa838e0> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
4 | (37,) | 90.0 | 3.273826 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa83e50> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | (1, 9, 16, 23, 24, 31, 77, 79) | 998.0 | 11.622582 | ((1, 9, 16, 17, 23, 24, 31, 77), (1, 9, 16, 17... | ind_mutate | <__main__.SubsetSelector object at 0x3a739b010> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9996 | (1, 8, 9, 16, 22, 23, 24, 28, 29, 31, 48, 49, ... | 0.0 | 51.400433 | ((1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 48,... | ind_mutate | <__main__.SubsetSelector object at 0x3af9a4460> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9997 | (1, 4, 8, 9, 16, 17, 23, 24, 31, 49, 68, 77, 8... | 1728.0 | 15.997430 | ((1, 4, 8, 9, 16, 17, 23, 24, 31, 68, 77, 88, ... | ind_mutate | <__main__.SubsetSelector object at 0x3aa303430> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | 1.0 |
9998 | (8, 9, 17, 23, 24, 25, 31, 51, 77) | 972.0 | 11.991547 | ((8, 9, 17, 23, 24, 31, 77, 88), (8, 9, 17, 23... | ind_mutate | <__main__.SubsetSelector object at 0x3a7399600> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9999 | (8, 23, 24, 73, 79) | 648.0 | 12.109013 | ((8, 16, 17, 23, 24), (8, 16, 17, 23, 24)) | ind_mutate | <__main__.SubsetSelector object at 0x3a88d4430> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
10000 行 × 11 列
输入 [3]
已复制!
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d y = final_population_results["Value"] x = final_population_results["Weight"] c = final_population_results["Generation"] x_bins = np.linspace(0, 100, 100) y_bins = np.linspace(0, 3000, 100) ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins]) fig, ax1 = plt.subplots(1, 1, figsize=(12, 4)) im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03) ax1.set_xlabel("重量") ax1.set_ylabel("价值") ax1.set_title("分箱平均代数") cbar = fig.colorbar(im,) cbar.set_label('代数') plt.tight_layout()