Genetic Optimization
In computer science and operations research, a genetic algorithm (GA) is a metaheuristic inspired by the process of natural selection that belongs to the larger class of evolutionary algorithms (EA). Genetic algorithms are commonly used to generate high-quality solutions to optimization and search problems by relying on biologically inspired operators such as mutation, crossover and selection. Some examples of GA applications include optimizing decision trees for better performance, automatically solve sudoku puzzles, hyperparameter optimization, etc.
Import
Example
from sklearn.metrics import log_loss
"""
define your own objective function,
make sure the function receives four parameters,
fit your model and return the objective value !
"""
def objective_function_topass(model,X_train, y_train, X_valid, y_valid):
model.fit(X_train,y_train)
P=log_loss(y_valid,model.predict_proba(X_valid))
return P
# import an algorithm !
from zoofs import GeneticOptimization
# create object of algorithm
algo_object=GeneticOptimization(objective_function_topass,n_iteration=20,
population_size=20,selective_pressure=2,elitism=2,
mutation_rate=0.05,minimize=True)
import lightgbm as lgb
lgb_model = lgb.LGBMClassifier()
# fit the algorithm
algo_object.fit(lgb_model,X_train, y_train,X_valid, y_valid, verbose=True)
#plot your results
algo_object.plot_history()
# extract the best feature set
algo_object.best_feature_list
Methods
__init__(self, objective_function, n_iteration=1000, timeout=None, population_size=20, selective_pressure=2, elitism=2, mutation_rate=0.05, minimize=True, logger=None, **kwargs)
special
Parameters:
Name | Type | Description | Default |
---|---|---|---|
objective_function |
user made function of the signature 'func(model,X_train,y_train,X_test,y_test)' |
The function must return a value, that needs to be minimized/maximized. |
required |
n_iteration |
int |
Number of time the Optimization algorithm will run |
1000 |
timeout |
int |
Stop operation after the given number of second(s). If this argument is set to None, the operation is executed without time limitation and n_iteration is followed |
None |
population_size |
int, default=50 |
Total size of the population |
20 |
selective_pressure |
int, default=2 |
measure of reproductive opportunities for each organism in the population |
2 |
elitism |
int, default=2 |
number of top individuals to be considered as elites |
2 |
mutation_rate |
float, default=0.05 |
rate of mutation in the population's gene |
0.05 |
minimize |
bool, default=True |
Defines if the objective value is to be maximized or minimized |
True |
logger |
Logger or None, optional (default=None) |
|
None |
**kwargs |
None |
Any extra keyword argument for objective_function |
{} |
Attributes:
Name | Type | Description |
---|---|---|
best_feature_list |
ndarray of shape (n_features) |
list of features with the best result of the entire run |
Source code in zoofs\geneticoptimization.py
def __init__(self,
objective_function,
n_iteration: int = 1000,
timeout: int = None,
population_size=20,
selective_pressure=2,
elitism=2,
mutation_rate=0.05,
minimize=True,
logger=None,
**kwargs):
"""
Parameters
----------
objective_function : user made function of the signature 'func(model,X_train,y_train,X_test,y_test)'
The function must return a value, that needs to be minimized/maximized.
n_iteration : int, default=1000
Number of time the Optimization algorithm will run
timeout: int = None
Stop operation after the given number of second(s).
If this argument is set to None, the operation is executed without time limitation and n_iteration is followed
population_size : int, default=50
Total size of the population
selective_pressure : int, default=2
measure of reproductive opportunities for each organism in the population
elitism : int, default=2
number of top individuals to be considered as elites
mutation_rate : float, default=0.05
rate of mutation in the population's gene
minimize : bool, default=True
Defines if the objective value is to be maximized or minimized
logger: Logger or None, optional (default=None)
- accepts `logging.Logger` instance.
**kwargs
Any extra keyword argument for objective_function
Attributes
----------
best_feature_list : ndarray of shape (n_features)
list of features with the best result of the entire run
"""
super().__init__(objective_function, n_iteration, timeout, population_size, minimize, logger, **kwargs)
self.n_generations = n_iteration
self.selective_pressure = selective_pressure
self.elitism = elitism
self.mutation_rate = mutation_rate
fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
machine learning model's object |
machine learning model's object |
required |
X_train |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Training input samples to be used for machine learning model |
required |
y_train |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
X_valid |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Validation input samples |
required |
y_valid |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
verbose |
bool,default=True |
Print results for iterations |
True |
Source code in zoofs\geneticoptimization.py
def fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True):
"""
Parameters
----------
model : machine learning model's object
machine learning model's object
X_train : pandas.core.frame.DataFrame of shape (n_samples, n_features)
Training input samples to be used for machine learning model
y_train : pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in regression).
X_valid : pandas.core.frame.DataFrame of shape (n_samples, n_features)
Validation input samples
y_valid : pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in regression).
verbose : bool,default=True
Print results for iterations
"""
self._check_params(model, X_train, y_train, X_valid, y_valid)
self.feature_score_hash = {}
self.feature_list = np.array(list(X_train.columns))
self.best_results_per_iteration = {}
self.best_score = np.inf
self.best_dim = np.ones(X_train.shape[1])
self.initialize_population(X_train)
self.best_score = -1 * float(np.inf)
self.best_scores = []
if (self.timeout is not None):
timeout_upper_limit = time.time() + self.timeout
else:
timeout_upper_limit = time.time()
for i in range(self.n_generations):
if (self.timeout is not None) & (time.time() > timeout_upper_limit):
warnings.warn("Timeout occured")
break
self._select_individuals(model, X_train, y_train, X_valid, y_valid)
self._produce_next_generation()
self.best_scores.append(self.best_score)
self._iteration_objective_score_monitor(i)
self._verbose_results(verbose, i)
self.best_feature_list = list(
self.feature_list[np.where(self.best_dim)[0]])
return self.best_feature_list
plot_history(self)
inherited
Plot objective score history
Source code in zoofs\geneticoptimization.py
def plot_history(self):
"""
Plot objective score history
"""
res = pd.DataFrame.from_dict(self.best_results_per_iteration).T
res.reset_index(inplace=True)
res.columns = ['iteration', 'best_score',
'objective_score', 'selected_features']
fig = go.Figure()
fig.add_trace(go.Scatter(x=res['iteration'], y=res['objective_score'],
mode='markers', name='objective_score'))
fig.add_trace(go.Scatter(x=res['iteration'], y=res['best_score'],
mode='lines+markers',
name='best_score'))
fig.update_xaxes(title_text='Iteration')
fig.update_yaxes(title_text='objective_score')
fig.update_layout(
title="Optimization History Plot")
fig.show()