Harris Hawk Optimization
Import
Example
from sklearn.metrics import log_loss
"""
define your own objective function,
make sure the function receives four parameters,
fit your model and return the objective value !
"""
def objective_function_topass(model,X_train, y_train, X_valid, y_valid):
model.fit(X_train,y_train)
P=log_loss(y_valid,model.predict_proba(X_valid))
return P
# import an algorithm !
from zoofs import HarrisHawkOptimization
# create object of algorithm
algo_object=HarrisHawkOptimization(objective_function_topass,
n_iteration=20,
population_size=20,
minimize=True)
import lightgbm as lgb
lgb_model = lgb.LGBMClassifier()
# fit the algorithm
algo_object.fit(lgb_model,X_train, y_train, X_valid, y_valid,verbose=True)
#plot your results
algo_object.plot_history()
# extract the best feature set
algo_object.best_feature_list
Methods
__init__(self, objective_function, n_iteration=1000, timeout=None, population_size=50, minimize=True, beta=0.5, logger=None, **kwargs)
special
Parameters:
Name | Type | Description | Default |
---|---|---|---|
objective_function |
user made function of the signature 'func(model,X_train,y_train,X_test,y_test)' |
User defined function that returns the objective value |
required |
population_size |
int, default=50 |
Total size of the population , default=50 |
50 |
n_iteration |
int |
Number of time the Particle Swarm Optimization algorithm will run |
1000 |
timeout |
int |
Stop operation after the given number of second(s). If this argument is set to None, the operation is executed without time limitation and n_iteration is followed |
None |
minimize |
bool, default=True |
Defines if the objective value is to be maximized or minimized |
True |
beta |
float, default=0.5 |
beta value for random levy walk |
0.5 |
logger |
Logger or None, optional (default=None) |
|
None |
**kwargs |
None |
Any extra keyword argument for objective_function |
{} |
Attributes:
Name | Type | Description |
---|---|---|
best_feature_list |
ndarray of shape (n_features) |
list of features with the best result of the entire run |
Source code in zoofs\harrishawkoptimization.py
def __init__(self,
objective_function,
n_iteration: int = 1000,
timeout: int = None,
population_size=50,
minimize=True,
beta=0.5,
logger=None,
**kwargs):
"""
Parameters
----------
objective_function: user made function of the signature 'func(model,X_train,y_train,X_test,y_test)'
User defined function that returns the objective value
population_size: int, default=50
Total size of the population , default=50
n_iteration: int, default=1000
Number of time the Particle Swarm Optimization algorithm will run
timeout: int = None
Stop operation after the given number of second(s).
If this argument is set to None, the operation is executed without time limitation and n_iteration is followed
minimize : bool, default=True
Defines if the objective value is to be maximized or minimized
beta: float, default=0.5
beta value for random levy walk
logger: Logger or None, optional (default=None)
- accepts `logging.Logger` instance.
**kwargs
Any extra keyword argument for objective_function
Attributes
----------
best_feature_list : ndarray of shape (n_features)
list of features with the best result of the entire run
"""
super().__init__(objective_function, n_iteration, timeout, population_size, minimize, logger, **kwargs)
self.beta=beta
fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
machine learning model's object |
The object to be used for fitting on train data |
required |
X_train |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Training input samples to be used for machine learning model |
required |
y_train |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
X_valid |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Validation input samples |
required |
y_valid |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
verbose |
bool,default=True |
Print results for iterations |
True |
Source code in zoofs\harrishawkoptimization.py
def fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True):
"""
Parameters
----------
model: machine learning model's object
The object to be used for fitting on train data
X_train: pandas.core.frame.DataFrame of shape (n_samples, n_features)
Training input samples to be used for machine learning model
y_train: pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in
regression).
X_valid: pandas.core.frame.DataFrame of shape (n_samples, n_features)
Validation input samples
y_valid: pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in
regression).
verbose : bool,default=True
Print results for iterations
"""
self._check_params(model, X_train, y_train, X_valid, y_valid)
self.feature_score_hash = {}
self.feature_list = np.array(list(X_train.columns))
self.best_results_per_iteration = {}
self.best_score = np.inf
self.best_dim = np.ones(X_train.shape[1])
self.initialize_population(X_train)
if (self.timeout is not None):
timeout_upper_limit = time.time() + self.timeout
else:
timeout_upper_limit = time.time()
for i in range(self.n_iteration):
if (self.timeout is not None) & (time.time() > timeout_upper_limit):
warnings.warn("Timeout occured")
break
# Logging warning if any entity in the population ends up having zero selected features
self._check_individuals()
self.fitness_scores = self._evaluate_fitness(
model, X_train, y_train, X_valid, y_valid)
self.gbest_individual = self.best_dim
self.iteration_objective_score_monitor(i)
self.e_0 = -1 + 2 * np.random.random(size=(self.population_size))
self.e = 2 * self.e_0 * (1 - ((i+1) / self.n_iteration))
self.exploration_individuals_indexes=np.where(np.abs(self.e)>=1)[0]
self._exploration_phase()
self.exploitation_individuals_indexes=np.where(np.abs(self.e)<1)[0]
self.r=np.random.random(len(self.exploitation_individuals_indexes))
self.exploitation_energy=self.e[self.exploitation_individuals_indexes]
self._soft_besiege()
self._hard_besiege()
self._soft_besiege_with_dives(model, X_train, y_train, X_valid, y_valid)
self._hard_besiege_with_dives(model, X_train, y_train, X_valid, y_valid)
self.verbose_results(verbose, i)
self.best_feature_list = list(
self.feature_list[np.where(self.best_dim)[0]])
return self.best_feature_list
plot_history(self)
inherited
Plot objective score history
Source code in zoofs\harrishawkoptimization.py
def plot_history(self):
"""
Plot objective score history
"""
res = pd.DataFrame.from_dict(self.best_results_per_iteration).T
res.reset_index(inplace=True)
res.columns = ['iteration', 'best_score',
'objective_score', 'selected_features']
fig = go.Figure()
fig.add_trace(go.Scatter(x=res['iteration'], y=res['objective_score'],
mode='markers', name='objective_score'))
fig.add_trace(go.Scatter(x=res['iteration'], y=res['best_score'],
mode='lines+markers',
name='best_score'))
fig.update_xaxes(title_text='Iteration')
fig.update_yaxes(title_text='objective_score')
fig.update_layout(
title="Optimization History Plot")
fig.show()