Gravitational Algorithm
Gravitational Algorithm is based on the law of gravity and mass interactions is introduced. In the algorithm, the searcher agents are a collection of masses which interact with each other based on the Newtonian gravity and the laws of motion.
Import
Example
from sklearn.metrics import log_loss
"""
define your own objective function,
make sure the function receives four parameters,
fit your model and return the objective value !
"""
def objective_function_topass(model,X_train, y_train, X_valid, y_valid):
model.fit(X_train,y_train)
P=log_loss(y_valid,model.predict_proba(X_valid))
return P
# import an algorithm !
from zoofs import GravitationalOptimization
# create object of algorithm
algo_object=GravitationalOptimizatio(objective_function_topass,
n_iteration=50,
population_size=50,
g0=100,
eps=0.5,
minimize=True)
import lightgbm as lgb
lgb_model = lgb.LGBMClassifier()
# fit the algorithm
algo_object.fit(lgb_model,X_train, y_train, X_valid, y_valid,verbose=True)
#plot your results
algo_object.plot_history()
# extract the best feature set
algo_object.best_feature_list
Methods
__init__(self, objective_function, n_iteration=1000, timeout=None, population_size=50, g0=100, eps=0.5, minimize=True, logger=None, **kwargs)
special
Parameters:
Name | Type | Description | Default |
---|---|---|---|
objective_function |
user made function of the signature 'func(model,X_train,y_train,X_test,y_test)' |
The function must return a value, that needs to be minimized/maximized. |
required |
n_iteration |
int |
Number of time the Optimization algorithm will run |
1000 |
timeout |
int |
Stop operation after the given number of second(s). If this argument is set to None, the operation is executed without time limitation and n_iteration is followed |
None |
population_size |
int, default=50 |
Total size of the population |
50 |
g0 |
float, default=100 |
gravitational strength constant |
100 |
eps |
float, default=0.5 |
distance constant |
0.5 |
minimize |
bool, default=True |
Defines if the objective value is to be maximized or minimized |
True |
logger |
Logger or None, optional (default=None) |
|
None |
**kwargs |
None |
Any extra keyword argument for objective_function |
{} |
Attributes:
Name | Type | Description |
---|---|---|
best_feature_list |
ndarray of shape (n_features) |
list of features with the best result of the entire run |
Source code in zoofs\gravitationaloptimization.py
def __init__(self,
objective_function,
n_iteration: int = 1000,
timeout: int = None,
population_size=50,
g0=100,
eps=0.5,
minimize=True,
logger=None,
**kwargs):
"""
Parameters
----------
objective_function : user made function of the signature 'func(model,X_train,y_train,X_test,y_test)'
The function must return a value, that needs to be minimized/maximized.
n_iteration : int, default=1000
Number of time the Optimization algorithm will run
timeout: int = None
Stop operation after the given number of second(s).
If this argument is set to None, the operation is executed without time limitation and n_iteration is followed
population_size : int, default=50
Total size of the population
g0 : float, default=100
gravitational strength constant
eps : float, default=0.5
distance constant
minimize : bool, default=True
Defines if the objective value is to be maximized or minimized
logger: Logger or None, optional (default=None)
- accepts `logging.Logger` instance.
**kwargs
Any extra keyword argument for objective_function
Attributes
----------
best_feature_list : ndarray of shape (n_features)
list of features with the best result of the entire run
"""
super().__init__(objective_function, n_iteration, timeout, population_size, minimize, logger, **kwargs)
self.g0 = g0
self.eps = eps
fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
machine learning model's object |
machine learning model's object |
required |
X_train |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Training input samples to be used for machine learning model |
required |
y_train |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
X_valid |
pandas.core.frame.DataFrame of shape (n_samples, n_features) |
Validation input samples |
required |
y_valid |
pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples) |
The target values (class labels in classification, real numbers in regression). |
required |
verbose |
bool,default=True |
Print results for iterations |
True |
Source code in zoofs\gravitationaloptimization.py
def fit(self, model, X_train, y_train, X_valid, y_valid, verbose=True):
"""
Parameters
----------
model : machine learning model's object
machine learning model's object
X_train : pandas.core.frame.DataFrame of shape (n_samples, n_features)
Training input samples to be used for machine learning model
y_train : pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in regression).
X_valid : pandas.core.frame.DataFrame of shape (n_samples, n_features)
Validation input samples
y_valid : pandas.core.frame.DataFrame or pandas.core.series.Series of shape (n_samples)
The target values (class labels in classification, real numbers in regression).
verbose : bool,default=True
Print results for iterations
"""
self._check_params(model, X_train, y_train, X_valid, y_valid)
self.feature_score_hash = {}
self.feature_list = np.array(list(X_train.columns))
self.best_results_per_iteration = {}
self.best_score = np.inf
self.best_dim = np.ones(X_train.shape[1])
self.initialize_population(X_train)
self.velocities = np.zeros((self.population_size, X_train.shape[1]))
kbest = sorted([int(x) for x in np.linspace(
1, self.population_size-1, self.n_iteration)], reverse=True)
if (self.timeout is not None):
timeout_upper_limit = time.time() + self.timeout
else:
timeout_upper_limit = time.time()
for iteration in range(self.n_iteration):
if (self.timeout is not None) & (time.time() > timeout_upper_limit):
warnings.warn("Timeout occured")
break
self.fitness_scores = self._evaluate_fitness(
model, X_train, y_train, X_valid, y_valid)
self.iteration_objective_score_monitor(iteration)
self.gi = self.g0*(1-((iteration+1)/self.n_iteration))
self.fitness_scores_numpy = np.array(self.fitness_scores)
self.qi = np.array(self.fitness_scores_numpy-self.fitness_scores_numpy.max())/(
self.fitness_scores_numpy.min()-self.fitness_scores_numpy.max())
self.Mi = self.qi/self.qi.sum()
kbest_v = kbest[iteration]
best_iteration_individuals = self.individuals[np.argsort(self.fitness_scores)[
:kbest_v]]
best_iteration_individuals_masses = self.Mi[np.argsort(self.fitness_scores)[
:kbest_v]]
self.interim_acc = np.zeros(
(self.population_size, X_train.shape[1]))
for single_individual, single_individual_mass in zip(best_iteration_individuals, best_iteration_individuals_masses):
self.interim_acc = np.random.random()*(self.individuals-single_individual)*(self.gi*single_individual_mass) * np.repeat((1 /
(((self.individuals-single_individual)**2).sum(axis=1)**(0.5)+self.eps)), X_train.shape[1]).reshape(self.population_size, X_train.shape[1])
self.velocities = self.interim_acc+self.velocities * \
np.random.random((self.population_size, 1))
self.velocities = np.where(self.velocities > 6, 6, self.velocities)
self.velocities = np.where(
self.velocities < -6, -6, self.velocities)
self.individuals = np.where(np.random.uniform(size=(self.population_size, X_train.shape[1])) <= np.tanh(
self.velocities), 1-self.individuals, self.individuals)
self.verbose_results(verbose, iteration)
self.best_feature_list = list(
self.feature_list[np.where(self.best_dim)[0]])
return self.best_feature_list
plot_history(self)
inherited
Plot objective score history
Source code in zoofs\gravitationaloptimization.py
def plot_history(self):
"""
Plot objective score history
"""
res = pd.DataFrame.from_dict(self.best_results_per_iteration).T
res.reset_index(inplace=True)
res.columns = ['iteration', 'best_score',
'objective_score', 'selected_features']
fig = go.Figure()
fig.add_trace(go.Scatter(x=res['iteration'], y=res['objective_score'],
mode='markers', name='objective_score'))
fig.add_trace(go.Scatter(x=res['iteration'], y=res['best_score'],
mode='lines+markers',
name='best_score'))
fig.update_xaxes(title_text='Iteration')
fig.update_yaxes(title_text='objective_score')
fig.update_layout(
title="Optimization History Plot")
fig.show()