From 069f530c0df2b3d62a33072da5ee530fc7f5848b Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 24 Aug 2023 17:22:23 -0400 Subject: [PATCH 01/25] NSGA-II Crowding Distance --- .../non_dominant_sorting_crowding_distance.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 NSGA-II/non_dominant_sorting_crowding_distance.py diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py new file mode 100644 index 0000000..86e3859 --- /dev/null +++ b/NSGA-II/non_dominant_sorting_crowding_distance.py @@ -0,0 +1,123 @@ +import numpy + +population = numpy.array([[20, 2.2], + [60, 4.4], + [65, 3.5], + [15, 4.4], + [55, 4.5], + [50, 1.8], + [80, 4.0], + [25, 4.6]]) + +def non_dominant_sorting(curr_set): + # List of the members of the current dominant front/set. + dominant_set = [] + # List of the non-members of the current dominant front/set. + non_dominant_set = [] + for idx1, sol1 in enumerate(curr_set): + # Flag indicates whether the solution is a member of the current dominant set. + is_dominant = True + for idx2, sol2 in enumerate(curr_set): + if idx1 == idx2: + continue + # Zipping the 2 solutions so the corresponding genes are in the same list. + # The returned array is of size (N, 2) where N is the number of genes. + b = numpy.array(list(zip(sol1, sol2))) + + #TODO Consider repacing < by > for maximization problems. + # Checking for if any solution dominates the current solution by applying the 2 conditions. + # le_eq: All elements must be True. + # le: Only 1 element must be True. + le_eq = b[:, 1] <= b[:, 0] + le = b[:, 1] < b[:, 0] + + # If the 2 conditions hold, then a solution dominates the current solution. + # The current solution is not considered a member of the dominant set. + if le_eq.all() and le.any(): + # print(f"{sol2} dominates {sol1}") + # Set the is_dominant flag to False to not insert the current solution in the current dominant set. + # Instead, insert it into the non-dominant set. + is_dominant = False + non_dominant_set.append(sol1) + break + else: + # Reaching here means the solution does not dominant the current solution. + # print(f"{sol2} does not dominate {sol1}") + pass + + # If the flag is True, then no solution dominates the current solution. + if is_dominant: + dominant_set.append(sol1) + + # Return the dominant and non-dominant sets. + return dominant_set, non_dominant_set + +dominant_set = [] +non_dominant_set = population.copy() +while len(non_dominant_set) > 0: + d1, non_dominant_set = non_dominant_sorting(non_dominant_set) + dominant_set.append(numpy.array(d1)) + +for i, s in enumerate(dominant_set): + print(f'Dominant Front Set {i+1}:\n{s}') + +print("\n\n\n--------------------") +def crowding_distance(front): + # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. + obj_crowding_dist_list = [] + # Loop through the objectives to calculate the crowding distance of each solution across all objectives. + for obj_idx in range(front.shape[1]): + obj = front[:, obj_idx] + # This variable has a nested list where each child list zip the following together: + # 1) The index of the objective value. + # 2) The objective value. + # 3) Initialize the crowding distance by zero. + obj = list(zip(range(len(obj)), obj, [0]*len(obj))) + obj = [list(element) for element in obj] + # This variable is the sorted version where sorting is done by the objective value (second element). + # Note that the first element is still the original objective index before sorting. + obj_sorted = sorted(obj, key=lambda x: x[1]) + + # Get the minimum and maximum values for the current objective. + obj_min_val = obj_sorted[0][1] + obj_max_val = obj_sorted[-1][1] + denominator = obj_max_val - obj_min_val + # To avoid division by zero, set the denominator to a tiny value. + if denominator == 0: + denominator = 0.0000001 + + # Set the crowding distance to the first and last solutions (after being sorted) to infinity. + inf_val = 999999999 + # crowding_distance[0] = inf_val + obj_sorted[0][2] = inf_val + # crowding_distance[-1] = inf_val + obj_sorted[-1][2] = inf_val + + # If there are only 2 solutions in the current front, then do not proceed. + # The crowding distance for such 2 solutions is infinity. + if len(obj_sorted) <= 2: + break + + for idx in range(1, len(obj_sorted)-1): + # Calculate the crowding distance. + crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] + crowding_dist = crowding_dist / denominator + # Insert the crowding distance back into the list to override the initial zero. + obj_sorted[idx][2] = crowding_dist + + # Sort the objective by the original index at index 0 of the each child list. + obj_sorted = sorted(obj_sorted, key=lambda x: x[0]) + obj_crowding_dist_list.append(obj_sorted) + + obj_crowding_dist_list = numpy.array(obj_crowding_dist_list) + crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) + crowding_dist_sum = numpy.sum(crowding_dist, axis=0) + + return obj_crowding_dist_list, crowding_dist_sum + +# Fetch the current front. +front = dominant_set[1] +obj_crowding_distance_list, crowding_distance_sum = crowding_distance(front) + +print(obj_crowding_distance_list) +print(f'\nSum of Crowd Dists\n{crowding_distance_sum}') From cf4dc636f8a6149860f9ec15370aaa9473d57ca0 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 30 Aug 2023 16:14:15 -0400 Subject: [PATCH 02/25] Update crowding distance --- NSGA-II/non_dominant_sorting_crowding_distance.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py index 86e3859..7f00eaa 100644 --- a/NSGA-II/non_dominant_sorting_crowding_distance.py +++ b/NSGA-II/non_dominant_sorting_crowding_distance.py @@ -79,20 +79,20 @@ def crowding_distance(front): obj_sorted = sorted(obj, key=lambda x: x[1]) # Get the minimum and maximum values for the current objective. - obj_min_val = obj_sorted[0][1] - obj_max_val = obj_sorted[-1][1] + obj_min_val = min(population[:, obj_idx]) + obj_max_val = max(population[:, obj_idx]) denominator = obj_max_val - obj_min_val # To avoid division by zero, set the denominator to a tiny value. if denominator == 0: denominator = 0.0000001 - + # Set the crowding distance to the first and last solutions (after being sorted) to infinity. - inf_val = 999999999 + inf_val = float('inf') # crowding_distance[0] = inf_val obj_sorted[0][2] = inf_val # crowding_distance[-1] = inf_val obj_sorted[-1][2] = inf_val - + # If there are only 2 solutions in the current front, then do not proceed. # The crowding distance for such 2 solutions is infinity. if len(obj_sorted) <= 2: From d8461fa16a73f14321500606136d0f9ecd7bf349 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 30 Aug 2023 22:38:33 -0400 Subject: [PATCH 03/25] Update crowding distance --- .../non_dominant_sorting_crowding_distance.py | 187 +++++++++++++----- 1 file changed, 133 insertions(+), 54 deletions(-) diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py index 7f00eaa..532db83 100644 --- a/NSGA-II/non_dominant_sorting_crowding_distance.py +++ b/NSGA-II/non_dominant_sorting_crowding_distance.py @@ -1,73 +1,139 @@ import numpy -population = numpy.array([[20, 2.2], - [60, 4.4], - [65, 3.5], - [15, 4.4], - [55, 4.5], - [50, 1.8], - [80, 4.0], - [25, 4.6]]) - -def non_dominant_sorting(curr_set): - # List of the members of the current dominant front/set. - dominant_set = [] - # List of the non-members of the current dominant front/set. - non_dominant_set = [] - for idx1, sol1 in enumerate(curr_set): - # Flag indicates whether the solution is a member of the current dominant set. - is_dominant = True - for idx2, sol2 in enumerate(curr_set): +population_fitness = numpy.array([[20, 2.2], + [60, 4.4], + [65, 3.5], + [15, 4.4], + [55, 4.5], + [50, 1.8], + [80, 4.0], + [25, 4.6]]) + +def get_non_dominated_set(curr_solutions): + """ + Get the set of non-dominated solutions from the current set of solutions. + + Parameters + ---------- + curr_solutions : TYPE + The set of solutions to find its non-dominated set. + + Returns + ------- + dominated_set : TYPE + A set of the dominated solutions. + non_dominated_set : TYPE + A set of the non-dominated set. + + """ + # List of the members of the current dominated pareto front/set. + dominated_set = [] + # List of the non-members of the current dominated pareto front/set. + non_dominated_set = [] + for idx1, sol1 in enumerate(curr_solutions): + # Flag indicates whether the solution is a member of the current dominated set. + is_dominated = True + for idx2, sol2 in enumerate(curr_solutions): if idx1 == idx2: continue # Zipping the 2 solutions so the corresponding genes are in the same list. # The returned array is of size (N, 2) where N is the number of genes. - b = numpy.array(list(zip(sol1, sol2))) - + two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) + #TODO Consider repacing < by > for maximization problems. # Checking for if any solution dominates the current solution by applying the 2 conditions. - # le_eq: All elements must be True. - # le: Only 1 element must be True. - le_eq = b[:, 1] <= b[:, 0] - le = b[:, 1] < b[:, 0] + # le_eq (less than or equal): All elements must be True. + # le (less than): Only 1 element must be True. + le_eq = two_solutions[:, 1] <= two_solutions[:, 0] + le = two_solutions[:, 1] < two_solutions[:, 0] # If the 2 conditions hold, then a solution dominates the current solution. - # The current solution is not considered a member of the dominant set. + # The current solution is not considered a member of the dominated set. if le_eq.all() and le.any(): - # print(f"{sol2} dominates {sol1}") - # Set the is_dominant flag to False to not insert the current solution in the current dominant set. - # Instead, insert it into the non-dominant set. - is_dominant = False - non_dominant_set.append(sol1) + # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. + # Instead, insert it into the non-dominated set. + is_dominated = False + non_dominated_set.append(sol1) break else: - # Reaching here means the solution does not dominant the current solution. - # print(f"{sol2} does not dominate {sol1}") + # Reaching here means the solution does not dominate the current solution. pass # If the flag is True, then no solution dominates the current solution. - if is_dominant: - dominant_set.append(sol1) + if is_dominated: + dominated_set.append(sol1) + + # Return the dominated and non-dominated sets. + return dominated_set, non_dominated_set + +def non_dominated_sorting(population_fitness): + """ + Apply the non-dominant sorting over the population_fitness to create sets of non-dominaned solutions. + + Parameters + ---------- + population_fitness : TYPE + An array of the population fitness across all objective function. - # Return the dominant and non-dominant sets. - return dominant_set, non_dominant_set + Returns + ------- + non_dominated_sets : TYPE + An array of the non-dominated sets. -dominant_set = [] -non_dominant_set = population.copy() -while len(non_dominant_set) > 0: - d1, non_dominant_set = non_dominant_sorting(non_dominant_set) - dominant_set.append(numpy.array(d1)) + """ + # A list of all non-dominated sets. + non_dominated_sets = [] -for i, s in enumerate(dominant_set): - print(f'Dominant Front Set {i+1}:\n{s}') + # The remaining set to be explored for non-dominance. + # Initially it is set to the entire population. + # The solutions of each non-dominated set are removed after each iteration. + remaining_set = population_fitness.copy() + + # Zipping the solution index with the solution's fitness. + # This helps to easily identify the index of each solution. + # Each element has: + # 1) The index of the solution. + # 2) An array of the fitness values of this solution across all objectives. + # remaining_set = numpy.array(list(zip(range(0, population_fitness.shape[0]), non_dominated_set))) + remaining_set = list(zip(range(0, population_fitness.shape[0]), remaining_set)) + while len(remaining_set) > 0: + # Get the current non-dominated set of solutions. + d1, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) + non_dominated_sets.append(numpy.array(d1, dtype=object)) + return non_dominated_sets + +def crowding_distance(pareto_front): + """ + Calculate the crowding dstance for all solutions in the current pareto front. + + Parameters + ---------- + pareto_front : TYPE + The set of solutions in the current pareto front. + + Returns + ------- + obj_crowding_dist_list : TYPE + A nested list of the values for all objectives alongside their crowding distance. + crowding_dist_sum : TYPE + A list of the sum of crowding distances across all objectives for each solution. + """ + + # Each solution in the pareto front has 2 elements: + # 1) The index of the solution in the population. + # 2) A list of the fitness values for all objectives of the solution. + # Before proceeding, remove the indices from each solution in the pareto front. + pareto_front = numpy.array([pareto_front[idx] for idx in range(pareto_front.shape[0])]) + + # If there is only 1 solution, then return empty arrays for the crowding distance. + if pareto_front.shape[0] == 1: + return numpy.array([]), numpy.array([]) -print("\n\n\n--------------------") -def crowding_distance(front): # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. obj_crowding_dist_list = [] # Loop through the objectives to calculate the crowding distance of each solution across all objectives. - for obj_idx in range(front.shape[1]): - obj = front[:, obj_idx] + for obj_idx in range(pareto_front.shape[1]): + obj = pareto_front[:, obj_idx] # This variable has a nested list where each child list zip the following together: # 1) The index of the objective value. # 2) The objective value. @@ -79,8 +145,8 @@ def crowding_distance(front): obj_sorted = sorted(obj, key=lambda x: x[1]) # Get the minimum and maximum values for the current objective. - obj_min_val = min(population[:, obj_idx]) - obj_max_val = max(population[:, obj_idx]) + obj_min_val = min(population_fitness[:, obj_idx]) + obj_max_val = max(population_fitness[:, obj_idx]) denominator = obj_max_val - obj_min_val # To avoid division by zero, set the denominator to a tiny value. if denominator == 0: @@ -93,7 +159,7 @@ def crowding_distance(front): # crowding_distance[-1] = inf_val obj_sorted[-1][2] = inf_val - # If there are only 2 solutions in the current front, then do not proceed. + # If there are only 2 solutions in the current pareto front, then do not proceed. # The crowding distance for such 2 solutions is infinity. if len(obj_sorted) <= 2: break @@ -113,11 +179,24 @@ def crowding_distance(front): crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) crowding_dist_sum = numpy.sum(crowding_dist, axis=0) + # An array of the sum of crowding distances across all objectives. + # Each row has 2 elements: + # 1) The index of the solution. + # 2) The sum of all crowding distances for all objective of the solution. + crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) + crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) + return obj_crowding_dist_list, crowding_dist_sum -# Fetch the current front. -front = dominant_set[1] -obj_crowding_distance_list, crowding_distance_sum = crowding_distance(front) +non_dominated_sets = non_dominated_sorting(population_fitness) + +# for i, s in enumerate(non_dominated_sets): +# print(f'dominated Pareto Front Set {i+1}:\n{s}') +# print("\n\n\n--------------------") + +# Fetch the current pareto front. +pareto_front = non_dominated_sets[1][:, 1] +obj_crowding_distance_list, crowding_distance_sum = crowding_distance(pareto_front) print(obj_crowding_distance_list) -print(f'\nSum of Crowd Dists\n{crowding_distance_sum}') +print(f'\nSorted Sum of Crowd Dists\n{crowding_distance_sum}') From 912a1d1083d6ed431d29dca19d38004b46906dd2 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sat, 2 Sep 2023 11:41:24 -0400 Subject: [PATCH 04/25] Update crowding distance --- .../non_dominant_sorting_crowding_distance.py | 288 ++++++++++++++++-- 1 file changed, 267 insertions(+), 21 deletions(-) diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py index 532db83..60f1895 100644 --- a/NSGA-II/non_dominant_sorting_crowding_distance.py +++ b/NSGA-II/non_dominant_sorting_crowding_distance.py @@ -68,7 +68,7 @@ def get_non_dominated_set(curr_solutions): def non_dominated_sorting(population_fitness): """ - Apply the non-dominant sorting over the population_fitness to create sets of non-dominaned solutions. + Apply the non-dominant sorting over the population_fitness to create the pareto fronts based on non-dominaned sorting of the solutions. Parameters ---------- @@ -77,12 +77,12 @@ def non_dominated_sorting(population_fitness): Returns ------- - non_dominated_sets : TYPE - An array of the non-dominated sets. + pareto_fronts : TYPE + An array of the pareto fronts. """ # A list of all non-dominated sets. - non_dominated_sets = [] + pareto_fronts = [] # The remaining set to be explored for non-dominance. # Initially it is set to the entire population. @@ -96,11 +96,25 @@ def non_dominated_sorting(population_fitness): # 2) An array of the fitness values of this solution across all objectives. # remaining_set = numpy.array(list(zip(range(0, population_fitness.shape[0]), non_dominated_set))) remaining_set = list(zip(range(0, population_fitness.shape[0]), remaining_set)) + + # A list mapping the index of each pareto front to the set of solutions in this front. + solutions_fronts_indices = [-1]*len(remaining_set) + solutions_fronts_indices = numpy.array(solutions_fronts_indices) + + # Index of the current pareto front. + front_index = -1 while len(remaining_set) > 0: + front_index += 1 + # Get the current non-dominated set of solutions. - d1, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) - non_dominated_sets.append(numpy.array(d1, dtype=object)) - return non_dominated_sets + pareto_front, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) + pareto_front = numpy.array(pareto_front, dtype=object) + pareto_fronts.append(pareto_front) + + solutions_indices = pareto_front[:, 0].astype(int) + solutions_fronts_indices[solutions_indices] = front_index + + return pareto_fronts, solutions_fronts_indices def crowding_distance(pareto_front): """ @@ -117,23 +131,28 @@ def crowding_distance(pareto_front): A nested list of the values for all objectives alongside their crowding distance. crowding_dist_sum : TYPE A list of the sum of crowding distances across all objectives for each solution. + crowding_dist_front_sorted_indices : TYPE + The indices of the solutions (relative to the current front) sorted by the crowding distance. + crowding_dist_pop_sorted_indices : TYPE + The indices of the solutions (relative to the population) sorted by the crowding distance. """ # Each solution in the pareto front has 2 elements: # 1) The index of the solution in the population. # 2) A list of the fitness values for all objectives of the solution. # Before proceeding, remove the indices from each solution in the pareto front. - pareto_front = numpy.array([pareto_front[idx] for idx in range(pareto_front.shape[0])]) + pareto_front_no_indices = numpy.array([pareto_front[:, 1][idx] for idx in range(pareto_front.shape[0])]) # If there is only 1 solution, then return empty arrays for the crowding distance. - if pareto_front.shape[0] == 1: - return numpy.array([]), numpy.array([]) + if pareto_front_no_indices.shape[0] == 1: + # There is only 1 index. + return numpy.array([]), numpy.array([]), numpy.array([0]), pareto_front[:, 0].astype(int) # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. obj_crowding_dist_list = [] # Loop through the objectives to calculate the crowding distance of each solution across all objectives. - for obj_idx in range(pareto_front.shape[1]): - obj = pareto_front[:, obj_idx] + for obj_idx in range(pareto_front_no_indices.shape[1]): + obj = pareto_front_no_indices[:, obj_idx] # This variable has a nested list where each child list zip the following together: # 1) The index of the objective value. # 2) The objective value. @@ -186,17 +205,244 @@ def crowding_distance(pareto_front): crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) - return obj_crowding_dist_list, crowding_dist_sum + # The sorted solutions' indices by the crowding distance. + crowding_dist_front_sorted_indices = numpy.array(crowding_dist_sum)[:, 0] + crowding_dist_front_sorted_indices = crowding_dist_front_sorted_indices.astype(int) + # Note that such indices are relative to the front, NOT the population. + # It is mandatory to map such front indices to population indices before using them to refer to the population. + crowding_dist_pop_sorted_indices = pareto_front[:, 0] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices[crowding_dist_front_sorted_indices] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) + + return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices + +def tournament_selection_nsga2(self, + pareto_fronts, + solutions_fronts_indices, + num_parents): + + """ + Select the parents using the tournament selection technique for NSGA-II. + The traditional tournament selection uses the fitness values. But the tournament selection for NSGA-II uses non-dominated sorting and crowding distance. + Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. + To select the parents solutions, 2 solutions are selected randomly. If the 2 solutions are in different pareto fronts, then the solution comming from a pareto front with lower index is selected. + If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected. + If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. + Later, the selected parents will mate to produce the offspring. + + It accepts 2 parameters: + -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. + -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. + -num_parents: The number of parents to be selected. + + It returns an array of the selected parents alongside their indices in the population. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) -non_dominated_sets = non_dominated_sorting(population_fitness) + # The indices of the selected parents. + parents_indices = [] -# for i, s in enumerate(non_dominated_sets): -# print(f'dominated Pareto Front Set {i+1}:\n{s}') + # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. + rand_indices = numpy.random.randint(low=0.0, + high=len(solutions_fronts_indices), + size=(num_parents, self.K_tournament)) + # rand_indices[0, 0] = 5 + # rand_indices[0, 1] = 3 + # rand_indices[1, 0] = 1 + # rand_indices[1, 1] = 6 + + for parent_num in range(num_parents): + # Return the indices of the current 2 solutions. + current_indices = rand_indices[parent_num] + # Return the front index of the 2 solutions. + parent_fronts_indices = solutions_fronts_indices[current_indices] + + if parent_fronts_indices[0] < parent_fronts_indices[1]: + # If the first solution is in a lower pareto front than the second, then select it. + selected_parent_idx = current_indices[0] + elif parent_fronts_indices[0] > parent_fronts_indices[1]: + # If the second solution is in a lower pareto front than the first, then select it. + selected_parent_idx = current_indices[1] + else: + # The 2 solutions are in the same pareto front. + # The selection is made using the crowding distance. + + # A list holding the crowding distance of the current 2 solutions. It is initialized to -1. + solutions_crowding_distance = [-1, -1] + + # Fetch the current pareto front. + pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used. + + # If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless). + if pareto_front.shape[0] == 1: + selected_parent_idx = current_indices[0] # Index 1 can also be used. + else: + # Reaching here means the pareto front has more than 1 solution. + + # Calculate the crowding distance of the solutions of the pareto front. + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) + + # This list has the sorted front-based indices for the solutions in the current pareto front. + crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices) + # This list has the sorted population-based indices for the solutions in the current pareto front. + crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) + + # Return the indices of the solutions from the pareto front. + solution1_idx = crowding_dist_pop_sorted_indices.index(current_indices[0]) + solution2_idx = crowding_dist_pop_sorted_indices.index(current_indices[1]) + + # Fetch the crowding distance using the indices. + solutions_crowding_distance[0] = crowding_distance_sum[solution1_idx][1] + solutions_crowding_distance[1] = crowding_distance_sum[solution2_idx][1] + + # # Instead of using the crowding distance, we can select the solution that comes first in the list. + # # Its limitation is that it is biased towards the low indexed solution if the 2 solutions have the same crowding distance. + # if solution1_idx < solution2_idx: + # # Select the first solution if it has higher crowding distance. + # selected_parent_idx = current_indices[0] + # else: + # # Select the second solution if it has higher crowding distance. + # selected_parent_idx = current_indices[1] + + if solutions_crowding_distance[0] > solutions_crowding_distance[1]: + # Select the first solution if it has higher crowding distance. + selected_parent_idx = current_indices[0] + elif solutions_crowding_distance[1] > solutions_crowding_distance[0]: + # Select the second solution if it has higher crowding distance. + selected_parent_idx = current_indices[1] + else: + # If the crowding distance is equal, select the parent randomly. + rand_num = numpy.random.uniform() + if rand_num < 0.5: + # If the random number is < 0.5, then select the first solution. + selected_parent_idx = current_indices[0] + else: + # If the random number is >= 0.5, then select the second solution. + selected_parent_idx = current_indices[1] + + # Insert the selected parent index. + parents_indices.append(selected_parent_idx) + # Insert the selected parent. + parents[parent_num, :] = self.population[selected_parent_idx, :].copy() + + # Make sure the parents indices is returned as a NumPy array. + return parents, numpy.array(parents_indices) + +def nsga2_selection(self, + pareto_fronts, + solutions_fronts_indices, + num_parents): + + """ + Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II). + The selection is done using non-dominated sorting and crowding distance. + Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. + The parents are selected from the lower pareto fronts and moving up until selecting the number of desired parents. + A solution from a pareto front X cannot be taken as a parent until all solutions in pareto front Y is selected given that Y < X. + For a pareto front X, if only a subset of its solutions is needed, then the corwding distance is used to determine which solutions to be selected from the front. The solution with the higher crowding distance is selected. + If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. + Later, the selected parents will mate to produce the offspring. + + It accepts 2 parameters: + -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. + -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. + -num_parents: The number of parents to be selected. + + It returns an array of the selected parents alongside their indices in the population. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + # The indices of the selected parents. + parents_indices = [] + + # The number of remaining parents to be selected. + num_remaining_parents = num_parents + + # A loop variable holding the index of the current pareto front. + pareto_front_idx = 0 + while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts): + # Return the current pareto front. + current_pareto_front = pareto_fronts[pareto_front_idx] + # Check if the entire front fits into the parents array. + # If so, then insert all the solutions in the current front into the parents array. + if num_remaining_parents >= len(current_pareto_front): + for sol_idx in range(len(current_pareto_front)): + selected_solution_idx = current_pareto_front[sol_idx, 0] + # Insert the parent into the parents array. + parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + # Insert the index of the selected parent. + parents_indices.append(selected_solution_idx) + + # Decrement the number of remaining parents by the length of the pareto front. + num_remaining_parents -= len(current_pareto_front) + else: + # If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed. + + # Calculate the crowding distance of the solutions of the pareto front. + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(current_pareto_front.copy()) + + for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: + # Insert the parent into the parents array. + parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + # Insert the index of the selected parent. + parents_indices.append(selected_solution_idx) + + # Decrement the number of remaining parents by the number of selected parents. + num_remaining_parents -= num_remaining_parents + + # Increase the pareto front index to take parents from the next front. + pareto_front_idx += 1 + + # Make sure the parents indices is returned as a NumPy array. + return parents, numpy.array(parents_indices) + + +pareto_fronts, solutions_fronts_indices = non_dominated_sorting(population_fitness) +# # print('\nsolutions_fronts_indices\n', solutions_fronts_indices) +# for i, s in enumerate(pareto_fronts): +# # print(f'Dominated Pareto Front Set {i+1}:\n{s}') +# print(f'Dominated Pareto Front Indices {i+1}:\n{s[:, 0]}') # print("\n\n\n--------------------") -# Fetch the current pareto front. -pareto_front = non_dominated_sets[1][:, 1] -obj_crowding_distance_list, crowding_distance_sum = crowding_distance(pareto_front) +class Object(object): + pass + +obj = Object() +obj.population = numpy.random.rand(8, 4) +obj.gene_type_single = True +obj.gene_type = [float, 0] +obj.K_tournament = 2 + +parents, parents_indices = tournament_selection_nsga2(self=obj, + pareto_fronts=pareto_fronts, + solutions_fronts_indices=solutions_fronts_indices, + num_parents=40) +print(f'Tournament Parent Selection for NSGA-II - Indices: \n{parents_indices}') + +parents, parents_indices = nsga2_selection(self=obj, + pareto_fronts=pareto_fronts, + solutions_fronts_indices=solutions_fronts_indices, + num_parents=40) +print(f'NSGA-II Parent Selection - Indices: \n{parents_indices}') + +# for idx in range(len(pareto_fronts)): +# # Fetch the current pareto front. +# pareto_front = pareto_fronts[idx] +# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) +# print('Front IDX', crowding_dist_front_sorted_indices) +# print('POP IDX ', crowding_dist_pop_sorted_indices) +# print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}') -print(obj_crowding_distance_list) -print(f'\nSorted Sum of Crowd Dists\n{crowding_distance_sum}') +# # Fetch the current pareto front. +# pareto_front = pareto_fronts[0] +# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) +# print('\n', crowding_dist_pop_sorted_indices) +# print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}') From 0ea3674830c77caed01c8381266c934431d700b8 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sat, 2 Sep 2023 14:54:54 -0400 Subject: [PATCH 05/25] Support of NSGA-II --- NSGA-II/non_dominant_sorting.py | 62 ------ .../non_dominant_sorting_crowding_distance.py | 120 +++++++--- pygad/__init__.py | 2 +- pygad/helper/__init__.py | 4 +- pygad/helper/nsga2.py | 210 ++++++++++++++++++ pygad/pygad.py | 4 + pygad/utils/parent_selection.py | 205 +++++++++++++++++ setup.py | 2 +- 8 files changed, 506 insertions(+), 103 deletions(-) delete mode 100644 NSGA-II/non_dominant_sorting.py create mode 100644 pygad/helper/nsga2.py diff --git a/NSGA-II/non_dominant_sorting.py b/NSGA-II/non_dominant_sorting.py deleted file mode 100644 index af677b8..0000000 --- a/NSGA-II/non_dominant_sorting.py +++ /dev/null @@ -1,62 +0,0 @@ -import numpy - -population = numpy.array([[20, 2.2], - [60, 4.4], - [65, 3.5], - [15, 4.4], - [55, 4.5], - [50, 1.8], - [80, 4.0], - [25, 4.6]]) - -def non_dominant_sorting(curr_set): - # List of the members of the current dominant front/set. - dominant_set = [] - # List of the non-members of the current dominant front/set. - non_dominant_set = [] - for idx1, sol1 in enumerate(curr_set): - # Flag indicates whether the solution is a member of the current dominant set. - is_dominant = True - for idx2, sol2 in enumerate(curr_set): - if idx1 == idx2: - continue - # Zipping the 2 solutions so the corresponding genes are in the same list. - # The returned array is of size (N, 2) where N is the number of genes. - b = numpy.array(list(zip(sol1, sol2))) - - #TODO Consider repacing < by > for maximization problems. - # Checking for if any solution dominates the current solution by applying the 2 conditions. - # le_eq: All elements must be True. - # le: Only 1 element must be True. - le_eq = b[:, 1] <= b[:, 0] - le = b[:, 1] < b[:, 0] - - # If the 2 conditions hold, then a solution dominates the current solution. - # The current solution is not considered a member of the dominant set. - if le_eq.all() and le.any(): - # print(f"{sol2} dominates {sol1}") - # Set the is_dominant flag to False to not insert the current solution in the current dominant set. - # Instead, insert it into the non-dominant set. - is_dominant = False - non_dominant_set.append(sol1) - break - else: - # Reaching here means the solution does not dominant the current solution. - # print(f"{sol2} does not dominate {sol1}") - pass - - # If the flag is True, then no solution dominates the current solution. - if is_dominant: - dominant_set.append(sol1) - - # Return the dominant and non-dominant sets. - return dominant_set, non_dominant_set - -dominant_set = [] -non_dominant_set = population.copy() -while len(non_dominant_set) > 0: - d1, non_dominant_set = non_dominant_sorting(non_dominant_set) - dominant_set.append(d1) - -for i, s in enumerate(dominant_set): - print(f'Dominant Front Set {i+1}:\n{s}') diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py index 60f1895..4563cc1 100644 --- a/NSGA-II/non_dominant_sorting_crowding_distance.py +++ b/NSGA-II/non_dominant_sorting_crowding_distance.py @@ -1,13 +1,31 @@ import numpy -population_fitness = numpy.array([[20, 2.2], - [60, 4.4], - [65, 3.5], - [15, 4.4], - [55, 4.5], - [50, 1.8], - [80, 4.0], - [25, 4.6]]) +fitness = numpy.array([[20, 2.2], + [60, 4.4], + [65, 3.5], + [15, 4.4], + [55, 4.5], + [50, 1.8], + [80, 4.0], + [25, 4.6]]) + +# fitness = numpy.array([20, +# 60, +# 65, +# 15, +# 55, +# 50, +# 80, +# 25]) + +# fitness = numpy.array([[20], +# [60], +# [65], +# [15], +# [55], +# [50], +# [80], +# [25]]) def get_non_dominated_set(curr_solutions): """ @@ -66,13 +84,13 @@ def get_non_dominated_set(curr_solutions): # Return the dominated and non-dominated sets. return dominated_set, non_dominated_set -def non_dominated_sorting(population_fitness): +def non_dominated_sorting(fitness): """ - Apply the non-dominant sorting over the population_fitness to create the pareto fronts based on non-dominaned sorting of the solutions. + Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. Parameters ---------- - population_fitness : TYPE + fitness : TYPE An array of the population fitness across all objective function. Returns @@ -87,15 +105,15 @@ def non_dominated_sorting(population_fitness): # The remaining set to be explored for non-dominance. # Initially it is set to the entire population. # The solutions of each non-dominated set are removed after each iteration. - remaining_set = population_fitness.copy() + remaining_set = fitness.copy() # Zipping the solution index with the solution's fitness. # This helps to easily identify the index of each solution. # Each element has: # 1) The index of the solution. # 2) An array of the fitness values of this solution across all objectives. - # remaining_set = numpy.array(list(zip(range(0, population_fitness.shape[0]), non_dominated_set))) - remaining_set = list(zip(range(0, population_fitness.shape[0]), remaining_set)) + # remaining_set = numpy.array(list(zip(range(0, fitness.shape[0]), non_dominated_set))) + remaining_set = list(zip(range(0, fitness.shape[0]), remaining_set)) # A list mapping the index of each pareto front to the set of solutions in this front. solutions_fronts_indices = [-1]*len(remaining_set) @@ -116,7 +134,7 @@ def non_dominated_sorting(population_fitness): return pareto_fronts, solutions_fronts_indices -def crowding_distance(pareto_front): +def crowding_distance(pareto_front, fitness): """ Calculate the crowding dstance for all solutions in the current pareto front. @@ -124,6 +142,8 @@ def crowding_distance(pareto_front): ---------- pareto_front : TYPE The set of solutions in the current pareto front. + fitness : TYPE + The fitness of the current population. Returns ------- @@ -164,8 +184,8 @@ def crowding_distance(pareto_front): obj_sorted = sorted(obj, key=lambda x: x[1]) # Get the minimum and maximum values for the current objective. - obj_min_val = min(population_fitness[:, obj_idx]) - obj_max_val = max(population_fitness[:, obj_idx]) + obj_min_val = min(fitness[:, obj_idx]) + obj_max_val = max(fitness[:, obj_idx]) denominator = obj_max_val - obj_min_val # To avoid division by zero, set the denominator to a tiny value. if denominator == 0: @@ -217,9 +237,11 @@ def crowding_distance(pareto_front): return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices def tournament_selection_nsga2(self, - pareto_fronts, - solutions_fronts_indices, - num_parents): + fitness, + num_parents + # pareto_fronts, + # solutions_fronts_indices, + ): """ Select the parents using the tournament selection technique for NSGA-II. @@ -231,9 +253,10 @@ def tournament_selection_nsga2(self, Later, the selected parents will mate to produce the offspring. It accepts 2 parameters: + -fitness: The fitness values for the current population. + -num_parents: The number of parents to be selected. -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - -num_parents: The number of parents to be selected. It returns an array of the selected parents alongside their indices in the population. """ @@ -246,6 +269,11 @@ def tournament_selection_nsga2(self, # The indices of the selected parents. parents_indices = [] + # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + # TODO Make a test to check for that behaviour. + # Find the pareto fronts and the solutions' indicies in each front. + pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) + # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. rand_indices = numpy.random.randint(low=0.0, high=len(solutions_fronts_indices), @@ -284,7 +312,8 @@ def tournament_selection_nsga2(self, # Reaching here means the pareto front has more than 1 solution. # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), + fitness=fitness) # This list has the sorted front-based indices for the solutions in the current pareto front. crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices) @@ -333,9 +362,11 @@ def tournament_selection_nsga2(self, return parents, numpy.array(parents_indices) def nsga2_selection(self, - pareto_fronts, - solutions_fronts_indices, - num_parents): + fitness, + num_parents + # pareto_fronts, + # solutions_fronts_indices + ): """ Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II). @@ -348,9 +379,10 @@ def nsga2_selection(self, Later, the selected parents will mate to produce the offspring. It accepts 2 parameters: + -fitness: The fitness values for the current population. + -num_parents: The number of parents to be selected. -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - -num_parents: The number of parents to be selected. It returns an array of the selected parents alongside their indices in the population. """ @@ -363,6 +395,11 @@ def nsga2_selection(self, # The indices of the selected parents. parents_indices = [] + # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + # TODO Make a test to check for that behaviour. + # Find the pareto fronts and the solutions' indicies in each front. + pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) + # The number of remaining parents to be selected. num_remaining_parents = num_parents @@ -387,7 +424,8 @@ def nsga2_selection(self, # If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed. # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(current_pareto_front.copy()) + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=current_pareto_front.copy(), + fitness=fitness) for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: # Insert the parent into the parents array. @@ -404,8 +442,10 @@ def nsga2_selection(self, # Make sure the parents indices is returned as a NumPy array. return parents, numpy.array(parents_indices) - -pareto_fronts, solutions_fronts_indices = non_dominated_sorting(population_fitness) +# TODO If there is only a single objective, each pareto front is expected to have only 1 solution. +# TODO Make a test to check for that behaviour. +# Find the pareto fronts and the solutions' indicies in each front. +pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) # # print('\nsolutions_fronts_indices\n', solutions_fronts_indices) # for i, s in enumerate(pareto_fronts): # # print(f'Dominated Pareto Front Set {i+1}:\n{s}') @@ -422,27 +462,33 @@ class Object(object): obj.K_tournament = 2 parents, parents_indices = tournament_selection_nsga2(self=obj, - pareto_fronts=pareto_fronts, - solutions_fronts_indices=solutions_fronts_indices, - num_parents=40) + fitness=fitness, + num_parents=4 + # pareto_fronts=pareto_fronts, + # solutions_fronts_indices=solutions_fronts_indices, + ) print(f'Tournament Parent Selection for NSGA-II - Indices: \n{parents_indices}') parents, parents_indices = nsga2_selection(self=obj, - pareto_fronts=pareto_fronts, - solutions_fronts_indices=solutions_fronts_indices, - num_parents=40) + fitness=fitness, + num_parents=4 + # pareto_fronts=pareto_fronts, + # solutions_fronts_indices=solutions_fronts_indices, + ) print(f'NSGA-II Parent Selection - Indices: \n{parents_indices}') # for idx in range(len(pareto_fronts)): # # Fetch the current pareto front. # pareto_front = pareto_fronts[idx] -# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) +# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), +# fitness=fitness) # print('Front IDX', crowding_dist_front_sorted_indices) # print('POP IDX ', crowding_dist_pop_sorted_indices) # print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}') # # Fetch the current pareto front. # pareto_front = pareto_fronts[0] -# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front.copy()) +# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), +# fitness=fitness) # print('\n', crowding_dist_pop_sorted_indices) # print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}') diff --git a/pygad/__init__.py b/pygad/__init__.py index 9df8e5c..c098928 100644 --- a/pygad/__init__.py +++ b/pygad/__init__.py @@ -1,3 +1,3 @@ from .pygad import * # Relative import. -__version__ = "3.1.1" +__version__ = "3.2.0" diff --git a/pygad/helper/__init__.py b/pygad/helper/__init__.py index e781d27..3eebdb7 100644 --- a/pygad/helper/__init__.py +++ b/pygad/helper/__init__.py @@ -1,3 +1,3 @@ -from pygad.helper import unique +from pygad.helper import unique, nsga2 -__version__ = "1.1.0" \ No newline at end of file +__version__ = "1.2.0" \ No newline at end of file diff --git a/pygad/helper/nsga2.py b/pygad/helper/nsga2.py new file mode 100644 index 0000000..f21ec5b --- /dev/null +++ b/pygad/helper/nsga2.py @@ -0,0 +1,210 @@ +import numpy + +def get_non_dominated_set(curr_solutions): + """ + Get the set of non-dominated solutions from the current set of solutions. + + Parameters + ---------- + curr_solutions : TYPE + The set of solutions to find its non-dominated set. + + Returns + ------- + dominated_set : TYPE + A set of the dominated solutions. + non_dominated_set : TYPE + A set of the non-dominated set. + + """ + # List of the members of the current dominated pareto front/set. + dominated_set = [] + # List of the non-members of the current dominated pareto front/set. + non_dominated_set = [] + for idx1, sol1 in enumerate(curr_solutions): + # Flag indicates whether the solution is a member of the current dominated set. + is_dominated = True + for idx2, sol2 in enumerate(curr_solutions): + if idx1 == idx2: + continue + # Zipping the 2 solutions so the corresponding genes are in the same list. + # The returned array is of size (N, 2) where N is the number of genes. + two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) + + #TODO Consider repacing < by > for maximization problems. + # Checking for if any solution dominates the current solution by applying the 2 conditions. + # le_eq (less than or equal): All elements must be True. + # le (less than): Only 1 element must be True. + le_eq = two_solutions[:, 1] <= two_solutions[:, 0] + le = two_solutions[:, 1] < two_solutions[:, 0] + + # If the 2 conditions hold, then a solution dominates the current solution. + # The current solution is not considered a member of the dominated set. + if le_eq.all() and le.any(): + # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. + # Instead, insert it into the non-dominated set. + is_dominated = False + non_dominated_set.append(sol1) + break + else: + # Reaching here means the solution does not dominate the current solution. + pass + + # If the flag is True, then no solution dominates the current solution. + if is_dominated: + dominated_set.append(sol1) + + # Return the dominated and non-dominated sets. + return dominated_set, non_dominated_set + +def non_dominated_sorting(fitness): + """ + Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. + + Parameters + ---------- + fitness : TYPE + An array of the population fitness across all objective function. + + Returns + ------- + pareto_fronts : TYPE + An array of the pareto fronts. + + """ + # A list of all non-dominated sets. + pareto_fronts = [] + + # The remaining set to be explored for non-dominance. + # Initially it is set to the entire population. + # The solutions of each non-dominated set are removed after each iteration. + remaining_set = fitness.copy() + + # Zipping the solution index with the solution's fitness. + # This helps to easily identify the index of each solution. + # Each element has: + # 1) The index of the solution. + # 2) An array of the fitness values of this solution across all objectives. + # remaining_set = numpy.array(list(zip(range(0, fitness.shape[0]), non_dominated_set))) + remaining_set = list(zip(range(0, fitness.shape[0]), remaining_set)) + + # A list mapping the index of each pareto front to the set of solutions in this front. + solutions_fronts_indices = [-1]*len(remaining_set) + solutions_fronts_indices = numpy.array(solutions_fronts_indices) + + # Index of the current pareto front. + front_index = -1 + while len(remaining_set) > 0: + front_index += 1 + + # Get the current non-dominated set of solutions. + pareto_front, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) + pareto_front = numpy.array(pareto_front, dtype=object) + pareto_fronts.append(pareto_front) + + solutions_indices = pareto_front[:, 0].astype(int) + solutions_fronts_indices[solutions_indices] = front_index + + return pareto_fronts, solutions_fronts_indices + +def crowding_distance(pareto_front, fitness): + """ + Calculate the crowding dstance for all solutions in the current pareto front. + + Parameters + ---------- + pareto_front : TYPE + The set of solutions in the current pareto front. + fitness : TYPE + The fitness of the current population. + + Returns + ------- + obj_crowding_dist_list : TYPE + A nested list of the values for all objectives alongside their crowding distance. + crowding_dist_sum : TYPE + A list of the sum of crowding distances across all objectives for each solution. + crowding_dist_front_sorted_indices : TYPE + The indices of the solutions (relative to the current front) sorted by the crowding distance. + crowding_dist_pop_sorted_indices : TYPE + The indices of the solutions (relative to the population) sorted by the crowding distance. + """ + + # Each solution in the pareto front has 2 elements: + # 1) The index of the solution in the population. + # 2) A list of the fitness values for all objectives of the solution. + # Before proceeding, remove the indices from each solution in the pareto front. + pareto_front_no_indices = numpy.array([pareto_front[:, 1][idx] for idx in range(pareto_front.shape[0])]) + + # If there is only 1 solution, then return empty arrays for the crowding distance. + if pareto_front_no_indices.shape[0] == 1: + # There is only 1 index. + return numpy.array([]), numpy.array([]), numpy.array([0]), pareto_front[:, 0].astype(int) + + # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. + obj_crowding_dist_list = [] + # Loop through the objectives to calculate the crowding distance of each solution across all objectives. + for obj_idx in range(pareto_front_no_indices.shape[1]): + obj = pareto_front_no_indices[:, obj_idx] + # This variable has a nested list where each child list zip the following together: + # 1) The index of the objective value. + # 2) The objective value. + # 3) Initialize the crowding distance by zero. + obj = list(zip(range(len(obj)), obj, [0]*len(obj))) + obj = [list(element) for element in obj] + # This variable is the sorted version where sorting is done by the objective value (second element). + # Note that the first element is still the original objective index before sorting. + obj_sorted = sorted(obj, key=lambda x: x[1]) + + # Get the minimum and maximum values for the current objective. + obj_min_val = min(fitness[:, obj_idx]) + obj_max_val = max(fitness[:, obj_idx]) + denominator = obj_max_val - obj_min_val + # To avoid division by zero, set the denominator to a tiny value. + if denominator == 0: + denominator = 0.0000001 + + # Set the crowding distance to the first and last solutions (after being sorted) to infinity. + inf_val = float('inf') + # crowding_distance[0] = inf_val + obj_sorted[0][2] = inf_val + # crowding_distance[-1] = inf_val + obj_sorted[-1][2] = inf_val + + # If there are only 2 solutions in the current pareto front, then do not proceed. + # The crowding distance for such 2 solutions is infinity. + if len(obj_sorted) <= 2: + break + + for idx in range(1, len(obj_sorted)-1): + # Calculate the crowding distance. + crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] + crowding_dist = crowding_dist / denominator + # Insert the crowding distance back into the list to override the initial zero. + obj_sorted[idx][2] = crowding_dist + + # Sort the objective by the original index at index 0 of the each child list. + obj_sorted = sorted(obj_sorted, key=lambda x: x[0]) + obj_crowding_dist_list.append(obj_sorted) + + obj_crowding_dist_list = numpy.array(obj_crowding_dist_list) + crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) + crowding_dist_sum = numpy.sum(crowding_dist, axis=0) + + # An array of the sum of crowding distances across all objectives. + # Each row has 2 elements: + # 1) The index of the solution. + # 2) The sum of all crowding distances for all objective of the solution. + crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) + crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) + + # The sorted solutions' indices by the crowding distance. + crowding_dist_front_sorted_indices = numpy.array(crowding_dist_sum)[:, 0] + crowding_dist_front_sorted_indices = crowding_dist_front_sorted_indices.astype(int) + # Note that such indices are relative to the front, NOT the population. + # It is mandatory to map such front indices to population indices before using them to refer to the population. + crowding_dist_pop_sorted_indices = pareto_front[:, 0] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices[crowding_dist_front_sorted_indices] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) + + return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices diff --git a/pygad/pygad.py b/pygad/pygad.py index d9a7564..9514f43 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -870,6 +870,10 @@ def __init__(self, self.select_parents = self.random_selection elif (parent_selection_type == "tournament"): self.select_parents = self.tournament_selection + elif (parent_selection_type == "tournament_nsga2"): # Supported in PyGAD >= 3.2 + self.select_parents = self.tournament_selection_nsga2 + elif (parent_selection_type == "nsga2"): # Supported in PyGAD >= 3.2 + self.select_parents = self.nsga2_selection elif (parent_selection_type == "rank"): self.select_parents = self.rank_selection else: diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 519c281..7e6538e 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -3,6 +3,7 @@ """ import numpy +from ..helper import nsga2 class ParentSelection: def steady_state_selection(self, fitness, num_parents): @@ -229,3 +230,207 @@ def stochastic_universal_selection(self, fitness, num_parents): break return parents, numpy.array(parents_indices) + + def tournament_selection_nsga2(self, + fitness, + num_parents + # pareto_fronts, + # solutions_fronts_indices, + ): + + """ + Select the parents using the tournament selection technique for NSGA-II. + The traditional tournament selection uses the fitness values. But the tournament selection for NSGA-II uses non-dominated sorting and crowding distance. + Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. + To select the parents solutions, 2 solutions are selected randomly. If the 2 solutions are in different pareto fronts, then the solution comming from a pareto front with lower index is selected. + If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected. + If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. + Later, the selected parents will mate to produce the offspring. + + It accepts 2 parameters: + -fitness: The fitness values for the current population. + -num_parents: The number of parents to be selected. + -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. + -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. + + It returns an array of the selected parents alongside their indices in the population. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + # The indices of the selected parents. + parents_indices = [] + + # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + # TODO Make a test to check for that behaviour. + pareto_fronts, solutions_fronts_indices = nsga2.non_dominated_sorting(fitness) + + # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. + rand_indices = numpy.random.randint(low=0.0, + high=len(solutions_fronts_indices), + size=(num_parents, self.K_tournament)) + # rand_indices[0, 0] = 5 + # rand_indices[0, 1] = 3 + # rand_indices[1, 0] = 1 + # rand_indices[1, 1] = 6 + + for parent_num in range(num_parents): + # Return the indices of the current 2 solutions. + current_indices = rand_indices[parent_num] + # Return the front index of the 2 solutions. + parent_fronts_indices = solutions_fronts_indices[current_indices] + + if parent_fronts_indices[0] < parent_fronts_indices[1]: + # If the first solution is in a lower pareto front than the second, then select it. + selected_parent_idx = current_indices[0] + elif parent_fronts_indices[0] > parent_fronts_indices[1]: + # If the second solution is in a lower pareto front than the first, then select it. + selected_parent_idx = current_indices[1] + else: + # The 2 solutions are in the same pareto front. + # The selection is made using the crowding distance. + + # A list holding the crowding distance of the current 2 solutions. It is initialized to -1. + solutions_crowding_distance = [-1, -1] + + # Fetch the current pareto front. + pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used. + + # If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless). + if pareto_front.shape[0] == 1: + selected_parent_idx = current_indices[0] # Index 1 can also be used. + else: + # Reaching here means the pareto front has more than 1 solution. + + # Calculate the crowding distance of the solutions of the pareto front. + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = nsga2.crowding_distance(pareto_front=pareto_front.copy(), + fitness=fitness) + + # This list has the sorted front-based indices for the solutions in the current pareto front. + crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices) + # This list has the sorted population-based indices for the solutions in the current pareto front. + crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) + + # Return the indices of the solutions from the pareto front. + solution1_idx = crowding_dist_pop_sorted_indices.index(current_indices[0]) + solution2_idx = crowding_dist_pop_sorted_indices.index(current_indices[1]) + + # Fetch the crowding distance using the indices. + solutions_crowding_distance[0] = crowding_distance_sum[solution1_idx][1] + solutions_crowding_distance[1] = crowding_distance_sum[solution2_idx][1] + + # # Instead of using the crowding distance, we can select the solution that comes first in the list. + # # Its limitation is that it is biased towards the low indexed solution if the 2 solutions have the same crowding distance. + # if solution1_idx < solution2_idx: + # # Select the first solution if it has higher crowding distance. + # selected_parent_idx = current_indices[0] + # else: + # # Select the second solution if it has higher crowding distance. + # selected_parent_idx = current_indices[1] + + if solutions_crowding_distance[0] > solutions_crowding_distance[1]: + # Select the first solution if it has higher crowding distance. + selected_parent_idx = current_indices[0] + elif solutions_crowding_distance[1] > solutions_crowding_distance[0]: + # Select the second solution if it has higher crowding distance. + selected_parent_idx = current_indices[1] + else: + # If the crowding distance is equal, select the parent randomly. + rand_num = numpy.random.uniform() + if rand_num < 0.5: + # If the random number is < 0.5, then select the first solution. + selected_parent_idx = current_indices[0] + else: + # If the random number is >= 0.5, then select the second solution. + selected_parent_idx = current_indices[1] + + # Insert the selected parent index. + parents_indices.append(selected_parent_idx) + # Insert the selected parent. + parents[parent_num, :] = self.population[selected_parent_idx, :].copy() + + # Make sure the parents indices is returned as a NumPy array. + return parents, numpy.array(parents_indices) + + def nsga2_selection(self, + fitness, + num_parents + # pareto_fronts, + # solutions_fronts_indices + ): + + """ + Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II). + The selection is done using non-dominated sorting and crowding distance. + Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. + The parents are selected from the lower pareto fronts and moving up until selecting the number of desired parents. + A solution from a pareto front X cannot be taken as a parent until all solutions in pareto front Y is selected given that Y < X. + For a pareto front X, if only a subset of its solutions is needed, then the corwding distance is used to determine which solutions to be selected from the front. The solution with the higher crowding distance is selected. + If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. + Later, the selected parents will mate to produce the offspring. + + It accepts 2 parameters: + -fitness: The fitness values for the current population. + -num_parents: The number of parents to be selected. + -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. + -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. + + It returns an array of the selected parents alongside their indices in the population. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + # The indices of the selected parents. + parents_indices = [] + + # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + # TODO Make a test to check for that behaviour. + pareto_fronts, solutions_fronts_indices = nsga2.non_dominated_sorting(fitness) + + # The number of remaining parents to be selected. + num_remaining_parents = num_parents + + # A loop variable holding the index of the current pareto front. + pareto_front_idx = 0 + while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts): + # Return the current pareto front. + current_pareto_front = pareto_fronts[pareto_front_idx] + # Check if the entire front fits into the parents array. + # If so, then insert all the solutions in the current front into the parents array. + if num_remaining_parents >= len(current_pareto_front): + for sol_idx in range(len(current_pareto_front)): + selected_solution_idx = current_pareto_front[sol_idx, 0] + # Insert the parent into the parents array. + parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + # Insert the index of the selected parent. + parents_indices.append(selected_solution_idx) + + # Decrement the number of remaining parents by the length of the pareto front. + num_remaining_parents -= len(current_pareto_front) + else: + # If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed. + + # Calculate the crowding distance of the solutions of the pareto front. + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = nsga2.crowding_distance(pareto_front=current_pareto_front.copy(), + fitness=fitness) + + for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: + # Insert the parent into the parents array. + parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + # Insert the index of the selected parent. + parents_indices.append(selected_solution_idx) + + # Decrement the number of remaining parents by the number of selected parents. + num_remaining_parents -= num_remaining_parents + + # Increase the pareto front index to take parents from the next front. + pareto_front_idx += 1 + + # Make sure the parents indices is returned as a NumPy array. + return parents, numpy.array(parents_indices) diff --git a/setup.py b/setup.py index 85af7bf..9017723 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pygad", - version="3.1.1", + version="3.2.0", author="Ahmed Fawzy Gad", install_requires=["numpy", "matplotlib", "cloudpickle",], author_email="ahmed.f.gad@gmail.com", From 4ef8cd5bcf8ee86b34db402dfbccc0ccf347aa51 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 3 Sep 2023 14:18:51 -0400 Subject: [PATCH 06/25] Support of NSGA-II --- pygad/pygad.py | 24 ++++++++++++++++++++++++ pygad/utils/mutation.py | 10 ++++++++-- pygad/utils/parent_selection.py | 14 ++++++++++---- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/pygad/pygad.py b/pygad/pygad.py index 9514f43..7b6ab8c 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -1683,6 +1683,12 @@ def cal_pop_fitness(self): if self.fitness_batch_size in [1, None]: fitness = self.fitness_func(self, sol, sol_idx) if type(fitness) in GA.supported_int_float_types: + # The fitness function returns a single numeric value. + # This is a single-objective optimization problem. + pass + elif type(fitness) in [list, tuple, numpy.ndarray]: + # The fitness function returns a list/tuple/numpy.ndarray. + # This is a multi-objective optimization problem. pass else: raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") @@ -1718,6 +1724,12 @@ def cal_pop_fitness(self): for index, fitness in zip(batch_indices, batch_fitness): if type(fitness) in GA.supported_int_float_types: + # The fitness function returns a single numeric value. + # This is a single-objective optimization problem. + pop_fitness[index] = fitness + elif type(fitness) in [list, tuple, numpy.ndarray]: + # The fitness function returns a list/tuple/numpy.ndarray. + # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") @@ -1779,6 +1791,12 @@ def cal_pop_fitness(self): if self.fitness_batch_size in [1, None]: for index, fitness in zip(solutions_to_submit_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), solutions_to_submit, solutions_to_submit_indices)): if type(fitness) in GA.supported_int_float_types: + # The fitness function returns a single numeric value. + # This is a single-objective optimization problem. + pop_fitness[index] = fitness + elif type(fitness) in [list, tuple, numpy.ndarray]: + # The fitness function returns a list/tuple/numpy.ndarray. + # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") @@ -1810,6 +1828,12 @@ def cal_pop_fitness(self): for index, fitness in zip(batch_indices, batch_fitness): if type(fitness) in GA.supported_int_float_types: + # The fitness function returns a single numeric value. + # This is a single-objective optimization problem. + pop_fitness[index] = fitness + elif type(fitness) in [list, tuple, numpy.ndarray]: + # The fitness function returns a list/tuple/numpy.ndarray. + # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: raise ValueError(f"The fitness function should return a number but the value ({fitness}) of type {type(fitness)} found.") diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index a564847..b7ba2c2 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -471,7 +471,13 @@ def adaptive_mutation_population_fitness(self, offspring): first_idx = len(parents_to_keep) last_idx = fitness.shape[0] - fitness[first_idx:last_idx] = [0]*(last_idx - first_idx) + if len(fitness.shape) > 1: + # TODO This is a multi-objective optimization problem. + # fitness[first_idx:last_idx] = [0]*(last_idx - first_idx) + raise ValueError('Edit adaptive mutation to work with multi-objective optimization problems.') + else: + # This is a single-objective optimization problem. + fitness[first_idx:last_idx] = [0]*(last_idx - first_idx) if self.fitness_batch_size in [1, None]: # Calculate the fitness for each individual solution. @@ -667,7 +673,7 @@ def adaptive_mutation_by_space(self, offspring): gene_type=self.gene_type, num_trials=10) return offspring - + def adaptive_mutation_randomly(self, offspring): """ diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 7e6538e..59be45b 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -395,7 +395,9 @@ def nsga2_selection(self, # The number of remaining parents to be selected. num_remaining_parents = num_parents - + + # Index of the current parent. + current_parent_idx = 0 # A loop variable holding the index of the current pareto front. pareto_front_idx = 0 while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts): @@ -407,10 +409,12 @@ def nsga2_selection(self, for sol_idx in range(len(current_pareto_front)): selected_solution_idx = current_pareto_front[sol_idx, 0] # Insert the parent into the parents array. - parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy() # Insert the index of the selected parent. parents_indices.append(selected_solution_idx) - + # Increase the parent index. + current_parent_idx += 1 + # Decrement the number of remaining parents by the length of the pareto front. num_remaining_parents -= len(current_pareto_front) else: @@ -422,9 +426,11 @@ def nsga2_selection(self, for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: # Insert the parent into the parents array. - parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() + parents[current_parent_idx, :] = self.population[selected_solution_idx, :].copy() # Insert the index of the selected parent. parents_indices.append(selected_solution_idx) + # Increase the parent index. + current_parent_idx += 1 # Decrement the number of remaining parents by the number of selected parents. num_remaining_parents -= num_remaining_parents From 3163d7f87c0dbd8f084e76a79920cf2ba0e5c713 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 3 Sep 2023 15:50:29 -0400 Subject: [PATCH 07/25] Support of NSGA-II --- pygad/helper/nsga2.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pygad/helper/nsga2.py b/pygad/helper/nsga2.py index f21ec5b..11ad3ce 100644 --- a/pygad/helper/nsga2.py +++ b/pygad/helper/nsga2.py @@ -1,4 +1,5 @@ import numpy +import pygad def get_non_dominated_set(curr_solutions): """ @@ -208,3 +209,43 @@ def crowding_distance(pareto_front, fitness): crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices + +def sort_solutions_nsga2(fitness): + """ + Sort the solutions based on the fitness. + The sorting procedure differs based on whether the problem is single-objective or multi-objective optimization. + If it is multi-objective, then non-dominated sorting and crowding distance are applied. + At first, non-dominated sorting is applied to classify the solutions into pareto fronts. + Then the solutions inside each front are sorted using crowded distance. + The solutions inside pareto front X always come before those in front X+1. + + Parameters + ---------- + fitness : TYPE + The fitness of the entire population. + + Returns + ------- + solutions_sorted : TYPE + The indices of the sorted solutions. + + """ + if type(fitness[0]) in [list, tuple, numpy.ndarray]: + # Multi-objective optimization problem. + solutions_sorted = [] + # Split the solutions into pareto fronts using non-dominated sorting. + pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) + for pareto_front in pareto_fronts: + # Sort the solutions in the front using crowded distance. + _, _, _, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), + fitness=fitness) + crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) + # Append the sorted solutions into the list. + solutions_sorted.extend(crowding_dist_pop_sorted_indices) + elif type(fitness[0]) in pygad.GA.supported_int_float_types: + # Single-objective optimization problem. + solutions_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + # Reverse the sorted solutions so that the best solution comes first. + solutions_sorted.reverse() + + return solutions_sorted From 6aeb6851ee150634723b9578e37944db3bc12407 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 3 Sep 2023 22:26:01 -0400 Subject: [PATCH 08/25] Support of NSGA-II --- pygad/helper/nsga2.py | 7 +- pygad/pygad.py | 2 +- pygad/utils/mutation.py | 37 +++++++-- pygad/utils/parent_selection.py | 134 +++++++++++++++++++++++--------- pygad/visualize/__init__.py | 2 +- 5 files changed, 137 insertions(+), 45 deletions(-) diff --git a/pygad/helper/nsga2.py b/pygad/helper/nsga2.py index 11ad3ce..8f7401e 100644 --- a/pygad/helper/nsga2.py +++ b/pygad/helper/nsga2.py @@ -36,8 +36,8 @@ def get_non_dominated_set(curr_solutions): # Checking for if any solution dominates the current solution by applying the 2 conditions. # le_eq (less than or equal): All elements must be True. # le (less than): Only 1 element must be True. - le_eq = two_solutions[:, 1] <= two_solutions[:, 0] - le = two_solutions[:, 1] < two_solutions[:, 0] + le_eq = two_solutions[:, 1] >= two_solutions[:, 0] + le = two_solutions[:, 1] > two_solutions[:, 0] # If the 2 conditions hold, then a solution dominates the current solution. # The current solution is not considered a member of the dominated set. @@ -175,8 +175,9 @@ def crowding_distance(pareto_front, fitness): # If there are only 2 solutions in the current pareto front, then do not proceed. # The crowding distance for such 2 solutions is infinity. if len(obj_sorted) <= 2: + obj_crowding_dist_list.append(obj_sorted) break - + for idx in range(1, len(obj_sorted)-1): # Calculate the crowding distance. crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] diff --git a/pygad/pygad.py b/pygad/pygad.py index 7b6ab8c..97205f9 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -2099,7 +2099,7 @@ def run(self): :] = self.last_generation_offspring_mutation else: self.last_generation_elitism, self.last_generation_elitism_indices = self.steady_state_selection(self.last_generation_fitness, - num_parents=self.keep_elitism) + num_parents=self.keep_elitism) self.population[0:self.last_generation_elitism.shape[0], :] = self.last_generation_elitism self.population[self.last_generation_elitism.shape[0]:, :] = self.last_generation_offspring_mutation diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index b7ba2c2..ef0e85f 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -474,7 +474,8 @@ def adaptive_mutation_population_fitness(self, offspring): if len(fitness.shape) > 1: # TODO This is a multi-objective optimization problem. # fitness[first_idx:last_idx] = [0]*(last_idx - first_idx) - raise ValueError('Edit adaptive mutation to work with multi-objective optimization problems.') + fitness[first_idx:last_idx] = numpy.zeros(shape=(last_idx - first_idx, fitness.shape[1])) + # raise ValueError('Edit adaptive mutation to work with multi-objective optimization problems.') else: # This is a single-objective optimization problem. fitness[first_idx:last_idx] = [0]*(last_idx - first_idx) @@ -514,7 +515,13 @@ def adaptive_mutation_population_fitness(self, offspring): for idx in range(batch_first_index, batch_last_index): fitness[idx] = fitness_temp[idx - batch_first_index] - average_fitness = numpy.mean(fitness) + if len(fitness.shape) > 1: + # TODO This is a multi-objective optimization problem. + # Calculate the average of each objective's fitness across all solutions in the population. + average_fitness = numpy.mean(fitness, axis=0) + else: + # This is a single-objective optimization problem. + average_fitness = numpy.mean(fitness) return average_fitness, fitness[len(parents_to_keep):] @@ -690,10 +697,30 @@ def adaptive_mutation_randomly(self, offspring): # Adaptive random mutation changes one or more genes in each offspring randomly. # The number of genes to mutate depends on the solution's fitness value. for offspring_idx in range(offspring.shape[0]): - if offspring_fitness[offspring_idx] < average_fitness: - adaptive_mutation_num_genes = self.mutation_num_genes[0] + ## TODO Make edits to work with multi-objective optimization. + # Compare the fitness of each offspring to the average fitness of each objective function. + fitness_comparison = offspring_fitness[offspring_idx] < average_fitness + # Check if the problem is single or multi-objective optimization. + if type(fitness_comparison) is bool: + # Single-objective optimization problem. + if fitness_comparison: + adaptive_mutation_num_genes = self.mutation_num_genes[0] + else: + adaptive_mutation_num_genes = self.mutation_num_genes[1] else: - adaptive_mutation_num_genes = self.mutation_num_genes[1] + # Multi-objective optimization problem. + + # Get the sum of the pool array (result of comparison). + # True is considered 1 and False is 0. + fitness_comparison_sum = sum(fitness_comparison) + # Check if more than or equal to 50% of the objectives have fitness greater than the average. + # If True, then use the first percentage. + # If False, use the second percentage. + if fitness_comparison_sum >= len(fitness_comparison)/2: + adaptive_mutation_num_genes = self.mutation_num_genes[0] + else: + adaptive_mutation_num_genes = self.mutation_num_genes[1] + mutation_indices = numpy.array(random.sample(range(0, self.num_genes), adaptive_mutation_num_genes)) for gene_idx in mutation_indices: diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 59be45b..464dcbd 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -7,17 +7,23 @@ class ParentSelection: def steady_state_selection(self, fitness, num_parents): - + """ - Selects the parents using the steady-state selection technique. Later, these parents will mate to produce the offspring. + Selects the parents using the steady-state selection technique. + This is by sorting the solutions based on the fitness and select the best ones as parents. + Later, these parents will mate to produce the offspring. + It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - fitness_sorted.reverse() + # Return the indices of the sorted solutions (all solutions in the population). + # This function works with both single- and multi-objective optimization problems. + fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. if self.gene_type_single == True: @@ -38,11 +44,14 @@ def rank_selection(self, fitness, num_parents): It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ - # This has the index of each solution in the population. - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + # Return the indices of the sorted solutions (all solutions in the population). + # This function works with both single- and multi-objective optimization problems. + fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) # Rank the solutions based on their fitness. The worst is gives the rank 1. The best has the rank N. rank = numpy.arange(1, self.sol_per_pop+1) @@ -74,7 +83,9 @@ def random_selection(self, fitness, num_parents): It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ if self.gene_type_single == True: @@ -96,25 +107,40 @@ def tournament_selection(self, fitness, num_parents): It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ - + + # Return the indices of the sorted solutions (all solutions in the population). + # This function works with both single- and multi-objective optimization problems. + fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) + if self.gene_type_single == True: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) else: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) - + parents_indices = [] - + for parent_num in range(num_parents): + # Generate random indices for the candiadate solutions. rand_indices = numpy.random.randint(low=0.0, high=len(fitness), size=self.K_tournament) - K_fitnesses = fitness[rand_indices] - selected_parent_idx = numpy.where(K_fitnesses == numpy.max(K_fitnesses))[0][0] + # K_fitnesses = fitness[rand_indices] + # selected_parent_idx = numpy.where(K_fitnesses == numpy.max(K_fitnesses))[0][0] + + # Find the rank of the candidate solutions. The lower the rank, the better the solution. + rand_indices_rank = [fitness_sorted.index(rand_idx) for rand_idx in rand_indices] + # Select the solution with the lowest rank as a parent. + selected_parent_idx = rand_indices_rank.index(min(rand_indices_rank)) + + # Append the index of the selected parent. parents_indices.append(rand_indices[selected_parent_idx]) + # Insert the selected parent. parents[parent_num, :] = self.population[rand_indices[selected_parent_idx], :].copy() - + return parents, numpy.array(parents_indices) - + def roulette_wheel_selection(self, fitness, num_parents): """ @@ -122,9 +148,27 @@ def roulette_wheel_selection(self, fitness, num_parents): It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ - + + ## Make edits to work with multi-objective optimization. + ## The objective is to convert the fitness from M-D array to just 1D array. + ## There are 2 ways: + # 1) By summing the fitness values of each solution. + # 2) By using only 1 objective to create the roulette wheel and excluding the others. + + # Take the sum of the fitness values of each solution. + if len(fitness.shape) > 1: + # Multi-objective optimization problem. + # Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array. + fitness = numpy.sum(fitness, axis=1) + else: + # Single-objective optimization problem. + pass + + # Reaching this step extends that fitness is a 1D array. fitness_sum = numpy.sum(fitness) if fitness_sum == 0: self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") @@ -170,7 +214,9 @@ def wheel_cumulative_probs(self, probs, num_parents): probs_start[min_probs_idx] = curr curr = curr + probs[min_probs_idx] probs_end[min_probs_idx] = curr - probs[min_probs_idx] = 99999999999 + # Replace 99999999999 by float('inf') + # probs[min_probs_idx] = 99999999999 + probs[min_probs_idx] = float('inf') # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. if self.gene_type_single == True: @@ -187,14 +233,34 @@ def stochastic_universal_selection(self, fitness, num_parents): It accepts 2 parameters: -fitness: The fitness values of the solutions in the current population. -num_parents: The number of parents to be selected. - It returns an array of the selected parents. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ + ## Make edits to work with multi-objective optimization. + ## The objective is to convert the fitness from M-D array to just 1D array. + ## There are 2 ways: + # 1) By summing the fitness values of each solution. + # 2) By using only 1 objective to create the roulette wheel and excluding the others. + + # Take the sum of the fitness values of each solution. + if len(fitness.shape) > 1: + # Multi-objective optimization problem. + # Sum the fitness values of each solution to reduce the fitness from M-D array to just 1D array. + fitness = numpy.sum(fitness, axis=1) + else: + # Single-objective optimization problem. + pass + + # Reaching this step extends that fitness is a 1D array. fitness_sum = numpy.sum(fitness) if fitness_sum == 0: self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") + probs = fitness / fitness_sum + probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities. probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities. @@ -206,7 +272,9 @@ def stochastic_universal_selection(self, fitness, num_parents): probs_start[min_probs_idx] = curr curr = curr + probs[min_probs_idx] probs_end[min_probs_idx] = curr - probs[min_probs_idx] = 99999999999 + # Replace 99999999999 by float('inf') + # probs[min_probs_idx] = 99999999999 + probs[min_probs_idx] = float('inf') pointers_distance = 1.0 / self.num_parents_mating # Distance between different pointers. first_pointer = numpy.random.uniform(low=0.0, @@ -234,8 +302,6 @@ def stochastic_universal_selection(self, fitness, num_parents): def tournament_selection_nsga2(self, fitness, num_parents - # pareto_fronts, - # solutions_fronts_indices, ): """ @@ -253,7 +319,9 @@ def tournament_selection_nsga2(self, -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - It returns an array of the selected parents alongside their indices in the population. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ if self.gene_type_single == True: @@ -263,19 +331,15 @@ def tournament_selection_nsga2(self, # The indices of the selected parents. parents_indices = [] - + # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. - # TODO Make a test to check for that behaviour. + # TODO Make a test to check for that behaviour and add it to the GitHub actions tests. pareto_fronts, solutions_fronts_indices = nsga2.non_dominated_sorting(fitness) # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. rand_indices = numpy.random.randint(low=0.0, high=len(solutions_fronts_indices), size=(num_parents, self.K_tournament)) - # rand_indices[0, 0] = 5 - # rand_indices[0, 1] = 3 - # rand_indices[1, 0] = 1 - # rand_indices[1, 1] = 6 for parent_num in range(num_parents): # Return the indices of the current 2 solutions. @@ -346,7 +410,7 @@ def tournament_selection_nsga2(self, else: # If the random number is >= 0.5, then select the second solution. selected_parent_idx = current_indices[1] - + # Insert the selected parent index. parents_indices.append(selected_parent_idx) # Insert the selected parent. @@ -358,8 +422,6 @@ def tournament_selection_nsga2(self, def nsga2_selection(self, fitness, num_parents - # pareto_fronts, - # solutions_fronts_indices ): """ @@ -378,7 +440,9 @@ def nsga2_selection(self, -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - It returns an array of the selected parents alongside their indices in the population. + It returns: + -An array of the selected parents. + -The indices of the selected solutions. """ if self.gene_type_single == True: diff --git a/pygad/visualize/__init__.py b/pygad/visualize/__init__.py index 6b79646..056dc67 100644 --- a/pygad/visualize/__init__.py +++ b/pygad/visualize/__init__.py @@ -1,3 +1,3 @@ from pygad.visualize import plot -__version__ = "1.0.0" \ No newline at end of file +__version__ = "1.1.0" \ No newline at end of file From a3dc00b3dd29b9802ae7cad3a284e386f1bd4621 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 3 Sep 2023 23:42:01 -0400 Subject: [PATCH 09/25] Support of NSGA-II --- pygad/visualize/plot.py | 99 ++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/pygad/visualize/plot.py b/pygad/visualize/plot.py index 9d6f70b..e00bfaf 100644 --- a/pygad/visualize/plot.py +++ b/pygad/visualize/plot.py @@ -5,6 +5,7 @@ import numpy import warnings import matplotlib.pyplot +import pygad class Plot: def plot_result(self, @@ -14,7 +15,7 @@ def plot_result(self, linewidth=3, font_size=14, plot_type="plot", - color="#3870FF", + color="#64f20c", save_dir=None): if not self.suppress_warnings: @@ -30,14 +31,15 @@ def plot_result(self, save_dir=save_dir) def plot_fitness(self, - title="PyGAD - Generation vs. Fitness", - xlabel="Generation", - ylabel="Fitness", - linewidth=3, - font_size=14, - plot_type="plot", - color="#3870FF", - save_dir=None): + title="PyGAD - Generation vs. Fitness", + xlabel="Generation", + ylabel="Fitness", + linewidth=3, + font_size=14, + plot_type="plot", + color="#64f20c", + label=None, + save_dir=None): """ Creates, shows, and returns a figure that summarizes how the fitness value evolved by generation. Can only be called after completing at least 1 generation. If no generation is completed, an exception is raised. @@ -47,9 +49,10 @@ def plot_fitness(self, xlabel: Label on the X-axis. ylabel: Label on the Y-axis. linewidth: Line width of the plot. Defaults to 3. - font_size: Font size for the labels and title. Defaults to 14. + font_size: Font size for the labels and title. Defaults to 14. Can be a list/tuple/numpy.ndarray if the problem is multi-objective optimization. plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". - color: Color of the plot which defaults to "#3870FF". + color: Color of the plot which defaults to "#64f20c". Can be a list/tuple/numpy.ndarray if the problem is multi-objective optimization. + label: The label used for the legend in the figures of multi-objective problems. It is not used for single-objective problems. save_dir: Directory to save the figure. Returns the figure. @@ -60,15 +63,69 @@ def plot_fitness(self, raise RuntimeError("The plot_fitness() (i.e. plot_result()) method can only be called after completing at least 1 generation but ({self.generations_completed}) is completed.") fig = matplotlib.pyplot.figure() - if plot_type == "plot": - matplotlib.pyplot.plot(self.best_solutions_fitness, linewidth=linewidth, color=color) - elif plot_type == "scatter": - matplotlib.pyplot.scatter(range(len(self.best_solutions_fitness)), self.best_solutions_fitness, linewidth=linewidth, color=color) - elif plot_type == "bar": - matplotlib.pyplot.bar(range(len(self.best_solutions_fitness)), self.best_solutions_fitness, linewidth=linewidth, color=color) + if len(self.best_solutions_fitness[0]) > 1: + # Multi-objective optimization problem. + if type(linewidth) in pygad.GA.supported_int_float_types: + linewidth = [linewidth] + linewidth.extend([linewidth[0]]*len(self.best_solutions_fitness[0])) + elif type(linewidth) in [list, tuple, numpy.ndarray]: + pass + + if type(color) is str: + color = [color] + color.extend([None]*len(self.best_solutions_fitness[0])) + elif type(color) in [list, tuple, numpy.ndarray]: + pass + + if label is None: + label = [None]*len(self.best_solutions_fitness[0]) + + # Loop through each objective to plot its fitness. + for objective_idx in range(len(self.best_solutions_fitness[0])): + # Return the color, line width, and label of the current plot. + current_color = color[objective_idx] + current_linewidth = linewidth[objective_idx] + current_label = label[objective_idx] + # Return the fitness values for the current objective function across all best solutions acorss all generations. + fitness = numpy.array(self.best_solutions_fitness)[:, objective_idx] + if plot_type == "plot": + matplotlib.pyplot.plot(fitness, + linewidth=current_linewidth, + color=current_color, + label=current_label) + elif plot_type == "scatter": + matplotlib.pyplot.scatter(range(len(fitness)), + fitness, + linewidth=current_linewidth, + color=current_color, + label=current_label) + elif plot_type == "bar": + matplotlib.pyplot.bar(range(len(fitness)), + fitness, + linewidth=current_linewidth, + color=current_color, + label=current_label) + else: + # Single-objective optimization problem. + if plot_type == "plot": + matplotlib.pyplot.plot(self.best_solutions_fitness, + linewidth=linewidth, + color=color) + elif plot_type == "scatter": + matplotlib.pyplot.scatter(range(len(self.best_solutions_fitness)), + self.best_solutions_fitness, + linewidth=linewidth, + color=color) + elif plot_type == "bar": + matplotlib.pyplot.bar(range(len(self.best_solutions_fitness)), + self.best_solutions_fitness, + linewidth=linewidth, + color=color) matplotlib.pyplot.title(title, fontsize=font_size) matplotlib.pyplot.xlabel(xlabel, fontsize=font_size) matplotlib.pyplot.ylabel(ylabel, fontsize=font_size) + # Create a legend out of the labels. + matplotlib.pyplot.legend() if not save_dir is None: matplotlib.pyplot.savefig(fname=save_dir, @@ -84,7 +141,7 @@ def plot_new_solution_rate(self, linewidth=3, font_size=14, plot_type="plot", - color="#3870FF", + color="#64f20c", save_dir=None): """ @@ -97,7 +154,7 @@ def plot_new_solution_rate(self, linewidth: Line width of the plot. Defaults to 3. font_size: Font size for the labels and title. Defaults to 14. plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". - color: Color of the plot which defaults to "#3870FF". + color: Color of the plot which defaults to "#64f20c". save_dir: Directory to save the figure. Returns the figure. @@ -154,7 +211,7 @@ def plot_genes(self, font_size=14, plot_type="plot", graph_type="plot", - fill_color="#3870FF", + fill_color="#64f20c", color="black", solutions="all", save_dir=None): @@ -172,7 +229,7 @@ def plot_genes(self, font_size: Font size for the labels and title. Defaults to 14. plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". graph_type: Type of the graph which can be either "plot" (default), "boxplot", or "histogram". - fill_color: Fill color of the graph which defaults to "#3870FF". This has no effect if graph_type="plot". + fill_color: Fill color of the graph which defaults to "#64f20c". This has no effect if graph_type="plot". color: Color of the plot which defaults to "black". solutions: Defaults to "all" which means use all solutions. If "best" then only the best solutions are used. save_dir: Directory to save the figure. From cabfa6d7d0494f490f7bab809480081e039c4814 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 15:52:53 -0400 Subject: [PATCH 10/25] Support of NSGA-II --- pygad/helper/__init__.py | 4 +- pygad/helper/nsga2.py | 252 ------------------------------- pygad/pygad.py | 2 + pygad/utils/__init__.py | 3 +- pygad/utils/crossover.py | 4 + pygad/utils/mutation.py | 3 + pygad/utils/nsga2.py | 257 ++++++++++++++++++++++++++++++++ pygad/utils/parent_selection.py | 35 +++-- 8 files changed, 290 insertions(+), 270 deletions(-) delete mode 100644 pygad/helper/nsga2.py create mode 100644 pygad/utils/nsga2.py diff --git a/pygad/helper/__init__.py b/pygad/helper/__init__.py index 3eebdb7..e781d27 100644 --- a/pygad/helper/__init__.py +++ b/pygad/helper/__init__.py @@ -1,3 +1,3 @@ -from pygad.helper import unique, nsga2 +from pygad.helper import unique -__version__ = "1.2.0" \ No newline at end of file +__version__ = "1.1.0" \ No newline at end of file diff --git a/pygad/helper/nsga2.py b/pygad/helper/nsga2.py deleted file mode 100644 index 8f7401e..0000000 --- a/pygad/helper/nsga2.py +++ /dev/null @@ -1,252 +0,0 @@ -import numpy -import pygad - -def get_non_dominated_set(curr_solutions): - """ - Get the set of non-dominated solutions from the current set of solutions. - - Parameters - ---------- - curr_solutions : TYPE - The set of solutions to find its non-dominated set. - - Returns - ------- - dominated_set : TYPE - A set of the dominated solutions. - non_dominated_set : TYPE - A set of the non-dominated set. - - """ - # List of the members of the current dominated pareto front/set. - dominated_set = [] - # List of the non-members of the current dominated pareto front/set. - non_dominated_set = [] - for idx1, sol1 in enumerate(curr_solutions): - # Flag indicates whether the solution is a member of the current dominated set. - is_dominated = True - for idx2, sol2 in enumerate(curr_solutions): - if idx1 == idx2: - continue - # Zipping the 2 solutions so the corresponding genes are in the same list. - # The returned array is of size (N, 2) where N is the number of genes. - two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) - - #TODO Consider repacing < by > for maximization problems. - # Checking for if any solution dominates the current solution by applying the 2 conditions. - # le_eq (less than or equal): All elements must be True. - # le (less than): Only 1 element must be True. - le_eq = two_solutions[:, 1] >= two_solutions[:, 0] - le = two_solutions[:, 1] > two_solutions[:, 0] - - # If the 2 conditions hold, then a solution dominates the current solution. - # The current solution is not considered a member of the dominated set. - if le_eq.all() and le.any(): - # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. - # Instead, insert it into the non-dominated set. - is_dominated = False - non_dominated_set.append(sol1) - break - else: - # Reaching here means the solution does not dominate the current solution. - pass - - # If the flag is True, then no solution dominates the current solution. - if is_dominated: - dominated_set.append(sol1) - - # Return the dominated and non-dominated sets. - return dominated_set, non_dominated_set - -def non_dominated_sorting(fitness): - """ - Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. - - Parameters - ---------- - fitness : TYPE - An array of the population fitness across all objective function. - - Returns - ------- - pareto_fronts : TYPE - An array of the pareto fronts. - - """ - # A list of all non-dominated sets. - pareto_fronts = [] - - # The remaining set to be explored for non-dominance. - # Initially it is set to the entire population. - # The solutions of each non-dominated set are removed after each iteration. - remaining_set = fitness.copy() - - # Zipping the solution index with the solution's fitness. - # This helps to easily identify the index of each solution. - # Each element has: - # 1) The index of the solution. - # 2) An array of the fitness values of this solution across all objectives. - # remaining_set = numpy.array(list(zip(range(0, fitness.shape[0]), non_dominated_set))) - remaining_set = list(zip(range(0, fitness.shape[0]), remaining_set)) - - # A list mapping the index of each pareto front to the set of solutions in this front. - solutions_fronts_indices = [-1]*len(remaining_set) - solutions_fronts_indices = numpy.array(solutions_fronts_indices) - - # Index of the current pareto front. - front_index = -1 - while len(remaining_set) > 0: - front_index += 1 - - # Get the current non-dominated set of solutions. - pareto_front, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) - pareto_front = numpy.array(pareto_front, dtype=object) - pareto_fronts.append(pareto_front) - - solutions_indices = pareto_front[:, 0].astype(int) - solutions_fronts_indices[solutions_indices] = front_index - - return pareto_fronts, solutions_fronts_indices - -def crowding_distance(pareto_front, fitness): - """ - Calculate the crowding dstance for all solutions in the current pareto front. - - Parameters - ---------- - pareto_front : TYPE - The set of solutions in the current pareto front. - fitness : TYPE - The fitness of the current population. - - Returns - ------- - obj_crowding_dist_list : TYPE - A nested list of the values for all objectives alongside their crowding distance. - crowding_dist_sum : TYPE - A list of the sum of crowding distances across all objectives for each solution. - crowding_dist_front_sorted_indices : TYPE - The indices of the solutions (relative to the current front) sorted by the crowding distance. - crowding_dist_pop_sorted_indices : TYPE - The indices of the solutions (relative to the population) sorted by the crowding distance. - """ - - # Each solution in the pareto front has 2 elements: - # 1) The index of the solution in the population. - # 2) A list of the fitness values for all objectives of the solution. - # Before proceeding, remove the indices from each solution in the pareto front. - pareto_front_no_indices = numpy.array([pareto_front[:, 1][idx] for idx in range(pareto_front.shape[0])]) - - # If there is only 1 solution, then return empty arrays for the crowding distance. - if pareto_front_no_indices.shape[0] == 1: - # There is only 1 index. - return numpy.array([]), numpy.array([]), numpy.array([0]), pareto_front[:, 0].astype(int) - - # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. - obj_crowding_dist_list = [] - # Loop through the objectives to calculate the crowding distance of each solution across all objectives. - for obj_idx in range(pareto_front_no_indices.shape[1]): - obj = pareto_front_no_indices[:, obj_idx] - # This variable has a nested list where each child list zip the following together: - # 1) The index of the objective value. - # 2) The objective value. - # 3) Initialize the crowding distance by zero. - obj = list(zip(range(len(obj)), obj, [0]*len(obj))) - obj = [list(element) for element in obj] - # This variable is the sorted version where sorting is done by the objective value (second element). - # Note that the first element is still the original objective index before sorting. - obj_sorted = sorted(obj, key=lambda x: x[1]) - - # Get the minimum and maximum values for the current objective. - obj_min_val = min(fitness[:, obj_idx]) - obj_max_val = max(fitness[:, obj_idx]) - denominator = obj_max_val - obj_min_val - # To avoid division by zero, set the denominator to a tiny value. - if denominator == 0: - denominator = 0.0000001 - - # Set the crowding distance to the first and last solutions (after being sorted) to infinity. - inf_val = float('inf') - # crowding_distance[0] = inf_val - obj_sorted[0][2] = inf_val - # crowding_distance[-1] = inf_val - obj_sorted[-1][2] = inf_val - - # If there are only 2 solutions in the current pareto front, then do not proceed. - # The crowding distance for such 2 solutions is infinity. - if len(obj_sorted) <= 2: - obj_crowding_dist_list.append(obj_sorted) - break - - for idx in range(1, len(obj_sorted)-1): - # Calculate the crowding distance. - crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] - crowding_dist = crowding_dist / denominator - # Insert the crowding distance back into the list to override the initial zero. - obj_sorted[idx][2] = crowding_dist - - # Sort the objective by the original index at index 0 of the each child list. - obj_sorted = sorted(obj_sorted, key=lambda x: x[0]) - obj_crowding_dist_list.append(obj_sorted) - - obj_crowding_dist_list = numpy.array(obj_crowding_dist_list) - crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) - crowding_dist_sum = numpy.sum(crowding_dist, axis=0) - - # An array of the sum of crowding distances across all objectives. - # Each row has 2 elements: - # 1) The index of the solution. - # 2) The sum of all crowding distances for all objective of the solution. - crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) - crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) - - # The sorted solutions' indices by the crowding distance. - crowding_dist_front_sorted_indices = numpy.array(crowding_dist_sum)[:, 0] - crowding_dist_front_sorted_indices = crowding_dist_front_sorted_indices.astype(int) - # Note that such indices are relative to the front, NOT the population. - # It is mandatory to map such front indices to population indices before using them to refer to the population. - crowding_dist_pop_sorted_indices = pareto_front[:, 0] - crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices[crowding_dist_front_sorted_indices] - crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) - - return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices - -def sort_solutions_nsga2(fitness): - """ - Sort the solutions based on the fitness. - The sorting procedure differs based on whether the problem is single-objective or multi-objective optimization. - If it is multi-objective, then non-dominated sorting and crowding distance are applied. - At first, non-dominated sorting is applied to classify the solutions into pareto fronts. - Then the solutions inside each front are sorted using crowded distance. - The solutions inside pareto front X always come before those in front X+1. - - Parameters - ---------- - fitness : TYPE - The fitness of the entire population. - - Returns - ------- - solutions_sorted : TYPE - The indices of the sorted solutions. - - """ - if type(fitness[0]) in [list, tuple, numpy.ndarray]: - # Multi-objective optimization problem. - solutions_sorted = [] - # Split the solutions into pareto fronts using non-dominated sorting. - pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) - for pareto_front in pareto_fronts: - # Sort the solutions in the front using crowded distance. - _, _, _, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), - fitness=fitness) - crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) - # Append the sorted solutions into the list. - solutions_sorted.extend(crowding_dist_pop_sorted_indices) - elif type(fitness[0]) in pygad.GA.supported_int_float_types: - # Single-objective optimization problem. - solutions_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - # Reverse the sorted solutions so that the best solution comes first. - solutions_sorted.reverse() - - return solutions_sorted diff --git a/pygad/pygad.py b/pygad/pygad.py index 97205f9..fc3e5b5 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -11,9 +11,11 @@ from pygad import visualize import sys +# Extend all the classes so that they can be referenced by just the `self` object of the `pygad.GA` class. class GA(utils.parent_selection.ParentSelection, utils.crossover.Crossover, utils.mutation.Mutation, + utils.nsga2.NSGA2, helper.unique.Unique, visualize.plot.Plot): diff --git a/pygad/utils/__init__.py b/pygad/utils/__init__.py index 3b8450b..95bf6e5 100644 --- a/pygad/utils/__init__.py +++ b/pygad/utils/__init__.py @@ -1,5 +1,6 @@ from pygad.utils import parent_selection from pygad.utils import crossover from pygad.utils import mutation +from pygad.utils import nsga2 -__version__ = "1.0.1" \ No newline at end of file +__version__ = "1.1.0" \ No newline at end of file diff --git a/pygad/utils/crossover.py b/pygad/utils/crossover.py index 6cc9a27..b03d361 100644 --- a/pygad/utils/crossover.py +++ b/pygad/utils/crossover.py @@ -6,6 +6,10 @@ import random class Crossover: + + def __init__(): + pass + def single_point_crossover(self, parents, offspring_size): """ diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index ef0e85f..2d6bed9 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -9,6 +9,9 @@ class Mutation: + def __init__(): + pass + def random_mutation(self, offspring): """ diff --git a/pygad/utils/nsga2.py b/pygad/utils/nsga2.py new file mode 100644 index 0000000..8b24f30 --- /dev/null +++ b/pygad/utils/nsga2.py @@ -0,0 +1,257 @@ +import numpy +import pygad + +class NSGA2: + def __init__(): + pass + + def get_non_dominated_set(self, curr_solutions): + """ + Get the set of non-dominated solutions from the current set of solutions. + + Parameters + ---------- + curr_solutions : TYPE + The set of solutions to find its non-dominated set. + + Returns + ------- + dominated_set : TYPE + A set of the dominated solutions. + non_dominated_set : TYPE + A set of the non-dominated set. + + """ + # List of the members of the current dominated pareto front/set. + dominated_set = [] + # List of the non-members of the current dominated pareto front/set. + non_dominated_set = [] + for idx1, sol1 in enumerate(curr_solutions): + # Flag indicates whether the solution is a member of the current dominated set. + is_dominated = True + for idx2, sol2 in enumerate(curr_solutions): + if idx1 == idx2: + continue + # Zipping the 2 solutions so the corresponding genes are in the same list. + # The returned array is of size (N, 2) where N is the number of genes. + two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) + + #TODO Consider repacing < by > for maximization problems. + # Checking for if any solution dominates the current solution by applying the 2 conditions. + # le_eq (less than or equal): All elements must be True. + # le (less than): Only 1 element must be True. + le_eq = two_solutions[:, 1] >= two_solutions[:, 0] + le = two_solutions[:, 1] > two_solutions[:, 0] + + # If the 2 conditions hold, then a solution dominates the current solution. + # The current solution is not considered a member of the dominated set. + if le_eq.all() and le.any(): + # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. + # Instead, insert it into the non-dominated set. + is_dominated = False + non_dominated_set.append(sol1) + break + else: + # Reaching here means the solution does not dominate the current solution. + pass + + # If the flag is True, then no solution dominates the current solution. + if is_dominated: + dominated_set.append(sol1) + + # Return the dominated and non-dominated sets. + return dominated_set, non_dominated_set + + def non_dominated_sorting(self, fitness): + """ + Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. + + Parameters + ---------- + fitness : TYPE + An array of the population fitness across all objective function. + + Returns + ------- + pareto_fronts : TYPE + An array of the pareto fronts. + + """ + # A list of all non-dominated sets. + pareto_fronts = [] + + # The remaining set to be explored for non-dominance. + # Initially it is set to the entire population. + # The solutions of each non-dominated set are removed after each iteration. + remaining_set = fitness.copy() + + # Zipping the solution index with the solution's fitness. + # This helps to easily identify the index of each solution. + # Each element has: + # 1) The index of the solution. + # 2) An array of the fitness values of this solution across all objectives. + # remaining_set = numpy.array(list(zip(range(0, fitness.shape[0]), non_dominated_set))) + remaining_set = list(zip(range(0, fitness.shape[0]), remaining_set)) + + # A list mapping the index of each pareto front to the set of solutions in this front. + solutions_fronts_indices = [-1]*len(remaining_set) + solutions_fronts_indices = numpy.array(solutions_fronts_indices) + + # Index of the current pareto front. + front_index = -1 + while len(remaining_set) > 0: + front_index += 1 + + # Get the current non-dominated set of solutions. + pareto_front, remaining_set = self.get_non_dominated_set(curr_solutions=remaining_set) + pareto_front = numpy.array(pareto_front, dtype=object) + pareto_fronts.append(pareto_front) + + solutions_indices = pareto_front[:, 0].astype(int) + solutions_fronts_indices[solutions_indices] = front_index + + return pareto_fronts, solutions_fronts_indices + + def crowding_distance(self, pareto_front, fitness): + """ + Calculate the crowding dstance for all solutions in the current pareto front. + + Parameters + ---------- + pareto_front : TYPE + The set of solutions in the current pareto front. + fitness : TYPE + The fitness of the current population. + + Returns + ------- + obj_crowding_dist_list : TYPE + A nested list of the values for all objectives alongside their crowding distance. + crowding_dist_sum : TYPE + A list of the sum of crowding distances across all objectives for each solution. + crowding_dist_front_sorted_indices : TYPE + The indices of the solutions (relative to the current front) sorted by the crowding distance. + crowding_dist_pop_sorted_indices : TYPE + The indices of the solutions (relative to the population) sorted by the crowding distance. + """ + + # Each solution in the pareto front has 2 elements: + # 1) The index of the solution in the population. + # 2) A list of the fitness values for all objectives of the solution. + # Before proceeding, remove the indices from each solution in the pareto front. + pareto_front_no_indices = numpy.array([pareto_front[:, 1][idx] for idx in range(pareto_front.shape[0])]) + + # If there is only 1 solution, then return empty arrays for the crowding distance. + if pareto_front_no_indices.shape[0] == 1: + # There is only 1 index. + return numpy.array([]), numpy.array([]), numpy.array([0]), pareto_front[:, 0].astype(int) + + # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. + obj_crowding_dist_list = [] + # Loop through the objectives to calculate the crowding distance of each solution across all objectives. + for obj_idx in range(pareto_front_no_indices.shape[1]): + obj = pareto_front_no_indices[:, obj_idx] + # This variable has a nested list where each child list zip the following together: + # 1) The index of the objective value. + # 2) The objective value. + # 3) Initialize the crowding distance by zero. + obj = list(zip(range(len(obj)), obj, [0]*len(obj))) + obj = [list(element) for element in obj] + # This variable is the sorted version where sorting is done by the objective value (second element). + # Note that the first element is still the original objective index before sorting. + obj_sorted = sorted(obj, key=lambda x: x[1]) + + # Get the minimum and maximum values for the current objective. + obj_min_val = min(fitness[:, obj_idx]) + obj_max_val = max(fitness[:, obj_idx]) + denominator = obj_max_val - obj_min_val + # To avoid division by zero, set the denominator to a tiny value. + if denominator == 0: + denominator = 0.0000001 + + # Set the crowding distance to the first and last solutions (after being sorted) to infinity. + inf_val = float('inf') + # crowding_distance[0] = inf_val + obj_sorted[0][2] = inf_val + # crowding_distance[-1] = inf_val + obj_sorted[-1][2] = inf_val + + # If there are only 2 solutions in the current pareto front, then do not proceed. + # The crowding distance for such 2 solutions is infinity. + if len(obj_sorted) <= 2: + obj_crowding_dist_list.append(obj_sorted) + break + + for idx in range(1, len(obj_sorted)-1): + # Calculate the crowding distance. + crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] + crowding_dist = crowding_dist / denominator + # Insert the crowding distance back into the list to override the initial zero. + obj_sorted[idx][2] = crowding_dist + + # Sort the objective by the original index at index 0 of the each child list. + obj_sorted = sorted(obj_sorted, key=lambda x: x[0]) + obj_crowding_dist_list.append(obj_sorted) + + obj_crowding_dist_list = numpy.array(obj_crowding_dist_list) + crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) + crowding_dist_sum = numpy.sum(crowding_dist, axis=0) + + # An array of the sum of crowding distances across all objectives. + # Each row has 2 elements: + # 1) The index of the solution. + # 2) The sum of all crowding distances for all objective of the solution. + crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) + crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) + + # The sorted solutions' indices by the crowding distance. + crowding_dist_front_sorted_indices = numpy.array(crowding_dist_sum)[:, 0] + crowding_dist_front_sorted_indices = crowding_dist_front_sorted_indices.astype(int) + # Note that such indices are relative to the front, NOT the population. + # It is mandatory to map such front indices to population indices before using them to refer to the population. + crowding_dist_pop_sorted_indices = pareto_front[:, 0] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices[crowding_dist_front_sorted_indices] + crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) + + return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices + + def sort_solutions_nsga2(self, fitness): + """ + Sort the solutions based on the fitness. + The sorting procedure differs based on whether the problem is single-objective or multi-objective optimization. + If it is multi-objective, then non-dominated sorting and crowding distance are applied. + At first, non-dominated sorting is applied to classify the solutions into pareto fronts. + Then the solutions inside each front are sorted using crowded distance. + The solutions inside pareto front X always come before those in front X+1. + + Parameters + ---------- + fitness : TYPE + The fitness of the entire population. + + Returns + ------- + solutions_sorted : TYPE + The indices of the sorted solutions. + + """ + if type(fitness[0]) in [list, tuple, numpy.ndarray]: + # Multi-objective optimization problem. + solutions_sorted = [] + # Split the solutions into pareto fronts using non-dominated sorting. + pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness) + self.pareto_fronts = pareto_fronts.copy() + for pareto_front in pareto_fronts: + # Sort the solutions in the front using crowded distance. + _, _, _, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=pareto_front.copy(), + fitness=fitness) + crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) + # Append the sorted solutions into the list. + solutions_sorted.extend(crowding_dist_pop_sorted_indices) + elif type(fitness[0]) in pygad.GA.supported_int_float_types: + # Single-objective optimization problem. + solutions_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + # Reverse the sorted solutions so that the best solution comes first. + solutions_sorted.reverse() + + return solutions_sorted diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 464dcbd..4016ca1 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -3,9 +3,12 @@ """ import numpy -from ..helper import nsga2 class ParentSelection: + + def __init__(): + pass + def steady_state_selection(self, fitness, num_parents): """ @@ -23,7 +26,7 @@ def steady_state_selection(self, fitness, num_parents): # Return the indices of the sorted solutions (all solutions in the population). # This function works with both single- and multi-objective optimization problems. - fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) + fitness_sorted = self.sort_solutions_nsga2(fitness=fitness) # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. if self.gene_type_single == True: @@ -51,7 +54,7 @@ def rank_selection(self, fitness, num_parents): # Return the indices of the sorted solutions (all solutions in the population). # This function works with both single- and multi-objective optimization problems. - fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) + fitness_sorted = self.sort_solutions_nsga2(fitness=fitness) # Rank the solutions based on their fitness. The worst is gives the rank 1. The best has the rank N. rank = numpy.arange(1, self.sol_per_pop+1) @@ -114,7 +117,7 @@ def tournament_selection(self, fitness, num_parents): # Return the indices of the sorted solutions (all solutions in the population). # This function works with both single- and multi-objective optimization problems. - fitness_sorted = nsga2.sort_solutions_nsga2(fitness=fitness) + fitness_sorted = self.sort_solutions_nsga2(fitness=fitness) if self.gene_type_single == True: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) @@ -312,7 +315,7 @@ def tournament_selection_nsga2(self, If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected. If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. Later, the selected parents will mate to produce the offspring. - + It accepts 2 parameters: -fitness: The fitness values for the current population. -num_parents: The number of parents to be selected. @@ -332,10 +335,11 @@ def tournament_selection_nsga2(self, # The indices of the selected parents. parents_indices = [] - # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + # If there is only a single objective, each pareto front is expected to have only 1 solution. # TODO Make a test to check for that behaviour and add it to the GitHub actions tests. - pareto_fronts, solutions_fronts_indices = nsga2.non_dominated_sorting(fitness) - + pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness) + self.pareto_fronts = pareto_fronts.copy() + # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. rand_indices = numpy.random.randint(low=0.0, high=len(solutions_fronts_indices), @@ -362,7 +366,7 @@ def tournament_selection_nsga2(self, # Fetch the current pareto front. pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used. - + # If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless). if pareto_front.shape[0] == 1: selected_parent_idx = current_indices[0] # Index 1 can also be used. @@ -370,7 +374,7 @@ def tournament_selection_nsga2(self, # Reaching here means the pareto front has more than 1 solution. # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = nsga2.crowding_distance(pareto_front=pareto_front.copy(), + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=pareto_front.copy(), fitness=fitness) # This list has the sorted front-based indices for the solutions in the current pareto front. @@ -452,10 +456,11 @@ def nsga2_selection(self, # The indices of the selected parents. parents_indices = [] - - # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. + + # If there is only a single objective, each pareto front is expected to have only 1 solution. # TODO Make a test to check for that behaviour. - pareto_fronts, solutions_fronts_indices = nsga2.non_dominated_sorting(fitness) + pareto_fronts, solutions_fronts_indices = self.non_dominated_sorting(fitness) + self.pareto_fronts = pareto_fronts.copy() # The number of remaining parents to be selected. num_remaining_parents = num_parents @@ -485,8 +490,8 @@ def nsga2_selection(self, # If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed. # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = nsga2.crowding_distance(pareto_front=current_pareto_front.copy(), - fitness=fitness) + obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = self.crowding_distance(pareto_front=current_pareto_front.copy(), + fitness=fitness) for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: # Insert the parent into the parents array. From 613efcbddbf5fb9fc93d64ba6c98805f82d462bf Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 15:57:43 -0400 Subject: [PATCH 11/25] Support of NSGA-II --- pygad/pygad.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygad/pygad.py b/pygad/pygad.py index fc3e5b5..d0a124c 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -1309,6 +1309,8 @@ def __init__(self, self.last_generation_elitism = None # Added in PyGAD 2.19.0. A NumPy array holding the indices of the elitism of the current generation. It works only if the 'keep_elitism' parameter has a non-zero value. self.last_generation_elitism_indices = None + # Supported in PyGAD 3.2.0. It holds the pareto fronts when solving a multi-objective problem. + self.pareto_fronts = None except Exception as e: self.logger.exception(e) sys.exit(-1) From 3f71ec0c8f5d2b3bb8531b90eaf1e9f562dc23eb Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 18:20:47 -0400 Subject: [PATCH 12/25] Support of NSGA-II --- pygad/utils/nsga2.py | 4 ++-- pygad/visualize/plot.py | 25 +++---------------------- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/pygad/utils/nsga2.py b/pygad/utils/nsga2.py index 8b24f30..c6a93ba 100644 --- a/pygad/utils/nsga2.py +++ b/pygad/utils/nsga2.py @@ -64,7 +64,7 @@ def get_non_dominated_set(self, curr_solutions): def non_dominated_sorting(self, fitness): """ - Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. + Apply non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. Parameters ---------- @@ -114,7 +114,7 @@ def non_dominated_sorting(self, fitness): def crowding_distance(self, pareto_front, fitness): """ - Calculate the crowding dstance for all solutions in the current pareto front. + Calculate the crowding distance for all solutions in the current pareto front. Parameters ---------- diff --git a/pygad/visualize/plot.py b/pygad/visualize/plot.py index e00bfaf..a084951 100644 --- a/pygad/visualize/plot.py +++ b/pygad/visualize/plot.py @@ -3,32 +3,13 @@ """ import numpy -import warnings import matplotlib.pyplot import pygad class Plot: - def plot_result(self, - title="PyGAD - Generation vs. Fitness", - xlabel="Generation", - ylabel="Fitness", - linewidth=3, - font_size=14, - plot_type="plot", - color="#64f20c", - save_dir=None): - - if not self.suppress_warnings: - warnings.warn("Please use the plot_fitness() method instead of plot_result(). The plot_result() method will be removed in the future.") - - return self.plot_fitness(title=title, - xlabel=xlabel, - ylabel=ylabel, - linewidth=linewidth, - font_size=font_size, - plot_type=plot_type, - color=color, - save_dir=save_dir) + + def __init__(): + pass def plot_fitness(self, title="PyGAD - Generation vs. Fitness", From 84c879d66e4ce47d68580962995bb8f0a8eaf6c4 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 18:22:35 -0400 Subject: [PATCH 13/25] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index faf8f30..2014f2d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # PyGAD: Genetic Algorithm in Python -[PyGAD](https://pypi.org/project/pygad) is an open-source easy-to-use Python 3 library for building the genetic algorithm and optimizing machine learning algorithms. It supports Keras and PyTorch. +[PyGAD](https://pypi.org/project/pygad) is an open-source easy-to-use Python 3 library for building the genetic algorithm and optimizing machine learning algorithms. It supports Keras and PyTorch. PyGAD supports optimizing both single-objective and multi-objective problems. Check documentation of the [PyGAD](https://pygad.readthedocs.io/en/latest). @@ -146,7 +146,7 @@ on_stop() # Example -Check the [PyGAD's documentation](https://pygad.readthedocs.io/en/latest/pygad.html) for information about the implementation of this example. +Check the [PyGAD's documentation](https://pygad.readthedocs.io/en/latest/pygad.html) for information about the implementation of this example. It solves a single-objective problem. ```python import pygad From b7e3f7ee08b07302dd6561be96137f183fe55a28 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 18:23:16 -0400 Subject: [PATCH 14/25] Update TOML file --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0f9ca5c..4f5055e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "pygad" -version = "3.1.1" +version = "3.2.0" description = "PyGAD: A Python Library for Building the Genetic Algorithm and Training Machine Learning Algoithms (Keras & PyTorch)." readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3" From 5122c6410c87bde788f80919eea33b454626afe8 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 18:35:43 -0400 Subject: [PATCH 15/25] Update docs --- docs/source/conf.py | 2 +- docs/source/index.rst | 80 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 806bb02..9dccbea 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Ahmed Fawzy Gad' # The full version, including alpha/beta/rc tags -release = '3.1.0' +release = '3.2.0' master_doc = 'index' diff --git a/docs/source/index.rst b/docs/source/index.rst index 3bd6ad6..92aacd7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -251,9 +251,52 @@ pygad Module - .. _header-n5: +More About pygad Module +=============== + + +.. toctree:: + :maxdepth: 4 + :caption: More About pygad Module TOC + + pygad_more.rst + + + + +.. _header-n6: + +utils Module +=============== + + +.. toctree:: + :maxdepth: 4 + :caption: utils Module TOC + + utils.rst + + + +.. _header-n7: + +visualize Module +=============== + + +.. toctree:: + :maxdepth: 4 + :caption: visualize Module TOC + + visualize.rst + + + + +.. _header-n8: + pygad.nn Module =============== @@ -268,7 +311,7 @@ pygad.nn Module -.. _header-n6: +.. _header-n9: pygad.gann Module ================= @@ -288,7 +331,7 @@ pygad.gann Module -.. _header-n7: +.. _header-n10: pygad.cnn Module ================= @@ -302,13 +345,7 @@ pygad.cnn Module - - - - - - -.. _header-n8: +.. _header-n11: pygad.gacnn Module ================= @@ -323,7 +360,7 @@ pygad.gacnn Module -.. _header-n9: +.. _header-n12: pygad.kerasga Module ================= @@ -338,7 +375,7 @@ pygad.kerasga Module -.. _header-n10: +.. _header-n13: pygad.torchga Module ================= @@ -353,20 +390,33 @@ pygad.torchga Module -.. _header-n11: +.. _header-n14: -More Information +Releases ================= .. toctree:: :maxdepth: 4 - :caption: More Information + :caption: Releases releases.rst +.. _header-n15: + +helper Module +================= + + +.. toctree:: + :maxdepth: 4 + :caption: helper Module TOC + + helper.rst + + Indices and tables From 21f4788a98f3dde1ce73f6a4ea4561e772f03b39 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 18:39:16 -0400 Subject: [PATCH 16/25] Update docs --- docs/source/helper.rst | 11 + docs/source/index.rst | 12 +- docs/source/pygad.rst | 3361 ++---------------------------------- docs/source/pygad_more.rst | 2171 +++++++++++++++++++++++ docs/source/releases.rst | 61 + docs/source/utils.rst | 707 ++++++++ docs/source/visualize.rst | 449 +++++ 7 files changed, 3550 insertions(+), 3222 deletions(-) create mode 100644 docs/source/helper.rst create mode 100644 docs/source/pygad_more.rst create mode 100644 docs/source/utils.rst create mode 100644 docs/source/visualize.rst diff --git a/docs/source/helper.rst b/docs/source/helper.rst new file mode 100644 index 0000000..44df8cd --- /dev/null +++ b/docs/source/helper.rst @@ -0,0 +1,11 @@ +.. _pygadhelper-module: + +``pygad.helper`` Module +======================= + +This section of the PyGAD's library documentation discusses the +**pygad.helper** module. + +Yet, this module has a submodule called ``unique`` that has a class +named ``Unique`` with some helper methods. Such methods help to check +and fix duplicate values in the genes of a solution. diff --git a/docs/source/index.rst b/docs/source/index.rst index 92aacd7..8d1edc7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,9 +19,10 @@ optimizing machine learning algorithms. It works with different types of crossover, mutation, and parent selection operators. `PyGAD `__ allows different types of problems to be optimized using the genetic algorithm -by customizing the fitness function. +by customizing the fitness function. It works with both single-objective +and multi-objective optimization problems. -.. figure:: https://user-images.githubusercontent.com/16560492/101267295-c74c0180-375f-11eb-9ad0-f8e37bd796ce.png +.. image:: https://user-images.githubusercontent.com/16560492/101267295-c74c0180-375f-11eb-9ad0-f8e37bd796ce.png :alt: *Logo designed by* `Asmaa @@ -108,6 +109,11 @@ equation. A very important step is to implement the fitness function that will be used for calculating the fitness value for each solution. Here is one. +If the fitness function returns a number, then the problem is +single-objective. If a ``list``, ``tuple``, or ``numpy.ndarray`` is +returned, then it is a multi-objective problem (applicable even if a +single element exists). + .. code:: python def fitness_func(ga_instance, solution, solution_idx): @@ -213,7 +219,7 @@ PyGAD's Modules 8. The ``visualize`` module to visualize the results. 9. The ``utils`` module contains the operators (crossover, mutation, - and parent selection). + and parent selection) and the NSGA-II code. 10. The ``helper`` module has some helper functions. diff --git a/docs/source/pygad.rst b/docs/source/pygad.rst index 83d0296..ce92704 100644 --- a/docs/source/pygad.rst +++ b/docs/source/pygad.rst @@ -5,7 +5,8 @@ This section of the PyGAD's library documentation discusses the ``pygad`` module. Using the ``pygad`` module, instances of the genetic algorithm can be -created, run, saved, and loaded. +created, run, saved, and loaded. Single-objective and multi-objective +optimization problems can be solved. .. _pygadga-class: @@ -34,13 +35,17 @@ The ``pygad.GA`` class constructor supports the following parameters: parents. - ``fitness_func``: Accepts a function/method and returns the fitness - value of the solution. If a function is passed, then it must accept 3 - parameters (1. the instance of the ``pygad.GA`` class, 2. a single - solution, and 3. its index in the population). If method, then it - accepts a fourth parameter representing the method's class instance. - Check the `Preparing the fitness_func + value(s) of the solution. If a function is passed, then it must + accept 3 parameters (1. the instance of the ``pygad.GA`` class, 2. a + single solution, and 3. its index in the population). If method, then + it accepts a fourth parameter representing the method's class + instance. Check the `Preparing the fitness_func Parameter `__ - section for information about creating such a function. + section for information about creating such a function. In `PyGAD + 3.2.0 `__, + multi-objective optimization is supported. To consider the problem as + multi-objective, just return a ``list``, ``tuple``, or + ``numpy.ndarray`` from the fitness function. - ``fitness_batch_size=None``: A new optional parameter called ``fitness_batch_size`` is supported to calculate the fitness function @@ -51,9 +56,9 @@ The ``pygad.GA`` class constructor supports the following parameters: ``1 < fitness_batch_size <= sol_per_pop``, then the solutions are grouped into batches of size ``fitness_batch_size`` and the fitness function is called once for each batch. Check the `Batch Fitness - Calculation `__ + Calculation `__ section for more details and examples. Added in from `PyGAD - 2.19.0 `__. + 2.19.0 `__. - ``initial_population``: A user-defined initial population. It is useful when the user wants to start the generations with a custom @@ -64,7 +69,7 @@ The ``pygad.GA`` class constructor supports the following parameters: exception is raised if the ``initial_population`` is ``None`` while any of the 2 parameters (``sol_per_pop`` or ``num_genes``) is also ``None``. Introduced in `PyGAD - 2.0.0 `__ + 2.0.0 `__ and higher. - ``sol_per_pop``: Number of solutions (i.e. chromosomes) within the @@ -79,30 +84,30 @@ The ``pygad.GA`` class constructor supports the following parameters: single data type that is applied to all genes or can specify the data type of each individual gene. It defaults to ``float`` which means all genes are of ``float`` data type. Starting from `PyGAD - 2.9.0 `__, + 2.9.0 `__, the ``gene_type`` parameter can be assigned to a numeric value of any of these types: ``int``, ``float``, and ``numpy.int/uint/float(8-64)``. Starting from `PyGAD - 2.14.0 `__, + 2.14.0 `__, it can be assigned to a ``list``, ``tuple``, or a ``numpy.ndarray`` which hold a data type for each gene (e.g. ``gene_type=[int, float, numpy.int8]``). This helps to control the data type of each individual gene. In `PyGAD - 2.15.0 `__, + 2.15.0 `__, a precision for the ``float`` data types can be specified (e.g. ``gene_type=[float, 2]``. - ``init_range_low=-4``: The lower value of the random range from which the gene values in the initial population are selected. ``init_range_low`` defaults to ``-4``. Available in `PyGAD - 1.0.20 `__ + 1.0.20 `__ and higher. This parameter has no action if the ``initial_population`` parameter exists. - ``init_range_high=4``: The upper value of the random range from which the gene values in the initial population are selected. ``init_range_high`` defaults to ``+4``. Available in `PyGAD - 1.0.20 `__ + 1.0.20 `__ and higher. This parameter has no action if the ``initial_population`` parameter exists. @@ -112,9 +117,9 @@ The ``pygad.GA`` class constructor supports the following parameters: ``rank`` (for rank selection), ``random`` (for random selection), and ``tournament`` (for tournament selection). A custom parent selection function can be passed starting from `PyGAD - 2.16.0 `__. + 2.16.0 `__. Check the `User-Defined Crossover, Mutation, and Parent Selection - Operators `__ + Operators `__ section for more details about building a user-defined parent selection function. @@ -125,15 +130,15 @@ The ``pygad.GA`` class constructor supports the following parameters: in the next population. Note that the value assigned to ``keep_parents`` cannot be ``< - 1`` or greater than the number of solutions within the population ``sol_per_pop``. Starting from `PyGAD - 2.18.0 `__, + 2.18.0 `__, this parameter have an effect only when the ``keep_elitism`` parameter is ``0``. Starting from `PyGAD - 2.20.0 `__, + 2.20.0 `__, the parents' fitness from the last generation will not be re-used if ``keep_parents=0``. - ``keep_elitism=1``: Added in `PyGAD - 2.18.0 `__. + 2.18.0 `__. It can take the value ``0`` or a positive integer that satisfies (``0 <= keep_elitism <= sol_per_pop``). It defaults to ``1`` which means only the best solution in the current generation is kept in the @@ -153,15 +158,15 @@ The ``pygad.GA`` class constructor supports the following parameters: ``two_points`` (for two points crossover), ``uniform`` (for uniform crossover), and ``scattered`` (for scattered crossover). Scattered crossover is supported from PyGAD - `2.9.0 `__ + `2.9.0 `__ and higher. It defaults to ``single_point``. A custom crossover function can be passed starting from `PyGAD - 2.16.0 `__. + 2.16.0 `__. Check the `User-Defined Crossover, Mutation, and Parent Selection - Operators `__ + Operators `__ section for more details about creating a user-defined crossover function. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, if ``crossover_type=None``, then the crossover step is bypassed which means no crossover is applied and thus no offspring will be created in the next generations. The next generation will use @@ -173,7 +178,7 @@ The ``pygad.GA`` class constructor supports the following parameters: 1.0 is generated. If this random value is less than or equal to the value assigned to the ``crossover_probability`` parameter, then the parent is selected. Added in `PyGAD - 2.5.0 `__ + 2.5.0 `__ and higher. - ``mutation_type="random"``: Type of the mutation operation. Supported @@ -182,23 +187,23 @@ The ``pygad.GA`` class constructor supports the following parameters: scramble mutation), and ``adaptive`` (for adaptive mutation). It defaults to ``random``. A custom mutation function can be passed starting from `PyGAD - 2.16.0 `__. + 2.16.0 `__. Check the `User-Defined Crossover, Mutation, and Parent Selection - Operators `__ + Operators `__ section for more details about creating a user-defined mutation function. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, if ``mutation_type=None``, then the mutation step is bypassed which means no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. ``Adaptive`` mutation is supported starting from `PyGAD - 2.10.0 `__. + 2.10.0 `__. For more information about adaptive mutation, go the the `Adaptive - Mutation `__ + Mutation `__ section. For example about using adaptive mutation, check the `Use Adaptive Mutation in - PyGAD `__ + PyGAD `__ section. - ``mutation_probability=None``: The probability of selecting a gene @@ -209,7 +214,7 @@ The ``pygad.GA`` class constructor supports the following parameters: parameter, then the gene is selected. If this parameter exists, then there is no need for the 2 parameters ``mutation_percent_genes`` and ``mutation_num_genes``. Added in `PyGAD - 2.5.0 `__ + 2.5.0 `__ and higher. - ``mutation_by_replacement=False``: An optional bool parameter. It @@ -219,9 +224,9 @@ The ``pygad.GA`` class constructor supports the following parameters: randomly generated value. If False, then it has no effect and random mutation works by adding the random value to the gene. Supported in `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher. Check the changes in `PyGAD - 2.2.2 `__ + 2.2.2 `__ under the Release History section for an example. - ``mutation_percent_genes="default"``: Percentage of genes to mutate. @@ -232,7 +237,7 @@ The ``pygad.GA`` class constructor supports the following parameters: ``mutation_num_genes`` parameter. The ``mutation_percent_genes`` parameter has no action if ``mutation_probability`` or ``mutation_num_genes`` exist. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, this parameter has no action if ``mutation_type`` is ``None``. @@ -240,7 +245,7 @@ The ``pygad.GA`` class constructor supports the following parameters: to ``None`` meaning that no number is specified. The ``mutation_num_genes`` parameter has no action if the parameter ``mutation_probability`` exists. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, this parameter has no action if ``mutation_type`` is ``None``. @@ -248,7 +253,7 @@ The ``pygad.GA`` class constructor supports the following parameters: ``random_mutation_min_val`` parameter specifies the start value of the range from which a random value is selected to be added to the gene. It defaults to ``-1``. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, this parameter has no action if ``mutation_type`` is ``None``. @@ -256,7 +261,7 @@ The ``pygad.GA`` class constructor supports the following parameters: ``random_mutation_max_val`` parameter specifies the end value of the range from which a random value is selected to be added to the gene. It defaults to ``+1``. Starting from `PyGAD - 2.2.2 `__ + 2.2.2 `__ and higher, this parameter has no action if ``mutation_type`` is ``None``. @@ -277,19 +282,19 @@ The ``pygad.GA`` class constructor supports the following parameters: ``init_range_high`` and its mutation value is selected randomly from the range specified by the 2 parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. ``gene_space`` is added in `PyGAD - 2.5.0 `__. + 2.5.0 `__. Check the `Release History of PyGAD - 2.5.0 `__ + 2.5.0 `__ section of the documentation for more details. In `PyGAD - 2.9.0 `__, + 2.9.0 `__, NumPy arrays can be assigned to the ``gene_space`` parameter. In `PyGAD - 2.11.0 `__, + 2.11.0 `__, the ``gene_space`` parameter itself or any of its elements can be assigned to a dictionary to specify the lower and upper limits of the genes. For example, ``{'low': 2, 'high': 4}`` means the minimum and maximum values are 2 and 4, respectively. In `PyGAD - 2.15.0 `__, + 2.15.0 `__, a new key called ``"step"`` is supported to specify the step of moving from the start to the end of the range specified by the 2 existing keys ``"low"`` and ``"high"``. @@ -299,7 +304,7 @@ The ``pygad.GA`` class constructor supports the following parameters: it must accept a single parameter representing the instance of the genetic algorithm. If method, then it must accept 2 parameters where the second one refers to the method's object. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_fitness=None``: Accepts a function/method to be called after calculating the fitness values of all solutions in the population. If @@ -307,35 +312,35 @@ The ``pygad.GA`` class constructor supports the following parameters: solutions' fitness values 2) the instance of the genetic algorithm. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_parents=None``: Accepts a function/method to be called after selecting the parents that mates. If function, then it must accept 2 parameters: 1) the selected parents 2) the instance of the genetic algorithm If method, then it must accept 3 parameters where the third one refers to the method's object. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_crossover=None``: Accepts a function to be called each time the crossover operation is applied. This function must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring generated using crossover. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_mutation=None``: Accepts a function to be called each time the mutation operation is applied. This function must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring after applying the mutation. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_generation=None``: Accepts a function to be called after each generation. This function must accept a single parameter representing the instance of the genetic algorithm. If the function returned the string ``stop``, then the ``run()`` method stops without completing the other generations. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``on_stop=None``: Accepts a function to be called only once exactly before the genetic algorithm stops or when it completes all the @@ -343,13 +348,13 @@ The ``pygad.GA`` class constructor supports the following parameters: represents the instance of the genetic algorithm and the second one is a list of fitness values of the last population's solutions. Added in `PyGAD - 2.6.0 `__. + 2.6.0 `__. - ``delay_after_gen=0.0``: It accepts a non-negative number specifying the time in seconds to wait after a generation completes and before going to the next generation. It defaults to ``0.0`` which means no delay after the generation. Available in `PyGAD - 2.4.0 `__ + 2.4.0 `__ and higher. - ``save_best_solutions=False``: When ``True``, then the best solution @@ -357,25 +362,25 @@ The ``pygad.GA`` class constructor supports the following parameters: ``best_solutions``. If ``False`` (default), then no solutions are saved and the ``best_solutions`` attribute will be empty. Supported in `PyGAD - 2.9.0 `__. + 2.9.0 `__. - ``save_solutions=False``: If ``True``, then all solutions in each generation are appended into an attribute called ``solutions`` which is NumPy array. Supported in `PyGAD - 2.15.0 `__. + 2.15.0 `__. - ``suppress_warnings=False``: A bool parameter to control whether the warning messages are printed or not. It defaults to ``False``. - ``allow_duplicate_genes=True``: Added in `PyGAD - 2.13.0 `__. + 2.13.0 `__. If ``True``, then a solution/chromosome may have duplicate gene values. If ``False``, then each gene will have a unique value in its solution. - ``stop_criteria=None``: Some criteria to stop the evolution. Added in `PyGAD - 2.15.0 `__. + 2.15.0 `__. Each criterion is passed as ``str`` which has a stop word. The current 2 supported words are ``reach`` and ``saturate``. ``reach`` stops the ``run()`` method if the fitness value is equal to or @@ -387,7 +392,7 @@ The ``pygad.GA`` class constructor supports the following parameters: fitness does not change for 7 consecutive generations. - ``parallel_processing=None``: Added in `PyGAD - 2.17.0 `__. + 2.17.0 `__. If ``None`` (Default), this means no parallel processing is applied. It can accept a list/tuple of 2 elements [1) Can be either ``'process'`` or ``'thread'`` to indicate whether processes or @@ -399,11 +404,11 @@ The ``pygad.GA`` class constructor supports the following parameters: 5 threads which is equivalent to ``parallel_processing=["thread", 5]``. For more information, check the `Parallel Processing in - PyGAD `__ + PyGAD `__ section. - ``random_seed=None``: Added in `PyGAD - 2.18.0 `__. + 2.18.0 `__. It defines the random seed to be used by the random function generators (we use random functions in the NumPy and random modules). This helps to reproduce the same results by setting the same random @@ -415,9 +420,9 @@ The ``pygad.GA`` class constructor supports the following parameters: ``print()`` but logged. If ``logger=None``, then a logger is created that uses ``StreamHandler`` to logs the messages to the console. Added in `PyGAD - 3.0.0 `__. + 3.0.0 `__. Check the `Logging - Outputs `__ + Outputs `__ for more information. The user doesn't have to specify all of such parameters while creating @@ -504,7 +509,7 @@ Other Attributes - ``last_generation_fitness``: The fitness values of the solutions in the last generation. `Added in PyGAD - 2.12.0 `__. + 2.12.0 `__. - ``previous_generation_fitness``: At the end of each generation, the fitness of the most recent population is saved in the @@ -514,51 +519,56 @@ Other Attributes ``previous_generation_fitness`` attribute is used to fetch the pre-calculated fitness instead of calling the fitness function for already explored solutions. `Added in PyGAD - 2.16.2 `__. + 2.16.2 `__. - ``last_generation_parents``: The parents selected from the last generation. `Added in PyGAD - 2.12.0 `__. + 2.12.0 `__. - ``last_generation_offspring_crossover``: The offspring generated after applying the crossover in the last generation. `Added in PyGAD - 2.12.0 `__. + 2.12.0 `__. - ``last_generation_offspring_mutation``: The offspring generated after applying the mutation in the last generation. `Added in PyGAD - 2.12.0 `__. + 2.12.0 `__. - ``gene_type_single``: A flag that is set to ``True`` if the ``gene_type`` parameter is assigned to a single data type that is applied to all genes. If ``gene_type`` is assigned a ``list``, ``tuple``, or ``numpy.ndarray``, then the value of ``gene_type_single`` will be ``False``. `Added in PyGAD - 2.14.0 `__. + 2.14.0 `__. - ``last_generation_parents_indices``: This attribute holds the indices of the selected parents in the last generation. Supported in `PyGAD - 2.15.0 `__. + 2.15.0 `__. - ``last_generation_elitism``: This attribute holds the elitism of the last generation. It is effective only if the ``keep_elitism`` parameter has a non-zero value. Supported in `PyGAD - 2.18.0 `__. + 2.18.0 `__. - ``last_generation_elitism_indices``: This attribute holds the indices of the elitism of the last generation. It is effective only if the ``keep_elitism`` parameter has a non-zero value. Supported in `PyGAD - 2.19.0 `__. + 2.19.0 `__. - ``logger``: This attribute holds the logger from the ``logging`` module. Supported in `PyGAD - 3.0.0 `__. + 3.0.0 `__. - ``gene_space_unpacked``: This is the unpacked version of the ``gene_space`` parameter. For example, ``range(1, 5)`` is unpacked to ``[1, 2, 3, 4]``. For an infinite range like ``{'low': 2, 'high': 4}``, then it is unpacked to a limited number of values (e.g. 100). Supported in `PyGAD - 3.1.0 `__. + 3.1.0 `__. + +- ``pareto_fronts``: A new instance attribute named ``pareto_fronts`` + added to the ``pygad.GA`` instances that holds the pareto fronts when + solving a multi-objective problem. Supported in `PyGAD + 3.2.0 `__. Note that the attributes with names starting with ``last_generation_`` are updated after each generation. @@ -606,9 +616,9 @@ Other Methods - ``summary()``: Prints a Keras-like summary of the PyGAD lifecycle. This helps to have an overview of the architecture. Supported in `PyGAD - 2.19.0 `__. + 2.19.0 `__. Check the `Print Lifecycle - Summary `__ + Summary `__ section for more details and examples. The next sections discuss the methods available in the ``pygad.GA`` @@ -790,6 +800,25 @@ Selects the parents using the roulette wheel selection technique. Selects the parents using the stochastic universal selection technique. +.. _nsga2selection: + +``nsga2_selection()`` +~~~~~~~~~~~~~~~~~~~~~ + +Selects the parents for the NSGA-II algorithm to solve multi-objective +optimization problems. It selects the parents by ranking them based on +non-dominated sorting and crowding distance. + +.. _tournamentselectionnsga2: + +``tournament_selection_nsga2()`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Selects the parents for the NSGA-II algorithm to solve multi-objective +optimization problems. It selects the parents using the tournament +selection technique applied based on non-dominated sorting and crowding +distance. + Crossover Methods ----------------- @@ -927,13 +956,12 @@ It returns the following: Previously named ``plot_result()``, this method creates, shows, and returns a figure that summarizes how the fitness value evolves by -generation. It works only after completing at least 1 generation. +generation. -If no generation is completed (at least 1), an exception is raised. +It works only after completing at least 1 generation. If no generation +is completed (at least 1), an exception is raised. -Starting from `PyGAD -2.15.0 `__ -and higher, this method accepts the following parameters: +This method accepts the following parameters: 1. ``title``: Title of the figure. @@ -949,9 +977,14 @@ and higher, this method accepts the following parameters: 6. ``plot_type``: Type of the plot which can be either ``"plot"`` (default), ``"scatter"``, or ``"bar"``. -7. ``color``: Color of the plot which defaults to ``"#3870FF"``. +7. ``color``: Color of the plot which defaults to the greenish color + ``"#64f20c"``. -8. ``save_dir``: Directory to save the figure. +8. ``label``: The label used for the legend in the figures of + multi-objective problems. It is not used for single-objective + problems. It defaults to ``None`` which means no labels used. + +9. ``save_dir``: Directory to save the figure. .. _plotnewsolutionrate: @@ -961,10 +994,10 @@ and higher, this method accepts the following parameters: The ``plot_new_solution_rate()`` method creates, shows, and returns a figure that shows the number of new solutions explored in each generation. This method works only when ``save_solutions=True`` in the -constructor of the ``pygad.GA`` class. It also works only after -completing at least 1 generation. +constructor of the ``pygad.GA`` class. -If no generation is completed (at least 1), an exception is raised. +It works only after completing at least 1 generation. If no generation +is completed (at least 1), an exception is raised. This method accepts the following parameters: @@ -1004,8 +1037,7 @@ which helps to: This is controlled by the ``graph_type`` parameter. It works only after completing at least 1 generation. If no generation -is completed, an exception is raised. If no generation is completed (at -least 1), an exception is raised. +is completed (at least 1), an exception is raised. This method accepts the following parameters: @@ -1115,7 +1147,22 @@ PyGAD has a parameter called ``fitness_func`` that allows the user to specify a custom function/method to use when calculating the fitness. This function/method must be a maximization function/method so that a solution with a high fitness value returned is selected compared to a -solution with a low value. Doing that allows the user to freely use +solution with a low value. + +The fitness function is where the user can decide whether the +optimization problem is single-objective or multi-objective. + +- If the fitness function returns a numeric value, then the problem is + single-objective. The numeric data types supported by PyGAD are + listed in the ``supported_int_float_types`` variable of the + ``pygad.GA`` class. + +- If the fitness function returns a ``list``, ``tuple``, or + ``numpy.ndarray``, then the problem is single-objective. Even if + there is only one element, the problem is still considered + multi-objective. + +Using a user-defined fitness function allows the user to freely use PyGAD to solve any problem by passing the appropriate fitness function/method. It is very important to understand the problem well for creating it. @@ -1145,6 +1192,9 @@ return a value that gets higher when the solution's output is closer to fitness = 1.0 / numpy.abs(output - desired_output) return fitness +Because the fitness function returns a numeric value, then the problem +is single-objective. + Such a user-defined function must accept 3 parameters: 1. The instance of the ``pygad.GA`` class. This helps the user to fetch @@ -1357,59 +1407,6 @@ any property. print(loaded_ga_instance.best_solution()) -Crossover, Mutation, and Parent Selection -========================================= - -PyGAD supports different types for selecting the parents and applying -the crossover & mutation operators. More features will be added in the -future. To ask for a new feature, please check the ``Ask for Feature`` -section. - -Supported Crossover Operations ------------------------------- - -The supported crossover operations at this time are: - -1. Single point: Implemented using the ``single_point_crossover()`` - method. - -2. Two points: Implemented using the ``two_points_crossover()`` method. - -3. Uniform: Implemented using the ``uniform_crossover()`` method. - -Supported Mutation Operations ------------------------------ - -The supported mutation operations at this time are: - -1. Random: Implemented using the ``random_mutation()`` method. - -2. Swap: Implemented using the ``swap_mutation()`` method. - -3. Inversion: Implemented using the ``inversion_mutation()`` method. - -4. Scramble: Implemented using the ``scramble_mutation()`` method. - -Supported Parent Selection Operations -------------------------------------- - -The supported parent selection techniques at this time are: - -1. Steady-state: Implemented using the ``steady_state_selection()`` - method. - -2. Roulette wheel: Implemented using the ``roulette_wheel_selection()`` - method. - -3. Stochastic universal: Implemented using the - ``stochastic_universal_selection()``\ method. - -4. Rank: Implemented using the ``rank_selection()`` method. - -5. Random: Implemented using the ``random_selection()`` method. - -6. Tournament: Implemented using the ``tournament_selection()`` method. - Life Cycle of PyGAD =================== @@ -1503,3088 +1500,14 @@ argument, here is the output. on_stop() -Adaptive Mutation -================= - -In the regular genetic algorithm, the mutation works by selecting a -single fixed mutation rate for all solutions regardless of their fitness -values. So, regardless on whether this solution has high or low quality, -the same number of genes are mutated all the time. - -The pitfalls of using a constant mutation rate for all solutions are -summarized in this paper `Libelli, S. Marsili, and P. Alba. "Adaptive -mutation in genetic algorithms." Soft computing 4.2 (2000): -76-80 `__ -as follows: - - The weak point of "classical" GAs is the total randomness of - mutation, which is applied equally to all chromosomes, irrespective - of their fitness. Thus a very good chromosome is equally likely to be - disrupted by mutation as a bad one. - - On the other hand, bad chromosomes are less likely to produce good - ones through crossover, because of their lack of building blocks, - until they remain unchanged. They would benefit the most from - mutation and could be used to spread throughout the parameter space - to increase the search thoroughness. So there are two conflicting - needs in determining the best probability of mutation. - - Usually, a reasonable compromise in the case of a constant mutation - is to keep the probability low to avoid disruption of good - chromosomes, but this would prevent a high mutation rate of - low-fitness chromosomes. Thus a constant probability of mutation - would probably miss both goals and result in a slow improvement of - the population. - -According to `Libelli, S. Marsili, and P. -Alba. `__ -work, the adaptive mutation solves the problems of constant mutation. - -Adaptive mutation works as follows: - -1. Calculate the average fitness value of the population (``f_avg``). - -2. For each chromosome, calculate its fitness value (``f``). - -3. If ``ff_avg``, then this solution is regarded as a high-quality - solution and thus the mutation rate should be kept low to avoid - disrupting this high quality solution. - -In PyGAD, if ``f=f_avg``, then the solution is regarded of high quality. - -The next figure summarizes the previous steps. - -.. image:: https://user-images.githubusercontent.com/16560492/103468973-e3c26600-4d2c-11eb-8af3-b3bb39b50540.jpg - :alt: - -This strategy is applied in PyGAD. - -Use Adaptive Mutation in PyGAD ------------------------------- - -In PyGAD 2.10.0, adaptive mutation is supported. To use it, just follow -the following 2 simple steps: - -1. In the constructor of the ``pygad.GA`` class, set - ``mutation_type="adaptive"`` to specify that the type of mutation is - adaptive. - -2. Specify the mutation rates for the low and high quality solutions - using one of these 3 parameters according to your preference: - ``mutation_probability``, ``mutation_num_genes``, and - ``mutation_percent_genes``. Please check the `documentation of each - of these - parameters `__ - for more information. - -When adaptive mutation is used, then the value assigned to any of the 3 -parameters can be of any of these data types: - -1. ``list`` - -2. ``tuple`` - -3. ``numpy.ndarray`` - -Whatever the data type used, the length of the ``list``, ``tuple``, or -the ``numpy.ndarray`` must be exactly 2. That is there are just 2 -values: - -1. The first value is the mutation rate for the low-quality solutions. - -2. The second value is the mutation rate for the high-quality solutions. - -PyGAD expects that the first value is higher than the second value and -thus a warning is printed in case the first value is lower than the -second one. - -Here are some examples to feed the mutation rates: - -.. code:: python - - # mutation_probability - mutation_probability = [0.25, 0.1] - mutation_probability = (0.35, 0.17) - mutation_probability = numpy.array([0.15, 0.05]) - - # mutation_num_genes - mutation_num_genes = [4, 2] - mutation_num_genes = (3, 1) - mutation_num_genes = numpy.array([7, 2]) - - # mutation_percent_genes - mutation_percent_genes = [25, 12] - mutation_percent_genes = (15, 8) - mutation_percent_genes = numpy.array([21, 13]) - -Assume that the average fitness is 12 and the fitness values of 2 -solutions are 15 and 7. If the mutation probabilities are specified as -follows: - -.. code:: python - - mutation_probability = [0.25, 0.1] - -Then the mutation probability of the first solution is 0.1 because its -fitness is 15 which is higher than the average fitness 12. The mutation -probability of the second solution is 0.25 because its fitness is 7 -which is lower than the average fitness 12. - -Here is an example that uses adaptive mutation. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] # Function inputs. - desired_output = 44 # Function output. - - def fitness_func(ga_instance, solution, solution_idx): - # The fitness function calulates the sum of products between each input and its corresponding weight. - output = numpy.sum(solution*function_inputs) - # The value 0.000001 is used to avoid the Inf value when the denominator numpy.abs(output - desired_output) is 0.0. - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - # Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor. - ga_instance = pygad.GA(num_generations=200, - fitness_func=fitness_func, - num_parents_mating=10, - sol_per_pop=20, - num_genes=len(function_inputs), - mutation_type="adaptive", - mutation_num_genes=(3, 1)) - - # Running the GA to optimize the parameters of the function. - ga_instance.run() - - ga_instance.plot_fitness(title="PyGAD with Adaptive Mutation", linewidth=5) - -.. _limit-the-gene-value-range-using-the-genespace-parameter: - -Limit the Gene Value Range using the ``gene_space`` Parameter -============================================================= - -In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter supported a new feature to allow -customizing the range of accepted values for each gene. Let's take a -quick review of the ``gene_space`` parameter to build over it. - -The ``gene_space`` parameter allows the user to feed the space of values -of each gene. This way the accepted values for each gene is retracted to -the user-defined values. Assume there is a problem that has 3 genes -where each gene has different set of values as follows: - -1. Gene 1: ``[0.4, 12, -5, 21.2]`` - -2. Gene 2: ``[-2, 0.3]`` - -3. Gene 3: ``[1.2, 63.2, 7.4]`` - -Then, the ``gene_space`` for this problem is as given below. Note that -the order is very important. - -.. code:: python - - gene_space = [[0.4, 12, -5, 21.2], - [-2, 0.3], - [1.2, 63.2, 7.4]] - -In case all genes share the same set of values, then simply feed a -single list to the ``gene_space`` parameter as follows. In this case, -all genes can only take values from this list of 6 values. - -.. code:: python - - gene_space = [33, 7, 0.5, 95. 6.3, 0.74] - -The previous example restricts the gene values to just a set of fixed -number of discrete values. In case you want to use a range of discrete -values to the gene, then you can use the ``range()`` function. For -example, ``range(1, 7)`` means the set of allowed values for the gene -are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or -``numpy.linspace()`` functions for the same purpose. - -The previous discussion only works with a range of discrete values not -continuous values. In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter can be assigned a dictionary that allows -the gene to have values from a continuous range. - -Assuming you want to restrict the gene within this half-open range [1 to -5) where 1 is included and 5 is not. Then simply create a dictionary -with 2 items where the keys of the 2 items are: - -1. ``'low'``: The minimum value in the range which is 1 in the example. - -2. ``'high'``: The maximum value in the range which is 5 in the example. - -The dictionary will look like that: - -.. code:: python - - {'low': 1, - 'high': 5} - -It is not acceptable to add more than 2 items in the dictionary or use -other keys than ``'low'`` and ``'high'``. - -For a 3-gene problem, the next code creates a dictionary for each gene -to restrict its values in a continuous range. For the first gene, it can -take any floating-point value from the range that starts from 1 -(inclusive) and ends at 5 (exclusive). - -.. code:: python - - gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] - -.. _more-about-the-genespace-parameter: - -More about the ``gene_space`` Parameter -======================================= - -The ``gene_space`` parameter customizes the space of values of each -gene. - -Assuming that all genes have the same global space which include the -values 0.3, 5.2, -4, and 8, then those values can be assigned to the -``gene_space`` parameter as a list, tuple, or range. Here is a list -assigned to this parameter. By doing that, then the gene values are -restricted to those assigned to the ``gene_space`` parameter. - -.. code:: python - - gene_space = [0.3, 5.2, -4, 8] - -If some genes have different spaces, then ``gene_space`` should accept a -nested list or tuple. In this case, the elements could be: - -1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single - value to be assigned to the gene. This means this gene will have the - same value across all generations. - -2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, - ``numpy.arange()``, or ``numpy.linspace``: It holds the space for - each individual gene. But this space is usually discrete. That is - there is a set of finite values to select from. - -3. ``dict``: To sample a value for a gene from a continuous range. The - dictionary must have 2 mandatory keys which are ``"low"`` and - ``"high"`` in addition to an optional key which is ``"step"``. A - random value is returned between the values assigned to the items - with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then - this works as the previous options (i.e. discrete set of values). - -4. ``None``: A gene with its space set to ``None`` is initialized - randomly from the range specified by the 2 parameters - ``init_range_low`` and ``init_range_high``. For mutation, its value - is mutated based on a random value from the range specified by the 2 - parameters ``random_mutation_min_val`` and - ``random_mutation_max_val``. If all elements in the ``gene_space`` - parameter are ``None``, the parameter will not have any effect. - -Assuming that a chromosome has 2 genes and each gene has a different -value space. Then the ``gene_space`` could be assigned a nested -list/tuple where each element determines the space of a gene. - -According to the next code, the space of the first gene is ``[0.4, -5]`` -which has 2 values and the space for the second gene is -``[0.5, -3.2, 8.8, -9]`` which has 4 values. - -.. code:: python - - gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] - -For a 2 gene chromosome, if the first gene space is restricted to the -discrete values from 0 to 4 and the second gene is restricted to the -values from 10 to 19, then it could be specified according to the next -code. - -.. code:: python - - gene_space = [range(5), range(10, 20)] - -The ``gene_space`` can also be assigned to a single range, as given -below, where the values of all genes are sampled from the same range. - -.. code:: python - - gene_space = numpy.arange(15) - -The ``gene_space`` can be assigned a dictionary to sample a value from a -continuous range. - -.. code:: python - - gene_space = {"low": 4, "high": 30} - -A step also can be assigned to the dictionary. This works as if a range -is used. - -.. code:: python - - gene_space = {"low": 4, "high": 30, "step": 2.5} - -.. - - Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the - ``gene_space`` means that random values from the continuous range [0, - 10) are sampled. Note that ``0`` is included but ``10`` is not - included while sampling. Thus, the maximum value that could be - returned is less than ``10`` like ``9.9999``. But if the user decided - to round the genes using, for example, ``[float, 2]``, then this - value will become 10. So, the user should be careful to the inputs. - -If a ``None`` is assigned to only a single gene, then its value will be -randomly generated initially using the ``init_range_low`` and -``init_range_high`` parameters in the ``pygad.GA`` class's constructor. -During mutation, the value are sampled from the range defined by the 2 -parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. -This is an example where the second gene is given a ``None`` value. - -.. code:: python - - gene_space = [range(5), None, numpy.linspace(10, 20, 300)] - -If the user did not assign the initial population to the -``initial_population`` parameter, the initial population is created -randomly based on the ``gene_space`` parameter. Moreover, the mutation -is applied based on this parameter. - -.. _how-mutation-works-with-the-genespace-parameter: - -How Mutation Works with the ``gene_space`` Parameter? ------------------------------------------------------ - -If a gene has its static space defined in the ``gene_space`` parameter, -then mutation works by replacing the gene value by a value randomly -selected from the gene space. This happens for both ``int`` and -``float`` data types. - -For example, the following ``gene_space`` has the static space -``[1, 2, 3]`` defined for the first gene. So, this gene can only have a -value out of these 3 values. - -.. code:: python - - Gene space: [[1, 2, 3], - None] - Solution: [1, 5] - -For a solution like ``[1, -0.5, 4]``, then mutation happens for the -first gene by simply replacing its current value by a randomly selected -value (other than its current value if possible). So, the value 1 will -be replaced by either 2 or 3. - -For the second gene, its space is set to ``None``. So, traditional -mutation happens for this gene by: - -1. Generating a random value from the range defined by the - ``random_mutation_min_val`` and ``random_mutation_max_val`` - parameters. - -2. Adding this random value to the current gene's value. - -If its current value is 5 and the random value is ``-0.5``, then the new -value is 4.5. If the gene type is integer, then the value will be -rounded. - -Stop at Any Generation -====================== - -In `PyGAD -2.4.0 `__, -it is possible to stop the genetic algorithm after any generation. All -you need to do it to return the string ``"stop"`` in the callback -function ``on_generation``. When this callback function is implemented -and assigned to the ``on_generation`` parameter in the constructor of -the ``pygad.GA`` class, then the algorithm immediately stops after -completing its current generation. Let's discuss an example. - -Assume that the user wants to stop algorithm either after the 100 -generations or if a condition is met. The user may assign a value of 100 -to the ``num_generations`` parameter of the ``pygad.GA`` class -constructor. - -The condition that stops the algorithm is written in a callback function -like the one in the next code. If the fitness value of the best solution -exceeds 70, then the string ``"stop"`` is returned. - -.. code:: python - - def func_generation(ga_instance): - if ga_instance.best_solution()[1] >= 70: - return "stop" - -Stop Criteria -============= - -In `PyGAD -2.15.0 `__, -a new parameter named ``stop_criteria`` is added to the constructor of -the ``pygad.GA`` class. It helps to stop the evolution based on some -criteria. It can be assigned to one or more criterion. - -Each criterion is passed as ``str`` that consists of 2 parts: - -1. Stop word. - -2. Number. - -It takes this form: - -.. code:: python - - "word_num" - -The current 2 supported words are ``reach`` and ``saturate``. - -The ``reach`` word stops the ``run()`` method if the fitness value is -equal to or greater than a given fitness value. An example for ``reach`` -is ``"reach_40"`` which stops the evolution if the fitness is >= 40. - -``saturate`` stops the evolution if the fitness saturates for a given -number of consecutive generations. An example for ``saturate`` is -``"saturate_7"`` which means stop the ``run()`` method if the fitness -does not change for 7 consecutive generations. - -Here is an example that stops the evolution if either the fitness value -reached ``127.4`` or if the fitness saturates for ``15`` generations. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, 9, 4] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=200, - sol_per_pop=10, - num_parents_mating=4, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - stop_criteria=["reach_127.4", "saturate_15"]) - - ga_instance.run() - print("Number of generations passed is {generations_completed}".format(generations_completed=ga_instance.generations_completed)) - -Elitism Selection -================= - -In `PyGAD -2.18.0 `__, -a new parameter called ``keep_elitism`` is supported. It accepts an -integer to define the number of elitism (i.e. best solutions) to keep in -the next generation. This parameter defaults to ``1`` which means only -the best solution is kept in the next generation. - -In the next example, the ``keep_elitism`` parameter in the constructor -of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in -each generation are kept in the next generation. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - num_genes=6, - sol_per_pop=5, - keep_elitism=2) - - ga_instance.run() - -The value passed to the ``keep_elitism`` parameter must satisfy 2 -conditions: - -1. It must be ``>= 0``. - -2. It must be ``<= sol_per_pop``. That is its value cannot exceed the - number of solutions in the current population. - -In the previous example, if the ``keep_elitism`` parameter is set equal -to the value passed to the ``sol_per_pop`` parameter, which is 5, then -there will be no evolution at all as in the next figure. This is because -all the 5 solutions are used as elitism in the next generation and no -offspring will be created. - -.. code:: python +Examples +======== - ... +This section gives the complete code of some examples that use +``pygad``. Each subsection builds a different example. - ga_instance = pygad.GA(..., - sol_per_pop=5, - keep_elitism=5) - - ga_instance.run() - -.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png - :alt: - -Note that if the ``keep_elitism`` parameter is effective (i.e. is -assigned a positive integer, not zero), then the ``keep_parents`` -parameter will have no effect. Because the default value of the -``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has -no effect by default. The ``keep_parents`` parameter is only effective -when ``keep_elitism=0``. - -Random Seed -=========== - -In `PyGAD -2.18.0 `__, -a new parameter called ``random_seed`` is supported. Its value is used -as a seed for the random function generators. - -PyGAD uses random functions in these 2 libraries: - -1. NumPy - -2. random - -The ``random_seed`` parameter defaults to ``None`` which means no seed -is used. As a result, different random numbers are generated for each -run of PyGAD. - -If this parameter is assigned a proper seed, then the results will be -reproducible. In the next example, the integer 2 is used as a random -seed. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - sol_per_pop=5, - num_genes=6, - random_seed=2) - - ga_instance.run() - best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() - print(best_solution) - print(best_solution_fitness) - -This is the best solution found and its fitness value. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -After running the code again, it will find the same result. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -Continue without Loosing Progress -================================= - -In `PyGAD -2.18.0 `__, -and thanks for `Felix Bernhard `__ for -opening `this GitHub -issue `__, -the values of these 4 instance attributes are no longer reset after each -call to the ``run()`` method. - -1. ``self.best_solutions`` - -2. ``self.best_solutions_fitness`` - -3. ``self.solutions`` - -4. ``self.solutions_fitness`` - -This helps the user to continue where the last run stopped without -loosing the values of these 4 attributes. - -Now, the user can save the model by calling the ``save()`` method. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - ga_instance = pygad.GA(...) - - ga_instance.run() - - ga_instance.plot_fitness() - - ga_instance.save("pygad_GA") - -Then the saved model is loaded by calling the ``load()`` function. After -calling the ``run()`` method over the loaded instance, then the data -from the previous 4 attributes are not reset but extended with the new -data. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - loaded_ga_instance = pygad.load("pygad_GA") - - loaded_ga_instance.run() - - loaded_ga_instance.plot_fitness() - -The plot created by the ``plot_fitness()`` method will show the data -collected from both the runs. - -Note that the 2 attributes (``self.best_solutions`` and -``self.best_solutions_fitness``) only work if the -``save_best_solutions`` parameter is set to ``True``. Also, the 2 -attributes (``self.solutions`` and ``self.solutions_fitness``) only work -if the ``save_solutions`` parameter is ``True``. - -Prevent Duplicates in Gene Values -================================= - -In `PyGAD -2.13.0 `__, -a new bool parameter called ``allow_duplicate_genes`` is supported to -control whether duplicates are supported in the chromosome or not. In -other words, whether 2 or more genes might have the same exact value. - -If ``allow_duplicate_genes=True`` (which is the default case), genes may -have the same value. If ``allow_duplicate_genes=False``, then no 2 genes -will have the same value given that there are enough unique values for -the genes. - -The next code gives an example to use the ``allow_duplicate_genes`` -parameter. A callback generation function is implemented to print the -population after each generation. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=5, - sol_per_pop=5, - num_genes=4, - mutation_num_genes=3, - random_mutation_min_val=-5, - random_mutation_max_val=5, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Here are the population after the 5 generations. Note how there are no -duplicate values. - -.. code:: python - - Generation 1 - [[ 2 -2 -3 3] - [ 0 1 2 3] - [ 5 -3 6 3] - [-3 1 -2 4] - [-1 0 -2 3]] - Generation 2 - [[-1 0 -2 3] - [-3 1 -2 4] - [ 0 -3 -2 6] - [-3 0 -2 3] - [ 1 -4 2 4]] - Generation 3 - [[ 1 -4 2 4] - [-3 0 -2 3] - [ 4 0 -2 1] - [-4 0 -2 -3] - [-4 2 0 3]] - Generation 4 - [[-4 2 0 3] - [-4 0 -2 -3] - [-2 5 4 -3] - [-1 2 -4 4] - [-4 2 0 -3]] - Generation 5 - [[-4 2 0 -3] - [-1 2 -4 4] - [ 3 4 -4 0] - [-1 0 2 -2] - [-4 2 -1 1]] - -The ``allow_duplicate_genes`` parameter is configured with use with the -``gene_space`` parameter. Here is an example where each of the 4 genes -has the same space of values that consists of 4 values (1, 2, 3, and 4). - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=1, - sol_per_pop=5, - num_genes=4, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Even that all the genes share the same space of values, no 2 genes -duplicate their values as provided by the next output. - -.. code:: python - - Generation 1 - [[2 3 1 4] - [2 3 1 4] - [2 4 1 3] - [2 3 1 4] - [1 3 2 4]] - Generation 2 - [[1 3 2 4] - [2 3 1 4] - [1 3 2 4] - [2 3 4 1] - [1 3 4 2]] - Generation 3 - [[1 3 4 2] - [2 3 4 1] - [1 3 4 2] - [3 1 4 2] - [3 2 4 1]] - Generation 4 - [[3 2 4 1] - [3 1 4 2] - [3 2 4 1] - [1 2 4 3] - [1 3 4 2]] - Generation 5 - [[1 3 4 2] - [1 2 4 3] - [2 1 4 3] - [1 2 4 3] - [1 2 4 3]] - -You should care of giving enough values for the genes so that PyGAD is -able to find alternatives for the gene value in case it duplicates with -another gene. - -There might be 2 duplicate genes where changing either of the 2 -duplicating genes will not solve the problem. For example, if -``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the -solution is ``[3 2 0 0]``, then the values of the last 2 genes -duplicate. There are no possible changes in the last 2 genes to solve -the problem. - -This problem can be solved by randomly changing one of the -non-duplicating genes that may make a room for a unique value in one the -2 duplicating genes. For example, by changing the second gene from 2 to -4, then any of the last 2 genes can take the value 2 and solve the -duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is -not yet supported in PyGAD. - -Solve Duplicates using a Third Gene ------------------------------------ - -When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` -is used, it sometimes happen that there is no room to solve the -duplicates between the 2 genes by simply replacing the value of one gene -by another gene. In `PyGAD -3.1.0 `__, -the duplicates are solved by looking for a third gene that will help in -solving the duplicates. The following examples explain how it works. - -Example 1: - -Let's assume that this gene space is used and there is a solution with 2 -duplicate genes with the same value 4. - -.. code:: python - - Gene space: [[2, 3], - [3, 4], - [4, 5], - [5, 6]] - Solution: [3, 4, 4, 5] - -By checking the gene space, the second gene can have the values -``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve -the duplicates, we have the value of any of these 2 genes. - -If the value of the second gene changes from 4 to 3, then it will be -duplicate with the first gene. If we are to change the value of the -third gene from 4 to 5, then it will duplicate with the fourth gene. As -a conclusion, trying to just selecting a different gene value for either -the second or third genes will introduce new duplicating genes. - -When there are 2 duplicate genes but there is no way to solve their -duplicates, then the solution is to change a third gene that makes a -room to solve the duplicates between the 2 genes. - -In our example, duplicates between the second and third genes can be -solved by, for example,: - -- Changing the first gene from 3 to 2 then changing the second gene - from 4 to 3. - -- Or changing the fourth gene from 5 to 6 then changing the third gene - from 4 to 5. - -Generally, this is how to solve such duplicates: - -1. For any duplicate gene **GENE1**, select another value. - -2. Check which other gene **GENEX** has duplicate with this new value. - -3. Find if **GENEX** can have another value that will not cause any more - duplicates. If so, go to step 7. - -4. If all the other values of **GENEX** will cause duplicates, then try - another gene **GENEY**. - -5. Repeat steps 3 and 4 until exploring all the genes. - -6. If there is no possibility to solve the duplicates, then there is not - way to solve the duplicates and we have to keep the duplicate value. - -7. If a value for a gene **GENEM** is found that will not cause more - duplicates, then use this value for the gene **GENEM**. - -8. Replace the value of the gene **GENE1** by the old value of the gene - **GENEM**. This solves the duplicates. - -This is an example to solve the duplicate for the solution -``[3, 4, 4, 5]``: - -1. Let's use the second gene with value 4. Because the space of this - gene is ``[3, 4]``, then the only other value we can select is 3. - -2. The first gene also have the value 3. - -3. The first gene has another value 2 that will not cause more - duplicates in the solution. Then go to step 7. - -4. Skip. - -5. Skip. - -6. Skip. - -7. The value of the first gene 3 will be replaced by the new value 2. - The new solution is [2, 4, 4, 5]. - -8. Replace the value of the second gene 4 by the old value of the first - gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is - solved. - -Example 2: - -.. code:: python - - Gene space: [[0, 1], - [1, 2], - [2, 3], - [3, 4]] - Solution: [1, 2, 2, 3] - -The quick summary is: - -- Change the value of the first gene from 1 to 0. The solution becomes - [0, 2, 2, 3]. - -- Change the value of the second gene from 2 to 1. The solution becomes - [0, 1, 2, 3]. The duplicate is solved. - -User-Defined Crossover, Mutation, and Parent Selection Operators -================================================================ - -Previously, the user can select the the type of the crossover, mutation, -and parent selection operators by assigning the name of the operator to -the following parameters of the ``pygad.GA`` class's constructor: - -1. ``crossover_type`` - -2. ``mutation_type`` - -3. ``parent_selection_type`` - -This way, the user can only use the built-in functions for each of these -operators. - -Starting from `PyGAD -2.16.0 `__, -the user can create a custom crossover, mutation, and parent selection -operators and assign these functions to the above parameters. Thus, a -new operator can be plugged easily into the `PyGAD -Lifecycle `__. - -This is a sample code that does not use any custom function. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4,-2,3.5] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func) - - ga_instance.run() - ga_instance.plot_fitness() - -This section describes the expected input parameters and outputs. For -simplicity, all of these custom functions all accept the instance of the -``pygad.GA`` class as the last parameter. - -User-Defined Crossover Operator -------------------------------- - -The user-defined crossover function is a Python function that accepts 3 -parameters: - -1. The selected parents. - -2. The size of the offspring as a tuple of 2 numbers: (the offspring - size, number of genes). - -3. The instance from the ``pygad.GA`` class. This instance helps to - retrieve any property like ``population``, ``gene_type``, - ``gene_space``, etc. - -This function should return a NumPy array of shape equal to the value -passed to the second parameter. - -The next code creates a template for the user-defined crossover -operator. You can use any names for the parameters. Note how a NumPy -array is returned. - -.. code:: python - - def crossover_func(parents, offspring_size, ga_instance): - offspring = ... - ... - return numpy.array(offspring) - -As an example, the next code creates a single-point crossover function. -By randomly generating a random point (i.e. index of a gene), the -function simply uses 2 parents to produce an offspring by copying the -genes before the point from the first parent and the remaining from the -second parent. - -.. code:: python - - def crossover_func(parents, offspring_size, ga_instance): - offspring = [] - idx = 0 - while len(offspring) != offspring_size[0]: - parent1 = parents[idx % parents.shape[0], :].copy() - parent2 = parents[(idx + 1) % parents.shape[0], :].copy() - - random_split_point = numpy.random.choice(range(offspring_size[1])) - - parent1[random_split_point:] = parent2[random_split_point:] - - offspring.append(parent1) - - idx += 1 - - return numpy.array(offspring) - -To use this user-defined function, simply assign its name to the -``crossover_type`` parameter in the constructor of the ``pygad.GA`` -class. The next code gives an example. In this case, the custom function -will be called in each generation rather than calling the built-in -crossover functions defined in PyGAD. - -.. code:: python - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - crossover_type=crossover_func) - -User-Defined Mutation Operator ------------------------------- - -A user-defined mutation function/operator can be created the same way a -custom crossover operator/function is created. Simply, it is a Python -function that accepts 2 parameters: - -1. The offspring to be mutated. - -2. The instance from the ``pygad.GA`` class. This instance helps to - retrieve any property like ``population``, ``gene_type``, - ``gene_space``, etc. - -The template for the user-defined mutation function is given in the next -code. According to the user preference, the function should make some -random changes to the genes. - -.. code:: python - - def mutation_func(offspring, ga_instance): - ... - return offspring - -The next code builds the random mutation where a single gene from each -chromosome is mutated by adding a random number between 0 and 1 to the -gene's value. - -.. code:: python - - def mutation_func(offspring, ga_instance): - - for chromosome_idx in range(offspring.shape[0]): - random_gene_idx = numpy.random.choice(range(offspring.shape[1])) - - offspring[chromosome_idx, random_gene_idx] += numpy.random.random() - - return offspring - -Here is how this function is assigned to the ``mutation_type`` -parameter. - -.. code:: python - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - crossover_type=crossover_func, - mutation_type=mutation_func) - -Note that there are other things to take into consideration like: - -- Making sure that each gene conforms to the data type(s) listed in the - ``gene_type`` parameter. - -- If the ``gene_space`` parameter is used, then the new value for the - gene should conform to the values/ranges listed. - -- Mutating a number of genes that conforms to the parameters - ``mutation_percent_genes``, ``mutation_probability``, and - ``mutation_num_genes``. - -- Whether mutation happens with or without replacement based on the - ``mutation_by_replacement`` parameter. - -- The minimum and maximum values from which a random value is generated - based on the ``random_mutation_min_val`` and - ``random_mutation_max_val`` parameters. - -- Whether duplicates are allowed or not in the chromosome based on the - ``allow_duplicate_genes`` parameter. - -and more. - -It all depends on your objective from building the mutation function. -You may neglect or consider some of the considerations according to your -objective. - -User-Defined Parent Selection Operator --------------------------------------- - -No much to mention about building a user-defined parent selection -function as things are similar to building a crossover or mutation -function. Just create a Python function that accepts 3 parameters: - -1. The fitness values of the current population. - -2. The number of parents needed. - -3. The instance from the ``pygad.GA`` class. This instance helps to - retrieve any property like ``population``, ``gene_type``, - ``gene_space``, etc. - -The function should return 2 outputs: - -1. The selected parents as a NumPy array. Its shape is equal to (the - number of selected parents, ``num_genes``). Note that the number of - selected parents is equal to the value assigned to the second input - parameter. - -2. The indices of the selected parents inside the population. It is a 1D - list with length equal to the number of selected parents. - -The outputs must be of type ``numpy.ndarray``. - -Here is a template for building a custom parent selection function. - -.. code:: python - - def parent_selection_func(fitness, num_parents, ga_instance): - ... - return parents, fitness_sorted[:num_parents] - -The next code builds the steady-state parent selection where the best -parents are selected. The number of parents is equal to the value in the -``num_parents`` parameter. - -.. code:: python - - def parent_selection_func(fitness, num_parents, ga_instance): - - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - fitness_sorted.reverse() - - parents = numpy.empty((num_parents, ga_instance.population.shape[1])) - - for parent_num in range(num_parents): - parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() - - return parents, numpy.array(fitness_sorted[:num_parents]) - -Finally, the defined function is assigned to the -``parent_selection_type`` parameter as in the next code. - -.. code:: python - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - crossover_type=crossover_func, - mutation_type=mutation_func, - parent_selection_type=parent_selection_func) - -Example -------- - -By discussing how to customize the 3 operators, the next code uses the -previous 3 user-defined functions instead of the built-in functions. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4,-2,3.5] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - def parent_selection_func(fitness, num_parents, ga_instance): - - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - fitness_sorted.reverse() - - parents = numpy.empty((num_parents, ga_instance.population.shape[1])) - - for parent_num in range(num_parents): - parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() - - return parents, numpy.array(fitness_sorted[:num_parents]) - - def crossover_func(parents, offspring_size, ga_instance): - - offspring = [] - idx = 0 - while len(offspring) != offspring_size[0]: - parent1 = parents[idx % parents.shape[0], :].copy() - parent2 = parents[(idx + 1) % parents.shape[0], :].copy() - - random_split_point = numpy.random.choice(range(offspring_size[1])) - - parent1[random_split_point:] = parent2[random_split_point:] - - offspring.append(parent1) - - idx += 1 - - return numpy.array(offspring) - - def mutation_func(offspring, ga_instance): - - for chromosome_idx in range(offspring.shape[0]): - random_gene_idx = numpy.random.choice(range(offspring.shape[0])) - - offspring[chromosome_idx, random_gene_idx] += numpy.random.random() - - return offspring - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - crossover_type=crossover_func, - mutation_type=mutation_func, - parent_selection_type=parent_selection_func) - - ga_instance.run() - ga_instance.plot_fitness() - -This is the same example but using methods instead of functions. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4,-2,3.5] - desired_output = 44 - - class Test: - def fitness_func(self, ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - def parent_selection_func(self, fitness, num_parents, ga_instance): - - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - fitness_sorted.reverse() - - parents = numpy.empty((num_parents, ga_instance.population.shape[1])) - - for parent_num in range(num_parents): - parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() - - return parents, numpy.array(fitness_sorted[:num_parents]) - - def crossover_func(self, parents, offspring_size, ga_instance): - - offspring = [] - idx = 0 - while len(offspring) != offspring_size[0]: - parent1 = parents[idx % parents.shape[0], :].copy() - parent2 = parents[(idx + 1) % parents.shape[0], :].copy() - - random_split_point = numpy.random.choice(range(offspring_size[0])) - - parent1[random_split_point:] = parent2[random_split_point:] - - offspring.append(parent1) - - idx += 1 - - return numpy.array(offspring) - - def mutation_func(self, offspring, ga_instance): - - for chromosome_idx in range(offspring.shape[0]): - random_gene_idx = numpy.random.choice(range(offspring.shape[1])) - - offspring[chromosome_idx, random_gene_idx] += numpy.random.random() - - return offspring - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=Test().fitness_func, - parent_selection_type=Test().parent_selection_func, - crossover_type=Test().crossover_func, - mutation_type=Test().mutation_func) - - ga_instance.run() - ga_instance.plot_fitness() - -.. _more-about-the-genetype-parameter: - -More about the ``gene_type`` Parameter -====================================== - -The ``gene_type`` parameter allows the user to control the data type for -all genes at once or each individual gene. In `PyGAD -2.15.0 `__, -the ``gene_type`` parameter also supports customizing the precision for -``float`` data types. As a result, the ``gene_type`` parameter helps to: - -1. Select a data type for all genes with or without precision. - -2. Select a data type for each individual gene with or without - precision. - -Let's discuss things by examples. - -Data Type for All Genes without Precision ------------------------------------------ - -The data type for all genes can be specified by assigning the numeric -data type directly to the ``gene_type`` parameter. This is an example to -make all genes of ``int`` data types. - -.. code:: python - - gene_type=int - -Given that the supported numeric data types of PyGAD include Python's -``int`` and ``float`` in addition to all numeric types of ``NumPy``, -then any of these types can be assigned to the ``gene_type`` parameter. - -If no precision is specified for a ``float`` data type, then the -complete floating-point number is kept. - -The next code uses an ``int`` data type for all genes where the genes in -the initial and final population are only integers. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=int) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[ 1 -1 2 0 -3] - [ 0 -2 0 -3 -1] - [ 0 -1 -1 2 0] - [-2 3 -2 3 3] - [ 0 0 2 -2 -2]] - - Final Population - [[ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0]] - -Data Type for All Genes with Precision --------------------------------------- - -A precision can only be specified for a ``float`` data type and cannot -be specified for integers. Here is an example to use a precision of 3 -for the ``float`` data type. In this case, all genes are of type -``float`` and their maximum precision is 3. - -.. code:: python - - gene_type=[float, 3] - -The next code uses prints the initial and final population where the -genes are of type ``float`` with precision 3. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[float, 3]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2.417 -0.487 3.623 2.457 -2.362] - [-1.231 0.079 -1.63 1.629 -2.637] - [ 0.692 -2.098 0.705 0.914 -3.633] - [ 2.637 -1.339 -1.107 -0.781 -3.896] - [-1.495 1.378 -1.026 3.522 2.379]] - - Final Population - [[ 1.714 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.375 -2.362] - [ 0.692 -1.024 4.041 3.185 -2.362] - [ 1.714 -0.644 3.623 3.185 -2.362]] - -Data Type for each Individual Gene without Precision ----------------------------------------------------- - -In `PyGAD -2.14.0 `__, -the ``gene_type`` parameter allows customizing the gene type for each -individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` -with number of elements equal to the number of genes. For each element, -a type is specified for the corresponding gene. - -This is an example for a 5-gene problem where different types are -assigned to the genes. - -.. code:: python - - gene_type=[int, float, numpy.float16, numpy.int8, float] - -This is a complete code that prints the initial and final population for -a custom-gene data type. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, float, numpy.float16, numpy.int8, float]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] - [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] - [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] - [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] - [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] - - Final Population - [[3 3.7729827570110714 2.055 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036] - [3 3.7729827570110714 1.458 0 0.0869406120516778] - [3 3.7729827570110714 1.458 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036]] - -Data Type for each Individual Gene with Precision -------------------------------------------------- - -The precision can also be specified for the ``float`` data types as in -the next line where the second gene precision is 2 and last gene -precision is 1. - -.. code:: python - - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] - -This is a complete example where the initial and final populations are -printed where the genes comply with the data types and precisions -specified. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2 -1.22 1.716796875 -1 0.2] - [-1 -1.58 -3.091796875 0 -1.3] - [3 3.35 -0.107421875 1 -3.3] - [-2 -3.58 -1.779296875 0 0.6] - [2 -3.73 2.65234375 3 -0.5]] - - Final Population - [[2 -4.22 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3] - [2 -4.22 3.47 2 -1.3] - [2 -4.58 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3]] - -Visualization in PyGAD -====================== - -This section discusses the different options to visualize the results in -PyGAD through these methods: - -1. ``plot_fitness()`` - -2. ``plot_genes()`` - -3. ``plot_new_solution_rate()`` - -In the following code, the ``save_solutions`` flag is set to ``True`` -which means all solutions are saved in the ``solutions`` attribute. The -code runs for only 10 generations. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2, 3.5, 8] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=10, - num_parents_mating=5, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_space=[range(1, 10), range(10, 20), range(15, 30), range(20, 40), range(25, 50), range(10, 30), range(20, 50)], - gene_type=int, - save_solutions=True) - - ga_instance.run() - -Let's explore how to visualize the results by the above mentioned -methods. - -.. _plotfitness-2: - -``plot_fitness()`` ------------------- - -The ``plot_fitness()`` method shows the fitness value for each -generation. - -.. _plottypeplot: - -``plot_type="plot"`` -~~~~~~~~~~~~~~~~~~~~ - -The simplest way to call this method is as follows leaving the -``plot_type`` with its default value ``"plot"`` to create a continuous -line connecting the fitness values across all generations: - -.. code:: python - - ga_instance.plot_fitness() - # ga_instance.plot_fitness(plot_type="plot") - -.. image:: https://user-images.githubusercontent.com/16560492/122472609-d02f5280-cf8e-11eb-88a7-f9366ff6e7c6.png - :alt: - -.. _plottypescatter: - -``plot_type="scatter"`` -~~~~~~~~~~~~~~~~~~~~~~~ - -The ``plot_type`` can also be set to ``"scatter"`` to create a scatter -graph with each individual fitness represented as a dot. The size of -these dots can be changed using the ``linewidth`` parameter. - -.. code:: python - - ga_instance.plot_fitness(plot_type="scatter") - -.. image:: https://user-images.githubusercontent.com/16560492/122473159-75e2c180-cf8f-11eb-942d-31279b286dbd.png - :alt: - -.. _plottypebar: - -``plot_type="bar"`` -~~~~~~~~~~~~~~~~~~~ - -The third value for the ``plot_type`` parameter is ``"bar"`` to create a -bar graph with each individual fitness represented as a bar. - -.. code:: python - - ga_instance.plot_fitness(plot_type="bar") - -.. image:: https://user-images.githubusercontent.com/16560492/122473340-b7736c80-cf8f-11eb-89c5-4f7db3b653cc.png - :alt: - -.. _plotnewsolutionrate-2: - -``plot_new_solution_rate()`` ----------------------------- - -The ``plot_new_solution_rate()`` method presents the number of new -solutions explored in each generation. This helps to figure out if the -genetic algorithm is able to find new solutions as an indication of more -possible evolution. If no new solutions are explored, this is an -indication that no further evolution is possible. - -The ``plot_new_solution_rate()`` method accepts the same parameters as -in the ``plot_fitness()`` method with 3 possible values for -``plot_type`` parameter. - -.. _plottypeplot-2: - -``plot_type="plot"`` -~~~~~~~~~~~~~~~~~~~~ - -The default value for the ``plot_type`` parameter is ``"plot"``. - -.. code:: python - - ga_instance.plot_new_solution_rate() - # ga_instance.plot_new_solution_rate(plot_type="plot") - -The next figure shows that, for example, generation 6 has the least -number of new solutions which is 4. The number of new solutions in the -first generation is always equal to the number of solutions in the -population (i.e. the value assigned to the ``sol_per_pop`` parameter in -the constructor of the ``pygad.GA`` class) which is 10 in this example. - -.. image:: https://user-images.githubusercontent.com/16560492/122475815-3322e880-cf93-11eb-9648-bf66f823234b.png - :alt: - -.. _plottypescatter-2: - -``plot_type="scatter"`` -~~~~~~~~~~~~~~~~~~~~~~~ - -The previous graph can be represented as scattered points by setting -``plot_type="scatter"``. - -.. code:: python - - ga_instance.plot_new_solution_rate(plot_type="scatter") - -.. image:: https://user-images.githubusercontent.com/16560492/122476108-adec0380-cf93-11eb-80ac-7588bf90492f.png - :alt: - -.. _plottypebar-2: - -``plot_type="bar"`` -~~~~~~~~~~~~~~~~~~~ - -By setting ``plot_type="scatter"``, each value is represented as a -vertical bar. - -.. code:: python - - ga_instance.plot_new_solution_rate(plot_type="bar") - -.. image:: https://user-images.githubusercontent.com/16560492/122476173-c2c89700-cf93-11eb-9e77-d39737cd3a96.png - :alt: - -.. _plotgenes-2: - -``plot_genes()`` ----------------- - -The ``plot_genes()`` method is the third option to visualize the PyGAD -results. This method has 3 control variables: - -1. ``graph_type="plot"``: Can be ``"plot"`` (default), ``"boxplot"``, or - ``"histogram"``. - -2. ``plot_type="plot"``: Identical to the ``plot_type`` parameter - explored in the ``plot_fitness()`` and ``plot_new_solution_rate()`` - methods. - -3. ``solutions="all"``: Can be ``"all"`` (default) or ``"best"``. - -These 3 parameters controls the style of the output figure. - -The ``graph_type`` parameter selects the type of the graph which helps -to explore the gene values as: - -1. A normal plot. - -2. A histogram. - -3. A box and whisker plot. - -The ``plot_type`` parameter works only when the type of the graph is set -to ``"plot"``. - -The ``solutions`` parameter selects whether the genes come from all -solutions in the population or from just the best solutions. - -.. _graphtypeplot: - -``graph_type="plot"`` -~~~~~~~~~~~~~~~~~~~~~ - -When ``graph_type="plot"``, then the figure creates a normal graph where -the relationship between the gene values and the generation numbers is -represented as a continuous plot, scattered points, or bars. - -.. _plottypeplot-3: - -``plot_type="plot"`` -^^^^^^^^^^^^^^^^^^^^ - -Because the default value for both ``graph_type`` and ``plot_type`` is -``"plot"``, then all of the lines below creates the same figure. This -figure is helpful to know whether a gene value lasts for more -generations as an indication of the best value for this gene. For -example, the value 16 for the gene with index 5 (at column 2 and row 2 -of the next graph) lasted for 83 generations. - -.. code:: python - - ga_instance.plot_genes() - - ga_instance.plot_genes(graph_type="plot") - - ga_instance.plot_genes(plot_type="plot") - - ga_instance.plot_genes(graph_type="plot", - plot_type="plot") - -.. image:: https://user-images.githubusercontent.com/16560492/122477158-4a62d580-cf95-11eb-8c93-9b6e74cb814c.png - :alt: - -As the default value for the ``solutions`` parameter is ``"all"``, then -the following method calls generate the same plot. - -.. code:: python - - ga_instance.plot_genes(solutions="all") - - ga_instance.plot_genes(graph_type="plot", - solutions="all") - - ga_instance.plot_genes(plot_type="plot", - solutions="all") - - ga_instance.plot_genes(graph_type="plot", - plot_type="plot", - solutions="all") - -.. _plottypescatter-3: - -``plot_type="scatter"`` -^^^^^^^^^^^^^^^^^^^^^^^ - -The following calls of the ``plot_genes()`` method create the same -scatter plot. - -.. code:: python - - ga_instance.plot_genes(plot_type="scatter") - - ga_instance.plot_genes(graph_type="plot", - plot_type="scatter", - solutions='all') - -.. image:: https://user-images.githubusercontent.com/16560492/122477273-73836600-cf95-11eb-828f-f357c7b0f815.png - :alt: - -.. _plottypebar-3: - -``plot_type="bar"`` -^^^^^^^^^^^^^^^^^^^ - -.. code:: python - - ga_instance.plot_genes(plot_type="bar") - - ga_instance.plot_genes(graph_type="plot", - plot_type="bar", - solutions='all') - -.. image:: https://user-images.githubusercontent.com/16560492/122477370-99106f80-cf95-11eb-8643-865b55e6b844.png - :alt: - -.. _graphtypeboxplot: - -``graph_type="boxplot"`` -~~~~~~~~~~~~~~~~~~~~~~~~ - -By setting ``graph_type`` to ``"boxplot"``, then a box and whisker graph -is created. Now, the ``plot_type`` parameter has no effect. - -The following 2 calls of the ``plot_genes()`` method create the same -figure as the default value for the ``solutions`` parameter is -``"all"``. - -.. code:: python - - ga_instance.plot_genes(graph_type="boxplot") - - ga_instance.plot_genes(graph_type="boxplot", - solutions='all') - -.. image:: https://user-images.githubusercontent.com/16560492/122479260-beeb4380-cf98-11eb-8f08-23707929b12c.png - :alt: - -.. _graphtypehistogram: - -``graph_type="histogram"`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For ``graph_type="boxplot"``, then a histogram is created for each gene. -Similar to ``graph_type="boxplot"``, the ``plot_type`` parameter has no -effect. - -The following 2 calls of the ``plot_genes()`` method create the same -figure as the default value for the ``solutions`` parameter is -``"all"``. - -.. code:: python - - ga_instance.plot_genes(graph_type="histogram") - - ga_instance.plot_genes(graph_type="histogram", - solutions='all') - -.. image:: https://user-images.githubusercontent.com/16560492/122477314-8007be80-cf95-11eb-9c95-da3f49204151.png - :alt: - -All the previous figures can be created for only the best solutions by -setting ``solutions="best"``. - -Parallel Processing in PyGAD -============================ - -Starting from `PyGAD -2.17.0 `__, -parallel processing becomes supported. This section explains how to use -parallel processing in PyGAD. - -According to the `PyGAD -lifecycle `__, -parallel processing can be parallelized in only 2 operations: - -1. Population fitness calculation. - -2. Mutation. - -The reason is that the calculations in these 2 operations are -independent (i.e. each solution/chromosome is handled independently from -the others) and can be distributed across different processes or -threads. - -For the mutation operation, it does not do intensive calculations on the -CPU. Its calculations are simple like flipping the values of some genes -from 0 to 1 or adding a random value to some genes. So, it does not take -much CPU processing time. Experiments proved that parallelizing the -mutation operation across the solutions increases the time instead of -reducing it. This is because running multiple processes or threads adds -overhead to manage them. Thus, parallel processing cannot be applied on -the mutation operation. - -For the population fitness calculation, parallel processing can help -make a difference and reduce the processing time. But this is -conditional on the type of calculations done in the fitness function. If -the fitness function makes intensive calculations and takes much -processing time from the CPU, then it is probably that parallel -processing will help to cut down the overall time. - -This section explains how parallel processing works in PyGAD and how to -use parallel processing in PyGAD - -How to Use Parallel Processing in PyGAD ---------------------------------------- - -Starting from `PyGAD -2.17.0 `__, -a new parameter called ``parallel_processing`` added to the constructor -of the ``pygad.GA`` class. - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - parallel_processing=...) - ... - -This parameter allows the user to do the following: - -1. Enable parallel processing. - -2. Select whether processes or threads are used. - -3. Specify the number of processes or threads to be used. - -These are 3 possible values for the ``parallel_processing`` parameter: - -1. ``None``: (Default) It means no parallel processing is used. - -2. A positive integer referring to the number of threads to be used - (i.e. threads, not processes, are used. - -3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is - assigned, then: - - 1. The first element can be either ``'process'`` or ``'thread'`` to - specify whether processes or threads are used, respectively. - - 2. The second element can be: - - 1. A positive integer to select the maximum number of processes or - threads to be used - - 2. ``0`` to indicate that 0 processes or threads are used. It - means no parallel processing. This is identical to setting - ``parallel_processing=None``. - - 3. ``None`` to use the default value as calculated by the - ``concurrent.futures module``. - -These are examples of the values assigned to the ``parallel_processing`` -parameter: - -- ``parallel_processing=4``: Because the parameter is assigned a - positive integer, this means parallel processing is activated where 4 - threads are used. - -- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 - threads. This is identical to ``parallel_processing=5``. - -- ``parallel_processing=["process", 8]``: Use parallel processing with - 8 processes. - -- ``parallel_processing=["process", 0]``: As the second element is - given the value 0, this means do not use parallel processing. This is - identical to ``parallel_processing=None``. - -Examples --------- - -The examples will help you know the difference between using processes -and threads. Moreover, it will give an idea when parallel processing -would make a difference and reduce the time. These are dummy examples -where the fitness function is made to always return 0. - -The first example uses 10 genes, 5 solutions in the population where -only 3 solutions mate, and 9999 generations. The fitness function uses a -``for`` loop with 100 iterations just to have some calculations. In the -constructor of the ``pygad.GA`` class, ``parallel_processing=None`` -means no parallel processing is used. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99): - pass - return 0 - - ga_instance = pygad.GA(num_generations=9999, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - - ga_instance.run() - - t2 = time.time() - print("Time is", t2-t1) - -When parallel processing is not used, the time it takes to run the -genetic algorithm is ``1.5`` seconds. - -In the comparison, let's do a second experiment where parallel -processing is used with 5 threads. In this case, it take ``5`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=5) - ... - -For the third experiment, processes instead of threads are used. Also, -only 99 generations are used instead of 9999. The time it takes is -``99`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(num_generations=99, - ..., - parallel_processing=["process", 5]) - ... - -This is the summary of the 3 experiments: - -1. No parallel processing & 9999 generations: 1.5 seconds. - -2. Parallel processing with 5 threads & 9999 generations: 5 seconds - -3. Parallel processing with 5 processes & 99 generations: 99 seconds - -Because the fitness function does not need much CPU time, the normal -processing takes the least time. Running processes for this simple -problem takes 99 compared to only 5 seconds for threads because managing -processes is much heavier than managing threads. Thus, most of the CPU -time is for swapping the processes instead of executing the code. - -In the second example, the loop makes 99999999 iterations and only 5 -generations are used. With no parallelization, it takes 22 seconds. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99999999): - pass - return 0 - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - ga_instance.run() - t2 = time.time() - print("Time is", t2-t1) - -It takes 15 seconds when 10 processes are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["process", 10]) - ... - -This is compared to 20 seconds when 10 threads are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["thread", 10]) - ... - -Based on the second example, using parallel processing with 10 processes -takes the least time because there is much CPU work done. Generally, -processes are preferred over threads when most of the work in on the -CPU. Threads are preferred over processes in some situations like doing -input/output operations. - -*Before releasing* `PyGAD -2.17.0 `__\ *,* -`László -Fazekas `__ -*wrote an article to parallelize the fitness function with PyGAD. Check -it:* `How Genetic Algorithms Can Compete with Gradient Descent and -Backprop `__. - -Print Lifecycle Summary -======================= - -In `PyGAD -2.19.0 `__, -a new method called ``summary()`` is supported. It prints a Keras-like -summary of the PyGAD lifecycle showing the steps, callback functions, -parameters, etc. - -This method accepts the following parameters: - -- ``line_length=70``: An integer representing the length of the single - line in characters. - -- ``fill_character=" "``: A character to fill the lines. - -- ``line_character="-"``: A character for creating a line separator. - -- ``line_character2="="``: A secondary character to create a line - separator. - -- ``columns_equal_len=False``: The table rows are split into - equal-sized columns or split subjective to the width needed. - -- ``print_step_parameters=True``: Whether to print extra parameters - about each step inside the step. If ``print_step_parameters=False`` - and ``print_parameters_summary=True``, then the parameters of each - step are printed at the end of the table. - -- ``print_parameters_summary=True``: Whether to print parameters - summary at the end of the table. If ``print_step_parameters=False``, - then the parameters of each step are printed at the end of the table - too. - -This is a quick example to create a PyGAD example. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def genetic_fitness(solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_gen(ga): - pass - - def on_crossover_callback(a, b): - pass - - ga_instance = pygad.GA(num_generations=100, - num_parents_mating=10, - sol_per_pop=20, - num_genes=len(function_inputs), - on_crossover=on_crossover_callback, - on_generation=on_gen, - parallel_processing=2, - stop_criteria="reach_10", - fitness_batch_size=4, - crossover_probability=0.4, - fitness_func=genetic_fitness) - -Then call the ``summary()`` method to print the summary with the default -parameters. Note that entries for the crossover and generation callback -function are created because their callback functions are implemented -through the ``on_crossover_callback()`` and ``on_gen()``, respectively. - -.. code:: python - - ga_instance.summary() - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - Fitness batch size: 4 - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - Number of Parents: 10 - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - Crossover probability: 0.4 - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - Mutation Genes: 1 - Random Mutation Range: (-1.0, 1.0) - Mutation by Replacement: False - Allow Duplicated Genes: True - ---------------------------------------------------------------------- - On Generation on_gen() None - Stop Criteria: [['reach', 10.0]] - ---------------------------------------------------------------------- - ====================================================================== - Population Size: (20, 6) - Number of Generations: 100 - Initial Population Range: (-4, 4) - Keep Elitism: 1 - Gene DType: [, None] - Parallel Processing: ['thread', 2] - Save Best Solutions: False - Save Solutions: False - ====================================================================== - -We can set the ``print_step_parameters`` and -``print_parameters_summary`` parameters to ``False`` to not print the -parameters. - -.. code:: python - - ga_instance.summary(print_step_parameters=False, - print_parameters_summary=False) - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - ---------------------------------------------------------------------- - On Generation on_gen() None - ---------------------------------------------------------------------- - ====================================================================== - -Logging Outputs -=============== - -In `PyGAD -3.0.0 `__, -the ``print()`` statement is no longer used and the outputs are printed -using the `logging `__ -module. A a new parameter called ``logger`` is supported to accept the -user-defined logger. - -.. code:: python - - import logging - - logger = ... - - ga_instance = pygad.GA(..., - logger=logger, - ...) - -The default value for this parameter is ``None``. If there is no logger -passed (i.e. ``logger=None``), then a default logger is created to log -the messages to the console exactly like how the ``print()`` statement -works. - -Some advantages of using the the -`logging `__ module -instead of the ``print()`` statement are: - -1. The user has more control over the printed messages specially if - there is a project that uses multiple modules where each module - prints its messages. A logger can organize the outputs. - -2. Using the proper ``Handler``, the user can log the output messages to - files and not only restricted to printing it to the console. So, it - is much easier to record the outputs. - -3. The format of the printed messages can be changed by customizing the - ``Formatter`` assigned to the Logger. - -This section gives some quick examples to use the ``logging`` module and -then gives an example to use the logger with PyGAD. - -Logging to the Console ----------------------- - -This is an example to create a logger to log the messages to the -console. - -.. code:: python - - import logging - - # Create a logger - logger = logging.getLogger(__name__) - - # Set the logger level to debug so that all the messages are printed. - logger.setLevel(logging.DEBUG) - - # Create a stream handler to log the messages to the console. - stream_handler = logging.StreamHandler() - - # Set the handler level to debug. - stream_handler.setLevel(logging.DEBUG) - - # Create a formatter - formatter = logging.Formatter('%(message)s') - - # Add the formatter to handler. - stream_handler.setFormatter(formatter) - - # Add the stream handler to the logger - logger.addHandler(stream_handler) - -Now, we can log messages to the console with the format specified in the -``Formatter``. - -.. code:: python - - logger.debug('Debug message.') - logger.info('Info message.') - logger.warning('Warn message.') - logger.error('Error message.') - logger.critical('Critical message.') - -The outputs are identical to those returned using the ``print()`` -statement. - -.. code:: - - Debug message. - Info message. - Warn message. - Error message. - Critical message. - -By changing the format of the output messages, we can have more -information about each message. - -.. code:: python - - formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - -This is a sample output. - -.. code:: python - - 2023-04-03 18:46:27 DEBUG: Debug message. - 2023-04-03 18:46:27 INFO: Info message. - 2023-04-03 18:46:27 WARNING: Warn message. - 2023-04-03 18:46:27 ERROR: Error message. - 2023-04-03 18:46:27 CRITICAL: Critical message. - -Note that you may need to clear the handlers after finishing the -execution. This is to make sure no cached handlers are used in the next -run. If the cached handlers are not cleared, then the single output -message may be repeated. - -.. code:: python - - logger.handlers.clear() - -Logging to a File ------------------ - -This is another example to log the messages to a file named -``logfile.txt``. The formatter prints the following about each message: - -1. The date and time at which the message is logged. - -2. The log level. - -3. The message. - -4. The path of the file. - -5. The lone number of the log message. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name, 'a+', 'utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - -This is how the outputs look like. - -.. code:: python - - 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 - 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 - 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 - 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 - 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -Log to Both the Console and a File ----------------------------------- - -This is an example to create a single Logger associated with 2 handlers: - -1. A file handler. - -2. A stream handler. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - -When a log message is executed, then it is both printed to the console -and saved in the ``logfile.txt``. - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -PyGAD Example -------------- - -To use the logger in PyGAD, just create your custom logger and pass it -to the ``logger`` parameter. - -.. code:: python - - import logging - import pygad - import numpy - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - - equation_inputs = [4, -2, 8] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_generation(ga_instance): - ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) - ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=40, - num_parents_mating=2, - keep_parents=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - on_generation=on_generation, - logger=logger) - ga_instance.run() - - logger.handlers.clear() - -By executing this code, the logged messages are printed to the console -and also saved in the text file. - -.. code:: python - - 2023-04-03 19:04:27 INFO: Generation = 1 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 - 2023-04-03 19:04:27 INFO: Generation = 2 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 - 2023-04-03 19:04:27 INFO: Generation = 3 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 - 2023-04-03 19:04:27 INFO: Generation = 4 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 - 2023-04-03 19:04:27 INFO: Generation = 5 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 - 2023-04-03 19:04:27 INFO: Generation = 6 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 - 2023-04-03 19:04:27 INFO: Generation = 7 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 - 2023-04-03 19:04:27 INFO: Generation = 8 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 - 2023-04-03 19:04:27 INFO: Generation = 9 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 - 2023-04-03 19:04:27 INFO: Generation = 10 - 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 - -Solve Non-Deterministic Problems -================================ - -PyGAD can be used to solve both deterministic and non-deterministic -problems. Deterministic are those that return the same fitness for the -same solution. For non-deterministic problems, a different fitness value -would be returned for the same solution. - -By default, PyGAD settings are set to solve deterministic problems. -PyGAD can save the explored solutions and their fitness to reuse in the -future. These instances attributes can save the solutions: - -1. ``solutions``: Exists if ``save_solutions=True``. - -2. ``best_solutions``: Exists if ``save_best_solutions=True``. - -3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. - -4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or - ``keep_parents=-1``. - -To configure PyGAD for non-deterministic problems, we have to disable -saving the previous solutions. This is by setting these parameters: - -1. ``keep_elisitm=0`` - -2. ``keep_parents=0`` - -3. ``keep_solutions=False`` - -4. ``keep_best_solutions=False`` - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -This way PyGAD will not save any explored solution and thus the fitness -function have to be called for each individual solution. - -Reuse the Fitness instead of Calling the Fitness Function -========================================================= - -It may happen that a previously explored solution in generation X is -explored again in another generation Y (where Y > X). For some problems, -calling the fitness function takes much time. - -For deterministic problems, it is better to not call the fitness -function for an already explored solutions. Instead, reuse the fitness -of the old solution. PyGAD supports some options to help you save time -calling the fitness function for a previously explored solution. - -The parameters explored in this section can be set in the constructor of -the ``pygad.GA`` class. - -The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these -parameters to see if there is a possibility of reusing the fitness -instead of calling the fitness function. - -.. _1-savesolutions: - -1. ``save_solutions`` ---------------------- - -It defaults to ``False``. If set to ``True``, then the population of -each generation is saved into the ``solutions`` attribute of the -``pygad.GA`` instance. In other words, every single solution is saved in -the ``solutions`` attribute. - -.. _2-savebestsolutions: - -2. ``save_best_solutions`` --------------------------- - -It defaults to ``False``. If ``True``, then it only saves the best -solution in every generation. - -.. _3-keepelitism: - -3. ``keep_elitism`` -------------------- - -It accepts an integer and defaults to 1. If set to a positive integer, -then it keeps the elitism of one generation available in the next -generation. - -.. _4-keepparents: - -4. ``keep_parents`` -------------------- - -It accepts an integer and defaults to -1. It set to ``-1`` or a positive -integer, then it keeps the parents of one generation available in the -next generation. - -Why the Fitness Function is not Called for Solution at Index 0? -=============================================================== - -PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This -parameter defines the number of best solutions in generation **X** to -keep in the next generation **X+1**. The best solutions are just copied -from generation **X** to generation **X+1** without making any change. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=1, - ...) - -The best solutions are copied at the beginning of the population. If -``keep_elitism=1``, this means the best solution in generation X is kept -in the next generation X+1 at index 0 of the population. If -``keep_elitism=2``, this means the 2 best solutions in generation X are -kept in the next generation X+1 at indices 0 and 1 of the population of -generation 1. - -Because the fitness of these best solutions are already calculated in -generation X, then their fitness values will not be recalculated at -generation X+1 (i.e. the fitness function will not be called for these -solutions again). Instead, their fitness values are just reused. This is -why you see that no solution with index 0 is passed to the fitness -function. - -To force calling the fitness function for each solution in every -generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. -Moreover, keep the 2 parameters ``save_solutions`` and -``save_best_solutions`` to their default value ``False``. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -Batch Fitness Calculation -========================= - -In `PyGAD -2.19.0 `__, -a new optional parameter called ``fitness_batch_size`` is supported. A -new optional parameter called ``fitness_batch_size`` is supported to -calculate the fitness function in batches. Thanks to `Linan -Qiu `__ for opening the `GitHub issue -#136 `__. - -Its values can be: - -- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is - assigned the value ``1`` or ``None`` (default), then the normal flow - is used where the fitness function is called for each individual - solution. That is if there are 15 solutions, then the fitness - function is called 15 times. - -- ``1 < fitness_batch_size <= sol_per_pop``: If the - ``fitness_batch_size`` parameter is assigned a value satisfying this - condition ``1 < fitness_batch_size <= sol_per_pop``, then the - solutions are grouped into batches of size ``fitness_batch_size`` and - the fitness function is called once for each batch. In this case, the - fitness function must return a list/tuple/numpy.ndarray with a length - equal to the number of solutions passed. - -.. _example-without-fitnessbatchsize-parameter: - -Example without ``fitness_batch_size`` Parameter ------------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is given -the value ``None`` (which is the default value). This is equivalent to -using the value ``1``. In this case, the fitness function will be called -for each solution. This means the fitness function ``fitness_func`` will -receive only a single solution. This is an example of the passed -arguments to the fitness function: - -.. code:: - - solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] - solution_idx: 3 - -The fitness function also must return a single numeric value as the -fitness for the passed solution. - -As we have a population of ``20`` solutions, then the fitness function -is called 20 times per generation. For 5 generations, then the fitness -function is called ``20*5 = 100`` times. In PyGAD, the fitness function -is called after the last generation too and this adds additional 20 -times. So, the total number of calls to the fitness function is -``20*5 + 20 = 120``. - -Note that the ``keep_elitism`` and ``keep_parents`` parameters are set -to ``0`` to make sure no fitness values are reused and to force calling -the fitness function for each individual solution. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func(ga_instance, solution, solution_idx): - global number_of_calls - number_of_calls = number_of_calls + 1 - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func, - fitness_batch_size=None, - # fitness_batch_size=1, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 120 - -.. _example-with-fitnessbatchsize-parameter: - -Example with ``fitness_batch_size`` Parameter ---------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is used -and assigned the value ``4``. This means the solutions will be grouped -into batches of ``4`` solutions. The fitness function will be called -once for each patch (i.e. called once for each 4 solutions). - -This is an example of the arguments passed to it: - -.. code:: python - - solutions: - [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] - [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] - [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] - [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] - solutions_indices: - [16, 17, 18, 19] - -As we have 20 solutions, then there are ``20/4 = 5`` patches. As a -result, the fitness function is called only 5 times per generation -instead of 20. For each call to the fitness function, it receives a -batch of 4 solutions. - -As we have 5 generations, then the function will be called ``5*5 = 25`` -times. Given the call to the fitness function after the last generation, -then the total number of calls is ``5*5 + 5 = 30``. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func_batch(ga_instance, solutions, solutions_indices): - global number_of_calls - number_of_calls = number_of_calls + 1 - batch_fitness = [] - for solution in solutions: - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - batch_fitness.append(fitness) - return batch_fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func_batch, - fitness_batch_size=4, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 30 - -When batch fitness calculation is used, then we saved ``120 - 30 = 90`` -calls to the fitness function. - -Use Functions and Methods to Build Fitness and Callbacks -======================================================== - -In PyGAD 2.19.0, it is possible to pass user-defined functions or -methods to the following parameters: - -1. ``fitness_func`` - -2. ``on_start`` - -3. ``on_fitness`` - -4. ``on_parents`` - -5. ``on_crossover`` - -6. ``on_mutation`` - -7. ``on_generation`` - -8. ``on_stop`` - -This section gives 2 examples to assign these parameters user-defined: - -1. Functions. - -2. Methods. - -Assign Functions ----------------- - -This is a dummy example where the fitness function returns a random -value. Note that the instance of the ``pygad.GA`` class is passed as the -last parameter of all functions. - -.. code:: python - - import pygad - import numpy - - def fitness_func(ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(ga_instanse): - print("on_start") - - def on_fitness(ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(ga_instanse): - print("on_generation\n") - - def on_stop(ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=on_start, - on_fitness=on_fitness, - on_parents=on_parents, - on_crossover=on_crossover, - on_mutation=on_mutation, - on_generation=on_generation, - on_stop=on_stop, - fitness_func=fitness_func) - - ga_instance.run() - -Assign Methods --------------- - -The next example has all the method defined inside the class ``Test``. -All of the methods accept an additional parameter representing the -method's object of the class ``Test``. - -All methods accept ``self`` as the first parameter and the instance of -the ``pygad.GA`` class as the last parameter. - -.. code:: python - - import pygad - import numpy - - class Test: - def fitness_func(self, ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(self, ga_instanse): - print("on_start") - - def on_fitness(self, ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(self, ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(self, ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(self, ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(self, ga_instanse): - print("on_generation\n") - - def on_stop(self, ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=Test().on_start, - on_fitness=Test().on_fitness, - on_parents=Test().on_parents, - on_crossover=Test().on_crossover, - on_mutation=Test().on_mutation, - on_generation=Test().on_generation, - on_stop=Test().on_stop, - fitness_func=Test().fitness_func) - - ga_instance.run() - -.. _examples-2: - -Examples -======== - -This section gives the complete code of some examples that use -``pygad``. Each subsection builds a different example. - -Linear Model Optimization -------------------------- +Linear Model Optimization - Single Objective +-------------------------------------------- This example is discussed in the `Steps to Use PyGAD `__ diff --git a/docs/source/pygad_more.rst b/docs/source/pygad_more.rst new file mode 100644 index 0000000..a965391 --- /dev/null +++ b/docs/source/pygad_more.rst @@ -0,0 +1,2171 @@ +More About PyGAD +================ + +.. _limit-the-gene-value-range-using-the-genespace-parameter: + +Limit the Gene Value Range using the ``gene_space`` Parameter +============================================================= + +In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter supported a new feature to allow +customizing the range of accepted values for each gene. Let's take a +quick review of the ``gene_space`` parameter to build over it. + +The ``gene_space`` parameter allows the user to feed the space of values +of each gene. This way the accepted values for each gene is retracted to +the user-defined values. Assume there is a problem that has 3 genes +where each gene has different set of values as follows: + +1. Gene 1: ``[0.4, 12, -5, 21.2]`` + +2. Gene 2: ``[-2, 0.3]`` + +3. Gene 3: ``[1.2, 63.2, 7.4]`` + +Then, the ``gene_space`` for this problem is as given below. Note that +the order is very important. + +.. code:: python + + gene_space = [[0.4, 12, -5, 21.2], + [-2, 0.3], + [1.2, 63.2, 7.4]] + +In case all genes share the same set of values, then simply feed a +single list to the ``gene_space`` parameter as follows. In this case, +all genes can only take values from this list of 6 values. + +.. code:: python + + gene_space = [33, 7, 0.5, 95. 6.3, 0.74] + +The previous example restricts the gene values to just a set of fixed +number of discrete values. In case you want to use a range of discrete +values to the gene, then you can use the ``range()`` function. For +example, ``range(1, 7)`` means the set of allowed values for the gene +are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or +``numpy.linspace()`` functions for the same purpose. + +The previous discussion only works with a range of discrete values not +continuous values. In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter can be assigned a dictionary that allows +the gene to have values from a continuous range. + +Assuming you want to restrict the gene within this half-open range [1 to +5) where 1 is included and 5 is not. Then simply create a dictionary +with 2 items where the keys of the 2 items are: + +1. ``'low'``: The minimum value in the range which is 1 in the example. + +2. ``'high'``: The maximum value in the range which is 5 in the example. + +The dictionary will look like that: + +.. code:: python + + {'low': 1, + 'high': 5} + +It is not acceptable to add more than 2 items in the dictionary or use +other keys than ``'low'`` and ``'high'``. + +For a 3-gene problem, the next code creates a dictionary for each gene +to restrict its values in a continuous range. For the first gene, it can +take any floating-point value from the range that starts from 1 +(inclusive) and ends at 5 (exclusive). + +.. code:: python + + gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] + +.. _more-about-the-genespace-parameter: + +More about the ``gene_space`` Parameter +======================================= + +The ``gene_space`` parameter customizes the space of values of each +gene. + +Assuming that all genes have the same global space which include the +values 0.3, 5.2, -4, and 8, then those values can be assigned to the +``gene_space`` parameter as a list, tuple, or range. Here is a list +assigned to this parameter. By doing that, then the gene values are +restricted to those assigned to the ``gene_space`` parameter. + +.. code:: python + + gene_space = [0.3, 5.2, -4, 8] + +If some genes have different spaces, then ``gene_space`` should accept a +nested list or tuple. In this case, the elements could be: + +1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single + value to be assigned to the gene. This means this gene will have the + same value across all generations. + +2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, + ``numpy.arange()``, or ``numpy.linspace``: It holds the space for + each individual gene. But this space is usually discrete. That is + there is a set of finite values to select from. + +3. ``dict``: To sample a value for a gene from a continuous range. The + dictionary must have 2 mandatory keys which are ``"low"`` and + ``"high"`` in addition to an optional key which is ``"step"``. A + random value is returned between the values assigned to the items + with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then + this works as the previous options (i.e. discrete set of values). + +4. ``None``: A gene with its space set to ``None`` is initialized + randomly from the range specified by the 2 parameters + ``init_range_low`` and ``init_range_high``. For mutation, its value + is mutated based on a random value from the range specified by the 2 + parameters ``random_mutation_min_val`` and + ``random_mutation_max_val``. If all elements in the ``gene_space`` + parameter are ``None``, the parameter will not have any effect. + +Assuming that a chromosome has 2 genes and each gene has a different +value space. Then the ``gene_space`` could be assigned a nested +list/tuple where each element determines the space of a gene. + +According to the next code, the space of the first gene is ``[0.4, -5]`` +which has 2 values and the space for the second gene is +``[0.5, -3.2, 8.8, -9]`` which has 4 values. + +.. code:: python + + gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] + +For a 2 gene chromosome, if the first gene space is restricted to the +discrete values from 0 to 4 and the second gene is restricted to the +values from 10 to 19, then it could be specified according to the next +code. + +.. code:: python + + gene_space = [range(5), range(10, 20)] + +The ``gene_space`` can also be assigned to a single range, as given +below, where the values of all genes are sampled from the same range. + +.. code:: python + + gene_space = numpy.arange(15) + +The ``gene_space`` can be assigned a dictionary to sample a value from a +continuous range. + +.. code:: python + + gene_space = {"low": 4, "high": 30} + +A step also can be assigned to the dictionary. This works as if a range +is used. + +.. code:: python + + gene_space = {"low": 4, "high": 30, "step": 2.5} + +.. + + Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the + ``gene_space`` means that random values from the continuous range [0, + 10) are sampled. Note that ``0`` is included but ``10`` is not + included while sampling. Thus, the maximum value that could be + returned is less than ``10`` like ``9.9999``. But if the user decided + to round the genes using, for example, ``[float, 2]``, then this + value will become 10. So, the user should be careful to the inputs. + +If a ``None`` is assigned to only a single gene, then its value will be +randomly generated initially using the ``init_range_low`` and +``init_range_high`` parameters in the ``pygad.GA`` class's constructor. +During mutation, the value are sampled from the range defined by the 2 +parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. +This is an example where the second gene is given a ``None`` value. + +.. code:: python + + gene_space = [range(5), None, numpy.linspace(10, 20, 300)] + +If the user did not assign the initial population to the +``initial_population`` parameter, the initial population is created +randomly based on the ``gene_space`` parameter. Moreover, the mutation +is applied based on this parameter. + +.. _how-mutation-works-with-the-genespace-parameter: + +How Mutation Works with the ``gene_space`` Parameter? +----------------------------------------------------- + +If a gene has its static space defined in the ``gene_space`` parameter, +then mutation works by replacing the gene value by a value randomly +selected from the gene space. This happens for both ``int`` and +``float`` data types. + +For example, the following ``gene_space`` has the static space +``[1, 2, 3]`` defined for the first gene. So, this gene can only have a +value out of these 3 values. + +.. code:: python + + Gene space: [[1, 2, 3], + None] + Solution: [1, 5] + +For a solution like ``[1, -0.5, 4]``, then mutation happens for the +first gene by simply replacing its current value by a randomly selected +value (other than its current value if possible). So, the value 1 will +be replaced by either 2 or 3. + +For the second gene, its space is set to ``None``. So, traditional +mutation happens for this gene by: + +1. Generating a random value from the range defined by the + ``random_mutation_min_val`` and ``random_mutation_max_val`` + parameters. + +2. Adding this random value to the current gene's value. + +If its current value is 5 and the random value is ``-0.5``, then the new +value is 4.5. If the gene type is integer, then the value will be +rounded. + +Stop at Any Generation +====================== + +In `PyGAD +2.4.0 `__, +it is possible to stop the genetic algorithm after any generation. All +you need to do it to return the string ``"stop"`` in the callback +function ``on_generation``. When this callback function is implemented +and assigned to the ``on_generation`` parameter in the constructor of +the ``pygad.GA`` class, then the algorithm immediately stops after +completing its current generation. Let's discuss an example. + +Assume that the user wants to stop algorithm either after the 100 +generations or if a condition is met. The user may assign a value of 100 +to the ``num_generations`` parameter of the ``pygad.GA`` class +constructor. + +The condition that stops the algorithm is written in a callback function +like the one in the next code. If the fitness value of the best solution +exceeds 70, then the string ``"stop"`` is returned. + +.. code:: python + + def func_generation(ga_instance): + if ga_instance.best_solution()[1] >= 70: + return "stop" + +Stop Criteria +============= + +In `PyGAD +2.15.0 `__, +a new parameter named ``stop_criteria`` is added to the constructor of +the ``pygad.GA`` class. It helps to stop the evolution based on some +criteria. It can be assigned to one or more criterion. + +Each criterion is passed as ``str`` that consists of 2 parts: + +1. Stop word. + +2. Number. + +It takes this form: + +.. code:: python + + "word_num" + +The current 2 supported words are ``reach`` and ``saturate``. + +The ``reach`` word stops the ``run()`` method if the fitness value is +equal to or greater than a given fitness value. An example for ``reach`` +is ``"reach_40"`` which stops the evolution if the fitness is >= 40. + +``saturate`` stops the evolution if the fitness saturates for a given +number of consecutive generations. An example for ``saturate`` is +``"saturate_7"`` which means stop the ``run()`` method if the fitness +does not change for 7 consecutive generations. + +Here is an example that stops the evolution if either the fitness value +reached ``127.4`` or if the fitness saturates for ``15`` generations. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, 9, 4] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=200, + sol_per_pop=10, + num_parents_mating=4, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + stop_criteria=["reach_127.4", "saturate_15"]) + + ga_instance.run() + print("Number of generations passed is {generations_completed}".format(generations_completed=ga_instance.generations_completed)) + +Elitism Selection +================= + +In `PyGAD +2.18.0 `__, +a new parameter called ``keep_elitism`` is supported. It accepts an +integer to define the number of elitism (i.e. best solutions) to keep in +the next generation. This parameter defaults to ``1`` which means only +the best solution is kept in the next generation. + +In the next example, the ``keep_elitism`` parameter in the constructor +of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in +each generation are kept in the next generation. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + num_genes=6, + sol_per_pop=5, + keep_elitism=2) + + ga_instance.run() + +The value passed to the ``keep_elitism`` parameter must satisfy 2 +conditions: + +1. It must be ``>= 0``. + +2. It must be ``<= sol_per_pop``. That is its value cannot exceed the + number of solutions in the current population. + +In the previous example, if the ``keep_elitism`` parameter is set equal +to the value passed to the ``sol_per_pop`` parameter, which is 5, then +there will be no evolution at all as in the next figure. This is because +all the 5 solutions are used as elitism in the next generation and no +offspring will be created. + +.. code:: python + + ... + + ga_instance = pygad.GA(..., + sol_per_pop=5, + keep_elitism=5) + + ga_instance.run() + +.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png + :alt: + +Note that if the ``keep_elitism`` parameter is effective (i.e. is +assigned a positive integer, not zero), then the ``keep_parents`` +parameter will have no effect. Because the default value of the +``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has +no effect by default. The ``keep_parents`` parameter is only effective +when ``keep_elitism=0``. + +Random Seed +=========== + +In `PyGAD +2.18.0 `__, +a new parameter called ``random_seed`` is supported. Its value is used +as a seed for the random function generators. + +PyGAD uses random functions in these 2 libraries: + +1. NumPy + +2. random + +The ``random_seed`` parameter defaults to ``None`` which means no seed +is used. As a result, different random numbers are generated for each +run of PyGAD. + +If this parameter is assigned a proper seed, then the results will be +reproducible. In the next example, the integer 2 is used as a random +seed. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + sol_per_pop=5, + num_genes=6, + random_seed=2) + + ga_instance.run() + best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() + print(best_solution) + print(best_solution_fitness) + +This is the best solution found and its fitness value. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +After running the code again, it will find the same result. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +Continue without Loosing Progress +================================= + +In `PyGAD +2.18.0 `__, +and thanks for `Felix Bernhard `__ for +opening `this GitHub +issue `__, +the values of these 4 instance attributes are no longer reset after each +call to the ``run()`` method. + +1. ``self.best_solutions`` + +2. ``self.best_solutions_fitness`` + +3. ``self.solutions`` + +4. ``self.solutions_fitness`` + +This helps the user to continue where the last run stopped without +loosing the values of these 4 attributes. + +Now, the user can save the model by calling the ``save()`` method. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + ga_instance = pygad.GA(...) + + ga_instance.run() + + ga_instance.plot_fitness() + + ga_instance.save("pygad_GA") + +Then the saved model is loaded by calling the ``load()`` function. After +calling the ``run()`` method over the loaded instance, then the data +from the previous 4 attributes are not reset but extended with the new +data. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + loaded_ga_instance = pygad.load("pygad_GA") + + loaded_ga_instance.run() + + loaded_ga_instance.plot_fitness() + +The plot created by the ``plot_fitness()`` method will show the data +collected from both the runs. + +Note that the 2 attributes (``self.best_solutions`` and +``self.best_solutions_fitness``) only work if the +``save_best_solutions`` parameter is set to ``True``. Also, the 2 +attributes (``self.solutions`` and ``self.solutions_fitness``) only work +if the ``save_solutions`` parameter is ``True``. + +Prevent Duplicates in Gene Values +================================= + +In `PyGAD +2.13.0 `__, +a new bool parameter called ``allow_duplicate_genes`` is supported to +control whether duplicates are supported in the chromosome or not. In +other words, whether 2 or more genes might have the same exact value. + +If ``allow_duplicate_genes=True`` (which is the default case), genes may +have the same value. If ``allow_duplicate_genes=False``, then no 2 genes +will have the same value given that there are enough unique values for +the genes. + +The next code gives an example to use the ``allow_duplicate_genes`` +parameter. A callback generation function is implemented to print the +population after each generation. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=5, + sol_per_pop=5, + num_genes=4, + mutation_num_genes=3, + random_mutation_min_val=-5, + random_mutation_max_val=5, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Here are the population after the 5 generations. Note how there are no +duplicate values. + +.. code:: python + + Generation 1 + [[ 2 -2 -3 3] + [ 0 1 2 3] + [ 5 -3 6 3] + [-3 1 -2 4] + [-1 0 -2 3]] + Generation 2 + [[-1 0 -2 3] + [-3 1 -2 4] + [ 0 -3 -2 6] + [-3 0 -2 3] + [ 1 -4 2 4]] + Generation 3 + [[ 1 -4 2 4] + [-3 0 -2 3] + [ 4 0 -2 1] + [-4 0 -2 -3] + [-4 2 0 3]] + Generation 4 + [[-4 2 0 3] + [-4 0 -2 -3] + [-2 5 4 -3] + [-1 2 -4 4] + [-4 2 0 -3]] + Generation 5 + [[-4 2 0 -3] + [-1 2 -4 4] + [ 3 4 -4 0] + [-1 0 2 -2] + [-4 2 -1 1]] + +The ``allow_duplicate_genes`` parameter is configured with use with the +``gene_space`` parameter. Here is an example where each of the 4 genes +has the same space of values that consists of 4 values (1, 2, 3, and 4). + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=1, + sol_per_pop=5, + num_genes=4, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Even that all the genes share the same space of values, no 2 genes +duplicate their values as provided by the next output. + +.. code:: python + + Generation 1 + [[2 3 1 4] + [2 3 1 4] + [2 4 1 3] + [2 3 1 4] + [1 3 2 4]] + Generation 2 + [[1 3 2 4] + [2 3 1 4] + [1 3 2 4] + [2 3 4 1] + [1 3 4 2]] + Generation 3 + [[1 3 4 2] + [2 3 4 1] + [1 3 4 2] + [3 1 4 2] + [3 2 4 1]] + Generation 4 + [[3 2 4 1] + [3 1 4 2] + [3 2 4 1] + [1 2 4 3] + [1 3 4 2]] + Generation 5 + [[1 3 4 2] + [1 2 4 3] + [2 1 4 3] + [1 2 4 3] + [1 2 4 3]] + +You should care of giving enough values for the genes so that PyGAD is +able to find alternatives for the gene value in case it duplicates with +another gene. + +There might be 2 duplicate genes where changing either of the 2 +duplicating genes will not solve the problem. For example, if +``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the +solution is ``[3 2 0 0]``, then the values of the last 2 genes +duplicate. There are no possible changes in the last 2 genes to solve +the problem. + +This problem can be solved by randomly changing one of the +non-duplicating genes that may make a room for a unique value in one the +2 duplicating genes. For example, by changing the second gene from 2 to +4, then any of the last 2 genes can take the value 2 and solve the +duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is +not yet supported in PyGAD. + +Solve Duplicates using a Third Gene +----------------------------------- + +When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` +is used, it sometimes happen that there is no room to solve the +duplicates between the 2 genes by simply replacing the value of one gene +by another gene. In `PyGAD +3.1.0 `__, +the duplicates are solved by looking for a third gene that will help in +solving the duplicates. The following examples explain how it works. + +Example 1: + +Let's assume that this gene space is used and there is a solution with 2 +duplicate genes with the same value 4. + +.. code:: python + + Gene space: [[2, 3], + [3, 4], + [4, 5], + [5, 6]] + Solution: [3, 4, 4, 5] + +By checking the gene space, the second gene can have the values +``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve +the duplicates, we have the value of any of these 2 genes. + +If the value of the second gene changes from 4 to 3, then it will be +duplicate with the first gene. If we are to change the value of the +third gene from 4 to 5, then it will duplicate with the fourth gene. As +a conclusion, trying to just selecting a different gene value for either +the second or third genes will introduce new duplicating genes. + +When there are 2 duplicate genes but there is no way to solve their +duplicates, then the solution is to change a third gene that makes a +room to solve the duplicates between the 2 genes. + +In our example, duplicates between the second and third genes can be +solved by, for example,: + +- Changing the first gene from 3 to 2 then changing the second gene + from 4 to 3. + +- Or changing the fourth gene from 5 to 6 then changing the third gene + from 4 to 5. + +Generally, this is how to solve such duplicates: + +1. For any duplicate gene **GENE1**, select another value. + +2. Check which other gene **GENEX** has duplicate with this new value. + +3. Find if **GENEX** can have another value that will not cause any more + duplicates. If so, go to step 7. + +4. If all the other values of **GENEX** will cause duplicates, then try + another gene **GENEY**. + +5. Repeat steps 3 and 4 until exploring all the genes. + +6. If there is no possibility to solve the duplicates, then there is not + way to solve the duplicates and we have to keep the duplicate value. + +7. If a value for a gene **GENEM** is found that will not cause more + duplicates, then use this value for the gene **GENEM**. + +8. Replace the value of the gene **GENE1** by the old value of the gene + **GENEM**. This solves the duplicates. + +This is an example to solve the duplicate for the solution +``[3, 4, 4, 5]``: + +1. Let's use the second gene with value 4. Because the space of this + gene is ``[3, 4]``, then the only other value we can select is 3. + +2. The first gene also have the value 3. + +3. The first gene has another value 2 that will not cause more + duplicates in the solution. Then go to step 7. + +4. Skip. + +5. Skip. + +6. Skip. + +7. The value of the first gene 3 will be replaced by the new value 2. + The new solution is [2, 4, 4, 5]. + +8. Replace the value of the second gene 4 by the old value of the first + gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is + solved. + +Example 2: + +.. code:: python + + Gene space: [[0, 1], + [1, 2], + [2, 3], + [3, 4]] + Solution: [1, 2, 2, 3] + +The quick summary is: + +- Change the value of the first gene from 1 to 0. The solution becomes + [0, 2, 2, 3]. + +- Change the value of the second gene from 2 to 1. The solution becomes + [0, 1, 2, 3]. The duplicate is solved. + +.. _more-about-the-genetype-parameter: + +More about the ``gene_type`` Parameter +====================================== + +The ``gene_type`` parameter allows the user to control the data type for +all genes at once or each individual gene. In `PyGAD +2.15.0 `__, +the ``gene_type`` parameter also supports customizing the precision for +``float`` data types. As a result, the ``gene_type`` parameter helps to: + +1. Select a data type for all genes with or without precision. + +2. Select a data type for each individual gene with or without + precision. + +Let's discuss things by examples. + +Data Type for All Genes without Precision +----------------------------------------- + +The data type for all genes can be specified by assigning the numeric +data type directly to the ``gene_type`` parameter. This is an example to +make all genes of ``int`` data types. + +.. code:: python + + gene_type=int + +Given that the supported numeric data types of PyGAD include Python's +``int`` and ``float`` in addition to all numeric types of ``NumPy``, +then any of these types can be assigned to the ``gene_type`` parameter. + +If no precision is specified for a ``float`` data type, then the +complete floating-point number is kept. + +The next code uses an ``int`` data type for all genes where the genes in +the initial and final population are only integers. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=int) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[ 1 -1 2 0 -3] + [ 0 -2 0 -3 -1] + [ 0 -1 -1 2 0] + [-2 3 -2 3 3] + [ 0 0 2 -2 -2]] + + Final Population + [[ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0]] + +Data Type for All Genes with Precision +-------------------------------------- + +A precision can only be specified for a ``float`` data type and cannot +be specified for integers. Here is an example to use a precision of 3 +for the ``float`` data type. In this case, all genes are of type +``float`` and their maximum precision is 3. + +.. code:: python + + gene_type=[float, 3] + +The next code uses prints the initial and final population where the +genes are of type ``float`` with precision 3. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[float, 3]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2.417 -0.487 3.623 2.457 -2.362] + [-1.231 0.079 -1.63 1.629 -2.637] + [ 0.692 -2.098 0.705 0.914 -3.633] + [ 2.637 -1.339 -1.107 -0.781 -3.896] + [-1.495 1.378 -1.026 3.522 2.379]] + + Final Population + [[ 1.714 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.375 -2.362] + [ 0.692 -1.024 4.041 3.185 -2.362] + [ 1.714 -0.644 3.623 3.185 -2.362]] + +Data Type for each Individual Gene without Precision +---------------------------------------------------- + +In `PyGAD +2.14.0 `__, +the ``gene_type`` parameter allows customizing the gene type for each +individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` +with number of elements equal to the number of genes. For each element, +a type is specified for the corresponding gene. + +This is an example for a 5-gene problem where different types are +assigned to the genes. + +.. code:: python + + gene_type=[int, float, numpy.float16, numpy.int8, float] + +This is a complete code that prints the initial and final population for +a custom-gene data type. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, float, numpy.float16, numpy.int8, float]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] + [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] + [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] + [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] + [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] + + Final Population + [[3 3.7729827570110714 2.055 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036] + [3 3.7729827570110714 1.458 0 0.0869406120516778] + [3 3.7729827570110714 1.458 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036]] + +Data Type for each Individual Gene with Precision +------------------------------------------------- + +The precision can also be specified for the ``float`` data types as in +the next line where the second gene precision is 2 and last gene +precision is 1. + +.. code:: python + + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] + +This is a complete example where the initial and final populations are +printed where the genes comply with the data types and precisions +specified. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2 -1.22 1.716796875 -1 0.2] + [-1 -1.58 -3.091796875 0 -1.3] + [3 3.35 -0.107421875 1 -3.3] + [-2 -3.58 -1.779296875 0 0.6] + [2 -3.73 2.65234375 3 -0.5]] + + Final Population + [[2 -4.22 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3] + [2 -4.22 3.47 2 -1.3] + [2 -4.58 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3]] + +Parallel Processing in PyGAD +============================ + +Starting from `PyGAD +2.17.0 `__, +parallel processing becomes supported. This section explains how to use +parallel processing in PyGAD. + +According to the `PyGAD +lifecycle `__, +parallel processing can be parallelized in only 2 operations: + +1. Population fitness calculation. + +2. Mutation. + +The reason is that the calculations in these 2 operations are +independent (i.e. each solution/chromosome is handled independently from +the others) and can be distributed across different processes or +threads. + +For the mutation operation, it does not do intensive calculations on the +CPU. Its calculations are simple like flipping the values of some genes +from 0 to 1 or adding a random value to some genes. So, it does not take +much CPU processing time. Experiments proved that parallelizing the +mutation operation across the solutions increases the time instead of +reducing it. This is because running multiple processes or threads adds +overhead to manage them. Thus, parallel processing cannot be applied on +the mutation operation. + +For the population fitness calculation, parallel processing can help +make a difference and reduce the processing time. But this is +conditional on the type of calculations done in the fitness function. If +the fitness function makes intensive calculations and takes much +processing time from the CPU, then it is probably that parallel +processing will help to cut down the overall time. + +This section explains how parallel processing works in PyGAD and how to +use parallel processing in PyGAD + +How to Use Parallel Processing in PyGAD +--------------------------------------- + +Starting from `PyGAD +2.17.0 `__, +a new parameter called ``parallel_processing`` added to the constructor +of the ``pygad.GA`` class. + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + parallel_processing=...) + ... + +This parameter allows the user to do the following: + +1. Enable parallel processing. + +2. Select whether processes or threads are used. + +3. Specify the number of processes or threads to be used. + +These are 3 possible values for the ``parallel_processing`` parameter: + +1. ``None``: (Default) It means no parallel processing is used. + +2. A positive integer referring to the number of threads to be used + (i.e. threads, not processes, are used. + +3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is + assigned, then: + + 1. The first element can be either ``'process'`` or ``'thread'`` to + specify whether processes or threads are used, respectively. + + 2. The second element can be: + + 1. A positive integer to select the maximum number of processes or + threads to be used + + 2. ``0`` to indicate that 0 processes or threads are used. It + means no parallel processing. This is identical to setting + ``parallel_processing=None``. + + 3. ``None`` to use the default value as calculated by the + ``concurrent.futures module``. + +These are examples of the values assigned to the ``parallel_processing`` +parameter: + +- ``parallel_processing=4``: Because the parameter is assigned a + positive integer, this means parallel processing is activated where 4 + threads are used. + +- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 + threads. This is identical to ``parallel_processing=5``. + +- ``parallel_processing=["process", 8]``: Use parallel processing with + 8 processes. + +- ``parallel_processing=["process", 0]``: As the second element is + given the value 0, this means do not use parallel processing. This is + identical to ``parallel_processing=None``. + +Examples +-------- + +The examples will help you know the difference between using processes +and threads. Moreover, it will give an idea when parallel processing +would make a difference and reduce the time. These are dummy examples +where the fitness function is made to always return 0. + +The first example uses 10 genes, 5 solutions in the population where +only 3 solutions mate, and 9999 generations. The fitness function uses a +``for`` loop with 100 iterations just to have some calculations. In the +constructor of the ``pygad.GA`` class, ``parallel_processing=None`` +means no parallel processing is used. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99): + pass + return 0 + + ga_instance = pygad.GA(num_generations=9999, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + + ga_instance.run() + + t2 = time.time() + print("Time is", t2-t1) + +When parallel processing is not used, the time it takes to run the +genetic algorithm is ``1.5`` seconds. + +In the comparison, let's do a second experiment where parallel +processing is used with 5 threads. In this case, it take ``5`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=5) + ... + +For the third experiment, processes instead of threads are used. Also, +only 99 generations are used instead of 9999. The time it takes is +``99`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(num_generations=99, + ..., + parallel_processing=["process", 5]) + ... + +This is the summary of the 3 experiments: + +1. No parallel processing & 9999 generations: 1.5 seconds. + +2. Parallel processing with 5 threads & 9999 generations: 5 seconds + +3. Parallel processing with 5 processes & 99 generations: 99 seconds + +Because the fitness function does not need much CPU time, the normal +processing takes the least time. Running processes for this simple +problem takes 99 compared to only 5 seconds for threads because managing +processes is much heavier than managing threads. Thus, most of the CPU +time is for swapping the processes instead of executing the code. + +In the second example, the loop makes 99999999 iterations and only 5 +generations are used. With no parallelization, it takes 22 seconds. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99999999): + pass + return 0 + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + ga_instance.run() + t2 = time.time() + print("Time is", t2-t1) + +It takes 15 seconds when 10 processes are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["process", 10]) + ... + +This is compared to 20 seconds when 10 threads are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["thread", 10]) + ... + +Based on the second example, using parallel processing with 10 processes +takes the least time because there is much CPU work done. Generally, +processes are preferred over threads when most of the work in on the +CPU. Threads are preferred over processes in some situations like doing +input/output operations. + +*Before releasing* `PyGAD +2.17.0 `__\ *,* +`László +Fazekas `__ +*wrote an article to parallelize the fitness function with PyGAD. Check +it:* `How Genetic Algorithms Can Compete with Gradient Descent and +Backprop `__. + +Print Lifecycle Summary +======================= + +In `PyGAD +2.19.0 `__, +a new method called ``summary()`` is supported. It prints a Keras-like +summary of the PyGAD lifecycle showing the steps, callback functions, +parameters, etc. + +This method accepts the following parameters: + +- ``line_length=70``: An integer representing the length of the single + line in characters. + +- ``fill_character=" "``: A character to fill the lines. + +- ``line_character="-"``: A character for creating a line separator. + +- ``line_character2="="``: A secondary character to create a line + separator. + +- ``columns_equal_len=False``: The table rows are split into + equal-sized columns or split subjective to the width needed. + +- ``print_step_parameters=True``: Whether to print extra parameters + about each step inside the step. If ``print_step_parameters=False`` + and ``print_parameters_summary=True``, then the parameters of each + step are printed at the end of the table. + +- ``print_parameters_summary=True``: Whether to print parameters + summary at the end of the table. If ``print_step_parameters=False``, + then the parameters of each step are printed at the end of the table + too. + +This is a quick example to create a PyGAD example. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def genetic_fitness(solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_gen(ga): + pass + + def on_crossover_callback(a, b): + pass + + ga_instance = pygad.GA(num_generations=100, + num_parents_mating=10, + sol_per_pop=20, + num_genes=len(function_inputs), + on_crossover=on_crossover_callback, + on_generation=on_gen, + parallel_processing=2, + stop_criteria="reach_10", + fitness_batch_size=4, + crossover_probability=0.4, + fitness_func=genetic_fitness) + +Then call the ``summary()`` method to print the summary with the default +parameters. Note that entries for the crossover and generation callback +function are created because their callback functions are implemented +through the ``on_crossover_callback()`` and ``on_gen()``, respectively. + +.. code:: python + + ga_instance.summary() + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + Fitness batch size: 4 + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + Number of Parents: 10 + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + Crossover probability: 0.4 + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + Mutation Genes: 1 + Random Mutation Range: (-1.0, 1.0) + Mutation by Replacement: False + Allow Duplicated Genes: True + ---------------------------------------------------------------------- + On Generation on_gen() None + Stop Criteria: [['reach', 10.0]] + ---------------------------------------------------------------------- + ====================================================================== + Population Size: (20, 6) + Number of Generations: 100 + Initial Population Range: (-4, 4) + Keep Elitism: 1 + Gene DType: [, None] + Parallel Processing: ['thread', 2] + Save Best Solutions: False + Save Solutions: False + ====================================================================== + +We can set the ``print_step_parameters`` and +``print_parameters_summary`` parameters to ``False`` to not print the +parameters. + +.. code:: python + + ga_instance.summary(print_step_parameters=False, + print_parameters_summary=False) + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + ---------------------------------------------------------------------- + On Generation on_gen() None + ---------------------------------------------------------------------- + ====================================================================== + +Logging Outputs +=============== + +In `PyGAD +3.0.0 `__, +the ``print()`` statement is no longer used and the outputs are printed +using the `logging `__ +module. A a new parameter called ``logger`` is supported to accept the +user-defined logger. + +.. code:: python + + import logging + + logger = ... + + ga_instance = pygad.GA(..., + logger=logger, + ...) + +The default value for this parameter is ``None``. If there is no logger +passed (i.e. ``logger=None``), then a default logger is created to log +the messages to the console exactly like how the ``print()`` statement +works. + +Some advantages of using the the +`logging `__ module +instead of the ``print()`` statement are: + +1. The user has more control over the printed messages specially if + there is a project that uses multiple modules where each module + prints its messages. A logger can organize the outputs. + +2. Using the proper ``Handler``, the user can log the output messages to + files and not only restricted to printing it to the console. So, it + is much easier to record the outputs. + +3. The format of the printed messages can be changed by customizing the + ``Formatter`` assigned to the Logger. + +This section gives some quick examples to use the ``logging`` module and +then gives an example to use the logger with PyGAD. + +Logging to the Console +---------------------- + +This is an example to create a logger to log the messages to the +console. + +.. code:: python + + import logging + + # Create a logger + logger = logging.getLogger(__name__) + + # Set the logger level to debug so that all the messages are printed. + logger.setLevel(logging.DEBUG) + + # Create a stream handler to log the messages to the console. + stream_handler = logging.StreamHandler() + + # Set the handler level to debug. + stream_handler.setLevel(logging.DEBUG) + + # Create a formatter + formatter = logging.Formatter('%(message)s') + + # Add the formatter to handler. + stream_handler.setFormatter(formatter) + + # Add the stream handler to the logger + logger.addHandler(stream_handler) + +Now, we can log messages to the console with the format specified in the +``Formatter``. + +.. code:: python + + logger.debug('Debug message.') + logger.info('Info message.') + logger.warning('Warn message.') + logger.error('Error message.') + logger.critical('Critical message.') + +The outputs are identical to those returned using the ``print()`` +statement. + +.. code:: + + Debug message. + Info message. + Warn message. + Error message. + Critical message. + +By changing the format of the output messages, we can have more +information about each message. + +.. code:: python + + formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + +This is a sample output. + +.. code:: python + + 2023-04-03 18:46:27 DEBUG: Debug message. + 2023-04-03 18:46:27 INFO: Info message. + 2023-04-03 18:46:27 WARNING: Warn message. + 2023-04-03 18:46:27 ERROR: Error message. + 2023-04-03 18:46:27 CRITICAL: Critical message. + +Note that you may need to clear the handlers after finishing the +execution. This is to make sure no cached handlers are used in the next +run. If the cached handlers are not cleared, then the single output +message may be repeated. + +.. code:: python + + logger.handlers.clear() + +Logging to a File +----------------- + +This is another example to log the messages to a file named +``logfile.txt``. The formatter prints the following about each message: + +1. The date and time at which the message is logged. + +2. The log level. + +3. The message. + +4. The path of the file. + +5. The lone number of the log message. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name, 'a+', 'utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + +This is how the outputs look like. + +.. code:: python + + 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 + 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 + 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 + 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 + 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +Log to Both the Console and a File +---------------------------------- + +This is an example to create a single Logger associated with 2 handlers: + +1. A file handler. + +2. A stream handler. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + +When a log message is executed, then it is both printed to the console +and saved in the ``logfile.txt``. + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +PyGAD Example +------------- + +To use the logger in PyGAD, just create your custom logger and pass it +to the ``logger`` parameter. + +.. code:: python + + import logging + import pygad + import numpy + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + + equation_inputs = [4, -2, 8] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_generation(ga_instance): + ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) + ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=40, + num_parents_mating=2, + keep_parents=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + on_generation=on_generation, + logger=logger) + ga_instance.run() + + logger.handlers.clear() + +By executing this code, the logged messages are printed to the console +and also saved in the text file. + +.. code:: python + + 2023-04-03 19:04:27 INFO: Generation = 1 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 + 2023-04-03 19:04:27 INFO: Generation = 2 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 + 2023-04-03 19:04:27 INFO: Generation = 3 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 + 2023-04-03 19:04:27 INFO: Generation = 4 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 + 2023-04-03 19:04:27 INFO: Generation = 5 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 + 2023-04-03 19:04:27 INFO: Generation = 6 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 + 2023-04-03 19:04:27 INFO: Generation = 7 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 + 2023-04-03 19:04:27 INFO: Generation = 8 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 + 2023-04-03 19:04:27 INFO: Generation = 9 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 + 2023-04-03 19:04:27 INFO: Generation = 10 + 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 + +Solve Non-Deterministic Problems +================================ + +PyGAD can be used to solve both deterministic and non-deterministic +problems. Deterministic are those that return the same fitness for the +same solution. For non-deterministic problems, a different fitness value +would be returned for the same solution. + +By default, PyGAD settings are set to solve deterministic problems. +PyGAD can save the explored solutions and their fitness to reuse in the +future. These instances attributes can save the solutions: + +1. ``solutions``: Exists if ``save_solutions=True``. + +2. ``best_solutions``: Exists if ``save_best_solutions=True``. + +3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. + +4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or + ``keep_parents=-1``. + +To configure PyGAD for non-deterministic problems, we have to disable +saving the previous solutions. This is by setting these parameters: + +1. ``keep_elisitm=0`` + +2. ``keep_parents=0`` + +3. ``keep_solutions=False`` + +4. ``keep_best_solutions=False`` + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +This way PyGAD will not save any explored solution and thus the fitness +function have to be called for each individual solution. + +Reuse the Fitness instead of Calling the Fitness Function +========================================================= + +It may happen that a previously explored solution in generation X is +explored again in another generation Y (where Y > X). For some problems, +calling the fitness function takes much time. + +For deterministic problems, it is better to not call the fitness +function for an already explored solutions. Instead, reuse the fitness +of the old solution. PyGAD supports some options to help you save time +calling the fitness function for a previously explored solution. + +The parameters explored in this section can be set in the constructor of +the ``pygad.GA`` class. + +The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these +parameters to see if there is a possibility of reusing the fitness +instead of calling the fitness function. + +.. _1-savesolutions: + +1. ``save_solutions`` +--------------------- + +It defaults to ``False``. If set to ``True``, then the population of +each generation is saved into the ``solutions`` attribute of the +``pygad.GA`` instance. In other words, every single solution is saved in +the ``solutions`` attribute. + +.. _2-savebestsolutions: + +2. ``save_best_solutions`` +-------------------------- + +It defaults to ``False``. If ``True``, then it only saves the best +solution in every generation. + +.. _3-keepelitism: + +3. ``keep_elitism`` +------------------- + +It accepts an integer and defaults to 1. If set to a positive integer, +then it keeps the elitism of one generation available in the next +generation. + +.. _4-keepparents: + +4. ``keep_parents`` +------------------- + +It accepts an integer and defaults to -1. It set to ``-1`` or a positive +integer, then it keeps the parents of one generation available in the +next generation. + +Why the Fitness Function is not Called for Solution at Index 0? +=============================================================== + +PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This +parameter defines the number of best solutions in generation **X** to +keep in the next generation **X+1**. The best solutions are just copied +from generation **X** to generation **X+1** without making any change. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=1, + ...) + +The best solutions are copied at the beginning of the population. If +``keep_elitism=1``, this means the best solution in generation X is kept +in the next generation X+1 at index 0 of the population. If +``keep_elitism=2``, this means the 2 best solutions in generation X are +kept in the next generation X+1 at indices 0 and 1 of the population of +generation 1. + +Because the fitness of these best solutions are already calculated in +generation X, then their fitness values will not be recalculated at +generation X+1 (i.e. the fitness function will not be called for these +solutions again). Instead, their fitness values are just reused. This is +why you see that no solution with index 0 is passed to the fitness +function. + +To force calling the fitness function for each solution in every +generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. +Moreover, keep the 2 parameters ``save_solutions`` and +``save_best_solutions`` to their default value ``False``. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +Batch Fitness Calculation +========================= + +In `PyGAD +2.19.0 `__, +a new optional parameter called ``fitness_batch_size`` is supported. A +new optional parameter called ``fitness_batch_size`` is supported to +calculate the fitness function in batches. Thanks to `Linan +Qiu `__ for opening the `GitHub issue +#136 `__. + +Its values can be: + +- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is + assigned the value ``1`` or ``None`` (default), then the normal flow + is used where the fitness function is called for each individual + solution. That is if there are 15 solutions, then the fitness + function is called 15 times. + +- ``1 < fitness_batch_size <= sol_per_pop``: If the + ``fitness_batch_size`` parameter is assigned a value satisfying this + condition ``1 < fitness_batch_size <= sol_per_pop``, then the + solutions are grouped into batches of size ``fitness_batch_size`` and + the fitness function is called once for each batch. In this case, the + fitness function must return a list/tuple/numpy.ndarray with a length + equal to the number of solutions passed. + +.. _example-without-fitnessbatchsize-parameter: + +Example without ``fitness_batch_size`` Parameter +------------------------------------------------ + +This is an example where the ``fitness_batch_size`` parameter is given +the value ``None`` (which is the default value). This is equivalent to +using the value ``1``. In this case, the fitness function will be called +for each solution. This means the fitness function ``fitness_func`` will +receive only a single solution. This is an example of the passed +arguments to the fitness function: + +.. code:: + + solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] + solution_idx: 3 + +The fitness function also must return a single numeric value as the +fitness for the passed solution. + +As we have a population of ``20`` solutions, then the fitness function +is called 20 times per generation. For 5 generations, then the fitness +function is called ``20*5 = 100`` times. In PyGAD, the fitness function +is called after the last generation too and this adds additional 20 +times. So, the total number of calls to the fitness function is +``20*5 + 20 = 120``. + +Note that the ``keep_elitism`` and ``keep_parents`` parameters are set +to ``0`` to make sure no fitness values are reused and to force calling +the fitness function for each individual solution. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func(ga_instance, solution, solution_idx): + global number_of_calls + number_of_calls = number_of_calls + 1 + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func, + fitness_batch_size=None, + # fitness_batch_size=1, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 120 + +.. _example-with-fitnessbatchsize-parameter: + +Example with ``fitness_batch_size`` Parameter +--------------------------------------------- + +This is an example where the ``fitness_batch_size`` parameter is used +and assigned the value ``4``. This means the solutions will be grouped +into batches of ``4`` solutions. The fitness function will be called +once for each patch (i.e. called once for each 4 solutions). + +This is an example of the arguments passed to it: + +.. code:: python + + solutions: + [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] + [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] + [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] + [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] + solutions_indices: + [16, 17, 18, 19] + +As we have 20 solutions, then there are ``20/4 = 5`` patches. As a +result, the fitness function is called only 5 times per generation +instead of 20. For each call to the fitness function, it receives a +batch of 4 solutions. + +As we have 5 generations, then the function will be called ``5*5 = 25`` +times. Given the call to the fitness function after the last generation, +then the total number of calls is ``5*5 + 5 = 30``. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func_batch(ga_instance, solutions, solutions_indices): + global number_of_calls + number_of_calls = number_of_calls + 1 + batch_fitness = [] + for solution in solutions: + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + batch_fitness.append(fitness) + return batch_fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func_batch, + fitness_batch_size=4, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 30 + +When batch fitness calculation is used, then we saved ``120 - 30 = 90`` +calls to the fitness function. + +Use Functions and Methods to Build Fitness and Callbacks +======================================================== + +In PyGAD 2.19.0, it is possible to pass user-defined functions or +methods to the following parameters: + +1. ``fitness_func`` + +2. ``on_start`` + +3. ``on_fitness`` + +4. ``on_parents`` + +5. ``on_crossover`` + +6. ``on_mutation`` + +7. ``on_generation`` + +8. ``on_stop`` + +This section gives 2 examples to assign these parameters user-defined: + +1. Functions. + +2. Methods. + +Assign Functions +---------------- + +This is a dummy example where the fitness function returns a random +value. Note that the instance of the ``pygad.GA`` class is passed as the +last parameter of all functions. + +.. code:: python + + import pygad + import numpy + + def fitness_func(ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(ga_instanse): + print("on_start") + + def on_fitness(ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(ga_instanse): + print("on_generation\n") + + def on_stop(ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=on_start, + on_fitness=on_fitness, + on_parents=on_parents, + on_crossover=on_crossover, + on_mutation=on_mutation, + on_generation=on_generation, + on_stop=on_stop, + fitness_func=fitness_func) + + ga_instance.run() + +Assign Methods +-------------- + +The next example has all the method defined inside the class ``Test``. +All of the methods accept an additional parameter representing the +method's object of the class ``Test``. + +All methods accept ``self`` as the first parameter and the instance of +the ``pygad.GA`` class as the last parameter. + +.. code:: python + + import pygad + import numpy + + class Test: + def fitness_func(self, ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(self, ga_instanse): + print("on_start") + + def on_fitness(self, ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(self, ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(self, ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(self, ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(self, ga_instanse): + print("on_generation\n") + + def on_stop(self, ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=Test().on_start, + on_fitness=Test().on_fitness, + on_parents=Test().on_parents, + on_crossover=Test().on_crossover, + on_mutation=Test().on_mutation, + on_generation=Test().on_generation, + on_stop=Test().on_stop, + fitness_func=Test().fitness_func) + + ga_instance.run() diff --git a/docs/source/releases.rst b/docs/source/releases.rst index 1e518ad..44e95a3 100644 --- a/docs/source/releases.rst +++ b/docs/source/releases.rst @@ -1399,6 +1399,67 @@ Release Date 20 June 2023 section `__. https://github.com/ahmedfgad/GeneticAlgorithmPython/discussions/198 +.. _pygad-320: + +PyGAD 3.2.0 +----------- + +Release Date 6 September 2023 + +1. A new module ``pygad.utils.nsga2`` is created that has the ``NSGA2`` + class that includes the functionalities of NSGA-II. The class has + these methods: 1) ``get_non_dominated_set()`` 2) + ``non_dominated_sorting()`` 3) ``crowding_distance()`` 4) + ``sort_solutions_nsga2()`` + +2. Support of multi-objective optimization using Non-Dominated Sorting + Genetic Algorithm II (NSGA-II) using the ``NSGA2`` class in the + ``pygad.utils.nsga2`` module. Just return a ``list``, ``tuple``, or + ``numpy.ndarray`` from the fitness function and the library will + consider the problem as multi-objective optimization. All the + objectives are expected to be maximization. + +3. The parent selection methods and adaptive mutation are edited to + support multi-objective optimization. + +4. Two new NSGA-II parent selection methods are supported in the + ``pygad.utils.parent_selection`` module: 1) Tournament selection for + NSGA-II 2) NSGA-II selection. + +5. The ``plot_fitness()`` method in the ``pygad.plot`` module has a new + optional parameter named ``label`` to accept the label of the plots. + This is only used for multi-objective problems. Otherwise, it is + ignored. It defaults to ``None`` and accepts a ``list``, ``tuple``, + or ``numpy.ndarray``. The labels are used in a legend inside the + plot. + +6. The default color in the methods of the ``pygad.plot`` module is + changed to the greenish ``#64f20c`` color. + +7. A new instance attribute named ``pareto_fronts`` added to the + ``pygad.GA`` instances that holds the pareto fronts when solving a + multi-objective problem. + +8. The ``gene_type`` accepts a ``list``, ``tuple``, or + ``numpy.ndarray`` for integer data types given that the precision is + set to ``None`` (e.g. ``gene_type=[float, [int, None]]``). + +9. In the ``cal_pop_fitness()`` method, the fitness value is re-used if + ``save_best_solutions=True`` and the solution is found in the + ``best_solutions`` attribute. These parameters also can help + re-using the fitness of a solution instead of calling the fitness + function: ``keep_elitism``, ``keep_parents``, and + ``save_solutions``. + +10. The value ``99999999999`` is replaced by ``float('inf')`` in the 2 + methods ``wheel_cumulative_probs()`` and + ``stochastic_universal_selection()`` inside the + ``pygad.utils.parent_selection.ParentSelection`` class. + +11. The ``plot_result()`` method in the ``pygad.visualize.plot.Plot`` + class is removed. Instead, please use the ``plot_fitness()`` if you + did not upgrade yet. + PyGAD Projects at GitHub ======================== diff --git a/docs/source/utils.rst b/docs/source/utils.rst new file mode 100644 index 0000000..d3a6951 --- /dev/null +++ b/docs/source/utils.rst @@ -0,0 +1,707 @@ +.. _pygadtorchga-module: + +``pygad.torchga`` Module +======================== + +This section of the PyGAD's library documentation discusses the +**pygad.utils** module. + +PyGAD supports different types of operators for selecting the parents, +applying the crossover, and mutation. More features will be added in the +future. To ask for a new feature, please check the `Ask for +Feature `__ +section. + +The submodules in the ``pygad.utils`` module are: + +1. ``crossover``: Has the ``Crossover`` class that implements the + crossover operators. + +2. ``mutation``: Has the ``Mutation`` class that implements the mutation + operators. + +3. ``parent_selection``: Has the ``ParentSelection`` class that + implements the parent selection operators. + +4. ``nsga2``: Has the ``NSGA2`` class that implements the Non-Dominated + Sorting Genetic Algorithm II (NSGA-II). + +Note that the ``pygad.GA`` class extends all of these classes. So, the +user can access any of the methods in such classes directly by the +instance/object of the ``pygad.GA`` class. + +The next sections discuss each submodule. + +.. _pygadutilscrossover-submodule: + +``pygad.utils.crossover`` Submodule +=================================== + +The ``pygad.utils.crossover`` module has a class named ``Crossover`` +with the supported crossover operations which are: + +1. Single point: Implemented using the ``single_point_crossover()`` + method. + +2. Two points: Implemented using the ``two_points_crossover()`` method. + +3. Uniform: Implemented using the ``uniform_crossover()`` method. + +4. Scattered: Implemented using the ``scattered_crossover()`` method. + +All crossover methods accept this parameter: + +1. ``parents``: The parents to mate for producing the offspring. + +2. ``offspring_size``: The size of the offspring to produce. + +.. _pygadutilsmutation-submodule: + +``pygad.utils.mutation`` Submodule +================================== + +The ``pygad.utils.mutation`` module has a class named ``Mutation`` with +the supported mutation operations which are: + +1. Random: Implemented using the ``random_mutation()`` method. + +2. Swap: Implemented using the ``swap_mutation()`` method. + +3. Inversion: Implemented using the ``inversion_mutation()`` method. + +4. Scramble: Implemented using the ``scramble_mutation()`` method. + +5. Scramble: Implemented using the ``adaptive_mutation()`` method. + +All mutation methods accept this parameter: + +1. ``offspring``: The offspring to mutate. + +Adaptive Mutation +================= + +In the regular genetic algorithm, the mutation works by selecting a +single fixed mutation rate for all solutions regardless of their fitness +values. So, regardless on whether this solution has high or low quality, +the same number of genes are mutated all the time. + +The pitfalls of using a constant mutation rate for all solutions are +summarized in this paper `Libelli, S. Marsili, and P. Alba. "Adaptive +mutation in genetic algorithms." Soft computing 4.2 (2000): +76-80 `__ +as follows: + + The weak point of "classical" GAs is the total randomness of + mutation, which is applied equally to all chromosomes, irrespective + of their fitness. Thus a very good chromosome is equally likely to be + disrupted by mutation as a bad one. + + On the other hand, bad chromosomes are less likely to produce good + ones through crossover, because of their lack of building blocks, + until they remain unchanged. They would benefit the most from + mutation and could be used to spread throughout the parameter space + to increase the search thoroughness. So there are two conflicting + needs in determining the best probability of mutation. + + Usually, a reasonable compromise in the case of a constant mutation + is to keep the probability low to avoid disruption of good + chromosomes, but this would prevent a high mutation rate of + low-fitness chromosomes. Thus a constant probability of mutation + would probably miss both goals and result in a slow improvement of + the population. + +According to `Libelli, S. Marsili, and P. +Alba. `__ +work, the adaptive mutation solves the problems of constant mutation. + +Adaptive mutation works as follows: + +1. Calculate the average fitness value of the population (``f_avg``). + +2. For each chromosome, calculate its fitness value (``f``). + +3. If ``ff_avg``, then this solution is regarded as a high-quality + solution and thus the mutation rate should be kept low to avoid + disrupting this high quality solution. + +In PyGAD, if ``f=f_avg``, then the solution is regarded of high quality. + +The next figure summarizes the previous steps. + +.. image:: https://user-images.githubusercontent.com/16560492/103468973-e3c26600-4d2c-11eb-8af3-b3bb39b50540.jpg + :alt: + +This strategy is applied in PyGAD. + +Use Adaptive Mutation in PyGAD +------------------------------ + +In `PyGAD +2.10.0 `__, +adaptive mutation is supported. To use it, just follow the following 2 +simple steps: + +1. In the constructor of the ``pygad.GA`` class, set + ``mutation_type="adaptive"`` to specify that the type of mutation is + adaptive. + +2. Specify the mutation rates for the low and high quality solutions + using one of these 3 parameters according to your preference: + ``mutation_probability``, ``mutation_num_genes``, and + ``mutation_percent_genes``. Please check the `documentation of each + of these + parameters `__ + for more information. + +When adaptive mutation is used, then the value assigned to any of the 3 +parameters can be of any of these data types: + +1. ``list`` + +2. ``tuple`` + +3. ``numpy.ndarray`` + +Whatever the data type used, the length of the ``list``, ``tuple``, or +the ``numpy.ndarray`` must be exactly 2. That is there are just 2 +values: + +1. The first value is the mutation rate for the low-quality solutions. + +2. The second value is the mutation rate for the high-quality solutions. + +PyGAD expects that the first value is higher than the second value and +thus a warning is printed in case the first value is lower than the +second one. + +Here are some examples to feed the mutation rates: + +.. code:: python + + # mutation_probability + mutation_probability = [0.25, 0.1] + mutation_probability = (0.35, 0.17) + mutation_probability = numpy.array([0.15, 0.05]) + + # mutation_num_genes + mutation_num_genes = [4, 2] + mutation_num_genes = (3, 1) + mutation_num_genes = numpy.array([7, 2]) + + # mutation_percent_genes + mutation_percent_genes = [25, 12] + mutation_percent_genes = (15, 8) + mutation_percent_genes = numpy.array([21, 13]) + +Assume that the average fitness is 12 and the fitness values of 2 +solutions are 15 and 7. If the mutation probabilities are specified as +follows: + +.. code:: python + + mutation_probability = [0.25, 0.1] + +Then the mutation probability of the first solution is 0.1 because its +fitness is 15 which is higher than the average fitness 12. The mutation +probability of the second solution is 0.25 because its fitness is 7 +which is lower than the average fitness 12. + +Here is an example that uses adaptive mutation. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] # Function inputs. + desired_output = 44 # Function output. + + def fitness_func(ga_instance, solution, solution_idx): + # The fitness function calulates the sum of products between each input and its corresponding weight. + output = numpy.sum(solution*function_inputs) + # The value 0.000001 is used to avoid the Inf value when the denominator numpy.abs(output - desired_output) is 0.0. + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + # Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor. + ga_instance = pygad.GA(num_generations=200, + fitness_func=fitness_func, + num_parents_mating=10, + sol_per_pop=20, + num_genes=len(function_inputs), + mutation_type="adaptive", + mutation_num_genes=(3, 1)) + + # Running the GA to optimize the parameters of the function. + ga_instance.run() + + ga_instance.plot_fitness(title="PyGAD with Adaptive Mutation", linewidth=5) + +.. _pygadutilsparentselection-submodule: + +``pygad.utils.parent_selection`` Submodule +========================================== + +The ``pygad.utils.parent_selection`` module has a class named +``ParentSelection`` with the supported parent selection operations which +are: + +1. Steady-state: Implemented using the ``steady_state_selection()`` + method. + +2. Roulette wheel: Implemented using the ``roulette_wheel_selection()`` + method. + +3. Stochastic universal: Implemented using the + ``stochastic_universal_selection()``\ method. + +4. Rank: Implemented using the ``rank_selection()`` method. + +5. Random: Implemented using the ``random_selection()`` method. + +6. Tournament: Implemented using the ``tournament_selection()`` method. + +7. NSGA-II: Implemented using the ``nsga2_selection()`` method. + +8. NSGA-II Tournament: Implemented using the + ``tournament_nsga2_selection()`` method. + +All parent selection methods accept these parameters: + +1. ``fitness``: The fitness of the entire population. + +2. ``num_parents``: The number of parents to select. + +.. _pygadutilsnsga2-submodule: + +``pygad.utils.nsga2`` Submodule +=============================== + +The ``pygad.utils.nsga2`` module has a class named ``NSGA2`` that +implements NSGA-II. The methods inside this class are: + +1. ``non_dominated_sorting()``: Returns all the pareto fronts by + applying non-dominated sorting over the solutions. + +2. ``get_non_dominated_set()``: Returns the set of non-dominated + solutions from the passed solutions. + +3. ``crowding_distance()``: Calculates the crowding distance for all + solutions in the current pareto front. + +4. ``sort_solutions_nsga2()``: Sort the solutions. If the problem is + single-objective, then the solutions are sorted by sorting the + fitness values of the population. If it is multi-objective, then + non-dominated sorting and crowding distance are applied to sort the + solutions. + +User-Defined Crossover, Mutation, and Parent Selection Operators +================================================================ + +Previously, the user can select the the type of the crossover, mutation, +and parent selection operators by assigning the name of the operator to +the following parameters of the ``pygad.GA`` class's constructor: + +1. ``crossover_type`` + +2. ``mutation_type`` + +3. ``parent_selection_type`` + +This way, the user can only use the built-in functions for each of these +operators. + +Starting from `PyGAD +2.16.0 `__, +the user can create a custom crossover, mutation, and parent selection +operators and assign these functions to the above parameters. Thus, a +new operator can be plugged easily into the `PyGAD +Lifecycle `__. + +This is a sample code that does not use any custom function. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4,-2,3.5] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func) + + ga_instance.run() + ga_instance.plot_fitness() + +This section describes the expected input parameters and outputs. For +simplicity, all of these custom functions all accept the instance of the +``pygad.GA`` class as the last parameter. + +User-Defined Crossover Operator +------------------------------- + +The user-defined crossover function is a Python function that accepts 3 +parameters: + +1. The selected parents. + +2. The size of the offspring as a tuple of 2 numbers: (the offspring + size, number of genes). + +3. The instance from the ``pygad.GA`` class. This instance helps to + retrieve any property like ``population``, ``gene_type``, + ``gene_space``, etc. + +This function should return a NumPy array of shape equal to the value +passed to the second parameter. + +The next code creates a template for the user-defined crossover +operator. You can use any names for the parameters. Note how a NumPy +array is returned. + +.. code:: python + + def crossover_func(parents, offspring_size, ga_instance): + offspring = ... + ... + return numpy.array(offspring) + +As an example, the next code creates a single-point crossover function. +By randomly generating a random point (i.e. index of a gene), the +function simply uses 2 parents to produce an offspring by copying the +genes before the point from the first parent and the remaining from the +second parent. + +.. code:: python + + def crossover_func(parents, offspring_size, ga_instance): + offspring = [] + idx = 0 + while len(offspring) != offspring_size[0]: + parent1 = parents[idx % parents.shape[0], :].copy() + parent2 = parents[(idx + 1) % parents.shape[0], :].copy() + + random_split_point = numpy.random.choice(range(offspring_size[1])) + + parent1[random_split_point:] = parent2[random_split_point:] + + offspring.append(parent1) + + idx += 1 + + return numpy.array(offspring) + +To use this user-defined function, simply assign its name to the +``crossover_type`` parameter in the constructor of the ``pygad.GA`` +class. The next code gives an example. In this case, the custom function +will be called in each generation rather than calling the built-in +crossover functions defined in PyGAD. + +.. code:: python + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + crossover_type=crossover_func) + +User-Defined Mutation Operator +------------------------------ + +A user-defined mutation function/operator can be created the same way a +custom crossover operator/function is created. Simply, it is a Python +function that accepts 2 parameters: + +1. The offspring to be mutated. + +2. The instance from the ``pygad.GA`` class. This instance helps to + retrieve any property like ``population``, ``gene_type``, + ``gene_space``, etc. + +The template for the user-defined mutation function is given in the next +code. According to the user preference, the function should make some +random changes to the genes. + +.. code:: python + + def mutation_func(offspring, ga_instance): + ... + return offspring + +The next code builds the random mutation where a single gene from each +chromosome is mutated by adding a random number between 0 and 1 to the +gene's value. + +.. code:: python + + def mutation_func(offspring, ga_instance): + + for chromosome_idx in range(offspring.shape[0]): + random_gene_idx = numpy.random.choice(range(offspring.shape[1])) + + offspring[chromosome_idx, random_gene_idx] += numpy.random.random() + + return offspring + +Here is how this function is assigned to the ``mutation_type`` +parameter. + +.. code:: python + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + crossover_type=crossover_func, + mutation_type=mutation_func) + +Note that there are other things to take into consideration like: + +- Making sure that each gene conforms to the data type(s) listed in the + ``gene_type`` parameter. + +- If the ``gene_space`` parameter is used, then the new value for the + gene should conform to the values/ranges listed. + +- Mutating a number of genes that conforms to the parameters + ``mutation_percent_genes``, ``mutation_probability``, and + ``mutation_num_genes``. + +- Whether mutation happens with or without replacement based on the + ``mutation_by_replacement`` parameter. + +- The minimum and maximum values from which a random value is generated + based on the ``random_mutation_min_val`` and + ``random_mutation_max_val`` parameters. + +- Whether duplicates are allowed or not in the chromosome based on the + ``allow_duplicate_genes`` parameter. + +and more. + +It all depends on your objective from building the mutation function. +You may neglect or consider some of the considerations according to your +objective. + +User-Defined Parent Selection Operator +-------------------------------------- + +No much to mention about building a user-defined parent selection +function as things are similar to building a crossover or mutation +function. Just create a Python function that accepts 3 parameters: + +1. The fitness values of the current population. + +2. The number of parents needed. + +3. The instance from the ``pygad.GA`` class. This instance helps to + retrieve any property like ``population``, ``gene_type``, + ``gene_space``, etc. + +The function should return 2 outputs: + +1. The selected parents as a NumPy array. Its shape is equal to (the + number of selected parents, ``num_genes``). Note that the number of + selected parents is equal to the value assigned to the second input + parameter. + +2. The indices of the selected parents inside the population. It is a 1D + list with length equal to the number of selected parents. + +The outputs must be of type ``numpy.ndarray``. + +Here is a template for building a custom parent selection function. + +.. code:: python + + def parent_selection_func(fitness, num_parents, ga_instance): + ... + return parents, fitness_sorted[:num_parents] + +The next code builds the steady-state parent selection where the best +parents are selected. The number of parents is equal to the value in the +``num_parents`` parameter. + +.. code:: python + + def parent_selection_func(fitness, num_parents, ga_instance): + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + + parents = numpy.empty((num_parents, ga_instance.population.shape[1])) + + for parent_num in range(num_parents): + parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + +Finally, the defined function is assigned to the +``parent_selection_type`` parameter as in the next code. + +.. code:: python + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + crossover_type=crossover_func, + mutation_type=mutation_func, + parent_selection_type=parent_selection_func) + +Example +------- + +By discussing how to customize the 3 operators, the next code uses the +previous 3 user-defined functions instead of the built-in functions. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4,-2,3.5] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + def parent_selection_func(fitness, num_parents, ga_instance): + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + + parents = numpy.empty((num_parents, ga_instance.population.shape[1])) + + for parent_num in range(num_parents): + parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + + def crossover_func(parents, offspring_size, ga_instance): + + offspring = [] + idx = 0 + while len(offspring) != offspring_size[0]: + parent1 = parents[idx % parents.shape[0], :].copy() + parent2 = parents[(idx + 1) % parents.shape[0], :].copy() + + random_split_point = numpy.random.choice(range(offspring_size[1])) + + parent1[random_split_point:] = parent2[random_split_point:] + + offspring.append(parent1) + + idx += 1 + + return numpy.array(offspring) + + def mutation_func(offspring, ga_instance): + + for chromosome_idx in range(offspring.shape[0]): + random_gene_idx = numpy.random.choice(range(offspring.shape[0])) + + offspring[chromosome_idx, random_gene_idx] += numpy.random.random() + + return offspring + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + crossover_type=crossover_func, + mutation_type=mutation_func, + parent_selection_type=parent_selection_func) + + ga_instance.run() + ga_instance.plot_fitness() + +This is the same example but using methods instead of functions. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4,-2,3.5] + desired_output = 44 + + class Test: + def fitness_func(self, ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + def parent_selection_func(self, fitness, num_parents, ga_instance): + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + + parents = numpy.empty((num_parents, ga_instance.population.shape[1])) + + for parent_num in range(num_parents): + parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + + def crossover_func(self, parents, offspring_size, ga_instance): + + offspring = [] + idx = 0 + while len(offspring) != offspring_size[0]: + parent1 = parents[idx % parents.shape[0], :].copy() + parent2 = parents[(idx + 1) % parents.shape[0], :].copy() + + random_split_point = numpy.random.choice(range(offspring_size[0])) + + parent1[random_split_point:] = parent2[random_split_point:] + + offspring.append(parent1) + + idx += 1 + + return numpy.array(offspring) + + def mutation_func(self, offspring, ga_instance): + + for chromosome_idx in range(offspring.shape[0]): + random_gene_idx = numpy.random.choice(range(offspring.shape[1])) + + offspring[chromosome_idx, random_gene_idx] += numpy.random.random() + + return offspring + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=Test().fitness_func, + parent_selection_type=Test().parent_selection_func, + crossover_type=Test().crossover_func, + mutation_type=Test().mutation_func) + + ga_instance.run() + ga_instance.plot_fitness() diff --git a/docs/source/visualize.rst b/docs/source/visualize.rst new file mode 100644 index 0000000..45dc1e4 --- /dev/null +++ b/docs/source/visualize.rst @@ -0,0 +1,449 @@ +.. _pygadvisualize-module: + +``pygad.visualize`` Module +========================== + +This section of the PyGAD's library documentation discusses the +**pygad.visualize** module. It offers the methods for results +visualization in PyGAD. + +This section discusses the different options to visualize the results in +PyGAD through these methods: + +1. ``plot_fitness()``: Create plots for the fitness. + +2. ``plot_genes()``: Create plots for the genes. + +3. ``plot_new_solution_rate()``: Create plots for the new solution rate. + +In the following code, the ``save_solutions`` flag is set to ``True`` +which means all solutions are saved in the ``solutions`` attribute. The +code runs for only 10 generations. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2, 3.5, 8] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=10, + num_parents_mating=5, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_space=[range(1, 10), range(10, 20), range(15, 30), range(20, 40), range(25, 50), range(10, 30), range(20, 50)], + gene_type=int, + save_solutions=True) + + ga_instance.run() + +Let's explore how to visualize the results by the above mentioned +methods. + +Fitness +======= + +.. _plotfitness: + +``plot_fitness()`` +------------------ + +The ``plot_fitness()`` method shows the fitness value for each +generation. It creates, shows, and returns a figure that summarizes how +the fitness value(s) evolve(s) by generation. + +It works only after completing at least 1 generation. If no generation +is completed (at least 1), an exception is raised. + +This method accepts the following parameters: + +1. ``title``: Title of the figure. + +2. ``xlabel``: X-axis label. + +3. ``ylabel``: Y-axis label. + +4. ``linewidth``: Line width of the plot. Defaults to ``3``. + +5. ``font_size``: Font size for the labels and title. Defaults to + ``14``. + +6. ``plot_type``: Type of the plot which can be either ``"plot"`` + (default), ``"scatter"``, or ``"bar"``. + +7. ``color``: Color of the plot which defaults to the greenish color + ``"#64f20c"``. + +8. ``label``: The label used for the legend in the figures of + multi-objective problems. It is not used for single-objective + problems. It defaults to ``None`` which means no labels used. + +9. ``save_dir``: Directory to save the figure. + +.. _plottypeplot: + +``plot_type="plot"`` +~~~~~~~~~~~~~~~~~~~~ + +The simplest way to call this method is as follows leaving the +``plot_type`` with its default value ``"plot"`` to create a continuous +line connecting the fitness values across all generations: + +.. code:: python + + ga_instance.plot_fitness() + # ga_instance.plot_fitness(plot_type="plot") + +.. image:: https://user-images.githubusercontent.com/16560492/122472609-d02f5280-cf8e-11eb-88a7-f9366ff6e7c6.png + :alt: + +.. _plottypescatter: + +``plot_type="scatter"`` +~~~~~~~~~~~~~~~~~~~~~~~ + +The ``plot_type`` can also be set to ``"scatter"`` to create a scatter +graph with each individual fitness represented as a dot. The size of +these dots can be changed using the ``linewidth`` parameter. + +.. code:: python + + ga_instance.plot_fitness(plot_type="scatter") + +.. image:: https://user-images.githubusercontent.com/16560492/122473159-75e2c180-cf8f-11eb-942d-31279b286dbd.png + :alt: + +.. _plottypebar: + +``plot_type="bar"`` +~~~~~~~~~~~~~~~~~~~ + +The third value for the ``plot_type`` parameter is ``"bar"`` to create a +bar graph with each individual fitness represented as a bar. + +.. code:: python + + ga_instance.plot_fitness(plot_type="bar") + +.. image:: https://user-images.githubusercontent.com/16560492/122473340-b7736c80-cf8f-11eb-89c5-4f7db3b653cc.png + :alt: + +New Solution Rate +================= + +.. _plotnewsolutionrate: + +``plot_new_solution_rate()`` +---------------------------- + +The ``plot_new_solution_rate()`` method presents the number of new +solutions explored in each generation. This helps to figure out if the +genetic algorithm is able to find new solutions as an indication of more +possible evolution. If no new solutions are explored, this is an +indication that no further evolution is possible. + +It works only after completing at least 1 generation. If no generation +is completed (at least 1), an exception is raised. + +The ``plot_new_solution_rate()`` method accepts the same parameters as +in the ``plot_fitness()`` method (it also have 3 possible values for +``plot_type`` parameter). Here are all the parameters it accepts: + +1. ``title``: Title of the figure. + +2. ``xlabel``: X-axis label. + +3. ``ylabel``: Y-axis label. + +4. ``linewidth``: Line width of the plot. Defaults to ``3``. + +5. ``font_size``: Font size for the labels and title. Defaults to + ``14``. + +6. ``plot_type``: Type of the plot which can be either ``"plot"`` + (default), ``"scatter"``, or ``"bar"``. + +7. ``color``: Color of the plot which defaults to ``"#3870FF"``. + +8. ``save_dir``: Directory to save the figure. + +.. _plottypeplot-2: + +``plot_type="plot"`` +~~~~~~~~~~~~~~~~~~~~ + +The default value for the ``plot_type`` parameter is ``"plot"``. + +.. code:: python + + ga_instance.plot_new_solution_rate() + # ga_instance.plot_new_solution_rate(plot_type="plot") + +The next figure shows that, for example, generation 6 has the least +number of new solutions which is 4. The number of new solutions in the +first generation is always equal to the number of solutions in the +population (i.e. the value assigned to the ``sol_per_pop`` parameter in +the constructor of the ``pygad.GA`` class) which is 10 in this example. + +.. image:: https://user-images.githubusercontent.com/16560492/122475815-3322e880-cf93-11eb-9648-bf66f823234b.png + :alt: + +.. _plottypescatter-2: + +``plot_type="scatter"`` +~~~~~~~~~~~~~~~~~~~~~~~ + +The previous graph can be represented as scattered points by setting +``plot_type="scatter"``. + +.. code:: python + + ga_instance.plot_new_solution_rate(plot_type="scatter") + +.. image:: https://user-images.githubusercontent.com/16560492/122476108-adec0380-cf93-11eb-80ac-7588bf90492f.png + :alt: + +.. _plottypebar-2: + +``plot_type="bar"`` +~~~~~~~~~~~~~~~~~~~ + +By setting ``plot_type="scatter"``, each value is represented as a +vertical bar. + +.. code:: python + + ga_instance.plot_new_solution_rate(plot_type="bar") + +.. image:: https://user-images.githubusercontent.com/16560492/122476173-c2c89700-cf93-11eb-9e77-d39737cd3a96.png + :alt: + +Genes +===== + +.. _plotgenes: + +``plot_genes()`` +---------------- + +The ``plot_genes()`` method is the third option to visualize the PyGAD +results. The ``plot_genes()`` method creates, shows, and returns a +figure that describes each gene. It has different options to create the +figures which helps to: + +1. Explore the gene value for each generation by creating a normal plot. + +2. Create a histogram for each gene. + +3. Create a boxplot. + +It works only after completing at least 1 generation. If no generation +is completed, an exception is raised. If no generation is completed (at +least 1), an exception is raised. + +This method accepts the following parameters: + +1. ``title``: Title of the figure. + +2. ``xlabel``: X-axis label. + +3. ``ylabel``: Y-axis label. + +4. ``linewidth``: Line width of the plot. Defaults to ``3``. + +5. ``font_size``: Font size for the labels and title. Defaults to + ``14``. + +6. ``plot_type``: Type of the plot which can be either ``"plot"`` + (default), ``"scatter"``, or ``"bar"``. + +7. ``graph_type``: Type of the graph which can be either ``"plot"`` + (default), ``"boxplot"``, or ``"histogram"``. + +8. ``fill_color``: Fill color of the graph which defaults to + ``"#3870FF"``. This has no effect if ``graph_type="plot"``. + +9. ``color``: Color of the plot which defaults to ``"#3870FF"``. + +10. ``solutions``: Defaults to ``"all"`` which means use all solutions. + If ``"best"`` then only the best solutions are used. + +11. ``save_dir``: Directory to save the figure. + +This method has 3 control variables: + +1. ``graph_type="plot"``: Can be ``"plot"`` (default), ``"boxplot"``, or + ``"histogram"``. + +2. ``plot_type="plot"``: Identical to the ``plot_type`` parameter + explored in the ``plot_fitness()`` and ``plot_new_solution_rate()`` + methods. + +3. ``solutions="all"``: Can be ``"all"`` (default) or ``"best"``. + +These 3 parameters controls the style of the output figure. + +The ``graph_type`` parameter selects the type of the graph which helps +to explore the gene values as: + +1. A normal plot. + +2. A histogram. + +3. A box and whisker plot. + +The ``plot_type`` parameter works only when the type of the graph is set +to ``"plot"``. + +The ``solutions`` parameter selects whether the genes come from all +solutions in the population or from just the best solutions. + +An exception is raised if: + +- ``solutions="all"`` while ``save_solutions=False`` in the constructor + of the ``pygad.GA`` class. . + +- ``solutions="best"`` while ``save_best_solutions=False`` in the + constructor of the ``pygad.GA`` class. . + +.. _graphtypeplot: + +``graph_type="plot"`` +~~~~~~~~~~~~~~~~~~~~~ + +When ``graph_type="plot"``, then the figure creates a normal graph where +the relationship between the gene values and the generation numbers is +represented as a continuous plot, scattered points, or bars. + +.. _plottypeplot-3: + +``plot_type="plot"`` +^^^^^^^^^^^^^^^^^^^^ + +Because the default value for both ``graph_type`` and ``plot_type`` is +``"plot"``, then all of the lines below creates the same figure. This +figure is helpful to know whether a gene value lasts for more +generations as an indication of the best value for this gene. For +example, the value 16 for the gene with index 5 (at column 2 and row 2 +of the next graph) lasted for 83 generations. + +.. code:: python + + ga_instance.plot_genes() + + ga_instance.plot_genes(graph_type="plot") + + ga_instance.plot_genes(plot_type="plot") + + ga_instance.plot_genes(graph_type="plot", + plot_type="plot") + +.. image:: https://user-images.githubusercontent.com/16560492/122477158-4a62d580-cf95-11eb-8c93-9b6e74cb814c.png + :alt: + +As the default value for the ``solutions`` parameter is ``"all"``, then +the following method calls generate the same plot. + +.. code:: python + + ga_instance.plot_genes(solutions="all") + + ga_instance.plot_genes(graph_type="plot", + solutions="all") + + ga_instance.plot_genes(plot_type="plot", + solutions="all") + + ga_instance.plot_genes(graph_type="plot", + plot_type="plot", + solutions="all") + +.. _plottypescatter-3: + +``plot_type="scatter"`` +^^^^^^^^^^^^^^^^^^^^^^^ + +The following calls of the ``plot_genes()`` method create the same +scatter plot. + +.. code:: python + + ga_instance.plot_genes(plot_type="scatter") + + ga_instance.plot_genes(graph_type="plot", + plot_type="scatter", + solutions='all') + +.. image:: https://user-images.githubusercontent.com/16560492/122477273-73836600-cf95-11eb-828f-f357c7b0f815.png + :alt: + +.. _plottypebar-3: + +``plot_type="bar"`` +^^^^^^^^^^^^^^^^^^^ + +.. code:: python + + ga_instance.plot_genes(plot_type="bar") + + ga_instance.plot_genes(graph_type="plot", + plot_type="bar", + solutions='all') + +.. image:: https://user-images.githubusercontent.com/16560492/122477370-99106f80-cf95-11eb-8643-865b55e6b844.png + :alt: + +.. _graphtypeboxplot: + +``graph_type="boxplot"`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +By setting ``graph_type`` to ``"boxplot"``, then a box and whisker graph +is created. Now, the ``plot_type`` parameter has no effect. + +The following 2 calls of the ``plot_genes()`` method create the same +figure as the default value for the ``solutions`` parameter is +``"all"``. + +.. code:: python + + ga_instance.plot_genes(graph_type="boxplot") + + ga_instance.plot_genes(graph_type="boxplot", + solutions='all') + +.. image:: https://user-images.githubusercontent.com/16560492/122479260-beeb4380-cf98-11eb-8f08-23707929b12c.png + :alt: + +.. _graphtypehistogram: + +``graph_type="histogram"`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For ``graph_type="boxplot"``, then a histogram is created for each gene. +Similar to ``graph_type="boxplot"``, the ``plot_type`` parameter has no +effect. + +The following 2 calls of the ``plot_genes()`` method create the same +figure as the default value for the ``solutions`` parameter is +``"all"``. + +.. code:: python + + ga_instance.plot_genes(graph_type="histogram") + + ga_instance.plot_genes(graph_type="histogram", + solutions='all') + +.. image:: https://user-images.githubusercontent.com/16560492/122477314-8007be80-cf95-11eb-9c95-da3f49204151.png + :alt: + +All the previous figures can be created for only the best solutions by +setting ``solutions="best"``. From aab09bd3dc13daf676e9d14edfa5a7c15e4f59bc Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 21:09:44 -0400 Subject: [PATCH 17/25] Run tests --- examples/example.py | 10 ++-- examples/example_multi_objective.py | 71 ++++++++++++++++++++++++++++ examples/genetic.pkl | Bin 0 -> 18276 bytes pygad/utils/mutation.py | 2 +- pygad/visualize/plot.py | 2 +- 5 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 examples/example_multi_objective.py create mode 100644 examples/genetic.pkl diff --git a/examples/example.py b/examples/example.py index d73dac1..f22bc72 100644 --- a/examples/example.py +++ b/examples/example.py @@ -44,15 +44,15 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") prediction = numpy.sum(numpy.array(function_inputs)*solution) -print("Predicted output based on the best solution : {prediction}".format(prediction=prediction)) +print(f"Predicted output based on the best solution : {prediction}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Saving the GA instance. filename = 'genetic' # The filename to which the instance is saved. The name is without extension. diff --git a/examples/example_multi_objective.py b/examples/example_multi_objective.py new file mode 100644 index 0000000..3fdf35a --- /dev/null +++ b/examples/example_multi_objective.py @@ -0,0 +1,71 @@ +import pygad +import numpy + +""" +Given these 2 functions: + y1 = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6 + y2 = f(w1:w6) = w1x7 + w2x8 + w3x9 + w4x10 + w5x11 + 6wx12 + where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=50 + and (x7,x8,x9,x10,x11,x12)=(-2,0.7,-9,1.4,3,5) and y=30 +What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize these 2 functions. +This is a multi-objective optimization problem. + +PyGAD considers the problem as multi-objective if the fitness function returns: + 1) List. + 2) Or tuple. + 3) Or numpy.ndarray. +""" + +function_inputs1 = [4,-2,3.5,5,-11,-4.7] # Function 1 inputs. +function_inputs2 = [-2,0.7,-9,1.4,3,5] # Function 2 inputs. +desired_output1 = 50 # Function 1 output. +desired_output2 = 30 # Function 2 output. + +def fitness_func(ga_instance, solution, solution_idx): + output1 = numpy.sum(solution*function_inputs1) + output2 = numpy.sum(solution*function_inputs2) + fitness1 = 1.0 / (numpy.abs(output1 - desired_output1) + 0.000001) + fitness2 = 1.0 / (numpy.abs(output2 - desired_output2) + 0.000001) + return [fitness1, fitness2] + +num_generations = 100 # Number of generations. +num_parents_mating = 10 # Number of solutions to be selected as parents in the mating pool. + +sol_per_pop = 20 # Number of solutions in the population. +num_genes = len(function_inputs1) + +last_fitness = 0 +def on_generation(ga_instance): + global last_fitness + print("Generation = {generation}".format(generation=ga_instance.generations_completed)) + print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] + +ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=num_parents_mating, + sol_per_pop=sol_per_pop, + num_genes=num_genes, + fitness_func=fitness_func, + parent_selection_type='nsga2', + on_generation=on_generation) + +# Running the GA to optimize the parameters of the function. +ga_instance.run() + +ga_instance.plot_fitness() + +# Returning the details of the best solution. +solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) +print("Parameters of the best solution : {solution}".format(solution=solution)) +print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) +print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + +prediction = numpy.sum(numpy.array(function_inputs1)*solution) +print("Predicted output 1 based on the best solution : {prediction}".format(prediction=prediction)) +prediction = numpy.sum(numpy.array(function_inputs2)*solution) +print("Predicted output 2 based on the best solution : {prediction}".format(prediction=prediction)) + +if ga_instance.best_solution_generation != -1: + print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + diff --git a/examples/genetic.pkl b/examples/genetic.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c82c1568db19786ae91e66a8c3b13ef30b010c6b GIT binary patch literal 18276 zcmeG^30M=?)(HtA1j4>palxe)Dk4_vR&}OgMU9QVYOM=8PLhEMgd}DrA_^AkhB8{0 z`7K)eKl`+5pW3QVtM93uX>oa4sc5azy5WY3XvL~n*LNn#gb4E6?|c9I{(j#-;Y%|2 z%$a-7J=;C^o)ZfF|7%@4nd`*_+4C}SQ?l!mW&G0KWS6puGuQ$)fl*m(nVE!xWdei? zvo(`t0y7DEs2jSJC1tbOcUUIKfm=6e6RT&!DW~1;ASepU#T{07iel$6u{ll} zr_DAiX2`=Fgx!K0i5$X8!>wrCVzK37Ca2wEHsUmaWfE3`VvCq?;R2)VfQkWxhTi8f zntC9N6=JrUX$;2CBrxcN9rEbu)2B?R50o@#lB`ZC5DuNud3J&w4^vYcoMsDcwo)wP zpJB1#G|XC0FkxOSi~`Cn1dtI~KO*L|0yWvLX$D%IIUZ7VDkg}sSui`{!0a|VtBYdP z@C{u|C7_{G&0!f00Gt*V3s@!sp0(#C8*L6E87>Kj*$8aJG90!XgEJ!oSiY1^SjH>d zB090*!jl6^Ir`b*+7%lHN1*q5J2MCEy2_~^=|dw|sQ9Mv z%etHm4)}90ik{mrHD=H??sA!}d%LT0FaDC@edd30;v%=_(UCz@_Nl!6uYBdgI_Hcl z+&%pJiiZIzet5;9zfmm|=M-90ix%lF^f^1FA>M4%#mVdb*y^wdCp z{s#k+w?2}gYm+`+wX?E@o3v5$BSC*QrNWDUv-Gbgr~g>hq-#rsLLTlt_gf?UhVo6? zuzG6OT0eBhl}R0!O#SVJ@~x}OKHhGKs?mHK%JyB@1s{|AQ$c}F*$7|cPec9G z9oWKMA5<^j->c8%e9#XOIZwRv{r=jHTLa2Ny!!JZ`PLYw-Fd4|y*`NcBFfh~cF*?1 zbgfsPMfyekF|=xVtJBPS{)_V3yg6iD*!e(y#)Q?=bNPry@&y;;vUm74xg5|^@Us~` z*?C2C!F?v^6pXwNtJmv?fbV+h`q0T}*5Gj*@>G@v`VQhQB{7IyhMy|ViiB#bwAe)+3k zx%iE%cI@i*6T)|X(78G-7@d&yWA%vrYQFhhHhmIU;qf(v04Ve>SQ=qfB^Jnvv_qoc_FV}su=Q8J5{&0LWADngp$%s3b&0`_x zfU0d@gD>ke67UravJALhk4oX;EfV8{LwSDL8?6or#JlS)= z`Hgj2blXddxfD*tSB|vwF0VR+WZitdZ@^wZ-X0Y=vSyu@-?+w9<#?}-Yqd{bxa5kQ zk8l1)Wl7&q)Ea#!q_CAAKXz|vmC~w4Qzq1m8hB307al)Jb{Ja2olb1Q#cGs%!h!eS zx8Hq;{Jb#K`Z=%P<529$nt7g-b=XC1fc z1lIxEU3})S#nlNB`;7bS*mhY^n18g#fgq;_qjp((WNK z?`{3m@u3o}%)h*6U)ddQbcEl*;=G&4u^ZC^%LmEP^-0;^{CM**a%*1??dQ3VIW*0@ z`Td?dx&3F>EVu~$HmB4_`;1faRo#cYn%GLqFS;Lhw)pT}r1GmEziLYzchmXL$Z-|H zD6{BKUAhS@RyfgZPQ1afzM#9pK0(Qy;I`eVZ%XdEwfn zk1leD+x%3xvCUJYa@xt}yYe(>QB8;Wrz$QXJX6(O;lrKKNZ&&+M$UlCFEna>} ziT)*f_T^5!f>6WmO|@Oe$$0d*%Jjx-2avECX-5aG3q|)9wHd%}xsObIz$Sg%?+RDB zaN*WYpFiPVjT~uQuF~*5JGA<=bwDV;aNf(ktACd9h2Nh()Z^{z92&Lx{OE;;xziic zhn1dt%&iz1cEfJ*<2Q#t{P5J_a_;Qv>lsI9$WT+#!uCHT2BIvP;I?=&$z|`co~X(j zIOUkF*ks(org67Cp<81Ve2N6I2f_~s;&d7&(CDyHlx?yQ+{GF# zHmAvMHfCFhWS?u6(PEw`$sIR=+tR#E(4@^YQV645v0t=1dUVoe3zh8CX?&_UK(afK z0=H$h(pWtTgpZ0jf@2Hu!ra3!!M;zhOmzKyyTfL{4Q7j(&V%4jEwY9s`!Z3!gm`Z- z&EEFJoyiT@2@#fOe65J>8D8baWSJ=Mjc3OVp$H3Mq(!z0qqp!K-OX+YhXpJMd0D2z zaF+x|h>&CmW^q#<*WlBbnd~GYz3Lg=ILcMwaZ&XoliB1{3ZTU;@C@sWuZ)578-fGWhd{ zztq$OTDD4_FQ@&YWS`0l6ec;e75NHU*$mnyzgStEtU#Hs93f-%X`<4l_1nC^Zq@c1 z3+A#3WIIMdIdfPhRG`4M0We$bP8#BF1#W=P5EDU}9fS$9Icb2yT9?2CW#Slw<}_|K z5-g*J&>Hkh;A60OiJ47PSVrY}h6(UY6P{*9n|i0ih9dCL#lO zTEMvobBD*CX={K1Hj~peZ<1yL;I_@^!UU!Pjg-^jW?HP!5}go6ISuZ|DA=nB{LQq0 z-CNwlf=ZD+nLxlX=h$ryVdF-&6MPYAgpHj}IphgMQE6zf+ehmBhujs~e8gelRYs76 zF3IVf!_Ijpg^3WY!+_I9(!E^+0m3E~cmh4^84X;Ey5(+pe5o*5xaW0E&<=vO=rN$F zg{Giu4)>mqOiv)O1fASvTXL6ek+jSt&ywfMviwYb3*1Tev_J0d+aOJ>ey$sBT-u6e3D!mpLQ zEm*3@PRU6g(;%mjUy5qJjGRs`Bi}Dz$r*a`13fvDq{&%&a<-nFGoB@j0KT4yc17;) zoq7)Fxfypj@I06X1s((!_UU9osycknu-e?V1j-5v8s*Wh08jvmhT{M|6KWzXcpipl z&~QQk^JdfB+9{JUS^)+0$PBm4IN(MJ$0wlI;1XT8a%~QiUXurWTQ(=}b%*L=R?@u!w+vg5iWSmQZ2I@R0-y zc+qYhuV0ru3!{Rfz+uOTIxZY{7$L53Jx&QxMTGa3*eihLOw2P9g|*17Uc#Kn9RmBh zCym7wUCtve_B9C#`n-0Y9k21l$_Oj2T-N%wFKGY#4Lho89=rflTTABF{k@r6VD1s0 z1NvLqsqLxT;(W251KPJ|{Cl0VQhc%g5TI5ahps&_W2`S-e^k&j_Tv_Z1cDXKWgQYH zBu>#W3yf148o)9jhG*(VYZoX^XQpV{Y_l1yxTNSUgR^Mo?4#hZzw@W}-B zOs7KnC!_&HIJgu;AmQKx5=ow8j}TV}V)!RK8(80f-9Wgvx2$hd3qEb=8f%W zkoF#r@QePuvsWKAT42~Pxl7t*q}}-Ltu?(XkTFXbe&)>@d3!gis1LAbi@>p*nh*6j!2J~5)o4bO{H|i zBlf1~Z+PS{_TLnKQz;$s2wzMQh)4bnQ&=KBBK=~DK%8&G{)c?H_(9h8NNL;gO3~ksn><4kO`%uRBe>qR{Gz6{}@PIkKYLg#5jaN)a91|LC&P8YKFc z$KU0h{{e}6Yw@?v{z^1>`J2THmYqb>y0+`0%h`{N`fhvYi52$|`?O8rfjf^Nd&e8X zI-fp>yz3G>Vn?djkt%kiiXE{GNUGSWPk2fdJ5t3C6z@nCJ5t4tRIwvf>_`}`bt+Pwid3@YDT9^DbN=n}oaeAms@Rb#c3eN& zmMV7KKRTBxc0B(DK+J)OC0f5%L8M>lyZ>u|^eg>m|BJ-`yI<)`6+2SJj#wk~zfiIB EFIjlL6951J literal 0 HcmV?d00001 diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index 2d6bed9..75db685 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -704,7 +704,7 @@ def adaptive_mutation_randomly(self, offspring): # Compare the fitness of each offspring to the average fitness of each objective function. fitness_comparison = offspring_fitness[offspring_idx] < average_fitness # Check if the problem is single or multi-objective optimization. - if type(fitness_comparison) is bool: + if type(fitness_comparison) in [bool, numpy.bool_]: # Single-objective optimization problem. if fitness_comparison: adaptive_mutation_num_genes = self.mutation_num_genes[0] diff --git a/pygad/visualize/plot.py b/pygad/visualize/plot.py index a084951..7dffc4b 100644 --- a/pygad/visualize/plot.py +++ b/pygad/visualize/plot.py @@ -44,7 +44,7 @@ def plot_fitness(self, raise RuntimeError("The plot_fitness() (i.e. plot_result()) method can only be called after completing at least 1 generation but ({self.generations_completed}) is completed.") fig = matplotlib.pyplot.figure() - if len(self.best_solutions_fitness[0]) > 1: + if type(self.best_solutions_fitness[0]) in [list, tuple, numpy.ndarray] and len(self.best_solutions_fitness[0]) > 1: # Multi-objective optimization problem. if type(linewidth) in pygad.GA.supported_int_float_types: linewidth = [linewidth] From 900df5553cb5fe5da10e81ca9293c0d8b3926c7f Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 21:36:23 -0400 Subject: [PATCH 18/25] Replace format() str method by f-string --- examples/KerasGA/XOR_classification.py | 14 +++++------ examples/KerasGA/cancer_dataset.py | 12 +++++----- examples/KerasGA/cancer_dataset_generator.py | 12 +++++----- examples/KerasGA/image_classification_CNN.py | 12 +++++----- .../KerasGA/image_classification_Dense.py | 12 +++++----- examples/KerasGA/regression_example.py | 12 +++++----- examples/TorchGA/XOR_classification.py | 14 +++++------ examples/TorchGA/image_classification_CNN.py | 12 +++++----- .../TorchGA/image_classification_Dense.py | 12 +++++----- examples/TorchGA/regression_example.py | 12 +++++----- examples/clustering/example_clustering_2.py | 6 ++--- examples/clustering/example_clustering_3.py | 6 ++--- examples/cnn/example_image_classification.py | 6 ++--- examples/example.py | 6 ++--- examples/example_logger.py | 4 ++-- examples/example_multi_objective.py | 18 +++++++------- .../gacnn/example_image_classification.py | 16 ++++++------- examples/gann/example_XOR_classification.py | 22 +++++++++--------- examples/gann/example_classification.py | 22 +++++++++--------- examples/gann/example_regression.py | 18 +++++++------- examples/gann/example_regression_fish.py | 18 +++++++------- examples/genetic.pkl | Bin 18276 -> 0 bytes examples/nn/example_XOR_classification.py | 6 ++--- examples/nn/example_classification.py | 6 ++--- examples/nn/example_regression.py | 2 +- examples/nn/example_regression_fish.py | 2 +- 26 files changed, 141 insertions(+), 141 deletions(-) delete mode 100644 examples/genetic.pkl diff --git a/examples/KerasGA/XOR_classification.py b/examples/KerasGA/XOR_classification.py index 3b0a1e1..2d7e4ee 100644 --- a/examples/KerasGA/XOR_classification.py +++ b/examples/KerasGA/XOR_classification.py @@ -16,8 +16,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(2) @@ -62,23 +62,23 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) -print("Predictions : \n", predictions) +print(f"Predictions : \n{predictions}") # Calculate the binary crossentropy for the trained model. bce = tensorflow.keras.losses.BinaryCrossentropy() -print("Binary Crossentropy : ", bce(data_outputs, predictions).numpy()) +print(f"Binary Crossentropy : {bce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ba = tensorflow.keras.metrics.BinaryAccuracy() ba.update_state(data_outputs, predictions) accuracy = ba.result().numpy() -print("Accuracy : ", accuracy) +print(f"Accuracy : {accuracy}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/KerasGA/cancer_dataset.py b/examples/KerasGA/cancer_dataset.py index 5aceae6..f5e87d3 100644 --- a/examples/KerasGA/cancer_dataset.py +++ b/examples/KerasGA/cancer_dataset.py @@ -17,8 +17,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(ga_instance.last_generation_fitness)[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # The dataset path. dataset_path = r'../data/Skin_Cancer_Dataset' @@ -71,8 +71,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, @@ -81,13 +81,13 @@ def on_generation(ga_instance): # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() -print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) +print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() -print("Accuracy : ", accuracy) +print(f"Accuracy : {accuracy}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/KerasGA/cancer_dataset_generator.py b/examples/KerasGA/cancer_dataset_generator.py index 3f8afeb..9746e90 100644 --- a/examples/KerasGA/cancer_dataset_generator.py +++ b/examples/KerasGA/cancer_dataset_generator.py @@ -16,8 +16,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(ga_instance.last_generation_fitness)[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # The dataset path. dataset_path = r'../data/Skin_Cancer_Dataset' @@ -65,8 +65,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, @@ -75,13 +75,13 @@ def on_generation(ga_instance): # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() -print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) +print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() -print("Accuracy : ", accuracy) +print(f"Accuracy : {accuracy}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/KerasGA/image_classification_CNN.py b/examples/KerasGA/image_classification_CNN.py index 9fb4563..a8084ee 100644 --- a/examples/KerasGA/image_classification_CNN.py +++ b/examples/KerasGA/image_classification_CNN.py @@ -16,8 +16,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(shape=(100, 100, 3)) @@ -66,8 +66,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, @@ -76,13 +76,13 @@ def on_generation(ga_instance): # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() -print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) +print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() -print("Accuracy : ", accuracy) +print(f"Accuracy : {accuracy}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/KerasGA/image_classification_Dense.py b/examples/KerasGA/image_classification_Dense.py index 002e36c..986282a 100644 --- a/examples/KerasGA/image_classification_Dense.py +++ b/examples/KerasGA/image_classification_Dense.py @@ -16,8 +16,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(360) @@ -57,8 +57,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") # Fetch the parameters of the best solution. predictions = pygad.kerasga.predict(model=model, @@ -68,13 +68,13 @@ def on_generation(ga_instance): # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() -print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) +print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() -print("Accuracy : ", accuracy) +print(f"Accuracy : {accuracy}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/KerasGA/regression_example.py b/examples/KerasGA/regression_example.py index 2deec1f..11312c3 100644 --- a/examples/KerasGA/regression_example.py +++ b/examples/KerasGA/regression_example.py @@ -17,8 +17,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Create the Keras model. input_layer = tensorflow.keras.layers.Input(3) @@ -61,17 +61,17 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) -print("Predictions : \n", predictions) +print(f"Predictions : \n{predictions}") mae = tensorflow.keras.losses.MeanAbsoluteError() abs_error = mae(data_outputs, predictions).numpy() -print("Absolute Error : ", abs_error) +print(f"Absolute Error : {abs_error}") # model.compile(optimizer="Adam", loss="mse", metrics=["mae"]) diff --git a/examples/TorchGA/XOR_classification.py b/examples/TorchGA/XOR_classification.py index f7a2f44..be2e9e8 100644 --- a/examples/TorchGA/XOR_classification.py +++ b/examples/TorchGA/XOR_classification.py @@ -14,8 +14,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Create the PyTorch model. input_layer = torch.nn.Linear(2, 4) @@ -68,19 +68,19 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.torchga.predict(model=model, solution=solution, data=data_inputs) -print("Predictions : \n", predictions.detach().numpy()) +print(f"Predictions : \n{predictions.detach().numpy()}") # Calculate the binary crossentropy for the trained model. -print("Binary Crossentropy : ", loss_function(predictions, data_outputs).detach().numpy()) +print(f"Binary Crossentropy : {loss_function(predictions, data_outputs).detach().numpy()}") # Calculate the classification accuracy of the trained model. a = torch.max(predictions, axis=1) b = torch.max(data_outputs, axis=1) accuracy = torch.true_divide(torch.sum(a.indices == b.indices), len(data_outputs)) -print("Accuracy : ", accuracy.detach().numpy()) +print(f"Accuracy : {accuracy.detach().numpy()}") diff --git a/examples/TorchGA/image_classification_CNN.py b/examples/TorchGA/image_classification_CNN.py index baf1f1b..295cdc5 100644 --- a/examples/TorchGA/image_classification_CNN.py +++ b/examples/TorchGA/image_classification_CNN.py @@ -15,8 +15,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the PyTorch model. input_layer = torch.nn.Conv2d(in_channels=3, out_channels=5, kernel_size=7) @@ -78,8 +78,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.torchga.predict(model=model, solution=solution, @@ -87,8 +87,8 @@ def on_generation(ga_instance): # print("Predictions : \n", predictions) # Calculate the crossentropy for the trained model. -print("Crossentropy : ", loss_function(predictions, data_outputs).detach().numpy()) +print(f"Crossentropy : {loss_function(predictions, data_outputs).detach().numpy()}") # Calculate the classification accuracy for the trained model. accuracy = torch.true_divide(torch.sum(torch.max(predictions, axis=1).indices == data_outputs), len(data_outputs)) -print("Accuracy : ", accuracy.detach().numpy()) +print(f"Accuracy : {accuracy.detach().numpy()}") diff --git a/examples/TorchGA/image_classification_Dense.py b/examples/TorchGA/image_classification_Dense.py index 91bb4c1..85e8b1f 100644 --- a/examples/TorchGA/image_classification_Dense.py +++ b/examples/TorchGA/image_classification_Dense.py @@ -15,8 +15,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the PyTorch model using the functional API. input_layer = torch.nn.Linear(360, 50) @@ -64,8 +64,8 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.torchga.predict(model=model, solution=solution, @@ -73,8 +73,8 @@ def on_generation(ga_instance): # print("Predictions : \n", predictions) # Calculate the crossentropy loss of the trained model. -print("Crossentropy : ", loss_function(predictions, data_outputs).detach().numpy()) +print(f"Crossentropy : {loss_function(predictions, data_outputs).detach().numpy()}") # Calculate the classification accuracy for the trained model. accuracy = torch.true_divide(torch.sum(torch.max(predictions, axis=1).indices == data_outputs), len(data_outputs)) -print("Accuracy : ", accuracy.detach().numpy()) +print(f"Accuracy : {accuracy.detach().numpy()}") diff --git a/examples/TorchGA/regression_example.py b/examples/TorchGA/regression_example.py index 5bf2fc1..a7feb31 100644 --- a/examples/TorchGA/regression_example.py +++ b/examples/TorchGA/regression_example.py @@ -15,8 +15,8 @@ def fitness_func(ga_instanse, solution, sol_idx): return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Create the PyTorch model. input_layer = torch.nn.Linear(3, 2) @@ -64,13 +64,13 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") predictions = pygad.torchga.predict(model=model, solution=solution, data=data_inputs) -print("Predictions : \n", predictions.detach().numpy()) +print("Predictions : \n{predictions.detach().numpy()}") abs_error = loss_function(predictions, data_outputs) -print("Absolute Error : ", abs_error.detach().numpy()) +print("Absolute Error : {abs_error.detach().numpy()}") diff --git a/examples/clustering/example_clustering_2.py b/examples/clustering/example_clustering_2.py index 877e318..9b84654 100644 --- a/examples/clustering/example_clustering_2.py +++ b/examples/clustering/example_clustering_2.py @@ -107,9 +107,9 @@ def fitness_func(ga_instance, solution, solution_idx): ga_instance.run() best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() -print("Best solution is {bs}".format(bs=best_solution)) -print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) -print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) +print(f"Best solution is {best_solution}") +print(f"Fitness of the best solution is {best_solution_fitness}") +print(f"Best solution found after {ga_instance.best_solution_generation} generations") cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) diff --git a/examples/clustering/example_clustering_3.py b/examples/clustering/example_clustering_3.py index 608d54b..5c0381d 100644 --- a/examples/clustering/example_clustering_3.py +++ b/examples/clustering/example_clustering_3.py @@ -119,9 +119,9 @@ def fitness_func(ga_instance, solution, solution_idx): ga_instance.run() best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() -print("Best solution is {bs}".format(bs=best_solution)) -print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) -print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) +print(f"Best solution is {best_solution}") +print(f"Fitness of the best solution is {best_solution_fitness}") +print(f"Best solution found after {ga_instance.best_solution_generation} generations") cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) diff --git a/examples/cnn/example_image_classification.py b/examples/cnn/example_image_classification.py index 9b90a8b..13347ec 100644 --- a/examples/cnn/example_image_classification.py +++ b/examples/cnn/example_image_classification.py @@ -67,6 +67,6 @@ num_wrong = numpy.where(predictions != train_outputs)[0] num_correct = train_outputs.size - num_wrong.size accuracy = 100 * (num_correct/train_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") diff --git a/examples/example.py b/examples/example.py index f22bc72..72d895c 100644 --- a/examples/example.py +++ b/examples/example.py @@ -25,9 +25,9 @@ def fitness_func(ga_instance, solution, solution_idx): last_fitness = 0 def on_generation(ga_instance): global last_fitness - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + print(f"Change = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] ga_instance = pygad.GA(num_generations=num_generations, diff --git a/examples/example_logger.py b/examples/example_logger.py index d38a179..bbf44e9 100644 --- a/examples/example_logger.py +++ b/examples/example_logger.py @@ -29,8 +29,8 @@ def fitness_func(ga_instance, solution, solution_idx): return fitness def on_generation(ga_instance): - ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) - ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + ga_instance.logger.info(f"Generation = {ga_instance.generations_completed}") + ga_instance.logger.info(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") ga_instance = pygad.GA(num_generations=10, sol_per_pop=40, diff --git a/examples/example_multi_objective.py b/examples/example_multi_objective.py index 3fdf35a..479ba9b 100644 --- a/examples/example_multi_objective.py +++ b/examples/example_multi_objective.py @@ -37,9 +37,9 @@ def fitness_func(ga_instance, solution, solution_idx): last_fitness = 0 def on_generation(ga_instance): global last_fitness - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + print(f"Change = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] ga_instance = pygad.GA(num_generations=num_generations, @@ -57,15 +57,15 @@ def on_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") prediction = numpy.sum(numpy.array(function_inputs1)*solution) -print("Predicted output 1 based on the best solution : {prediction}".format(prediction=prediction)) +print(f"Predicted output 1 based on the best solution : {prediction}") prediction = numpy.sum(numpy.array(function_inputs2)*solution) -print("Predicted output 2 based on the best solution : {prediction}".format(prediction=prediction)) +print(f"Predicted output 2 based on the best solution : {prediction}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") diff --git a/examples/gacnn/example_image_classification.py b/examples/gacnn/example_image_classification.py index 32fa77c..daaec5a 100644 --- a/examples/gacnn/example_image_classification.py +++ b/examples/gacnn/example_image_classification.py @@ -108,21 +108,21 @@ def callback_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = GACNN_instance.population_networks[solution_idx].predict(data_inputs=data_inputs) -print("Predictions of the trained network : {predictions}".format(predictions=predictions)) +print("Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") diff --git a/examples/gann/example_XOR_classification.py b/examples/gann/example_XOR_classification.py index 2e3f465..b5ed6d7 100644 --- a/examples/gann/example_XOR_classification.py +++ b/examples/gann/example_XOR_classification.py @@ -25,9 +25,9 @@ def callback_generation(ga_instance): GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution()[1].copy() @@ -107,22 +107,22 @@ def callback_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) -print("Predictions of the trained network : {predictions}".format(predictions=predictions)) +print("Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) \ No newline at end of file +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") \ No newline at end of file diff --git a/examples/gann/example_classification.py b/examples/gann/example_classification.py index b25a676..a1eb150 100644 --- a/examples/gann/example_classification.py +++ b/examples/gann/example_classification.py @@ -21,9 +21,9 @@ def callback_generation(ga_instance): GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() @@ -95,22 +95,22 @@ def callback_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) -print("Predictions of the trained network : {predictions}".format(predictions=predictions)) +print("Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) \ No newline at end of file +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") \ No newline at end of file diff --git a/examples/gann/example_regression.py b/examples/gann/example_regression.py index f120bbf..427176e 100644 --- a/examples/gann/example_regression.py +++ b/examples/gann/example_regression.py @@ -20,9 +20,9 @@ def callback_generation(ga_instance): GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() @@ -95,19 +95,19 @@ def callback_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs, problem_type="regression") -print("Predictions of the trained network : {predictions}".format(predictions=predictions)) +print("Predictions of the trained network : {predictions}") # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) -print("Absolute error : {abs_error}.".format(abs_error=abs_error)) +print(f"Absolute error : {abs_error}.") diff --git a/examples/gann/example_regression_fish.py b/examples/gann/example_regression_fish.py index 4eb3c0b..3c5beaa 100644 --- a/examples/gann/example_regression_fish.py +++ b/examples/gann/example_regression_fish.py @@ -21,9 +21,9 @@ def callback_generation(ga_instance): GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() @@ -96,19 +96,19 @@ def callback_generation(ga_instance): # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs, problem_type="regression") -print("Predictions of the trained network : {predictions}".format(predictions=predictions)) +print("Predictions of the trained network : {predictions}") # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) -print("Absolute error : {abs_error}.".format(abs_error=abs_error)) +print(f"Absolute error : {abs_error}.") diff --git a/examples/genetic.pkl b/examples/genetic.pkl deleted file mode 100644 index c82c1568db19786ae91e66a8c3b13ef30b010c6b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18276 zcmeG^30M=?)(HtA1j4>palxe)Dk4_vR&}OgMU9QVYOM=8PLhEMgd}DrA_^AkhB8{0 z`7K)eKl`+5pW3QVtM93uX>oa4sc5azy5WY3XvL~n*LNn#gb4E6?|c9I{(j#-;Y%|2 z%$a-7J=;C^o)ZfF|7%@4nd`*_+4C}SQ?l!mW&G0KWS6puGuQ$)fl*m(nVE!xWdei? zvo(`t0y7DEs2jSJC1tbOcUUIKfm=6e6RT&!DW~1;ASepU#T{07iel$6u{ll} zr_DAiX2`=Fgx!K0i5$X8!>wrCVzK37Ca2wEHsUmaWfE3`VvCq?;R2)VfQkWxhTi8f zntC9N6=JrUX$;2CBrxcN9rEbu)2B?R50o@#lB`ZC5DuNud3J&w4^vYcoMsDcwo)wP zpJB1#G|XC0FkxOSi~`Cn1dtI~KO*L|0yWvLX$D%IIUZ7VDkg}sSui`{!0a|VtBYdP z@C{u|C7_{G&0!f00Gt*V3s@!sp0(#C8*L6E87>Kj*$8aJG90!XgEJ!oSiY1^SjH>d zB090*!jl6^Ir`b*+7%lHN1*q5J2MCEy2_~^=|dw|sQ9Mv z%etHm4)}90ik{mrHD=H??sA!}d%LT0FaDC@edd30;v%=_(UCz@_Nl!6uYBdgI_Hcl z+&%pJiiZIzet5;9zfmm|=M-90ix%lF^f^1FA>M4%#mVdb*y^wdCp z{s#k+w?2}gYm+`+wX?E@o3v5$BSC*QrNWDUv-Gbgr~g>hq-#rsLLTlt_gf?UhVo6? zuzG6OT0eBhl}R0!O#SVJ@~x}OKHhGKs?mHK%JyB@1s{|AQ$c}F*$7|cPec9G z9oWKMA5<^j->c8%e9#XOIZwRv{r=jHTLa2Ny!!JZ`PLYw-Fd4|y*`NcBFfh~cF*?1 zbgfsPMfyekF|=xVtJBPS{)_V3yg6iD*!e(y#)Q?=bNPry@&y;;vUm74xg5|^@Us~` z*?C2C!F?v^6pXwNtJmv?fbV+h`q0T}*5Gj*@>G@v`VQhQB{7IyhMy|ViiB#bwAe)+3k zx%iE%cI@i*6T)|X(78G-7@d&yWA%vrYQFhhHhmIU;qf(v04Ve>SQ=qfB^Jnvv_qoc_FV}su=Q8J5{&0LWADngp$%s3b&0`_x zfU0d@gD>ke67UravJALhk4oX;EfV8{LwSDL8?6or#JlS)= z`Hgj2blXddxfD*tSB|vwF0VR+WZitdZ@^wZ-X0Y=vSyu@-?+w9<#?}-Yqd{bxa5kQ zk8l1)Wl7&q)Ea#!q_CAAKXz|vmC~w4Qzq1m8hB307al)Jb{Ja2olb1Q#cGs%!h!eS zx8Hq;{Jb#K`Z=%P<529$nt7g-b=XC1fc z1lIxEU3})S#nlNB`;7bS*mhY^n18g#fgq;_qjp((WNK z?`{3m@u3o}%)h*6U)ddQbcEl*;=G&4u^ZC^%LmEP^-0;^{CM**a%*1??dQ3VIW*0@ z`Td?dx&3F>EVu~$HmB4_`;1faRo#cYn%GLqFS;Lhw)pT}r1GmEziLYzchmXL$Z-|H zD6{BKUAhS@RyfgZPQ1afzM#9pK0(Qy;I`eVZ%XdEwfn zk1leD+x%3xvCUJYa@xt}yYe(>QB8;Wrz$QXJX6(O;lrKKNZ&&+M$UlCFEna>} ziT)*f_T^5!f>6WmO|@Oe$$0d*%Jjx-2avECX-5aG3q|)9wHd%}xsObIz$Sg%?+RDB zaN*WYpFiPVjT~uQuF~*5JGA<=bwDV;aNf(ktACd9h2Nh()Z^{z92&Lx{OE;;xziic zhn1dt%&iz1cEfJ*<2Q#t{P5J_a_;Qv>lsI9$WT+#!uCHT2BIvP;I?=&$z|`co~X(j zIOUkF*ks(org67Cp<81Ve2N6I2f_~s;&d7&(CDyHlx?yQ+{GF# zHmAvMHfCFhWS?u6(PEw`$sIR=+tR#E(4@^YQV645v0t=1dUVoe3zh8CX?&_UK(afK z0=H$h(pWtTgpZ0jf@2Hu!ra3!!M;zhOmzKyyTfL{4Q7j(&V%4jEwY9s`!Z3!gm`Z- z&EEFJoyiT@2@#fOe65J>8D8baWSJ=Mjc3OVp$H3Mq(!z0qqp!K-OX+YhXpJMd0D2z zaF+x|h>&CmW^q#<*WlBbnd~GYz3Lg=ILcMwaZ&XoliB1{3ZTU;@C@sWuZ)578-fGWhd{ zztq$OTDD4_FQ@&YWS`0l6ec;e75NHU*$mnyzgStEtU#Hs93f-%X`<4l_1nC^Zq@c1 z3+A#3WIIMdIdfPhRG`4M0We$bP8#BF1#W=P5EDU}9fS$9Icb2yT9?2CW#Slw<}_|K z5-g*J&>Hkh;A60OiJ47PSVrY}h6(UY6P{*9n|i0ih9dCL#lO zTEMvobBD*CX={K1Hj~peZ<1yL;I_@^!UU!Pjg-^jW?HP!5}go6ISuZ|DA=nB{LQq0 z-CNwlf=ZD+nLxlX=h$ryVdF-&6MPYAgpHj}IphgMQE6zf+ehmBhujs~e8gelRYs76 zF3IVf!_Ijpg^3WY!+_I9(!E^+0m3E~cmh4^84X;Ey5(+pe5o*5xaW0E&<=vO=rN$F zg{Giu4)>mqOiv)O1fASvTXL6ek+jSt&ywfMviwYb3*1Tev_J0d+aOJ>ey$sBT-u6e3D!mpLQ zEm*3@PRU6g(;%mjUy5qJjGRs`Bi}Dz$r*a`13fvDq{&%&a<-nFGoB@j0KT4yc17;) zoq7)Fxfypj@I06X1s((!_UU9osycknu-e?V1j-5v8s*Wh08jvmhT{M|6KWzXcpipl z&~QQk^JdfB+9{JUS^)+0$PBm4IN(MJ$0wlI;1XT8a%~QiUXurWTQ(=}b%*L=R?@u!w+vg5iWSmQZ2I@R0-y zc+qYhuV0ru3!{Rfz+uOTIxZY{7$L53Jx&QxMTGa3*eihLOw2P9g|*17Uc#Kn9RmBh zCym7wUCtve_B9C#`n-0Y9k21l$_Oj2T-N%wFKGY#4Lho89=rflTTABF{k@r6VD1s0 z1NvLqsqLxT;(W251KPJ|{Cl0VQhc%g5TI5ahps&_W2`S-e^k&j_Tv_Z1cDXKWgQYH zBu>#W3yf148o)9jhG*(VYZoX^XQpV{Y_l1yxTNSUgR^Mo?4#hZzw@W}-B zOs7KnC!_&HIJgu;AmQKx5=ow8j}TV}V)!RK8(80f-9Wgvx2$hd3qEb=8f%W zkoF#r@QePuvsWKAT42~Pxl7t*q}}-Ltu?(XkTFXbe&)>@d3!gis1LAbi@>p*nh*6j!2J~5)o4bO{H|i zBlf1~Z+PS{_TLnKQz;$s2wzMQh)4bnQ&=KBBK=~DK%8&G{)c?H_(9h8NNL;gO3~ksn><4kO`%uRBe>qR{Gz6{}@PIkKYLg#5jaN)a91|LC&P8YKFc z$KU0h{{e}6Yw@?v{z^1>`J2THmYqb>y0+`0%h`{N`fhvYi52$|`?O8rfjf^Nd&e8X zI-fp>yz3G>Vn?djkt%kiiXE{GNUGSWPk2fdJ5t3C6z@nCJ5t4tRIwvf>_`}`bt+Pwid3@YDT9^DbN=n}oaeAms@Rb#c3eN& zmMV7KKRTBxc0B(DK+J)OC0f5%L8M>lyZ>u|^eg>m|BJ-`yI<)`6+2SJj#wk~zfiIB EFIjlL6951J diff --git a/examples/nn/example_XOR_classification.py b/examples/nn/example_XOR_classification.py index 5208679..f40d97f 100644 --- a/examples/nn/example_XOR_classification.py +++ b/examples/nn/example_XOR_classification.py @@ -46,6 +46,6 @@ num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") \ No newline at end of file diff --git a/examples/nn/example_classification.py b/examples/nn/example_classification.py index 732b92d..ac5f97a 100644 --- a/examples/nn/example_classification.py +++ b/examples/nn/example_classification.py @@ -46,6 +46,6 @@ num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) -print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) -print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) -print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) +print(f"Number of correct classifications : {num_correct}.") +print(f"Number of wrong classifications : {num_wrong.size}.") +print(f"Classification accuracy : {accuracy}.") diff --git a/examples/nn/example_regression.py b/examples/nn/example_regression.py index 5b9d990..3f0f5af 100644 --- a/examples/nn/example_regression.py +++ b/examples/nn/example_regression.py @@ -43,4 +43,4 @@ # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) -print("Absolute error : {abs_error}.".format(abs_error=abs_error)) +print(f"Absolute error : {abs_error}.") diff --git a/examples/nn/example_regression_fish.py b/examples/nn/example_regression_fish.py index a6aa550..49e6522 100644 --- a/examples/nn/example_regression_fish.py +++ b/examples/nn/example_regression_fish.py @@ -44,4 +44,4 @@ # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) -print("Absolute error : {abs_error}.".format(abs_error=abs_error)) \ No newline at end of file +print(f"Absolute error : {abs_error}.") \ No newline at end of file From 47c6298d209c1be6e5aa9f0f6169670441c2062f Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 21:41:29 -0400 Subject: [PATCH 19/25] Replace format() str method by f-string --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2014f2d..969efa9 100644 --- a/README.md +++ b/README.md @@ -183,9 +183,9 @@ num_genes = len(function_inputs) last_fitness = 0 def callback_generation(ga_instance): global last_fitness - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution()[1] # Creating an instance of the GA class inside the ga module. Some parameters are initialized within the constructor. @@ -204,15 +204,15 @@ ga_instance.plot_fitness() # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() -print("Parameters of the best solution : {solution}".format(solution=solution)) -print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) -print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) +print(f"Parameters of the best solution : {solution}") +print(f"Fitness value of the best solution = {solution_fitness}") +print(f"Index of the best solution : {solution_idx}") prediction = numpy.sum(numpy.array(function_inputs)*solution) -print("Predicted output based on the best solution : {prediction}".format(prediction=prediction)) +print(f"Predicted output based on the best solution : {prediction}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(g"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Saving the GA instance. filename = 'genetic' # The filename to which the instance is saved. The name is without extension. From 5e9a5f1c0f345dbe5801318163f22995c882623d Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 21:44:10 -0400 Subject: [PATCH 20/25] Replace format() str method by f-string --- tests/test_allow_duplicate_genes.py | 2 +- tests/test_crossover_mutation.py | 2 +- tests/test_gene_space.py | 2 +- tests/test_gene_space_allow_duplicate_genes.py | 2 +- tests/test_lifecycle_callbacks_calls.py | 8 ++++---- tests/test_number_fitness_function_calls.py | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_allow_duplicate_genes.py b/tests/test_allow_duplicate_genes.py index af7b652..f485419 100644 --- a/tests/test_allow_duplicate_genes.py +++ b/tests/test_allow_duplicate_genes.py @@ -56,7 +56,7 @@ def fitness_func(ga, solution, idx): print(solution) num_duplicates += num - print("Number of duplicates is {num_duplicates}.".format(num_duplicates=num_duplicates)) + print(f"Number of duplicates is {num_duplicates}.") return num_duplicates def test_number_duplicates_default(): diff --git a/tests/test_crossover_mutation.py b/tests/test_crossover_mutation.py index 757d912..6b964c2 100644 --- a/tests/test_crossover_mutation.py +++ b/tests/test_crossover_mutation.py @@ -66,7 +66,7 @@ def fitness_func(ga, solution, idx): comparison_result = numpy.array(comparison_result) result = numpy.all(comparison_result == True) - print("Comparison result is {result}".format(result=result)) + print(f"Comparison result is {result}") return result, ga_instance def test_no_crossover_no_mutation(): diff --git a/tests/test_gene_space.py b/tests/test_gene_space.py index 3a1b21f..c85a4e6 100644 --- a/tests/test_gene_space.py +++ b/tests/test_gene_space.py @@ -188,7 +188,7 @@ def fitness_func(ga, solution, idx): else: num_outside += 1 - print("Number of outside range is {num_outside}.".format(num_outside=num_outside)) + print(f"Number of outside range is {num_outside}.") return num_outside, ga_instance def test_gene_space_range(): diff --git a/tests/test_gene_space_allow_duplicate_genes.py b/tests/test_gene_space_allow_duplicate_genes.py index 4d78dea..c2eff96 100644 --- a/tests/test_gene_space_allow_duplicate_genes.py +++ b/tests/test_gene_space_allow_duplicate_genes.py @@ -121,7 +121,7 @@ def fitness_func(ga, solution, idx): else: num_outside += 1 - print("Number of outside range is {num_outside}.".format(num_outside=num_outside)) + print(f"Number of outside range is {num_outside}.") return num_outside, ga_instance def test_gene_space_range(): diff --git a/tests/test_lifecycle_callbacks_calls.py b/tests/test_lifecycle_callbacks_calls.py index a6cce83..b36f05d 100644 --- a/tests/test_lifecycle_callbacks_calls.py +++ b/tests/test_lifecycle_callbacks_calls.py @@ -75,8 +75,8 @@ def on_stop(ga_instance, last_population_fitness): # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 - print("Expected {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) - print("Actual {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + print(f"Expected {expected_num_callbacks_calls}.") + print(f"Actual {actual_num_callbacks_calls}.") return actual_num_callbacks_calls, expected_num_callbacks_calls def number_lifecycle_callback_methods_calls(stop_criteria=None, @@ -154,8 +154,8 @@ def on_stop(self, ga_instance, last_population_fitness): # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 - print("Expected {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) - print("Actual {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + print(f"Expected {expected_num_callbacks_calls}.") + print(f"Actual {actual_num_callbacks_calls}.") return actual_num_callbacks_calls, expected_num_callbacks_calls def test_number_lifecycle_callback_functions_calls(): diff --git a/tests/test_number_fitness_function_calls.py b/tests/test_number_fitness_function_calls.py index 2910c7b..4d03454 100644 --- a/tests/test_number_fitness_function_calls.py +++ b/tests/test_number_fitness_function_calls.py @@ -56,8 +56,8 @@ def fitness_func(ga, solution, idx): if mutation_type == "adaptive": expected_num_fitness_calls += num_generations * (sol_per_pop - keep_elitism) - print("Expected number of fitness function calls is {expected_num_fitness_calls}.".format(expected_num_fitness_calls=expected_num_fitness_calls)) - print("Actual number of fitness function calls is {actual_num_fitness_calls}.".format(actual_num_fitness_calls=actual_num_fitness_calls)) + print(f"Expected number of fitness function calls is {expected_num_fitness_calls}.") + print(f"Actual number of fitness function calls is {actual_num_fitness_calls}.") return actual_num_fitness_calls, expected_num_fitness_calls def test_number_calls_fitness_function_default_keep(): From 655fd15543a6c5117a550b3aba5663f89446696b Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 6 Sep 2023 22:09:17 -0400 Subject: [PATCH 21/25] Replace format() str method by f-string --- docs/source/cnn.rst | 12 +- docs/source/gacnn.rst | 38 +- docs/source/gann.rst | 100 +- docs/source/kerasga.rst | 74 +- docs/source/nn.rst | 28 +- docs/source/pygad.rst | 32 +- docs/source/pygad_more.rst | 4342 ++++++++++++++++++------------------ docs/source/torchga.rst | 36 +- 8 files changed, 2331 insertions(+), 2331 deletions(-) diff --git a/docs/source/cnn.rst b/docs/source/cnn.rst index cb578bf..ce2bfe8 100644 --- a/docs/source/cnn.rst +++ b/docs/source/cnn.rst @@ -644,9 +644,9 @@ addition to the classification accuracy. num_wrong = numpy.where(predictions != train_outputs)[0] num_correct = train_outputs.size - num_wrong.size accuracy = 100 * (num_correct/train_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") It is very important to note that it is not expected that the classification accuracy is high because no training algorithm is used. @@ -743,6 +743,6 @@ files before running this code. num_wrong = numpy.where(predictions != train_outputs)[0] num_correct = train_outputs.size - num_wrong.size accuracy = 100 * (num_correct/train_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") diff --git a/docs/source/gacnn.rst b/docs/source/gacnn.rst index c9b6336..e9f89ba 100644 --- a/docs/source/gacnn.rst +++ b/docs/source/gacnn.rst @@ -405,7 +405,7 @@ solutions within the population. population_matrices = gacnn.population_as_matrices(population_networks=GACNN_instance.population_networks, population_vectors=ga_instance.population) GACNN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) + print(f"Generation = {ga_instance.generations_completed}") After preparing the fitness and callback function, next is to create an instance of the ``pygad.GA`` class. @@ -462,7 +462,7 @@ be called to show how the fitness values evolve by generation. ga_instance.plot_fitness() -.. figure:: https://user-images.githubusercontent.com/16560492/83429675-ab744580-a434-11ea-8f21-9d3804b50d15.png +.. image:: https://user-images.githubusercontent.com/16560492/83429675-ab744580-a434-11ea-8f21-9d3804b50d15.png :alt: Information about the Best Solution @@ -483,9 +483,9 @@ Here is how such information is returned. .. code:: python solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") .. code:: @@ -504,7 +504,7 @@ the labels correctly. .. code:: python predictions = pygad.cnn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") Calculating Some Statistics --------------------------- @@ -518,9 +518,9 @@ addition to the classification accuracy. num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") .. code:: @@ -575,8 +575,8 @@ complete code is listed below. GACNN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solutions_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solutions_fitness}") data_inputs = numpy.load("dataset_inputs.npy") data_outputs = numpy.load("dataset_outputs.npy") @@ -642,21 +642,21 @@ complete code is listed below. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = GACNN_instance.population_networks[solution_idx].predict(data_inputs=data_inputs) - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") diff --git a/docs/source/gann.rst b/docs/source/gann.rst index a9b38ff..c3c85b3 100644 --- a/docs/source/gann.rst +++ b/docs/source/gann.rst @@ -458,8 +458,8 @@ solutions within the population. population_matrices = pygad.gann.population_as_matrices(population_networks=GANN_instance.population_networks, population_vectors=ga_instance.population) GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") After preparing the fitness and callback function, next is to create an instance of the ``pygad.GA`` class. @@ -563,9 +563,9 @@ accuracy) is 100. .. code:: python solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") .. code:: @@ -581,7 +581,7 @@ fitness value is reached after 182 generations. .. code:: python if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") .. code:: @@ -597,7 +597,7 @@ the labels correctly. .. code:: python predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") .. code:: @@ -615,9 +615,9 @@ addition to the classification accuracy. num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") .. code:: @@ -668,9 +668,9 @@ its complete code is listed below. GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution()[1].copy() @@ -750,25 +750,25 @@ its complete code is listed below. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") Image Classification -------------------- @@ -826,9 +826,9 @@ according to the ``num_neurons_output`` parameter of the GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") + print(f"Change = {ga_instance.best_solution()[1] - last_fitness}") last_fitness = ga_instance.best_solution()[1].copy() @@ -900,25 +900,25 @@ according to the ``num_neurons_output`` parameter of the # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs) - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") # Calculating some statistics num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") After training completes, here are the outputs of the print statements. The number of wrong classifications is only 1 and the accuracy is @@ -1002,9 +1002,9 @@ for regression. GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + print(f"Change = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() @@ -1077,22 +1077,22 @@ for regression. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs, problem_type="regression") - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") The next figure shows how the fitness value changes for the generations used. @@ -1168,9 +1168,9 @@ Here is the complete code. GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + print(f"Change = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() @@ -1243,22 +1243,22 @@ Here is the complete code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Predicting the outputs of the data using the best solution. predictions = pygad.nn.predict(last_layer=GANN_instance.population_networks[solution_idx], data_inputs=data_inputs, problem_type="regression") - print("Predictions of the trained network : {predictions}".format(predictions=predictions)) + print(f"Predictions of the trained network : {predictions}") # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") The next figure shows how the fitness value changes for the 500 generations used. diff --git a/docs/source/kerasga.rst b/docs/source/kerasga.rst index e1c8316..f39ffec 100644 --- a/docs/source/kerasga.rst +++ b/docs/source/kerasga.rst @@ -254,8 +254,8 @@ subsections discuss each part in the code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") input_layer = tensorflow.keras.layers.Input(3) dense_layer1 = tensorflow.keras.layers.Dense(5, activation="relu")(input_layer) @@ -296,18 +296,18 @@ subsections discuss each part in the code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make prediction based on the best solution. predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) - print("Predictions : \n", predictions) + print(f"Predictions : \n{predictions}") mae = tensorflow.keras.losses.MeanAbsoluteError() abs_error = mae(data_outputs, predictions).numpy() - print("Absolute Error : ", abs_error) + print(f"Absolute Error : {abs_error}") Create a Keras Model ~~~~~~~~~~~~~~~~~~~~ @@ -461,8 +461,8 @@ To get information about the best solution found by PyGAD, use the # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") .. code:: python @@ -478,7 +478,7 @@ return the model predictions based on the best solution. predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) - print("Predictions : \n", predictions) + print(f"Predictions : \n{predictions}") .. code:: python @@ -494,7 +494,7 @@ The next code measures the trained model error. mae = tensorflow.keras.losses.MeanAbsoluteError() abs_error = mae(data_outputs, predictions).numpy() - print("Absolute Error : ", abs_error) + print(f"Absolute Error : {abs_error}") .. code:: @@ -527,8 +527,8 @@ previous example. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(2) @@ -573,14 +573,14 @@ previous example. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) - print("Predictions : \n", predictions) + print(f"Predictions : \n{predictions}") # Calculate the binary crossentropy for the trained model. bce = tensorflow.keras.losses.BinaryCrossentropy() @@ -590,7 +590,7 @@ previous example. ba = tensorflow.keras.metrics.BinaryAccuracy() ba.update_state(data_outputs, predictions) accuracy = ba.result().numpy() - print("Accuracy : ", accuracy) + print(f"Accuracy : {accuracy}") Compared to the previous regression example, here are the changes: @@ -680,8 +680,8 @@ Here is the code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(360) @@ -721,24 +721,24 @@ Here is the code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) - # print("Predictions : \n", predictions) + # print(f"Predictions : \n{predictions}") # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() - print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) + print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() - print("Accuracy : ", accuracy) + print(f"Accuracy : {accuracy}") Compared to the previous binary classification example, this example has multiple classes (4) and thus the loss is measured using categorical @@ -827,8 +827,8 @@ Here is the complete code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the keras model using the functional API. input_layer = tensorflow.keras.layers.Input(shape=(100, 100, 3)) @@ -877,24 +877,24 @@ Here is the complete code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.kerasga.predict(model=model, solution=solution, data=data_inputs) - # print("Predictions : \n", predictions) + # print(f"Predictions : \n{predictions}") # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() - print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) + print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() - print("Accuracy : ", accuracy) + print(f"Accuracy : {accuracy}") Compared to the previous example, the only change is that the architecture uses convolutional and max-pooling layers. The shape of @@ -1010,8 +1010,8 @@ more computational time. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(ga_instance.last_generation_fitness)[1])) + print("Generation = {ga_instance.generations_completed}") + print("Fitness = {ga_instance.best_solution(ga_instance.last_generation_fitness)[1]}") # The dataset path. dataset_path = r'../data/Skin_Cancer_Dataset' @@ -1059,20 +1059,20 @@ more computational time. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") predictions = pygad.kerasga.predict(model=model, solution=solution, data=train_generator) - # print("Predictions : \n", predictions) + # print(f"Predictions : \n{predictions}") # Calculate the categorical crossentropy for the trained model. cce = tensorflow.keras.losses.CategoricalCrossentropy() - print("Categorical Crossentropy : ", cce(data_outputs, predictions).numpy()) + print(f"Categorical Crossentropy : {cce(data_outputs, predictions).numpy()}") # Calculate the classification accuracy for the trained model. ca = tensorflow.keras.metrics.CategoricalAccuracy() ca.update_state(data_outputs, predictions) accuracy = ca.result().numpy() - print("Accuracy : ", accuracy) + print(f"Accuracy : {accuracy}") diff --git a/docs/source/nn.rst b/docs/source/nn.rst index c7902ec..26b0af6 100644 --- a/docs/source/nn.rst +++ b/docs/source/nn.rst @@ -181,7 +181,7 @@ points to the layer **N-2**, the layer **N-2** points to the layer **N-3**, and so on until reaching the end of the linked list which is layer 1 (input layer). -.. figure:: https://user-images.githubusercontent.com/16560492/81918975-816af880-95d7-11ea-83e3-34d14c3316db.jpg +.. image:: https://user-images.githubusercontent.com/16560492/81918975-816af880-95d7-11ea-83e3-34d14c3316db.jpg :alt: The one way linked list allows returning all properties of all layers in @@ -680,9 +680,9 @@ addition to the classification accuracy. num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") It is very important to note that it is not expected that the classification accuracy is high because no training algorithm is used. @@ -746,9 +746,9 @@ has 2 neurons, one for each class. num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") Image Classification -------------------- @@ -805,9 +805,9 @@ files before running this code. num_wrong = numpy.where(predictions != data_outputs)[0] num_correct = data_outputs.size - num_wrong.size accuracy = 100 * (num_correct/data_outputs.size) - print("Number of correct classifications : {num_correct}.".format(num_correct=num_correct)) - print("Number of wrong classifications : {num_wrong}.".format(num_wrong=num_wrong.size)) - print("Classification accuracy : {accuracy}.".format(accuracy=accuracy)) + print(f"Number of correct classifications : {num_correct}.") + print(f"Number of wrong classifications : {num_wrong.size}.") + print(f"Classification accuracy : {accuracy}.") Regression Example 1 -------------------- @@ -839,7 +839,7 @@ what to do to make the code works for regression: .. code:: python abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") Here is the complete code. Yet, there is no algorithm used to train the network and thus the network is expected to give bad results. Later, the @@ -886,7 +886,7 @@ classification networks. # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") Regression Example 2 - Fish Weight Prediction --------------------------------------------- @@ -928,7 +928,7 @@ error is calculated. .. code:: python abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") Here is the complete code. @@ -973,4 +973,4 @@ Here is the complete code. # Calculating some statistics abs_error = numpy.mean(numpy.abs(predictions - data_outputs)) - print("Absolute error : {abs_error}.".format(abs_error=abs_error)) + print(f"Absolute error : {abs_error}.") diff --git a/docs/source/pygad.rst b/docs/source/pygad.rst index ce92704..69030f8 100644 --- a/docs/source/pygad.rst +++ b/docs/source/pygad.rst @@ -1361,9 +1361,9 @@ is returned using the ``best_solution()`` method. .. code:: python solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") Using the ``best_solution_generation`` attribute of the instance from the ``pygad.GA`` class, the generation number at which the @@ -1372,7 +1372,7 @@ the ``pygad.GA`` class, the generation number at which the .. code:: python if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") .. _saving--loading-the-results: @@ -1543,9 +1543,9 @@ below. last_fitness = 0 def on_generation(ga_instance): global last_fitness - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + print(f"Change = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness}") last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] ga_instance = pygad.GA(num_generations=num_generations, @@ -1562,15 +1562,15 @@ below. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) - print("Parameters of the best solution : {solution}".format(solution=solution)) - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") prediction = numpy.sum(numpy.array(function_inputs)*solution) - print("Predicted output based on the best solution : {prediction}".format(prediction=prediction)) + print(f"Predicted output based on the best solution : {prediction}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") # Saving the GA instance. filename = 'genetic' # The filename to which the instance is saved. The name is without extension. @@ -1691,7 +1691,7 @@ its code is listed below. def chromosome2img(vector, shape): if len(vector) != functools.reduce(operator.mul, shape): - raise ValueError("A vector of length {vector_length} into an array of shape {shape}.".format(vector_length=len(vector), shape=shape)) + raise ValueError(f"A vector of length {len(vector)} into an array of shape {shape}.") return numpy.reshape(a=vector, newshape=shape) @@ -1764,11 +1764,11 @@ Here is some information about the best solution. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") if ga_instance.best_solution_generation != -1: - print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation)) + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") result = gari.chromosome2img(solution, target_im.shape) matplotlib.pyplot.imshow(result) diff --git a/docs/source/pygad_more.rst b/docs/source/pygad_more.rst index a965391..9943c25 100644 --- a/docs/source/pygad_more.rst +++ b/docs/source/pygad_more.rst @@ -1,2171 +1,2171 @@ -More About PyGAD -================ - -.. _limit-the-gene-value-range-using-the-genespace-parameter: - -Limit the Gene Value Range using the ``gene_space`` Parameter -============================================================= - -In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter supported a new feature to allow -customizing the range of accepted values for each gene. Let's take a -quick review of the ``gene_space`` parameter to build over it. - -The ``gene_space`` parameter allows the user to feed the space of values -of each gene. This way the accepted values for each gene is retracted to -the user-defined values. Assume there is a problem that has 3 genes -where each gene has different set of values as follows: - -1. Gene 1: ``[0.4, 12, -5, 21.2]`` - -2. Gene 2: ``[-2, 0.3]`` - -3. Gene 3: ``[1.2, 63.2, 7.4]`` - -Then, the ``gene_space`` for this problem is as given below. Note that -the order is very important. - -.. code:: python - - gene_space = [[0.4, 12, -5, 21.2], - [-2, 0.3], - [1.2, 63.2, 7.4]] - -In case all genes share the same set of values, then simply feed a -single list to the ``gene_space`` parameter as follows. In this case, -all genes can only take values from this list of 6 values. - -.. code:: python - - gene_space = [33, 7, 0.5, 95. 6.3, 0.74] - -The previous example restricts the gene values to just a set of fixed -number of discrete values. In case you want to use a range of discrete -values to the gene, then you can use the ``range()`` function. For -example, ``range(1, 7)`` means the set of allowed values for the gene -are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or -``numpy.linspace()`` functions for the same purpose. - -The previous discussion only works with a range of discrete values not -continuous values. In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter can be assigned a dictionary that allows -the gene to have values from a continuous range. - -Assuming you want to restrict the gene within this half-open range [1 to -5) where 1 is included and 5 is not. Then simply create a dictionary -with 2 items where the keys of the 2 items are: - -1. ``'low'``: The minimum value in the range which is 1 in the example. - -2. ``'high'``: The maximum value in the range which is 5 in the example. - -The dictionary will look like that: - -.. code:: python - - {'low': 1, - 'high': 5} - -It is not acceptable to add more than 2 items in the dictionary or use -other keys than ``'low'`` and ``'high'``. - -For a 3-gene problem, the next code creates a dictionary for each gene -to restrict its values in a continuous range. For the first gene, it can -take any floating-point value from the range that starts from 1 -(inclusive) and ends at 5 (exclusive). - -.. code:: python - - gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] - -.. _more-about-the-genespace-parameter: - -More about the ``gene_space`` Parameter -======================================= - -The ``gene_space`` parameter customizes the space of values of each -gene. - -Assuming that all genes have the same global space which include the -values 0.3, 5.2, -4, and 8, then those values can be assigned to the -``gene_space`` parameter as a list, tuple, or range. Here is a list -assigned to this parameter. By doing that, then the gene values are -restricted to those assigned to the ``gene_space`` parameter. - -.. code:: python - - gene_space = [0.3, 5.2, -4, 8] - -If some genes have different spaces, then ``gene_space`` should accept a -nested list or tuple. In this case, the elements could be: - -1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single - value to be assigned to the gene. This means this gene will have the - same value across all generations. - -2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, - ``numpy.arange()``, or ``numpy.linspace``: It holds the space for - each individual gene. But this space is usually discrete. That is - there is a set of finite values to select from. - -3. ``dict``: To sample a value for a gene from a continuous range. The - dictionary must have 2 mandatory keys which are ``"low"`` and - ``"high"`` in addition to an optional key which is ``"step"``. A - random value is returned between the values assigned to the items - with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then - this works as the previous options (i.e. discrete set of values). - -4. ``None``: A gene with its space set to ``None`` is initialized - randomly from the range specified by the 2 parameters - ``init_range_low`` and ``init_range_high``. For mutation, its value - is mutated based on a random value from the range specified by the 2 - parameters ``random_mutation_min_val`` and - ``random_mutation_max_val``. If all elements in the ``gene_space`` - parameter are ``None``, the parameter will not have any effect. - -Assuming that a chromosome has 2 genes and each gene has a different -value space. Then the ``gene_space`` could be assigned a nested -list/tuple where each element determines the space of a gene. - -According to the next code, the space of the first gene is ``[0.4, -5]`` -which has 2 values and the space for the second gene is -``[0.5, -3.2, 8.8, -9]`` which has 4 values. - -.. code:: python - - gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] - -For a 2 gene chromosome, if the first gene space is restricted to the -discrete values from 0 to 4 and the second gene is restricted to the -values from 10 to 19, then it could be specified according to the next -code. - -.. code:: python - - gene_space = [range(5), range(10, 20)] - -The ``gene_space`` can also be assigned to a single range, as given -below, where the values of all genes are sampled from the same range. - -.. code:: python - - gene_space = numpy.arange(15) - -The ``gene_space`` can be assigned a dictionary to sample a value from a -continuous range. - -.. code:: python - - gene_space = {"low": 4, "high": 30} - -A step also can be assigned to the dictionary. This works as if a range -is used. - -.. code:: python - - gene_space = {"low": 4, "high": 30, "step": 2.5} - -.. - - Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the - ``gene_space`` means that random values from the continuous range [0, - 10) are sampled. Note that ``0`` is included but ``10`` is not - included while sampling. Thus, the maximum value that could be - returned is less than ``10`` like ``9.9999``. But if the user decided - to round the genes using, for example, ``[float, 2]``, then this - value will become 10. So, the user should be careful to the inputs. - -If a ``None`` is assigned to only a single gene, then its value will be -randomly generated initially using the ``init_range_low`` and -``init_range_high`` parameters in the ``pygad.GA`` class's constructor. -During mutation, the value are sampled from the range defined by the 2 -parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. -This is an example where the second gene is given a ``None`` value. - -.. code:: python - - gene_space = [range(5), None, numpy.linspace(10, 20, 300)] - -If the user did not assign the initial population to the -``initial_population`` parameter, the initial population is created -randomly based on the ``gene_space`` parameter. Moreover, the mutation -is applied based on this parameter. - -.. _how-mutation-works-with-the-genespace-parameter: - -How Mutation Works with the ``gene_space`` Parameter? ------------------------------------------------------ - -If a gene has its static space defined in the ``gene_space`` parameter, -then mutation works by replacing the gene value by a value randomly -selected from the gene space. This happens for both ``int`` and -``float`` data types. - -For example, the following ``gene_space`` has the static space -``[1, 2, 3]`` defined for the first gene. So, this gene can only have a -value out of these 3 values. - -.. code:: python - - Gene space: [[1, 2, 3], - None] - Solution: [1, 5] - -For a solution like ``[1, -0.5, 4]``, then mutation happens for the -first gene by simply replacing its current value by a randomly selected -value (other than its current value if possible). So, the value 1 will -be replaced by either 2 or 3. - -For the second gene, its space is set to ``None``. So, traditional -mutation happens for this gene by: - -1. Generating a random value from the range defined by the - ``random_mutation_min_val`` and ``random_mutation_max_val`` - parameters. - -2. Adding this random value to the current gene's value. - -If its current value is 5 and the random value is ``-0.5``, then the new -value is 4.5. If the gene type is integer, then the value will be -rounded. - -Stop at Any Generation -====================== - -In `PyGAD -2.4.0 `__, -it is possible to stop the genetic algorithm after any generation. All -you need to do it to return the string ``"stop"`` in the callback -function ``on_generation``. When this callback function is implemented -and assigned to the ``on_generation`` parameter in the constructor of -the ``pygad.GA`` class, then the algorithm immediately stops after -completing its current generation. Let's discuss an example. - -Assume that the user wants to stop algorithm either after the 100 -generations or if a condition is met. The user may assign a value of 100 -to the ``num_generations`` parameter of the ``pygad.GA`` class -constructor. - -The condition that stops the algorithm is written in a callback function -like the one in the next code. If the fitness value of the best solution -exceeds 70, then the string ``"stop"`` is returned. - -.. code:: python - - def func_generation(ga_instance): - if ga_instance.best_solution()[1] >= 70: - return "stop" - -Stop Criteria -============= - -In `PyGAD -2.15.0 `__, -a new parameter named ``stop_criteria`` is added to the constructor of -the ``pygad.GA`` class. It helps to stop the evolution based on some -criteria. It can be assigned to one or more criterion. - -Each criterion is passed as ``str`` that consists of 2 parts: - -1. Stop word. - -2. Number. - -It takes this form: - -.. code:: python - - "word_num" - -The current 2 supported words are ``reach`` and ``saturate``. - -The ``reach`` word stops the ``run()`` method if the fitness value is -equal to or greater than a given fitness value. An example for ``reach`` -is ``"reach_40"`` which stops the evolution if the fitness is >= 40. - -``saturate`` stops the evolution if the fitness saturates for a given -number of consecutive generations. An example for ``saturate`` is -``"saturate_7"`` which means stop the ``run()`` method if the fitness -does not change for 7 consecutive generations. - -Here is an example that stops the evolution if either the fitness value -reached ``127.4`` or if the fitness saturates for ``15`` generations. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, 9, 4] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=200, - sol_per_pop=10, - num_parents_mating=4, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - stop_criteria=["reach_127.4", "saturate_15"]) - - ga_instance.run() - print("Number of generations passed is {generations_completed}".format(generations_completed=ga_instance.generations_completed)) - -Elitism Selection -================= - -In `PyGAD -2.18.0 `__, -a new parameter called ``keep_elitism`` is supported. It accepts an -integer to define the number of elitism (i.e. best solutions) to keep in -the next generation. This parameter defaults to ``1`` which means only -the best solution is kept in the next generation. - -In the next example, the ``keep_elitism`` parameter in the constructor -of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in -each generation are kept in the next generation. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - num_genes=6, - sol_per_pop=5, - keep_elitism=2) - - ga_instance.run() - -The value passed to the ``keep_elitism`` parameter must satisfy 2 -conditions: - -1. It must be ``>= 0``. - -2. It must be ``<= sol_per_pop``. That is its value cannot exceed the - number of solutions in the current population. - -In the previous example, if the ``keep_elitism`` parameter is set equal -to the value passed to the ``sol_per_pop`` parameter, which is 5, then -there will be no evolution at all as in the next figure. This is because -all the 5 solutions are used as elitism in the next generation and no -offspring will be created. - -.. code:: python - - ... - - ga_instance = pygad.GA(..., - sol_per_pop=5, - keep_elitism=5) - - ga_instance.run() - -.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png - :alt: - -Note that if the ``keep_elitism`` parameter is effective (i.e. is -assigned a positive integer, not zero), then the ``keep_parents`` -parameter will have no effect. Because the default value of the -``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has -no effect by default. The ``keep_parents`` parameter is only effective -when ``keep_elitism=0``. - -Random Seed -=========== - -In `PyGAD -2.18.0 `__, -a new parameter called ``random_seed`` is supported. Its value is used -as a seed for the random function generators. - -PyGAD uses random functions in these 2 libraries: - -1. NumPy - -2. random - -The ``random_seed`` parameter defaults to ``None`` which means no seed -is used. As a result, different random numbers are generated for each -run of PyGAD. - -If this parameter is assigned a proper seed, then the results will be -reproducible. In the next example, the integer 2 is used as a random -seed. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - sol_per_pop=5, - num_genes=6, - random_seed=2) - - ga_instance.run() - best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() - print(best_solution) - print(best_solution_fitness) - -This is the best solution found and its fitness value. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -After running the code again, it will find the same result. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -Continue without Loosing Progress -================================= - -In `PyGAD -2.18.0 `__, -and thanks for `Felix Bernhard `__ for -opening `this GitHub -issue `__, -the values of these 4 instance attributes are no longer reset after each -call to the ``run()`` method. - -1. ``self.best_solutions`` - -2. ``self.best_solutions_fitness`` - -3. ``self.solutions`` - -4. ``self.solutions_fitness`` - -This helps the user to continue where the last run stopped without -loosing the values of these 4 attributes. - -Now, the user can save the model by calling the ``save()`` method. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - ga_instance = pygad.GA(...) - - ga_instance.run() - - ga_instance.plot_fitness() - - ga_instance.save("pygad_GA") - -Then the saved model is loaded by calling the ``load()`` function. After -calling the ``run()`` method over the loaded instance, then the data -from the previous 4 attributes are not reset but extended with the new -data. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - loaded_ga_instance = pygad.load("pygad_GA") - - loaded_ga_instance.run() - - loaded_ga_instance.plot_fitness() - -The plot created by the ``plot_fitness()`` method will show the data -collected from both the runs. - -Note that the 2 attributes (``self.best_solutions`` and -``self.best_solutions_fitness``) only work if the -``save_best_solutions`` parameter is set to ``True``. Also, the 2 -attributes (``self.solutions`` and ``self.solutions_fitness``) only work -if the ``save_solutions`` parameter is ``True``. - -Prevent Duplicates in Gene Values -================================= - -In `PyGAD -2.13.0 `__, -a new bool parameter called ``allow_duplicate_genes`` is supported to -control whether duplicates are supported in the chromosome or not. In -other words, whether 2 or more genes might have the same exact value. - -If ``allow_duplicate_genes=True`` (which is the default case), genes may -have the same value. If ``allow_duplicate_genes=False``, then no 2 genes -will have the same value given that there are enough unique values for -the genes. - -The next code gives an example to use the ``allow_duplicate_genes`` -parameter. A callback generation function is implemented to print the -population after each generation. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=5, - sol_per_pop=5, - num_genes=4, - mutation_num_genes=3, - random_mutation_min_val=-5, - random_mutation_max_val=5, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Here are the population after the 5 generations. Note how there are no -duplicate values. - -.. code:: python - - Generation 1 - [[ 2 -2 -3 3] - [ 0 1 2 3] - [ 5 -3 6 3] - [-3 1 -2 4] - [-1 0 -2 3]] - Generation 2 - [[-1 0 -2 3] - [-3 1 -2 4] - [ 0 -3 -2 6] - [-3 0 -2 3] - [ 1 -4 2 4]] - Generation 3 - [[ 1 -4 2 4] - [-3 0 -2 3] - [ 4 0 -2 1] - [-4 0 -2 -3] - [-4 2 0 3]] - Generation 4 - [[-4 2 0 3] - [-4 0 -2 -3] - [-2 5 4 -3] - [-1 2 -4 4] - [-4 2 0 -3]] - Generation 5 - [[-4 2 0 -3] - [-1 2 -4 4] - [ 3 4 -4 0] - [-1 0 2 -2] - [-4 2 -1 1]] - -The ``allow_duplicate_genes`` parameter is configured with use with the -``gene_space`` parameter. Here is an example where each of the 4 genes -has the same space of values that consists of 4 values (1, 2, 3, and 4). - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=1, - sol_per_pop=5, - num_genes=4, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Even that all the genes share the same space of values, no 2 genes -duplicate their values as provided by the next output. - -.. code:: python - - Generation 1 - [[2 3 1 4] - [2 3 1 4] - [2 4 1 3] - [2 3 1 4] - [1 3 2 4]] - Generation 2 - [[1 3 2 4] - [2 3 1 4] - [1 3 2 4] - [2 3 4 1] - [1 3 4 2]] - Generation 3 - [[1 3 4 2] - [2 3 4 1] - [1 3 4 2] - [3 1 4 2] - [3 2 4 1]] - Generation 4 - [[3 2 4 1] - [3 1 4 2] - [3 2 4 1] - [1 2 4 3] - [1 3 4 2]] - Generation 5 - [[1 3 4 2] - [1 2 4 3] - [2 1 4 3] - [1 2 4 3] - [1 2 4 3]] - -You should care of giving enough values for the genes so that PyGAD is -able to find alternatives for the gene value in case it duplicates with -another gene. - -There might be 2 duplicate genes where changing either of the 2 -duplicating genes will not solve the problem. For example, if -``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the -solution is ``[3 2 0 0]``, then the values of the last 2 genes -duplicate. There are no possible changes in the last 2 genes to solve -the problem. - -This problem can be solved by randomly changing one of the -non-duplicating genes that may make a room for a unique value in one the -2 duplicating genes. For example, by changing the second gene from 2 to -4, then any of the last 2 genes can take the value 2 and solve the -duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is -not yet supported in PyGAD. - -Solve Duplicates using a Third Gene ------------------------------------ - -When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` -is used, it sometimes happen that there is no room to solve the -duplicates between the 2 genes by simply replacing the value of one gene -by another gene. In `PyGAD -3.1.0 `__, -the duplicates are solved by looking for a third gene that will help in -solving the duplicates. The following examples explain how it works. - -Example 1: - -Let's assume that this gene space is used and there is a solution with 2 -duplicate genes with the same value 4. - -.. code:: python - - Gene space: [[2, 3], - [3, 4], - [4, 5], - [5, 6]] - Solution: [3, 4, 4, 5] - -By checking the gene space, the second gene can have the values -``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve -the duplicates, we have the value of any of these 2 genes. - -If the value of the second gene changes from 4 to 3, then it will be -duplicate with the first gene. If we are to change the value of the -third gene from 4 to 5, then it will duplicate with the fourth gene. As -a conclusion, trying to just selecting a different gene value for either -the second or third genes will introduce new duplicating genes. - -When there are 2 duplicate genes but there is no way to solve their -duplicates, then the solution is to change a third gene that makes a -room to solve the duplicates between the 2 genes. - -In our example, duplicates between the second and third genes can be -solved by, for example,: - -- Changing the first gene from 3 to 2 then changing the second gene - from 4 to 3. - -- Or changing the fourth gene from 5 to 6 then changing the third gene - from 4 to 5. - -Generally, this is how to solve such duplicates: - -1. For any duplicate gene **GENE1**, select another value. - -2. Check which other gene **GENEX** has duplicate with this new value. - -3. Find if **GENEX** can have another value that will not cause any more - duplicates. If so, go to step 7. - -4. If all the other values of **GENEX** will cause duplicates, then try - another gene **GENEY**. - -5. Repeat steps 3 and 4 until exploring all the genes. - -6. If there is no possibility to solve the duplicates, then there is not - way to solve the duplicates and we have to keep the duplicate value. - -7. If a value for a gene **GENEM** is found that will not cause more - duplicates, then use this value for the gene **GENEM**. - -8. Replace the value of the gene **GENE1** by the old value of the gene - **GENEM**. This solves the duplicates. - -This is an example to solve the duplicate for the solution -``[3, 4, 4, 5]``: - -1. Let's use the second gene with value 4. Because the space of this - gene is ``[3, 4]``, then the only other value we can select is 3. - -2. The first gene also have the value 3. - -3. The first gene has another value 2 that will not cause more - duplicates in the solution. Then go to step 7. - -4. Skip. - -5. Skip. - -6. Skip. - -7. The value of the first gene 3 will be replaced by the new value 2. - The new solution is [2, 4, 4, 5]. - -8. Replace the value of the second gene 4 by the old value of the first - gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is - solved. - -Example 2: - -.. code:: python - - Gene space: [[0, 1], - [1, 2], - [2, 3], - [3, 4]] - Solution: [1, 2, 2, 3] - -The quick summary is: - -- Change the value of the first gene from 1 to 0. The solution becomes - [0, 2, 2, 3]. - -- Change the value of the second gene from 2 to 1. The solution becomes - [0, 1, 2, 3]. The duplicate is solved. - -.. _more-about-the-genetype-parameter: - -More about the ``gene_type`` Parameter -====================================== - -The ``gene_type`` parameter allows the user to control the data type for -all genes at once or each individual gene. In `PyGAD -2.15.0 `__, -the ``gene_type`` parameter also supports customizing the precision for -``float`` data types. As a result, the ``gene_type`` parameter helps to: - -1. Select a data type for all genes with or without precision. - -2. Select a data type for each individual gene with or without - precision. - -Let's discuss things by examples. - -Data Type for All Genes without Precision ------------------------------------------ - -The data type for all genes can be specified by assigning the numeric -data type directly to the ``gene_type`` parameter. This is an example to -make all genes of ``int`` data types. - -.. code:: python - - gene_type=int - -Given that the supported numeric data types of PyGAD include Python's -``int`` and ``float`` in addition to all numeric types of ``NumPy``, -then any of these types can be assigned to the ``gene_type`` parameter. - -If no precision is specified for a ``float`` data type, then the -complete floating-point number is kept. - -The next code uses an ``int`` data type for all genes where the genes in -the initial and final population are only integers. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=int) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[ 1 -1 2 0 -3] - [ 0 -2 0 -3 -1] - [ 0 -1 -1 2 0] - [-2 3 -2 3 3] - [ 0 0 2 -2 -2]] - - Final Population - [[ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0]] - -Data Type for All Genes with Precision --------------------------------------- - -A precision can only be specified for a ``float`` data type and cannot -be specified for integers. Here is an example to use a precision of 3 -for the ``float`` data type. In this case, all genes are of type -``float`` and their maximum precision is 3. - -.. code:: python - - gene_type=[float, 3] - -The next code uses prints the initial and final population where the -genes are of type ``float`` with precision 3. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[float, 3]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2.417 -0.487 3.623 2.457 -2.362] - [-1.231 0.079 -1.63 1.629 -2.637] - [ 0.692 -2.098 0.705 0.914 -3.633] - [ 2.637 -1.339 -1.107 -0.781 -3.896] - [-1.495 1.378 -1.026 3.522 2.379]] - - Final Population - [[ 1.714 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.375 -2.362] - [ 0.692 -1.024 4.041 3.185 -2.362] - [ 1.714 -0.644 3.623 3.185 -2.362]] - -Data Type for each Individual Gene without Precision ----------------------------------------------------- - -In `PyGAD -2.14.0 `__, -the ``gene_type`` parameter allows customizing the gene type for each -individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` -with number of elements equal to the number of genes. For each element, -a type is specified for the corresponding gene. - -This is an example for a 5-gene problem where different types are -assigned to the genes. - -.. code:: python - - gene_type=[int, float, numpy.float16, numpy.int8, float] - -This is a complete code that prints the initial and final population for -a custom-gene data type. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, float, numpy.float16, numpy.int8, float]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] - [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] - [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] - [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] - [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] - - Final Population - [[3 3.7729827570110714 2.055 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036] - [3 3.7729827570110714 1.458 0 0.0869406120516778] - [3 3.7729827570110714 1.458 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036]] - -Data Type for each Individual Gene with Precision -------------------------------------------------- - -The precision can also be specified for the ``float`` data types as in -the next line where the second gene precision is 2 and last gene -precision is 1. - -.. code:: python - - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] - -This is a complete example where the initial and final populations are -printed where the genes comply with the data types and precisions -specified. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2 -1.22 1.716796875 -1 0.2] - [-1 -1.58 -3.091796875 0 -1.3] - [3 3.35 -0.107421875 1 -3.3] - [-2 -3.58 -1.779296875 0 0.6] - [2 -3.73 2.65234375 3 -0.5]] - - Final Population - [[2 -4.22 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3] - [2 -4.22 3.47 2 -1.3] - [2 -4.58 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3]] - -Parallel Processing in PyGAD -============================ - -Starting from `PyGAD -2.17.0 `__, -parallel processing becomes supported. This section explains how to use -parallel processing in PyGAD. - -According to the `PyGAD -lifecycle `__, -parallel processing can be parallelized in only 2 operations: - -1. Population fitness calculation. - -2. Mutation. - -The reason is that the calculations in these 2 operations are -independent (i.e. each solution/chromosome is handled independently from -the others) and can be distributed across different processes or -threads. - -For the mutation operation, it does not do intensive calculations on the -CPU. Its calculations are simple like flipping the values of some genes -from 0 to 1 or adding a random value to some genes. So, it does not take -much CPU processing time. Experiments proved that parallelizing the -mutation operation across the solutions increases the time instead of -reducing it. This is because running multiple processes or threads adds -overhead to manage them. Thus, parallel processing cannot be applied on -the mutation operation. - -For the population fitness calculation, parallel processing can help -make a difference and reduce the processing time. But this is -conditional on the type of calculations done in the fitness function. If -the fitness function makes intensive calculations and takes much -processing time from the CPU, then it is probably that parallel -processing will help to cut down the overall time. - -This section explains how parallel processing works in PyGAD and how to -use parallel processing in PyGAD - -How to Use Parallel Processing in PyGAD ---------------------------------------- - -Starting from `PyGAD -2.17.0 `__, -a new parameter called ``parallel_processing`` added to the constructor -of the ``pygad.GA`` class. - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - parallel_processing=...) - ... - -This parameter allows the user to do the following: - -1. Enable parallel processing. - -2. Select whether processes or threads are used. - -3. Specify the number of processes or threads to be used. - -These are 3 possible values for the ``parallel_processing`` parameter: - -1. ``None``: (Default) It means no parallel processing is used. - -2. A positive integer referring to the number of threads to be used - (i.e. threads, not processes, are used. - -3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is - assigned, then: - - 1. The first element can be either ``'process'`` or ``'thread'`` to - specify whether processes or threads are used, respectively. - - 2. The second element can be: - - 1. A positive integer to select the maximum number of processes or - threads to be used - - 2. ``0`` to indicate that 0 processes or threads are used. It - means no parallel processing. This is identical to setting - ``parallel_processing=None``. - - 3. ``None`` to use the default value as calculated by the - ``concurrent.futures module``. - -These are examples of the values assigned to the ``parallel_processing`` -parameter: - -- ``parallel_processing=4``: Because the parameter is assigned a - positive integer, this means parallel processing is activated where 4 - threads are used. - -- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 - threads. This is identical to ``parallel_processing=5``. - -- ``parallel_processing=["process", 8]``: Use parallel processing with - 8 processes. - -- ``parallel_processing=["process", 0]``: As the second element is - given the value 0, this means do not use parallel processing. This is - identical to ``parallel_processing=None``. - -Examples --------- - -The examples will help you know the difference between using processes -and threads. Moreover, it will give an idea when parallel processing -would make a difference and reduce the time. These are dummy examples -where the fitness function is made to always return 0. - -The first example uses 10 genes, 5 solutions in the population where -only 3 solutions mate, and 9999 generations. The fitness function uses a -``for`` loop with 100 iterations just to have some calculations. In the -constructor of the ``pygad.GA`` class, ``parallel_processing=None`` -means no parallel processing is used. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99): - pass - return 0 - - ga_instance = pygad.GA(num_generations=9999, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - - ga_instance.run() - - t2 = time.time() - print("Time is", t2-t1) - -When parallel processing is not used, the time it takes to run the -genetic algorithm is ``1.5`` seconds. - -In the comparison, let's do a second experiment where parallel -processing is used with 5 threads. In this case, it take ``5`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=5) - ... - -For the third experiment, processes instead of threads are used. Also, -only 99 generations are used instead of 9999. The time it takes is -``99`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(num_generations=99, - ..., - parallel_processing=["process", 5]) - ... - -This is the summary of the 3 experiments: - -1. No parallel processing & 9999 generations: 1.5 seconds. - -2. Parallel processing with 5 threads & 9999 generations: 5 seconds - -3. Parallel processing with 5 processes & 99 generations: 99 seconds - -Because the fitness function does not need much CPU time, the normal -processing takes the least time. Running processes for this simple -problem takes 99 compared to only 5 seconds for threads because managing -processes is much heavier than managing threads. Thus, most of the CPU -time is for swapping the processes instead of executing the code. - -In the second example, the loop makes 99999999 iterations and only 5 -generations are used. With no parallelization, it takes 22 seconds. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99999999): - pass - return 0 - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - ga_instance.run() - t2 = time.time() - print("Time is", t2-t1) - -It takes 15 seconds when 10 processes are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["process", 10]) - ... - -This is compared to 20 seconds when 10 threads are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["thread", 10]) - ... - -Based on the second example, using parallel processing with 10 processes -takes the least time because there is much CPU work done. Generally, -processes are preferred over threads when most of the work in on the -CPU. Threads are preferred over processes in some situations like doing -input/output operations. - -*Before releasing* `PyGAD -2.17.0 `__\ *,* -`László -Fazekas `__ -*wrote an article to parallelize the fitness function with PyGAD. Check -it:* `How Genetic Algorithms Can Compete with Gradient Descent and -Backprop `__. - -Print Lifecycle Summary -======================= - -In `PyGAD -2.19.0 `__, -a new method called ``summary()`` is supported. It prints a Keras-like -summary of the PyGAD lifecycle showing the steps, callback functions, -parameters, etc. - -This method accepts the following parameters: - -- ``line_length=70``: An integer representing the length of the single - line in characters. - -- ``fill_character=" "``: A character to fill the lines. - -- ``line_character="-"``: A character for creating a line separator. - -- ``line_character2="="``: A secondary character to create a line - separator. - -- ``columns_equal_len=False``: The table rows are split into - equal-sized columns or split subjective to the width needed. - -- ``print_step_parameters=True``: Whether to print extra parameters - about each step inside the step. If ``print_step_parameters=False`` - and ``print_parameters_summary=True``, then the parameters of each - step are printed at the end of the table. - -- ``print_parameters_summary=True``: Whether to print parameters - summary at the end of the table. If ``print_step_parameters=False``, - then the parameters of each step are printed at the end of the table - too. - -This is a quick example to create a PyGAD example. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def genetic_fitness(solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_gen(ga): - pass - - def on_crossover_callback(a, b): - pass - - ga_instance = pygad.GA(num_generations=100, - num_parents_mating=10, - sol_per_pop=20, - num_genes=len(function_inputs), - on_crossover=on_crossover_callback, - on_generation=on_gen, - parallel_processing=2, - stop_criteria="reach_10", - fitness_batch_size=4, - crossover_probability=0.4, - fitness_func=genetic_fitness) - -Then call the ``summary()`` method to print the summary with the default -parameters. Note that entries for the crossover and generation callback -function are created because their callback functions are implemented -through the ``on_crossover_callback()`` and ``on_gen()``, respectively. - -.. code:: python - - ga_instance.summary() - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - Fitness batch size: 4 - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - Number of Parents: 10 - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - Crossover probability: 0.4 - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - Mutation Genes: 1 - Random Mutation Range: (-1.0, 1.0) - Mutation by Replacement: False - Allow Duplicated Genes: True - ---------------------------------------------------------------------- - On Generation on_gen() None - Stop Criteria: [['reach', 10.0]] - ---------------------------------------------------------------------- - ====================================================================== - Population Size: (20, 6) - Number of Generations: 100 - Initial Population Range: (-4, 4) - Keep Elitism: 1 - Gene DType: [, None] - Parallel Processing: ['thread', 2] - Save Best Solutions: False - Save Solutions: False - ====================================================================== - -We can set the ``print_step_parameters`` and -``print_parameters_summary`` parameters to ``False`` to not print the -parameters. - -.. code:: python - - ga_instance.summary(print_step_parameters=False, - print_parameters_summary=False) - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - ---------------------------------------------------------------------- - On Generation on_gen() None - ---------------------------------------------------------------------- - ====================================================================== - -Logging Outputs -=============== - -In `PyGAD -3.0.0 `__, -the ``print()`` statement is no longer used and the outputs are printed -using the `logging `__ -module. A a new parameter called ``logger`` is supported to accept the -user-defined logger. - -.. code:: python - - import logging - - logger = ... - - ga_instance = pygad.GA(..., - logger=logger, - ...) - -The default value for this parameter is ``None``. If there is no logger -passed (i.e. ``logger=None``), then a default logger is created to log -the messages to the console exactly like how the ``print()`` statement -works. - -Some advantages of using the the -`logging `__ module -instead of the ``print()`` statement are: - -1. The user has more control over the printed messages specially if - there is a project that uses multiple modules where each module - prints its messages. A logger can organize the outputs. - -2. Using the proper ``Handler``, the user can log the output messages to - files and not only restricted to printing it to the console. So, it - is much easier to record the outputs. - -3. The format of the printed messages can be changed by customizing the - ``Formatter`` assigned to the Logger. - -This section gives some quick examples to use the ``logging`` module and -then gives an example to use the logger with PyGAD. - -Logging to the Console ----------------------- - -This is an example to create a logger to log the messages to the -console. - -.. code:: python - - import logging - - # Create a logger - logger = logging.getLogger(__name__) - - # Set the logger level to debug so that all the messages are printed. - logger.setLevel(logging.DEBUG) - - # Create a stream handler to log the messages to the console. - stream_handler = logging.StreamHandler() - - # Set the handler level to debug. - stream_handler.setLevel(logging.DEBUG) - - # Create a formatter - formatter = logging.Formatter('%(message)s') - - # Add the formatter to handler. - stream_handler.setFormatter(formatter) - - # Add the stream handler to the logger - logger.addHandler(stream_handler) - -Now, we can log messages to the console with the format specified in the -``Formatter``. - -.. code:: python - - logger.debug('Debug message.') - logger.info('Info message.') - logger.warning('Warn message.') - logger.error('Error message.') - logger.critical('Critical message.') - -The outputs are identical to those returned using the ``print()`` -statement. - -.. code:: - - Debug message. - Info message. - Warn message. - Error message. - Critical message. - -By changing the format of the output messages, we can have more -information about each message. - -.. code:: python - - formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - -This is a sample output. - -.. code:: python - - 2023-04-03 18:46:27 DEBUG: Debug message. - 2023-04-03 18:46:27 INFO: Info message. - 2023-04-03 18:46:27 WARNING: Warn message. - 2023-04-03 18:46:27 ERROR: Error message. - 2023-04-03 18:46:27 CRITICAL: Critical message. - -Note that you may need to clear the handlers after finishing the -execution. This is to make sure no cached handlers are used in the next -run. If the cached handlers are not cleared, then the single output -message may be repeated. - -.. code:: python - - logger.handlers.clear() - -Logging to a File ------------------ - -This is another example to log the messages to a file named -``logfile.txt``. The formatter prints the following about each message: - -1. The date and time at which the message is logged. - -2. The log level. - -3. The message. - -4. The path of the file. - -5. The lone number of the log message. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name, 'a+', 'utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - -This is how the outputs look like. - -.. code:: python - - 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 - 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 - 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 - 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 - 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -Log to Both the Console and a File ----------------------------------- - -This is an example to create a single Logger associated with 2 handlers: - -1. A file handler. - -2. A stream handler. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - -When a log message is executed, then it is both printed to the console -and saved in the ``logfile.txt``. - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -PyGAD Example -------------- - -To use the logger in PyGAD, just create your custom logger and pass it -to the ``logger`` parameter. - -.. code:: python - - import logging - import pygad - import numpy - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - - equation_inputs = [4, -2, 8] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_generation(ga_instance): - ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) - ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=40, - num_parents_mating=2, - keep_parents=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - on_generation=on_generation, - logger=logger) - ga_instance.run() - - logger.handlers.clear() - -By executing this code, the logged messages are printed to the console -and also saved in the text file. - -.. code:: python - - 2023-04-03 19:04:27 INFO: Generation = 1 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 - 2023-04-03 19:04:27 INFO: Generation = 2 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 - 2023-04-03 19:04:27 INFO: Generation = 3 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 - 2023-04-03 19:04:27 INFO: Generation = 4 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 - 2023-04-03 19:04:27 INFO: Generation = 5 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 - 2023-04-03 19:04:27 INFO: Generation = 6 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 - 2023-04-03 19:04:27 INFO: Generation = 7 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 - 2023-04-03 19:04:27 INFO: Generation = 8 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 - 2023-04-03 19:04:27 INFO: Generation = 9 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 - 2023-04-03 19:04:27 INFO: Generation = 10 - 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 - -Solve Non-Deterministic Problems -================================ - -PyGAD can be used to solve both deterministic and non-deterministic -problems. Deterministic are those that return the same fitness for the -same solution. For non-deterministic problems, a different fitness value -would be returned for the same solution. - -By default, PyGAD settings are set to solve deterministic problems. -PyGAD can save the explored solutions and their fitness to reuse in the -future. These instances attributes can save the solutions: - -1. ``solutions``: Exists if ``save_solutions=True``. - -2. ``best_solutions``: Exists if ``save_best_solutions=True``. - -3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. - -4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or - ``keep_parents=-1``. - -To configure PyGAD for non-deterministic problems, we have to disable -saving the previous solutions. This is by setting these parameters: - -1. ``keep_elisitm=0`` - -2. ``keep_parents=0`` - -3. ``keep_solutions=False`` - -4. ``keep_best_solutions=False`` - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -This way PyGAD will not save any explored solution and thus the fitness -function have to be called for each individual solution. - -Reuse the Fitness instead of Calling the Fitness Function -========================================================= - -It may happen that a previously explored solution in generation X is -explored again in another generation Y (where Y > X). For some problems, -calling the fitness function takes much time. - -For deterministic problems, it is better to not call the fitness -function for an already explored solutions. Instead, reuse the fitness -of the old solution. PyGAD supports some options to help you save time -calling the fitness function for a previously explored solution. - -The parameters explored in this section can be set in the constructor of -the ``pygad.GA`` class. - -The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these -parameters to see if there is a possibility of reusing the fitness -instead of calling the fitness function. - -.. _1-savesolutions: - -1. ``save_solutions`` ---------------------- - -It defaults to ``False``. If set to ``True``, then the population of -each generation is saved into the ``solutions`` attribute of the -``pygad.GA`` instance. In other words, every single solution is saved in -the ``solutions`` attribute. - -.. _2-savebestsolutions: - -2. ``save_best_solutions`` --------------------------- - -It defaults to ``False``. If ``True``, then it only saves the best -solution in every generation. - -.. _3-keepelitism: - -3. ``keep_elitism`` -------------------- - -It accepts an integer and defaults to 1. If set to a positive integer, -then it keeps the elitism of one generation available in the next -generation. - -.. _4-keepparents: - -4. ``keep_parents`` -------------------- - -It accepts an integer and defaults to -1. It set to ``-1`` or a positive -integer, then it keeps the parents of one generation available in the -next generation. - -Why the Fitness Function is not Called for Solution at Index 0? -=============================================================== - -PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This -parameter defines the number of best solutions in generation **X** to -keep in the next generation **X+1**. The best solutions are just copied -from generation **X** to generation **X+1** without making any change. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=1, - ...) - -The best solutions are copied at the beginning of the population. If -``keep_elitism=1``, this means the best solution in generation X is kept -in the next generation X+1 at index 0 of the population. If -``keep_elitism=2``, this means the 2 best solutions in generation X are -kept in the next generation X+1 at indices 0 and 1 of the population of -generation 1. - -Because the fitness of these best solutions are already calculated in -generation X, then their fitness values will not be recalculated at -generation X+1 (i.e. the fitness function will not be called for these -solutions again). Instead, their fitness values are just reused. This is -why you see that no solution with index 0 is passed to the fitness -function. - -To force calling the fitness function for each solution in every -generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. -Moreover, keep the 2 parameters ``save_solutions`` and -``save_best_solutions`` to their default value ``False``. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -Batch Fitness Calculation -========================= - -In `PyGAD -2.19.0 `__, -a new optional parameter called ``fitness_batch_size`` is supported. A -new optional parameter called ``fitness_batch_size`` is supported to -calculate the fitness function in batches. Thanks to `Linan -Qiu `__ for opening the `GitHub issue -#136 `__. - -Its values can be: - -- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is - assigned the value ``1`` or ``None`` (default), then the normal flow - is used where the fitness function is called for each individual - solution. That is if there are 15 solutions, then the fitness - function is called 15 times. - -- ``1 < fitness_batch_size <= sol_per_pop``: If the - ``fitness_batch_size`` parameter is assigned a value satisfying this - condition ``1 < fitness_batch_size <= sol_per_pop``, then the - solutions are grouped into batches of size ``fitness_batch_size`` and - the fitness function is called once for each batch. In this case, the - fitness function must return a list/tuple/numpy.ndarray with a length - equal to the number of solutions passed. - -.. _example-without-fitnessbatchsize-parameter: - -Example without ``fitness_batch_size`` Parameter ------------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is given -the value ``None`` (which is the default value). This is equivalent to -using the value ``1``. In this case, the fitness function will be called -for each solution. This means the fitness function ``fitness_func`` will -receive only a single solution. This is an example of the passed -arguments to the fitness function: - -.. code:: - - solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] - solution_idx: 3 - -The fitness function also must return a single numeric value as the -fitness for the passed solution. - -As we have a population of ``20`` solutions, then the fitness function -is called 20 times per generation. For 5 generations, then the fitness -function is called ``20*5 = 100`` times. In PyGAD, the fitness function -is called after the last generation too and this adds additional 20 -times. So, the total number of calls to the fitness function is -``20*5 + 20 = 120``. - -Note that the ``keep_elitism`` and ``keep_parents`` parameters are set -to ``0`` to make sure no fitness values are reused and to force calling -the fitness function for each individual solution. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func(ga_instance, solution, solution_idx): - global number_of_calls - number_of_calls = number_of_calls + 1 - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func, - fitness_batch_size=None, - # fitness_batch_size=1, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 120 - -.. _example-with-fitnessbatchsize-parameter: - -Example with ``fitness_batch_size`` Parameter ---------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is used -and assigned the value ``4``. This means the solutions will be grouped -into batches of ``4`` solutions. The fitness function will be called -once for each patch (i.e. called once for each 4 solutions). - -This is an example of the arguments passed to it: - -.. code:: python - - solutions: - [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] - [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] - [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] - [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] - solutions_indices: - [16, 17, 18, 19] - -As we have 20 solutions, then there are ``20/4 = 5`` patches. As a -result, the fitness function is called only 5 times per generation -instead of 20. For each call to the fitness function, it receives a -batch of 4 solutions. - -As we have 5 generations, then the function will be called ``5*5 = 25`` -times. Given the call to the fitness function after the last generation, -then the total number of calls is ``5*5 + 5 = 30``. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func_batch(ga_instance, solutions, solutions_indices): - global number_of_calls - number_of_calls = number_of_calls + 1 - batch_fitness = [] - for solution in solutions: - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - batch_fitness.append(fitness) - return batch_fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func_batch, - fitness_batch_size=4, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 30 - -When batch fitness calculation is used, then we saved ``120 - 30 = 90`` -calls to the fitness function. - -Use Functions and Methods to Build Fitness and Callbacks -======================================================== - -In PyGAD 2.19.0, it is possible to pass user-defined functions or -methods to the following parameters: - -1. ``fitness_func`` - -2. ``on_start`` - -3. ``on_fitness`` - -4. ``on_parents`` - -5. ``on_crossover`` - -6. ``on_mutation`` - -7. ``on_generation`` - -8. ``on_stop`` - -This section gives 2 examples to assign these parameters user-defined: - -1. Functions. - -2. Methods. - -Assign Functions ----------------- - -This is a dummy example where the fitness function returns a random -value. Note that the instance of the ``pygad.GA`` class is passed as the -last parameter of all functions. - -.. code:: python - - import pygad - import numpy - - def fitness_func(ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(ga_instanse): - print("on_start") - - def on_fitness(ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(ga_instanse): - print("on_generation\n") - - def on_stop(ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=on_start, - on_fitness=on_fitness, - on_parents=on_parents, - on_crossover=on_crossover, - on_mutation=on_mutation, - on_generation=on_generation, - on_stop=on_stop, - fitness_func=fitness_func) - - ga_instance.run() - -Assign Methods --------------- - -The next example has all the method defined inside the class ``Test``. -All of the methods accept an additional parameter representing the -method's object of the class ``Test``. - -All methods accept ``self`` as the first parameter and the instance of -the ``pygad.GA`` class as the last parameter. - -.. code:: python - - import pygad - import numpy - - class Test: - def fitness_func(self, ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(self, ga_instanse): - print("on_start") - - def on_fitness(self, ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(self, ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(self, ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(self, ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(self, ga_instanse): - print("on_generation\n") - - def on_stop(self, ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=Test().on_start, - on_fitness=Test().on_fitness, - on_parents=Test().on_parents, - on_crossover=Test().on_crossover, - on_mutation=Test().on_mutation, - on_generation=Test().on_generation, - on_stop=Test().on_stop, - fitness_func=Test().fitness_func) - - ga_instance.run() +More About PyGAD +================ + +.. _limit-the-gene-value-range-using-the-genespace-parameter: + +Limit the Gene Value Range using the ``gene_space`` Parameter +============================================================= + +In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter supported a new feature to allow +customizing the range of accepted values for each gene. Let's take a +quick review of the ``gene_space`` parameter to build over it. + +The ``gene_space`` parameter allows the user to feed the space of values +of each gene. This way the accepted values for each gene is retracted to +the user-defined values. Assume there is a problem that has 3 genes +where each gene has different set of values as follows: + +1. Gene 1: ``[0.4, 12, -5, 21.2]`` + +2. Gene 2: ``[-2, 0.3]`` + +3. Gene 3: ``[1.2, 63.2, 7.4]`` + +Then, the ``gene_space`` for this problem is as given below. Note that +the order is very important. + +.. code:: python + + gene_space = [[0.4, 12, -5, 21.2], + [-2, 0.3], + [1.2, 63.2, 7.4]] + +In case all genes share the same set of values, then simply feed a +single list to the ``gene_space`` parameter as follows. In this case, +all genes can only take values from this list of 6 values. + +.. code:: python + + gene_space = [33, 7, 0.5, 95. 6.3, 0.74] + +The previous example restricts the gene values to just a set of fixed +number of discrete values. In case you want to use a range of discrete +values to the gene, then you can use the ``range()`` function. For +example, ``range(1, 7)`` means the set of allowed values for the gene +are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or +``numpy.linspace()`` functions for the same purpose. + +The previous discussion only works with a range of discrete values not +continuous values. In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter can be assigned a dictionary that allows +the gene to have values from a continuous range. + +Assuming you want to restrict the gene within this half-open range [1 to +5) where 1 is included and 5 is not. Then simply create a dictionary +with 2 items where the keys of the 2 items are: + +1. ``'low'``: The minimum value in the range which is 1 in the example. + +2. ``'high'``: The maximum value in the range which is 5 in the example. + +The dictionary will look like that: + +.. code:: python + + {'low': 1, + 'high': 5} + +It is not acceptable to add more than 2 items in the dictionary or use +other keys than ``'low'`` and ``'high'``. + +For a 3-gene problem, the next code creates a dictionary for each gene +to restrict its values in a continuous range. For the first gene, it can +take any floating-point value from the range that starts from 1 +(inclusive) and ends at 5 (exclusive). + +.. code:: python + + gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] + +.. _more-about-the-genespace-parameter: + +More about the ``gene_space`` Parameter +======================================= + +The ``gene_space`` parameter customizes the space of values of each +gene. + +Assuming that all genes have the same global space which include the +values 0.3, 5.2, -4, and 8, then those values can be assigned to the +``gene_space`` parameter as a list, tuple, or range. Here is a list +assigned to this parameter. By doing that, then the gene values are +restricted to those assigned to the ``gene_space`` parameter. + +.. code:: python + + gene_space = [0.3, 5.2, -4, 8] + +If some genes have different spaces, then ``gene_space`` should accept a +nested list or tuple. In this case, the elements could be: + +1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single + value to be assigned to the gene. This means this gene will have the + same value across all generations. + +2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, + ``numpy.arange()``, or ``numpy.linspace``: It holds the space for + each individual gene. But this space is usually discrete. That is + there is a set of finite values to select from. + +3. ``dict``: To sample a value for a gene from a continuous range. The + dictionary must have 2 mandatory keys which are ``"low"`` and + ``"high"`` in addition to an optional key which is ``"step"``. A + random value is returned between the values assigned to the items + with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then + this works as the previous options (i.e. discrete set of values). + +4. ``None``: A gene with its space set to ``None`` is initialized + randomly from the range specified by the 2 parameters + ``init_range_low`` and ``init_range_high``. For mutation, its value + is mutated based on a random value from the range specified by the 2 + parameters ``random_mutation_min_val`` and + ``random_mutation_max_val``. If all elements in the ``gene_space`` + parameter are ``None``, the parameter will not have any effect. + +Assuming that a chromosome has 2 genes and each gene has a different +value space. Then the ``gene_space`` could be assigned a nested +list/tuple where each element determines the space of a gene. + +According to the next code, the space of the first gene is ``[0.4, -5]`` +which has 2 values and the space for the second gene is +``[0.5, -3.2, 8.8, -9]`` which has 4 values. + +.. code:: python + + gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] + +For a 2 gene chromosome, if the first gene space is restricted to the +discrete values from 0 to 4 and the second gene is restricted to the +values from 10 to 19, then it could be specified according to the next +code. + +.. code:: python + + gene_space = [range(5), range(10, 20)] + +The ``gene_space`` can also be assigned to a single range, as given +below, where the values of all genes are sampled from the same range. + +.. code:: python + + gene_space = numpy.arange(15) + +The ``gene_space`` can be assigned a dictionary to sample a value from a +continuous range. + +.. code:: python + + gene_space = {"low": 4, "high": 30} + +A step also can be assigned to the dictionary. This works as if a range +is used. + +.. code:: python + + gene_space = {"low": 4, "high": 30, "step": 2.5} + +.. + + Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the + ``gene_space`` means that random values from the continuous range [0, + 10) are sampled. Note that ``0`` is included but ``10`` is not + included while sampling. Thus, the maximum value that could be + returned is less than ``10`` like ``9.9999``. But if the user decided + to round the genes using, for example, ``[float, 2]``, then this + value will become 10. So, the user should be careful to the inputs. + +If a ``None`` is assigned to only a single gene, then its value will be +randomly generated initially using the ``init_range_low`` and +``init_range_high`` parameters in the ``pygad.GA`` class's constructor. +During mutation, the value are sampled from the range defined by the 2 +parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. +This is an example where the second gene is given a ``None`` value. + +.. code:: python + + gene_space = [range(5), None, numpy.linspace(10, 20, 300)] + +If the user did not assign the initial population to the +``initial_population`` parameter, the initial population is created +randomly based on the ``gene_space`` parameter. Moreover, the mutation +is applied based on this parameter. + +.. _how-mutation-works-with-the-genespace-parameter: + +How Mutation Works with the ``gene_space`` Parameter? +----------------------------------------------------- + +If a gene has its static space defined in the ``gene_space`` parameter, +then mutation works by replacing the gene value by a value randomly +selected from the gene space. This happens for both ``int`` and +``float`` data types. + +For example, the following ``gene_space`` has the static space +``[1, 2, 3]`` defined for the first gene. So, this gene can only have a +value out of these 3 values. + +.. code:: python + + Gene space: [[1, 2, 3], + None] + Solution: [1, 5] + +For a solution like ``[1, -0.5, 4]``, then mutation happens for the +first gene by simply replacing its current value by a randomly selected +value (other than its current value if possible). So, the value 1 will +be replaced by either 2 or 3. + +For the second gene, its space is set to ``None``. So, traditional +mutation happens for this gene by: + +1. Generating a random value from the range defined by the + ``random_mutation_min_val`` and ``random_mutation_max_val`` + parameters. + +2. Adding this random value to the current gene's value. + +If its current value is 5 and the random value is ``-0.5``, then the new +value is 4.5. If the gene type is integer, then the value will be +rounded. + +Stop at Any Generation +====================== + +In `PyGAD +2.4.0 `__, +it is possible to stop the genetic algorithm after any generation. All +you need to do it to return the string ``"stop"`` in the callback +function ``on_generation``. When this callback function is implemented +and assigned to the ``on_generation`` parameter in the constructor of +the ``pygad.GA`` class, then the algorithm immediately stops after +completing its current generation. Let's discuss an example. + +Assume that the user wants to stop algorithm either after the 100 +generations or if a condition is met. The user may assign a value of 100 +to the ``num_generations`` parameter of the ``pygad.GA`` class +constructor. + +The condition that stops the algorithm is written in a callback function +like the one in the next code. If the fitness value of the best solution +exceeds 70, then the string ``"stop"`` is returned. + +.. code:: python + + def func_generation(ga_instance): + if ga_instance.best_solution()[1] >= 70: + return "stop" + +Stop Criteria +============= + +In `PyGAD +2.15.0 `__, +a new parameter named ``stop_criteria`` is added to the constructor of +the ``pygad.GA`` class. It helps to stop the evolution based on some +criteria. It can be assigned to one or more criterion. + +Each criterion is passed as ``str`` that consists of 2 parts: + +1. Stop word. + +2. Number. + +It takes this form: + +.. code:: python + + "word_num" + +The current 2 supported words are ``reach`` and ``saturate``. + +The ``reach`` word stops the ``run()`` method if the fitness value is +equal to or greater than a given fitness value. An example for ``reach`` +is ``"reach_40"`` which stops the evolution if the fitness is >= 40. + +``saturate`` stops the evolution if the fitness saturates for a given +number of consecutive generations. An example for ``saturate`` is +``"saturate_7"`` which means stop the ``run()`` method if the fitness +does not change for 7 consecutive generations. + +Here is an example that stops the evolution if either the fitness value +reached ``127.4`` or if the fitness saturates for ``15`` generations. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, 9, 4] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=200, + sol_per_pop=10, + num_parents_mating=4, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + stop_criteria=["reach_127.4", "saturate_15"]) + + ga_instance.run() + print(f"Number of generations passed is {ga_instance.generations_completed}") + +Elitism Selection +================= + +In `PyGAD +2.18.0 `__, +a new parameter called ``keep_elitism`` is supported. It accepts an +integer to define the number of elitism (i.e. best solutions) to keep in +the next generation. This parameter defaults to ``1`` which means only +the best solution is kept in the next generation. + +In the next example, the ``keep_elitism`` parameter in the constructor +of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in +each generation are kept in the next generation. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + num_genes=6, + sol_per_pop=5, + keep_elitism=2) + + ga_instance.run() + +The value passed to the ``keep_elitism`` parameter must satisfy 2 +conditions: + +1. It must be ``>= 0``. + +2. It must be ``<= sol_per_pop``. That is its value cannot exceed the + number of solutions in the current population. + +In the previous example, if the ``keep_elitism`` parameter is set equal +to the value passed to the ``sol_per_pop`` parameter, which is 5, then +there will be no evolution at all as in the next figure. This is because +all the 5 solutions are used as elitism in the next generation and no +offspring will be created. + +.. code:: python + + ... + + ga_instance = pygad.GA(..., + sol_per_pop=5, + keep_elitism=5) + + ga_instance.run() + +.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png + :alt: + +Note that if the ``keep_elitism`` parameter is effective (i.e. is +assigned a positive integer, not zero), then the ``keep_parents`` +parameter will have no effect. Because the default value of the +``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has +no effect by default. The ``keep_parents`` parameter is only effective +when ``keep_elitism=0``. + +Random Seed +=========== + +In `PyGAD +2.18.0 `__, +a new parameter called ``random_seed`` is supported. Its value is used +as a seed for the random function generators. + +PyGAD uses random functions in these 2 libraries: + +1. NumPy + +2. random + +The ``random_seed`` parameter defaults to ``None`` which means no seed +is used. As a result, different random numbers are generated for each +run of PyGAD. + +If this parameter is assigned a proper seed, then the results will be +reproducible. In the next example, the integer 2 is used as a random +seed. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + sol_per_pop=5, + num_genes=6, + random_seed=2) + + ga_instance.run() + best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() + print(best_solution) + print(best_solution_fitness) + +This is the best solution found and its fitness value. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +After running the code again, it will find the same result. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +Continue without Loosing Progress +================================= + +In `PyGAD +2.18.0 `__, +and thanks for `Felix Bernhard `__ for +opening `this GitHub +issue `__, +the values of these 4 instance attributes are no longer reset after each +call to the ``run()`` method. + +1. ``self.best_solutions`` + +2. ``self.best_solutions_fitness`` + +3. ``self.solutions`` + +4. ``self.solutions_fitness`` + +This helps the user to continue where the last run stopped without +loosing the values of these 4 attributes. + +Now, the user can save the model by calling the ``save()`` method. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + ga_instance = pygad.GA(...) + + ga_instance.run() + + ga_instance.plot_fitness() + + ga_instance.save("pygad_GA") + +Then the saved model is loaded by calling the ``load()`` function. After +calling the ``run()`` method over the loaded instance, then the data +from the previous 4 attributes are not reset but extended with the new +data. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + loaded_ga_instance = pygad.load("pygad_GA") + + loaded_ga_instance.run() + + loaded_ga_instance.plot_fitness() + +The plot created by the ``plot_fitness()`` method will show the data +collected from both the runs. + +Note that the 2 attributes (``self.best_solutions`` and +``self.best_solutions_fitness``) only work if the +``save_best_solutions`` parameter is set to ``True``. Also, the 2 +attributes (``self.solutions`` and ``self.solutions_fitness``) only work +if the ``save_solutions`` parameter is ``True``. + +Prevent Duplicates in Gene Values +================================= + +In `PyGAD +2.13.0 `__, +a new bool parameter called ``allow_duplicate_genes`` is supported to +control whether duplicates are supported in the chromosome or not. In +other words, whether 2 or more genes might have the same exact value. + +If ``allow_duplicate_genes=True`` (which is the default case), genes may +have the same value. If ``allow_duplicate_genes=False``, then no 2 genes +will have the same value given that there are enough unique values for +the genes. + +The next code gives an example to use the ``allow_duplicate_genes`` +parameter. A callback generation function is implemented to print the +population after each generation. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=5, + sol_per_pop=5, + num_genes=4, + mutation_num_genes=3, + random_mutation_min_val=-5, + random_mutation_max_val=5, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Here are the population after the 5 generations. Note how there are no +duplicate values. + +.. code:: python + + Generation 1 + [[ 2 -2 -3 3] + [ 0 1 2 3] + [ 5 -3 6 3] + [-3 1 -2 4] + [-1 0 -2 3]] + Generation 2 + [[-1 0 -2 3] + [-3 1 -2 4] + [ 0 -3 -2 6] + [-3 0 -2 3] + [ 1 -4 2 4]] + Generation 3 + [[ 1 -4 2 4] + [-3 0 -2 3] + [ 4 0 -2 1] + [-4 0 -2 -3] + [-4 2 0 3]] + Generation 4 + [[-4 2 0 3] + [-4 0 -2 -3] + [-2 5 4 -3] + [-1 2 -4 4] + [-4 2 0 -3]] + Generation 5 + [[-4 2 0 -3] + [-1 2 -4 4] + [ 3 4 -4 0] + [-1 0 2 -2] + [-4 2 -1 1]] + +The ``allow_duplicate_genes`` parameter is configured with use with the +``gene_space`` parameter. Here is an example where each of the 4 genes +has the same space of values that consists of 4 values (1, 2, 3, and 4). + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=1, + sol_per_pop=5, + num_genes=4, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Even that all the genes share the same space of values, no 2 genes +duplicate their values as provided by the next output. + +.. code:: python + + Generation 1 + [[2 3 1 4] + [2 3 1 4] + [2 4 1 3] + [2 3 1 4] + [1 3 2 4]] + Generation 2 + [[1 3 2 4] + [2 3 1 4] + [1 3 2 4] + [2 3 4 1] + [1 3 4 2]] + Generation 3 + [[1 3 4 2] + [2 3 4 1] + [1 3 4 2] + [3 1 4 2] + [3 2 4 1]] + Generation 4 + [[3 2 4 1] + [3 1 4 2] + [3 2 4 1] + [1 2 4 3] + [1 3 4 2]] + Generation 5 + [[1 3 4 2] + [1 2 4 3] + [2 1 4 3] + [1 2 4 3] + [1 2 4 3]] + +You should care of giving enough values for the genes so that PyGAD is +able to find alternatives for the gene value in case it duplicates with +another gene. + +There might be 2 duplicate genes where changing either of the 2 +duplicating genes will not solve the problem. For example, if +``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the +solution is ``[3 2 0 0]``, then the values of the last 2 genes +duplicate. There are no possible changes in the last 2 genes to solve +the problem. + +This problem can be solved by randomly changing one of the +non-duplicating genes that may make a room for a unique value in one the +2 duplicating genes. For example, by changing the second gene from 2 to +4, then any of the last 2 genes can take the value 2 and solve the +duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is +not yet supported in PyGAD. + +Solve Duplicates using a Third Gene +----------------------------------- + +When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` +is used, it sometimes happen that there is no room to solve the +duplicates between the 2 genes by simply replacing the value of one gene +by another gene. In `PyGAD +3.1.0 `__, +the duplicates are solved by looking for a third gene that will help in +solving the duplicates. The following examples explain how it works. + +Example 1: + +Let's assume that this gene space is used and there is a solution with 2 +duplicate genes with the same value 4. + +.. code:: python + + Gene space: [[2, 3], + [3, 4], + [4, 5], + [5, 6]] + Solution: [3, 4, 4, 5] + +By checking the gene space, the second gene can have the values +``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve +the duplicates, we have the value of any of these 2 genes. + +If the value of the second gene changes from 4 to 3, then it will be +duplicate with the first gene. If we are to change the value of the +third gene from 4 to 5, then it will duplicate with the fourth gene. As +a conclusion, trying to just selecting a different gene value for either +the second or third genes will introduce new duplicating genes. + +When there are 2 duplicate genes but there is no way to solve their +duplicates, then the solution is to change a third gene that makes a +room to solve the duplicates between the 2 genes. + +In our example, duplicates between the second and third genes can be +solved by, for example,: + +- Changing the first gene from 3 to 2 then changing the second gene + from 4 to 3. + +- Or changing the fourth gene from 5 to 6 then changing the third gene + from 4 to 5. + +Generally, this is how to solve such duplicates: + +1. For any duplicate gene **GENE1**, select another value. + +2. Check which other gene **GENEX** has duplicate with this new value. + +3. Find if **GENEX** can have another value that will not cause any more + duplicates. If so, go to step 7. + +4. If all the other values of **GENEX** will cause duplicates, then try + another gene **GENEY**. + +5. Repeat steps 3 and 4 until exploring all the genes. + +6. If there is no possibility to solve the duplicates, then there is not + way to solve the duplicates and we have to keep the duplicate value. + +7. If a value for a gene **GENEM** is found that will not cause more + duplicates, then use this value for the gene **GENEM**. + +8. Replace the value of the gene **GENE1** by the old value of the gene + **GENEM**. This solves the duplicates. + +This is an example to solve the duplicate for the solution +``[3, 4, 4, 5]``: + +1. Let's use the second gene with value 4. Because the space of this + gene is ``[3, 4]``, then the only other value we can select is 3. + +2. The first gene also have the value 3. + +3. The first gene has another value 2 that will not cause more + duplicates in the solution. Then go to step 7. + +4. Skip. + +5. Skip. + +6. Skip. + +7. The value of the first gene 3 will be replaced by the new value 2. + The new solution is [2, 4, 4, 5]. + +8. Replace the value of the second gene 4 by the old value of the first + gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is + solved. + +Example 2: + +.. code:: python + + Gene space: [[0, 1], + [1, 2], + [2, 3], + [3, 4]] + Solution: [1, 2, 2, 3] + +The quick summary is: + +- Change the value of the first gene from 1 to 0. The solution becomes + [0, 2, 2, 3]. + +- Change the value of the second gene from 2 to 1. The solution becomes + [0, 1, 2, 3]. The duplicate is solved. + +.. _more-about-the-genetype-parameter: + +More about the ``gene_type`` Parameter +====================================== + +The ``gene_type`` parameter allows the user to control the data type for +all genes at once or each individual gene. In `PyGAD +2.15.0 `__, +the ``gene_type`` parameter also supports customizing the precision for +``float`` data types. As a result, the ``gene_type`` parameter helps to: + +1. Select a data type for all genes with or without precision. + +2. Select a data type for each individual gene with or without + precision. + +Let's discuss things by examples. + +Data Type for All Genes without Precision +----------------------------------------- + +The data type for all genes can be specified by assigning the numeric +data type directly to the ``gene_type`` parameter. This is an example to +make all genes of ``int`` data types. + +.. code:: python + + gene_type=int + +Given that the supported numeric data types of PyGAD include Python's +``int`` and ``float`` in addition to all numeric types of ``NumPy``, +then any of these types can be assigned to the ``gene_type`` parameter. + +If no precision is specified for a ``float`` data type, then the +complete floating-point number is kept. + +The next code uses an ``int`` data type for all genes where the genes in +the initial and final population are only integers. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=int) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[ 1 -1 2 0 -3] + [ 0 -2 0 -3 -1] + [ 0 -1 -1 2 0] + [-2 3 -2 3 3] + [ 0 0 2 -2 -2]] + + Final Population + [[ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0]] + +Data Type for All Genes with Precision +-------------------------------------- + +A precision can only be specified for a ``float`` data type and cannot +be specified for integers. Here is an example to use a precision of 3 +for the ``float`` data type. In this case, all genes are of type +``float`` and their maximum precision is 3. + +.. code:: python + + gene_type=[float, 3] + +The next code uses prints the initial and final population where the +genes are of type ``float`` with precision 3. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[float, 3]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2.417 -0.487 3.623 2.457 -2.362] + [-1.231 0.079 -1.63 1.629 -2.637] + [ 0.692 -2.098 0.705 0.914 -3.633] + [ 2.637 -1.339 -1.107 -0.781 -3.896] + [-1.495 1.378 -1.026 3.522 2.379]] + + Final Population + [[ 1.714 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.375 -2.362] + [ 0.692 -1.024 4.041 3.185 -2.362] + [ 1.714 -0.644 3.623 3.185 -2.362]] + +Data Type for each Individual Gene without Precision +---------------------------------------------------- + +In `PyGAD +2.14.0 `__, +the ``gene_type`` parameter allows customizing the gene type for each +individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` +with number of elements equal to the number of genes. For each element, +a type is specified for the corresponding gene. + +This is an example for a 5-gene problem where different types are +assigned to the genes. + +.. code:: python + + gene_type=[int, float, numpy.float16, numpy.int8, float] + +This is a complete code that prints the initial and final population for +a custom-gene data type. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, float, numpy.float16, numpy.int8, float]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] + [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] + [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] + [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] + [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] + + Final Population + [[3 3.7729827570110714 2.055 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036] + [3 3.7729827570110714 1.458 0 0.0869406120516778] + [3 3.7729827570110714 1.458 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036]] + +Data Type for each Individual Gene with Precision +------------------------------------------------- + +The precision can also be specified for the ``float`` data types as in +the next line where the second gene precision is 2 and last gene +precision is 1. + +.. code:: python + + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] + +This is a complete example where the initial and final populations are +printed where the genes comply with the data types and precisions +specified. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2 -1.22 1.716796875 -1 0.2] + [-1 -1.58 -3.091796875 0 -1.3] + [3 3.35 -0.107421875 1 -3.3] + [-2 -3.58 -1.779296875 0 0.6] + [2 -3.73 2.65234375 3 -0.5]] + + Final Population + [[2 -4.22 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3] + [2 -4.22 3.47 2 -1.3] + [2 -4.58 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3]] + +Parallel Processing in PyGAD +============================ + +Starting from `PyGAD +2.17.0 `__, +parallel processing becomes supported. This section explains how to use +parallel processing in PyGAD. + +According to the `PyGAD +lifecycle `__, +parallel processing can be parallelized in only 2 operations: + +1. Population fitness calculation. + +2. Mutation. + +The reason is that the calculations in these 2 operations are +independent (i.e. each solution/chromosome is handled independently from +the others) and can be distributed across different processes or +threads. + +For the mutation operation, it does not do intensive calculations on the +CPU. Its calculations are simple like flipping the values of some genes +from 0 to 1 or adding a random value to some genes. So, it does not take +much CPU processing time. Experiments proved that parallelizing the +mutation operation across the solutions increases the time instead of +reducing it. This is because running multiple processes or threads adds +overhead to manage them. Thus, parallel processing cannot be applied on +the mutation operation. + +For the population fitness calculation, parallel processing can help +make a difference and reduce the processing time. But this is +conditional on the type of calculations done in the fitness function. If +the fitness function makes intensive calculations and takes much +processing time from the CPU, then it is probably that parallel +processing will help to cut down the overall time. + +This section explains how parallel processing works in PyGAD and how to +use parallel processing in PyGAD + +How to Use Parallel Processing in PyGAD +--------------------------------------- + +Starting from `PyGAD +2.17.0 `__, +a new parameter called ``parallel_processing`` added to the constructor +of the ``pygad.GA`` class. + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + parallel_processing=...) + ... + +This parameter allows the user to do the following: + +1. Enable parallel processing. + +2. Select whether processes or threads are used. + +3. Specify the number of processes or threads to be used. + +These are 3 possible values for the ``parallel_processing`` parameter: + +1. ``None``: (Default) It means no parallel processing is used. + +2. A positive integer referring to the number of threads to be used + (i.e. threads, not processes, are used. + +3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is + assigned, then: + + 1. The first element can be either ``'process'`` or ``'thread'`` to + specify whether processes or threads are used, respectively. + + 2. The second element can be: + + 1. A positive integer to select the maximum number of processes or + threads to be used + + 2. ``0`` to indicate that 0 processes or threads are used. It + means no parallel processing. This is identical to setting + ``parallel_processing=None``. + + 3. ``None`` to use the default value as calculated by the + ``concurrent.futures module``. + +These are examples of the values assigned to the ``parallel_processing`` +parameter: + +- ``parallel_processing=4``: Because the parameter is assigned a + positive integer, this means parallel processing is activated where 4 + threads are used. + +- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 + threads. This is identical to ``parallel_processing=5``. + +- ``parallel_processing=["process", 8]``: Use parallel processing with + 8 processes. + +- ``parallel_processing=["process", 0]``: As the second element is + given the value 0, this means do not use parallel processing. This is + identical to ``parallel_processing=None``. + +Examples +-------- + +The examples will help you know the difference between using processes +and threads. Moreover, it will give an idea when parallel processing +would make a difference and reduce the time. These are dummy examples +where the fitness function is made to always return 0. + +The first example uses 10 genes, 5 solutions in the population where +only 3 solutions mate, and 9999 generations. The fitness function uses a +``for`` loop with 100 iterations just to have some calculations. In the +constructor of the ``pygad.GA`` class, ``parallel_processing=None`` +means no parallel processing is used. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99): + pass + return 0 + + ga_instance = pygad.GA(num_generations=9999, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + + ga_instance.run() + + t2 = time.time() + print("Time is", t2-t1) + +When parallel processing is not used, the time it takes to run the +genetic algorithm is ``1.5`` seconds. + +In the comparison, let's do a second experiment where parallel +processing is used with 5 threads. In this case, it take ``5`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=5) + ... + +For the third experiment, processes instead of threads are used. Also, +only 99 generations are used instead of 9999. The time it takes is +``99`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(num_generations=99, + ..., + parallel_processing=["process", 5]) + ... + +This is the summary of the 3 experiments: + +1. No parallel processing & 9999 generations: 1.5 seconds. + +2. Parallel processing with 5 threads & 9999 generations: 5 seconds + +3. Parallel processing with 5 processes & 99 generations: 99 seconds + +Because the fitness function does not need much CPU time, the normal +processing takes the least time. Running processes for this simple +problem takes 99 compared to only 5 seconds for threads because managing +processes is much heavier than managing threads. Thus, most of the CPU +time is for swapping the processes instead of executing the code. + +In the second example, the loop makes 99999999 iterations and only 5 +generations are used. With no parallelization, it takes 22 seconds. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99999999): + pass + return 0 + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + ga_instance.run() + t2 = time.time() + print("Time is", t2-t1) + +It takes 15 seconds when 10 processes are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["process", 10]) + ... + +This is compared to 20 seconds when 10 threads are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["thread", 10]) + ... + +Based on the second example, using parallel processing with 10 processes +takes the least time because there is much CPU work done. Generally, +processes are preferred over threads when most of the work in on the +CPU. Threads are preferred over processes in some situations like doing +input/output operations. + +*Before releasing* `PyGAD +2.17.0 `__\ *,* +`László +Fazekas `__ +*wrote an article to parallelize the fitness function with PyGAD. Check +it:* `How Genetic Algorithms Can Compete with Gradient Descent and +Backprop `__. + +Print Lifecycle Summary +======================= + +In `PyGAD +2.19.0 `__, +a new method called ``summary()`` is supported. It prints a Keras-like +summary of the PyGAD lifecycle showing the steps, callback functions, +parameters, etc. + +This method accepts the following parameters: + +- ``line_length=70``: An integer representing the length of the single + line in characters. + +- ``fill_character=" "``: A character to fill the lines. + +- ``line_character="-"``: A character for creating a line separator. + +- ``line_character2="="``: A secondary character to create a line + separator. + +- ``columns_equal_len=False``: The table rows are split into + equal-sized columns or split subjective to the width needed. + +- ``print_step_parameters=True``: Whether to print extra parameters + about each step inside the step. If ``print_step_parameters=False`` + and ``print_parameters_summary=True``, then the parameters of each + step are printed at the end of the table. + +- ``print_parameters_summary=True``: Whether to print parameters + summary at the end of the table. If ``print_step_parameters=False``, + then the parameters of each step are printed at the end of the table + too. + +This is a quick example to create a PyGAD example. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def genetic_fitness(solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_gen(ga): + pass + + def on_crossover_callback(a, b): + pass + + ga_instance = pygad.GA(num_generations=100, + num_parents_mating=10, + sol_per_pop=20, + num_genes=len(function_inputs), + on_crossover=on_crossover_callback, + on_generation=on_gen, + parallel_processing=2, + stop_criteria="reach_10", + fitness_batch_size=4, + crossover_probability=0.4, + fitness_func=genetic_fitness) + +Then call the ``summary()`` method to print the summary with the default +parameters. Note that entries for the crossover and generation callback +function are created because their callback functions are implemented +through the ``on_crossover_callback()`` and ``on_gen()``, respectively. + +.. code:: python + + ga_instance.summary() + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + Fitness batch size: 4 + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + Number of Parents: 10 + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + Crossover probability: 0.4 + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + Mutation Genes: 1 + Random Mutation Range: (-1.0, 1.0) + Mutation by Replacement: False + Allow Duplicated Genes: True + ---------------------------------------------------------------------- + On Generation on_gen() None + Stop Criteria: [['reach', 10.0]] + ---------------------------------------------------------------------- + ====================================================================== + Population Size: (20, 6) + Number of Generations: 100 + Initial Population Range: (-4, 4) + Keep Elitism: 1 + Gene DType: [, None] + Parallel Processing: ['thread', 2] + Save Best Solutions: False + Save Solutions: False + ====================================================================== + +We can set the ``print_step_parameters`` and +``print_parameters_summary`` parameters to ``False`` to not print the +parameters. + +.. code:: python + + ga_instance.summary(print_step_parameters=False, + print_parameters_summary=False) + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + ---------------------------------------------------------------------- + On Generation on_gen() None + ---------------------------------------------------------------------- + ====================================================================== + +Logging Outputs +=============== + +In `PyGAD +3.0.0 `__, +the ``print()`` statement is no longer used and the outputs are printed +using the `logging `__ +module. A a new parameter called ``logger`` is supported to accept the +user-defined logger. + +.. code:: python + + import logging + + logger = ... + + ga_instance = pygad.GA(..., + logger=logger, + ...) + +The default value for this parameter is ``None``. If there is no logger +passed (i.e. ``logger=None``), then a default logger is created to log +the messages to the console exactly like how the ``print()`` statement +works. + +Some advantages of using the the +`logging `__ module +instead of the ``print()`` statement are: + +1. The user has more control over the printed messages specially if + there is a project that uses multiple modules where each module + prints its messages. A logger can organize the outputs. + +2. Using the proper ``Handler``, the user can log the output messages to + files and not only restricted to printing it to the console. So, it + is much easier to record the outputs. + +3. The format of the printed messages can be changed by customizing the + ``Formatter`` assigned to the Logger. + +This section gives some quick examples to use the ``logging`` module and +then gives an example to use the logger with PyGAD. + +Logging to the Console +---------------------- + +This is an example to create a logger to log the messages to the +console. + +.. code:: python + + import logging + + # Create a logger + logger = logging.getLogger(__name__) + + # Set the logger level to debug so that all the messages are printed. + logger.setLevel(logging.DEBUG) + + # Create a stream handler to log the messages to the console. + stream_handler = logging.StreamHandler() + + # Set the handler level to debug. + stream_handler.setLevel(logging.DEBUG) + + # Create a formatter + formatter = logging.Formatter('%(message)s') + + # Add the formatter to handler. + stream_handler.setFormatter(formatter) + + # Add the stream handler to the logger + logger.addHandler(stream_handler) + +Now, we can log messages to the console with the format specified in the +``Formatter``. + +.. code:: python + + logger.debug('Debug message.') + logger.info('Info message.') + logger.warning('Warn message.') + logger.error('Error message.') + logger.critical('Critical message.') + +The outputs are identical to those returned using the ``print()`` +statement. + +.. code:: + + Debug message. + Info message. + Warn message. + Error message. + Critical message. + +By changing the format of the output messages, we can have more +information about each message. + +.. code:: python + + formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + +This is a sample output. + +.. code:: python + + 2023-04-03 18:46:27 DEBUG: Debug message. + 2023-04-03 18:46:27 INFO: Info message. + 2023-04-03 18:46:27 WARNING: Warn message. + 2023-04-03 18:46:27 ERROR: Error message. + 2023-04-03 18:46:27 CRITICAL: Critical message. + +Note that you may need to clear the handlers after finishing the +execution. This is to make sure no cached handlers are used in the next +run. If the cached handlers are not cleared, then the single output +message may be repeated. + +.. code:: python + + logger.handlers.clear() + +Logging to a File +----------------- + +This is another example to log the messages to a file named +``logfile.txt``. The formatter prints the following about each message: + +1. The date and time at which the message is logged. + +2. The log level. + +3. The message. + +4. The path of the file. + +5. The lone number of the log message. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name, 'a+', 'utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + +This is how the outputs look like. + +.. code:: python + + 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 + 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 + 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 + 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 + 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +Log to Both the Console and a File +---------------------------------- + +This is an example to create a single Logger associated with 2 handlers: + +1. A file handler. + +2. A stream handler. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + +When a log message is executed, then it is both printed to the console +and saved in the ``logfile.txt``. + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +PyGAD Example +------------- + +To use the logger in PyGAD, just create your custom logger and pass it +to the ``logger`` parameter. + +.. code:: python + + import logging + import pygad + import numpy + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + + equation_inputs = [4, -2, 8] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_generation(ga_instance): + ga_instance.logger.info(f"Generation = {ga_instance.generations_completed}") + ga_instance.logger.info(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=40, + num_parents_mating=2, + keep_parents=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + on_generation=on_generation, + logger=logger) + ga_instance.run() + + logger.handlers.clear() + +By executing this code, the logged messages are printed to the console +and also saved in the text file. + +.. code:: python + + 2023-04-03 19:04:27 INFO: Generation = 1 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 + 2023-04-03 19:04:27 INFO: Generation = 2 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 + 2023-04-03 19:04:27 INFO: Generation = 3 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 + 2023-04-03 19:04:27 INFO: Generation = 4 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 + 2023-04-03 19:04:27 INFO: Generation = 5 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 + 2023-04-03 19:04:27 INFO: Generation = 6 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 + 2023-04-03 19:04:27 INFO: Generation = 7 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 + 2023-04-03 19:04:27 INFO: Generation = 8 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 + 2023-04-03 19:04:27 INFO: Generation = 9 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 + 2023-04-03 19:04:27 INFO: Generation = 10 + 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 + +Solve Non-Deterministic Problems +================================ + +PyGAD can be used to solve both deterministic and non-deterministic +problems. Deterministic are those that return the same fitness for the +same solution. For non-deterministic problems, a different fitness value +would be returned for the same solution. + +By default, PyGAD settings are set to solve deterministic problems. +PyGAD can save the explored solutions and their fitness to reuse in the +future. These instances attributes can save the solutions: + +1. ``solutions``: Exists if ``save_solutions=True``. + +2. ``best_solutions``: Exists if ``save_best_solutions=True``. + +3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. + +4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or + ``keep_parents=-1``. + +To configure PyGAD for non-deterministic problems, we have to disable +saving the previous solutions. This is by setting these parameters: + +1. ``keep_elisitm=0`` + +2. ``keep_parents=0`` + +3. ``keep_solutions=False`` + +4. ``keep_best_solutions=False`` + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +This way PyGAD will not save any explored solution and thus the fitness +function have to be called for each individual solution. + +Reuse the Fitness instead of Calling the Fitness Function +========================================================= + +It may happen that a previously explored solution in generation X is +explored again in another generation Y (where Y > X). For some problems, +calling the fitness function takes much time. + +For deterministic problems, it is better to not call the fitness +function for an already explored solutions. Instead, reuse the fitness +of the old solution. PyGAD supports some options to help you save time +calling the fitness function for a previously explored solution. + +The parameters explored in this section can be set in the constructor of +the ``pygad.GA`` class. + +The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these +parameters to see if there is a possibility of reusing the fitness +instead of calling the fitness function. + +.. _1-savesolutions: + +1. ``save_solutions`` +--------------------- + +It defaults to ``False``. If set to ``True``, then the population of +each generation is saved into the ``solutions`` attribute of the +``pygad.GA`` instance. In other words, every single solution is saved in +the ``solutions`` attribute. + +.. _2-savebestsolutions: + +2. ``save_best_solutions`` +-------------------------- + +It defaults to ``False``. If ``True``, then it only saves the best +solution in every generation. + +.. _3-keepelitism: + +3. ``keep_elitism`` +------------------- + +It accepts an integer and defaults to 1. If set to a positive integer, +then it keeps the elitism of one generation available in the next +generation. + +.. _4-keepparents: + +4. ``keep_parents`` +------------------- + +It accepts an integer and defaults to -1. It set to ``-1`` or a positive +integer, then it keeps the parents of one generation available in the +next generation. + +Why the Fitness Function is not Called for Solution at Index 0? +=============================================================== + +PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This +parameter defines the number of best solutions in generation **X** to +keep in the next generation **X+1**. The best solutions are just copied +from generation **X** to generation **X+1** without making any change. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=1, + ...) + +The best solutions are copied at the beginning of the population. If +``keep_elitism=1``, this means the best solution in generation X is kept +in the next generation X+1 at index 0 of the population. If +``keep_elitism=2``, this means the 2 best solutions in generation X are +kept in the next generation X+1 at indices 0 and 1 of the population of +generation 1. + +Because the fitness of these best solutions are already calculated in +generation X, then their fitness values will not be recalculated at +generation X+1 (i.e. the fitness function will not be called for these +solutions again). Instead, their fitness values are just reused. This is +why you see that no solution with index 0 is passed to the fitness +function. + +To force calling the fitness function for each solution in every +generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. +Moreover, keep the 2 parameters ``save_solutions`` and +``save_best_solutions`` to their default value ``False``. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +Batch Fitness Calculation +========================= + +In `PyGAD +2.19.0 `__, +a new optional parameter called ``fitness_batch_size`` is supported. A +new optional parameter called ``fitness_batch_size`` is supported to +calculate the fitness function in batches. Thanks to `Linan +Qiu `__ for opening the `GitHub issue +#136 `__. + +Its values can be: + +- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is + assigned the value ``1`` or ``None`` (default), then the normal flow + is used where the fitness function is called for each individual + solution. That is if there are 15 solutions, then the fitness + function is called 15 times. + +- ``1 < fitness_batch_size <= sol_per_pop``: If the + ``fitness_batch_size`` parameter is assigned a value satisfying this + condition ``1 < fitness_batch_size <= sol_per_pop``, then the + solutions are grouped into batches of size ``fitness_batch_size`` and + the fitness function is called once for each batch. In this case, the + fitness function must return a list/tuple/numpy.ndarray with a length + equal to the number of solutions passed. + +.. _example-without-fitnessbatchsize-parameter: + +Example without ``fitness_batch_size`` Parameter +------------------------------------------------ + +This is an example where the ``fitness_batch_size`` parameter is given +the value ``None`` (which is the default value). This is equivalent to +using the value ``1``. In this case, the fitness function will be called +for each solution. This means the fitness function ``fitness_func`` will +receive only a single solution. This is an example of the passed +arguments to the fitness function: + +.. code:: + + solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] + solution_idx: 3 + +The fitness function also must return a single numeric value as the +fitness for the passed solution. + +As we have a population of ``20`` solutions, then the fitness function +is called 20 times per generation. For 5 generations, then the fitness +function is called ``20*5 = 100`` times. In PyGAD, the fitness function +is called after the last generation too and this adds additional 20 +times. So, the total number of calls to the fitness function is +``20*5 + 20 = 120``. + +Note that the ``keep_elitism`` and ``keep_parents`` parameters are set +to ``0`` to make sure no fitness values are reused and to force calling +the fitness function for each individual solution. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func(ga_instance, solution, solution_idx): + global number_of_calls + number_of_calls = number_of_calls + 1 + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func, + fitness_batch_size=None, + # fitness_batch_size=1, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 120 + +.. _example-with-fitnessbatchsize-parameter: + +Example with ``fitness_batch_size`` Parameter +--------------------------------------------- + +This is an example where the ``fitness_batch_size`` parameter is used +and assigned the value ``4``. This means the solutions will be grouped +into batches of ``4`` solutions. The fitness function will be called +once for each patch (i.e. called once for each 4 solutions). + +This is an example of the arguments passed to it: + +.. code:: python + + solutions: + [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] + [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] + [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] + [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] + solutions_indices: + [16, 17, 18, 19] + +As we have 20 solutions, then there are ``20/4 = 5`` patches. As a +result, the fitness function is called only 5 times per generation +instead of 20. For each call to the fitness function, it receives a +batch of 4 solutions. + +As we have 5 generations, then the function will be called ``5*5 = 25`` +times. Given the call to the fitness function after the last generation, +then the total number of calls is ``5*5 + 5 = 30``. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func_batch(ga_instance, solutions, solutions_indices): + global number_of_calls + number_of_calls = number_of_calls + 1 + batch_fitness = [] + for solution in solutions: + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + batch_fitness.append(fitness) + return batch_fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func_batch, + fitness_batch_size=4, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 30 + +When batch fitness calculation is used, then we saved ``120 - 30 = 90`` +calls to the fitness function. + +Use Functions and Methods to Build Fitness and Callbacks +======================================================== + +In PyGAD 2.19.0, it is possible to pass user-defined functions or +methods to the following parameters: + +1. ``fitness_func`` + +2. ``on_start`` + +3. ``on_fitness`` + +4. ``on_parents`` + +5. ``on_crossover`` + +6. ``on_mutation`` + +7. ``on_generation`` + +8. ``on_stop`` + +This section gives 2 examples to assign these parameters user-defined: + +1. Functions. + +2. Methods. + +Assign Functions +---------------- + +This is a dummy example where the fitness function returns a random +value. Note that the instance of the ``pygad.GA`` class is passed as the +last parameter of all functions. + +.. code:: python + + import pygad + import numpy + + def fitness_func(ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(ga_instanse): + print("on_start") + + def on_fitness(ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(ga_instanse): + print("on_generation\n") + + def on_stop(ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=on_start, + on_fitness=on_fitness, + on_parents=on_parents, + on_crossover=on_crossover, + on_mutation=on_mutation, + on_generation=on_generation, + on_stop=on_stop, + fitness_func=fitness_func) + + ga_instance.run() + +Assign Methods +-------------- + +The next example has all the method defined inside the class ``Test``. +All of the methods accept an additional parameter representing the +method's object of the class ``Test``. + +All methods accept ``self`` as the first parameter and the instance of +the ``pygad.GA`` class as the last parameter. + +.. code:: python + + import pygad + import numpy + + class Test: + def fitness_func(self, ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(self, ga_instanse): + print("on_start") + + def on_fitness(self, ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(self, ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(self, ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(self, ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(self, ga_instanse): + print("on_generation\n") + + def on_stop(self, ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=Test().on_start, + on_fitness=Test().on_fitness, + on_parents=Test().on_parents, + on_crossover=Test().on_crossover, + on_mutation=Test().on_mutation, + on_generation=Test().on_generation, + on_stop=Test().on_stop, + fitness_func=Test().fitness_func) + + ga_instance.run() diff --git a/docs/source/torchga.rst b/docs/source/torchga.rst index 6d35d4b..27825e8 100644 --- a/docs/source/torchga.rst +++ b/docs/source/torchga.rst @@ -212,8 +212,8 @@ subsections discuss each part in the code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Create the PyTorch model. input_layer = torch.nn.Linear(3, 5) @@ -261,8 +261,8 @@ subsections discuss each part in the code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.torchga.predict(model=model, @@ -415,8 +415,8 @@ To get information about the best solution found by PyGAD, use the # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") .. code:: python @@ -478,8 +478,8 @@ previous example. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Create the PyTorch model. input_layer = torch.nn.Linear(2, 4) @@ -531,8 +531,8 @@ previous example. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.torchga.predict(model=model, @@ -639,8 +639,8 @@ Here is the code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the PyTorch model using the functional API. input_layer = torch.nn.Linear(360, 50) @@ -688,8 +688,8 @@ Here is the code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Fetch the parameters of the best solution. best_solution_weights = torchga.model_weights_as_dict(model=model, @@ -784,8 +784,8 @@ Here is the complete code. return solution_fitness def on_generation(ga_instance): - print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) + print(f"Generation = {ga_instance.generations_completed}") + print(f"Fitness = {ga_instance.best_solution()[1]}") # Build the PyTorch model. input_layer = torch.nn.Conv2d(in_channels=3, out_channels=5, kernel_size=7) @@ -847,8 +847,8 @@ Here is the complete code. # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution() - print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) - print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) + print(f"Fitness value of the best solution = {solution_fitness}") + print(f"Index of the best solution : {solution_idx}") # Make predictions based on the best solution. predictions = pygad.torchga.predict(model=model, From 8cbb3074253152eb8b50b581ab8e6153fd3e2d86 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 7 Sep 2023 15:41:07 -0400 Subject: [PATCH 22/25] Update docs and NSGA edits --- docs/source/helper.rst | 22 +- docs/source/pygad.rst | 205 +- docs/source/pygad_more.rst | 4488 ++++++++++++++------------- docs/source/releases.rst | 42 +- examples/example_multi_objective.py | 2 +- pygad/pygad.py | 8 +- pygad/utils/nsga2.py | 29 +- pygad/utils/parent_selection.py | 22 +- 8 files changed, 2506 insertions(+), 2312 deletions(-) diff --git a/docs/source/helper.rst b/docs/source/helper.rst index 44df8cd..dddfaac 100644 --- a/docs/source/helper.rst +++ b/docs/source/helper.rst @@ -7,5 +7,23 @@ This section of the PyGAD's library documentation discusses the **pygad.helper** module. Yet, this module has a submodule called ``unique`` that has a class -named ``Unique`` with some helper methods. Such methods help to check -and fix duplicate values in the genes of a solution. +named ``Unique`` with the following helper methods. Such methods help to +check and fix duplicate values in the genes of a solution. + +- ``solve_duplicate_genes_randomly()``: Solves the duplicates in a + solution by randomly selecting new values for the duplicating genes. + +- ``solve_duplicate_genes_by_space()``: Solves the duplicates in a + solution by selecting values for the duplicating genes from the gene + space + +- ``unique_int_gene_from_range()``: Finds a unique integer value for + the gene. + +- ``unique_genes_by_space()``: Loops through all the duplicating genes + to find unique values that from their gene spaces to solve the + duplicates. For each duplicating gene, a call to the + ``unique_gene_by_space()`` is made. + +- ``unique_gene_by_space()``: Returns a unique gene value for a single + gene based on its value space to solve the duplicates. diff --git a/docs/source/pygad.rst b/docs/source/pygad.rst index 69030f8..9681ca9 100644 --- a/docs/source/pygad.rst +++ b/docs/source/pygad.rst @@ -595,24 +595,6 @@ Other Methods - ``adaptive_mutation_population_fitness()``: Returns the average fitness value used in the adaptive mutation to filter the solutions. -- ``solve_duplicate_genes_randomly()``: Solves the duplicates in a - solution by randomly selecting new values for the duplicating genes. - -- ``solve_duplicate_genes_by_space()``: Solves the duplicates in a - solution by selecting values for the duplicating genes from the gene - space - -- ``unique_int_gene_from_range()``: Finds a unique integer value for - the gene. - -- ``unique_genes_by_space()``: Loops through all the duplicating genes - to find unique values that from their gene spaces to solve the - duplicates. For each duplicating gene, a call to the - ``unique_gene_by_space()`` is made. - -- ``unique_gene_by_space()``: Returns a unique gene value for a single - gene based on its value space to solve the duplicates. - - ``summary()``: Prints a Keras-like summary of the PyGAD lifecycle. This helps to have an overview of the architecture. Supported in `PyGAD @@ -961,31 +943,6 @@ generation. It works only after completing at least 1 generation. If no generation is completed (at least 1), an exception is raised. -This method accepts the following parameters: - -1. ``title``: Title of the figure. - -2. ``xlabel``: X-axis label. - -3. ``ylabel``: Y-axis label. - -4. ``linewidth``: Line width of the plot. Defaults to ``3``. - -5. ``font_size``: Font size for the labels and title. Defaults to - ``14``. - -6. ``plot_type``: Type of the plot which can be either ``"plot"`` - (default), ``"scatter"``, or ``"bar"``. - -7. ``color``: Color of the plot which defaults to the greenish color - ``"#64f20c"``. - -8. ``label``: The label used for the legend in the figures of - multi-objective problems. It is not used for single-objective - problems. It defaults to ``None`` which means no labels used. - -9. ``save_dir``: Directory to save the figure. - .. _plotnewsolutionrate: ``plot_new_solution_rate()`` @@ -999,26 +956,6 @@ constructor of the ``pygad.GA`` class. It works only after completing at least 1 generation. If no generation is completed (at least 1), an exception is raised. -This method accepts the following parameters: - -1. ``title``: Title of the figure. - -2. ``xlabel``: X-axis label. - -3. ``ylabel``: Y-axis label. - -4. ``linewidth``: Line width of the plot. Defaults to ``3``. - -5. ``font_size``: Font size for the labels and title. Defaults to - ``14``. - -6. ``plot_type``: Type of the plot which can be either ``"plot"`` - (default), ``"scatter"``, or ``"bar"``. - -7. ``color``: Color of the plot which defaults to ``"#3870FF"``. - -8. ``save_dir``: Directory to save the figure. - .. _plotgenes: ``plot_genes()`` @@ -1039,43 +976,6 @@ This is controlled by the ``graph_type`` parameter. It works only after completing at least 1 generation. If no generation is completed (at least 1), an exception is raised. -This method accepts the following parameters: - -1. ``title``: Title of the figure. - -2. ``xlabel``: X-axis label. - -3. ``ylabel``: Y-axis label. - -4. ``linewidth``: Line width of the plot. Defaults to ``3``. - -5. ``font_size``: Font size for the labels and title. Defaults to - ``14``. - -6. ``plot_type``: Type of the plot which can be either ``"plot"`` - (default), ``"scatter"``, or ``"bar"``. - -7. ``graph_type``: Type of the graph which can be either ``"plot"`` - (default), ``"boxplot"``, or ``"histogram"``. - -8. ``fill_color``: Fill color of the graph which defaults to - ``"#3870FF"``. This has no effect if ``graph_type="plot"``. - -9. ``color``: Color of the plot which defaults to ``"#3870FF"``. - -10. ``solutions``: Defaults to ``"all"`` which means use all solutions. - If ``"best"`` then only the best solutions are used. - -11. ``save_dir``: Directory to save the figure. - -An exception is raised if: - -- ``solutions="all"`` while ``save_solutions=False`` in the constructor - of the ``pygad.GA`` class. . - -- ``solutions="best"`` while ``save_best_solutions=False`` in the - constructor of the ``pygad.GA`` class. . - ``save()`` ---------- @@ -1160,7 +1060,8 @@ optimization problem is single-objective or multi-objective. - If the fitness function returns a ``list``, ``tuple``, or ``numpy.ndarray``, then the problem is single-objective. Even if there is only one element, the problem is still considered - multi-objective. + multi-objective. Each element represents the fitness value of its + corresponding objective. Using a user-defined fitness function allows the user to freely use PyGAD to solve any problem by passing the appropriate fitness @@ -1580,6 +1481,108 @@ below. loaded_ga_instance = pygad.load(filename=filename) loaded_ga_instance.plot_fitness() +Linear Model Optimization - Multi-Objective +------------------------------------------- + +This is a multi-objective optimization example that optimizes these 2 +functions: + +1. ``y1 = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6`` + +2. ``y2 = f(w1:w6) = w1x7 + w2x8 + w3x9 + w4x10 + w5x11 + 6wx12`` + +Where: + +1. ``(x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7)`` and ``y=50`` + +2. ``(x7,x8,x9,x10,x11,x12)=(-2,0.7,-9,1.4,3,5)`` and ``y=30`` + +The 2 functions use the same parameters (weights) ``w1`` to ``w6``. + +The goal is to use PyGAD to find the optimal values for such weights +that satisfy the 2 functions ``y1`` and ``y2``. + +To use PyGAD to solve multi-objective problems, the only adjustment is +to return a ``list``, ``tuple``, or ``numpy.ndarray`` from the fitness +function. Each element represents the fitness of an objective in order. +That is the first element is the fitness of the first objective, the +second element is the fitness for the second objective, and so on. + +.. code:: python + + import pygad + import numpy + + """ + Given these 2 functions: + y1 = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6 + y2 = f(w1:w6) = w1x7 + w2x8 + w3x9 + w4x10 + w5x11 + 6wx12 + where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=50 + and (x7,x8,x9,x10,x11,x12)=(-2,0.7,-9,1.4,3,5) and y=30 + What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize these 2 functions. + This is a multi-objective optimization problem. + + PyGAD considers the problem as multi-objective if the fitness function returns: + 1) List. + 2) Or tuple. + 3) Or numpy.ndarray. + """ + + function_inputs1 = [4,-2,3.5,5,-11,-4.7] # Function 1 inputs. + function_inputs2 = [-2,0.7,-9,1.4,3,5] # Function 2 inputs. + desired_output1 = 50 # Function 1 output. + desired_output2 = 30 # Function 2 output. + + def fitness_func(ga_instance, solution, solution_idx): + output1 = numpy.sum(solution*function_inputs1) + output2 = numpy.sum(solution*function_inputs2) + fitness1 = 1.0 / (numpy.abs(output1 - desired_output1) + 0.000001) + fitness2 = 1.0 / (numpy.abs(output2 - desired_output2) + 0.000001) + return [fitness1, fitness2] + + num_generations = 100 + num_parents_mating = 10 + + sol_per_pop = 20 + num_genes = len(function_inputs1) + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=num_parents_mating, + sol_per_pop=sol_per_pop, + num_genes=num_genes, + fitness_func=fitness_func, + parent_selection_type='nsga2') + + ga_instance.run() + + ga_instance.plot_fitness(label=['Obj 1', 'Obj 2']) + + solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + + prediction = numpy.sum(numpy.array(function_inputs1)*solution) + print(f"Predicted output 1 based on the best solution : {prediction}") + prediction = numpy.sum(numpy.array(function_inputs2)*solution) + print(f"Predicted output 2 based on the best solution : {prediction}") + +This is the result of the print statements. The predicted outputs are +close to the desired outputs. + +.. code:: + + Parameters of the best solution : [ 0.79676439 -2.98823386 -4.12677662 5.70539445 -2.02797016 -1.07243922] + Fitness value of the best solution = [ 1.68090829 349.8591915 ] + Predicted output 1 based on the best solution : 50.59491545442283 + Predicted output 2 based on the best solution : 29.99714270722312 + +This is the figure created by the ``plot_fitness()`` method. The fitness +of the first objective has the green color. The blue color is used for +the second objective fitness. + +.. image:: https://github.com/ahmedfgad/GeneticAlgorithmPython/assets/16560492/7896f8d8-01c5-4ff9-8d15-52191c309b63 + :alt: + Reproducing Images ------------------ diff --git a/docs/source/pygad_more.rst b/docs/source/pygad_more.rst index 9943c25..3a97840 100644 --- a/docs/source/pygad_more.rst +++ b/docs/source/pygad_more.rst @@ -1,2171 +1,2317 @@ -More About PyGAD -================ - -.. _limit-the-gene-value-range-using-the-genespace-parameter: - -Limit the Gene Value Range using the ``gene_space`` Parameter -============================================================= - -In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter supported a new feature to allow -customizing the range of accepted values for each gene. Let's take a -quick review of the ``gene_space`` parameter to build over it. - -The ``gene_space`` parameter allows the user to feed the space of values -of each gene. This way the accepted values for each gene is retracted to -the user-defined values. Assume there is a problem that has 3 genes -where each gene has different set of values as follows: - -1. Gene 1: ``[0.4, 12, -5, 21.2]`` - -2. Gene 2: ``[-2, 0.3]`` - -3. Gene 3: ``[1.2, 63.2, 7.4]`` - -Then, the ``gene_space`` for this problem is as given below. Note that -the order is very important. - -.. code:: python - - gene_space = [[0.4, 12, -5, 21.2], - [-2, 0.3], - [1.2, 63.2, 7.4]] - -In case all genes share the same set of values, then simply feed a -single list to the ``gene_space`` parameter as follows. In this case, -all genes can only take values from this list of 6 values. - -.. code:: python - - gene_space = [33, 7, 0.5, 95. 6.3, 0.74] - -The previous example restricts the gene values to just a set of fixed -number of discrete values. In case you want to use a range of discrete -values to the gene, then you can use the ``range()`` function. For -example, ``range(1, 7)`` means the set of allowed values for the gene -are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or -``numpy.linspace()`` functions for the same purpose. - -The previous discussion only works with a range of discrete values not -continuous values. In `PyGAD -2.11.0 `__, -the ``gene_space`` parameter can be assigned a dictionary that allows -the gene to have values from a continuous range. - -Assuming you want to restrict the gene within this half-open range [1 to -5) where 1 is included and 5 is not. Then simply create a dictionary -with 2 items where the keys of the 2 items are: - -1. ``'low'``: The minimum value in the range which is 1 in the example. - -2. ``'high'``: The maximum value in the range which is 5 in the example. - -The dictionary will look like that: - -.. code:: python - - {'low': 1, - 'high': 5} - -It is not acceptable to add more than 2 items in the dictionary or use -other keys than ``'low'`` and ``'high'``. - -For a 3-gene problem, the next code creates a dictionary for each gene -to restrict its values in a continuous range. For the first gene, it can -take any floating-point value from the range that starts from 1 -(inclusive) and ends at 5 (exclusive). - -.. code:: python - - gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] - -.. _more-about-the-genespace-parameter: - -More about the ``gene_space`` Parameter -======================================= - -The ``gene_space`` parameter customizes the space of values of each -gene. - -Assuming that all genes have the same global space which include the -values 0.3, 5.2, -4, and 8, then those values can be assigned to the -``gene_space`` parameter as a list, tuple, or range. Here is a list -assigned to this parameter. By doing that, then the gene values are -restricted to those assigned to the ``gene_space`` parameter. - -.. code:: python - - gene_space = [0.3, 5.2, -4, 8] - -If some genes have different spaces, then ``gene_space`` should accept a -nested list or tuple. In this case, the elements could be: - -1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single - value to be assigned to the gene. This means this gene will have the - same value across all generations. - -2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, - ``numpy.arange()``, or ``numpy.linspace``: It holds the space for - each individual gene. But this space is usually discrete. That is - there is a set of finite values to select from. - -3. ``dict``: To sample a value for a gene from a continuous range. The - dictionary must have 2 mandatory keys which are ``"low"`` and - ``"high"`` in addition to an optional key which is ``"step"``. A - random value is returned between the values assigned to the items - with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then - this works as the previous options (i.e. discrete set of values). - -4. ``None``: A gene with its space set to ``None`` is initialized - randomly from the range specified by the 2 parameters - ``init_range_low`` and ``init_range_high``. For mutation, its value - is mutated based on a random value from the range specified by the 2 - parameters ``random_mutation_min_val`` and - ``random_mutation_max_val``. If all elements in the ``gene_space`` - parameter are ``None``, the parameter will not have any effect. - -Assuming that a chromosome has 2 genes and each gene has a different -value space. Then the ``gene_space`` could be assigned a nested -list/tuple where each element determines the space of a gene. - -According to the next code, the space of the first gene is ``[0.4, -5]`` -which has 2 values and the space for the second gene is -``[0.5, -3.2, 8.8, -9]`` which has 4 values. - -.. code:: python - - gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] - -For a 2 gene chromosome, if the first gene space is restricted to the -discrete values from 0 to 4 and the second gene is restricted to the -values from 10 to 19, then it could be specified according to the next -code. - -.. code:: python - - gene_space = [range(5), range(10, 20)] - -The ``gene_space`` can also be assigned to a single range, as given -below, where the values of all genes are sampled from the same range. - -.. code:: python - - gene_space = numpy.arange(15) - -The ``gene_space`` can be assigned a dictionary to sample a value from a -continuous range. - -.. code:: python - - gene_space = {"low": 4, "high": 30} - -A step also can be assigned to the dictionary. This works as if a range -is used. - -.. code:: python - - gene_space = {"low": 4, "high": 30, "step": 2.5} - -.. - - Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the - ``gene_space`` means that random values from the continuous range [0, - 10) are sampled. Note that ``0`` is included but ``10`` is not - included while sampling. Thus, the maximum value that could be - returned is less than ``10`` like ``9.9999``. But if the user decided - to round the genes using, for example, ``[float, 2]``, then this - value will become 10. So, the user should be careful to the inputs. - -If a ``None`` is assigned to only a single gene, then its value will be -randomly generated initially using the ``init_range_low`` and -``init_range_high`` parameters in the ``pygad.GA`` class's constructor. -During mutation, the value are sampled from the range defined by the 2 -parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. -This is an example where the second gene is given a ``None`` value. - -.. code:: python - - gene_space = [range(5), None, numpy.linspace(10, 20, 300)] - -If the user did not assign the initial population to the -``initial_population`` parameter, the initial population is created -randomly based on the ``gene_space`` parameter. Moreover, the mutation -is applied based on this parameter. - -.. _how-mutation-works-with-the-genespace-parameter: - -How Mutation Works with the ``gene_space`` Parameter? ------------------------------------------------------ - -If a gene has its static space defined in the ``gene_space`` parameter, -then mutation works by replacing the gene value by a value randomly -selected from the gene space. This happens for both ``int`` and -``float`` data types. - -For example, the following ``gene_space`` has the static space -``[1, 2, 3]`` defined for the first gene. So, this gene can only have a -value out of these 3 values. - -.. code:: python - - Gene space: [[1, 2, 3], - None] - Solution: [1, 5] - -For a solution like ``[1, -0.5, 4]``, then mutation happens for the -first gene by simply replacing its current value by a randomly selected -value (other than its current value if possible). So, the value 1 will -be replaced by either 2 or 3. - -For the second gene, its space is set to ``None``. So, traditional -mutation happens for this gene by: - -1. Generating a random value from the range defined by the - ``random_mutation_min_val`` and ``random_mutation_max_val`` - parameters. - -2. Adding this random value to the current gene's value. - -If its current value is 5 and the random value is ``-0.5``, then the new -value is 4.5. If the gene type is integer, then the value will be -rounded. - -Stop at Any Generation -====================== - -In `PyGAD -2.4.0 `__, -it is possible to stop the genetic algorithm after any generation. All -you need to do it to return the string ``"stop"`` in the callback -function ``on_generation``. When this callback function is implemented -and assigned to the ``on_generation`` parameter in the constructor of -the ``pygad.GA`` class, then the algorithm immediately stops after -completing its current generation. Let's discuss an example. - -Assume that the user wants to stop algorithm either after the 100 -generations or if a condition is met. The user may assign a value of 100 -to the ``num_generations`` parameter of the ``pygad.GA`` class -constructor. - -The condition that stops the algorithm is written in a callback function -like the one in the next code. If the fitness value of the best solution -exceeds 70, then the string ``"stop"`` is returned. - -.. code:: python - - def func_generation(ga_instance): - if ga_instance.best_solution()[1] >= 70: - return "stop" - -Stop Criteria -============= - -In `PyGAD -2.15.0 `__, -a new parameter named ``stop_criteria`` is added to the constructor of -the ``pygad.GA`` class. It helps to stop the evolution based on some -criteria. It can be assigned to one or more criterion. - -Each criterion is passed as ``str`` that consists of 2 parts: - -1. Stop word. - -2. Number. - -It takes this form: - -.. code:: python - - "word_num" - -The current 2 supported words are ``reach`` and ``saturate``. - -The ``reach`` word stops the ``run()`` method if the fitness value is -equal to or greater than a given fitness value. An example for ``reach`` -is ``"reach_40"`` which stops the evolution if the fitness is >= 40. - -``saturate`` stops the evolution if the fitness saturates for a given -number of consecutive generations. An example for ``saturate`` is -``"saturate_7"`` which means stop the ``run()`` method if the fitness -does not change for 7 consecutive generations. - -Here is an example that stops the evolution if either the fitness value -reached ``127.4`` or if the fitness saturates for ``15`` generations. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, 9, 4] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=200, - sol_per_pop=10, - num_parents_mating=4, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - stop_criteria=["reach_127.4", "saturate_15"]) - - ga_instance.run() - print(f"Number of generations passed is {ga_instance.generations_completed}") - -Elitism Selection -================= - -In `PyGAD -2.18.0 `__, -a new parameter called ``keep_elitism`` is supported. It accepts an -integer to define the number of elitism (i.e. best solutions) to keep in -the next generation. This parameter defaults to ``1`` which means only -the best solution is kept in the next generation. - -In the next example, the ``keep_elitism`` parameter in the constructor -of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in -each generation are kept in the next generation. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - num_genes=6, - sol_per_pop=5, - keep_elitism=2) - - ga_instance.run() - -The value passed to the ``keep_elitism`` parameter must satisfy 2 -conditions: - -1. It must be ``>= 0``. - -2. It must be ``<= sol_per_pop``. That is its value cannot exceed the - number of solutions in the current population. - -In the previous example, if the ``keep_elitism`` parameter is set equal -to the value passed to the ``sol_per_pop`` parameter, which is 5, then -there will be no evolution at all as in the next figure. This is because -all the 5 solutions are used as elitism in the next generation and no -offspring will be created. - -.. code:: python - - ... - - ga_instance = pygad.GA(..., - sol_per_pop=5, - keep_elitism=5) - - ga_instance.run() - -.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png - :alt: - -Note that if the ``keep_elitism`` parameter is effective (i.e. is -assigned a positive integer, not zero), then the ``keep_parents`` -parameter will have no effect. Because the default value of the -``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has -no effect by default. The ``keep_parents`` parameter is only effective -when ``keep_elitism=0``. - -Random Seed -=========== - -In `PyGAD -2.18.0 `__, -a new parameter called ``random_seed`` is supported. Its value is used -as a seed for the random function generators. - -PyGAD uses random functions in these 2 libraries: - -1. NumPy - -2. random - -The ``random_seed`` parameter defaults to ``None`` which means no seed -is used. As a result, different random numbers are generated for each -run of PyGAD. - -If this parameter is assigned a proper seed, then the results will be -reproducible. In the next example, the integer 2 is used as a random -seed. - -.. code:: python - - import numpy - import pygad - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / numpy.abs(output - desired_output) - return fitness - - ga_instance = pygad.GA(num_generations=2, - num_parents_mating=3, - fitness_func=fitness_func, - sol_per_pop=5, - num_genes=6, - random_seed=2) - - ga_instance.run() - best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() - print(best_solution) - print(best_solution_fitness) - -This is the best solution found and its fitness value. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -After running the code again, it will find the same result. - -.. code:: - - [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] - 0.04872203136549972 - -Continue without Loosing Progress -================================= - -In `PyGAD -2.18.0 `__, -and thanks for `Felix Bernhard `__ for -opening `this GitHub -issue `__, -the values of these 4 instance attributes are no longer reset after each -call to the ``run()`` method. - -1. ``self.best_solutions`` - -2. ``self.best_solutions_fitness`` - -3. ``self.solutions`` - -4. ``self.solutions_fitness`` - -This helps the user to continue where the last run stopped without -loosing the values of these 4 attributes. - -Now, the user can save the model by calling the ``save()`` method. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - ga_instance = pygad.GA(...) - - ga_instance.run() - - ga_instance.plot_fitness() - - ga_instance.save("pygad_GA") - -Then the saved model is loaded by calling the ``load()`` function. After -calling the ``run()`` method over the loaded instance, then the data -from the previous 4 attributes are not reset but extended with the new -data. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - ... - return fitness - - loaded_ga_instance = pygad.load("pygad_GA") - - loaded_ga_instance.run() - - loaded_ga_instance.plot_fitness() - -The plot created by the ``plot_fitness()`` method will show the data -collected from both the runs. - -Note that the 2 attributes (``self.best_solutions`` and -``self.best_solutions_fitness``) only work if the -``save_best_solutions`` parameter is set to ``True``. Also, the 2 -attributes (``self.solutions`` and ``self.solutions_fitness``) only work -if the ``save_solutions`` parameter is ``True``. - -Prevent Duplicates in Gene Values -================================= - -In `PyGAD -2.13.0 `__, -a new bool parameter called ``allow_duplicate_genes`` is supported to -control whether duplicates are supported in the chromosome or not. In -other words, whether 2 or more genes might have the same exact value. - -If ``allow_duplicate_genes=True`` (which is the default case), genes may -have the same value. If ``allow_duplicate_genes=False``, then no 2 genes -will have the same value given that there are enough unique values for -the genes. - -The next code gives an example to use the ``allow_duplicate_genes`` -parameter. A callback generation function is implemented to print the -population after each generation. - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=5, - sol_per_pop=5, - num_genes=4, - mutation_num_genes=3, - random_mutation_min_val=-5, - random_mutation_max_val=5, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Here are the population after the 5 generations. Note how there are no -duplicate values. - -.. code:: python - - Generation 1 - [[ 2 -2 -3 3] - [ 0 1 2 3] - [ 5 -3 6 3] - [-3 1 -2 4] - [-1 0 -2 3]] - Generation 2 - [[-1 0 -2 3] - [-3 1 -2 4] - [ 0 -3 -2 6] - [-3 0 -2 3] - [ 1 -4 2 4]] - Generation 3 - [[ 1 -4 2 4] - [-3 0 -2 3] - [ 4 0 -2 1] - [-4 0 -2 -3] - [-4 2 0 3]] - Generation 4 - [[-4 2 0 3] - [-4 0 -2 -3] - [-2 5 4 -3] - [-1 2 -4 4] - [-4 2 0 -3]] - Generation 5 - [[-4 2 0 -3] - [-1 2 -4 4] - [ 3 4 -4 0] - [-1 0 2 -2] - [-4 2 -1 1]] - -The ``allow_duplicate_genes`` parameter is configured with use with the -``gene_space`` parameter. Here is an example where each of the 4 genes -has the same space of values that consists of 4 values (1, 2, 3, and 4). - -.. code:: python - - import pygad - - def fitness_func(ga_instance, solution, solution_idx): - return 0 - - def on_generation(ga): - print("Generation", ga.generations_completed) - print(ga.population) - - ga_instance = pygad.GA(num_generations=1, - sol_per_pop=5, - num_genes=4, - num_parents_mating=2, - fitness_func=fitness_func, - gene_type=int, - gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], - on_generation=on_generation, - allow_duplicate_genes=False) - ga_instance.run() - -Even that all the genes share the same space of values, no 2 genes -duplicate their values as provided by the next output. - -.. code:: python - - Generation 1 - [[2 3 1 4] - [2 3 1 4] - [2 4 1 3] - [2 3 1 4] - [1 3 2 4]] - Generation 2 - [[1 3 2 4] - [2 3 1 4] - [1 3 2 4] - [2 3 4 1] - [1 3 4 2]] - Generation 3 - [[1 3 4 2] - [2 3 4 1] - [1 3 4 2] - [3 1 4 2] - [3 2 4 1]] - Generation 4 - [[3 2 4 1] - [3 1 4 2] - [3 2 4 1] - [1 2 4 3] - [1 3 4 2]] - Generation 5 - [[1 3 4 2] - [1 2 4 3] - [2 1 4 3] - [1 2 4 3] - [1 2 4 3]] - -You should care of giving enough values for the genes so that PyGAD is -able to find alternatives for the gene value in case it duplicates with -another gene. - -There might be 2 duplicate genes where changing either of the 2 -duplicating genes will not solve the problem. For example, if -``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the -solution is ``[3 2 0 0]``, then the values of the last 2 genes -duplicate. There are no possible changes in the last 2 genes to solve -the problem. - -This problem can be solved by randomly changing one of the -non-duplicating genes that may make a room for a unique value in one the -2 duplicating genes. For example, by changing the second gene from 2 to -4, then any of the last 2 genes can take the value 2 and solve the -duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is -not yet supported in PyGAD. - -Solve Duplicates using a Third Gene ------------------------------------ - -When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` -is used, it sometimes happen that there is no room to solve the -duplicates between the 2 genes by simply replacing the value of one gene -by another gene. In `PyGAD -3.1.0 `__, -the duplicates are solved by looking for a third gene that will help in -solving the duplicates. The following examples explain how it works. - -Example 1: - -Let's assume that this gene space is used and there is a solution with 2 -duplicate genes with the same value 4. - -.. code:: python - - Gene space: [[2, 3], - [3, 4], - [4, 5], - [5, 6]] - Solution: [3, 4, 4, 5] - -By checking the gene space, the second gene can have the values -``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve -the duplicates, we have the value of any of these 2 genes. - -If the value of the second gene changes from 4 to 3, then it will be -duplicate with the first gene. If we are to change the value of the -third gene from 4 to 5, then it will duplicate with the fourth gene. As -a conclusion, trying to just selecting a different gene value for either -the second or third genes will introduce new duplicating genes. - -When there are 2 duplicate genes but there is no way to solve their -duplicates, then the solution is to change a third gene that makes a -room to solve the duplicates between the 2 genes. - -In our example, duplicates between the second and third genes can be -solved by, for example,: - -- Changing the first gene from 3 to 2 then changing the second gene - from 4 to 3. - -- Or changing the fourth gene from 5 to 6 then changing the third gene - from 4 to 5. - -Generally, this is how to solve such duplicates: - -1. For any duplicate gene **GENE1**, select another value. - -2. Check which other gene **GENEX** has duplicate with this new value. - -3. Find if **GENEX** can have another value that will not cause any more - duplicates. If so, go to step 7. - -4. If all the other values of **GENEX** will cause duplicates, then try - another gene **GENEY**. - -5. Repeat steps 3 and 4 until exploring all the genes. - -6. If there is no possibility to solve the duplicates, then there is not - way to solve the duplicates and we have to keep the duplicate value. - -7. If a value for a gene **GENEM** is found that will not cause more - duplicates, then use this value for the gene **GENEM**. - -8. Replace the value of the gene **GENE1** by the old value of the gene - **GENEM**. This solves the duplicates. - -This is an example to solve the duplicate for the solution -``[3, 4, 4, 5]``: - -1. Let's use the second gene with value 4. Because the space of this - gene is ``[3, 4]``, then the only other value we can select is 3. - -2. The first gene also have the value 3. - -3. The first gene has another value 2 that will not cause more - duplicates in the solution. Then go to step 7. - -4. Skip. - -5. Skip. - -6. Skip. - -7. The value of the first gene 3 will be replaced by the new value 2. - The new solution is [2, 4, 4, 5]. - -8. Replace the value of the second gene 4 by the old value of the first - gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is - solved. - -Example 2: - -.. code:: python - - Gene space: [[0, 1], - [1, 2], - [2, 3], - [3, 4]] - Solution: [1, 2, 2, 3] - -The quick summary is: - -- Change the value of the first gene from 1 to 0. The solution becomes - [0, 2, 2, 3]. - -- Change the value of the second gene from 2 to 1. The solution becomes - [0, 1, 2, 3]. The duplicate is solved. - -.. _more-about-the-genetype-parameter: - -More about the ``gene_type`` Parameter -====================================== - -The ``gene_type`` parameter allows the user to control the data type for -all genes at once or each individual gene. In `PyGAD -2.15.0 `__, -the ``gene_type`` parameter also supports customizing the precision for -``float`` data types. As a result, the ``gene_type`` parameter helps to: - -1. Select a data type for all genes with or without precision. - -2. Select a data type for each individual gene with or without - precision. - -Let's discuss things by examples. - -Data Type for All Genes without Precision ------------------------------------------ - -The data type for all genes can be specified by assigning the numeric -data type directly to the ``gene_type`` parameter. This is an example to -make all genes of ``int`` data types. - -.. code:: python - - gene_type=int - -Given that the supported numeric data types of PyGAD include Python's -``int`` and ``float`` in addition to all numeric types of ``NumPy``, -then any of these types can be assigned to the ``gene_type`` parameter. - -If no precision is specified for a ``float`` data type, then the -complete floating-point number is kept. - -The next code uses an ``int`` data type for all genes where the genes in -the initial and final population are only integers. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=int) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[ 1 -1 2 0 -3] - [ 0 -2 0 -3 -1] - [ 0 -1 -1 2 0] - [-2 3 -2 3 3] - [ 0 0 2 -2 -2]] - - Final Population - [[ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0] - [ 1 -1 2 2 0]] - -Data Type for All Genes with Precision --------------------------------------- - -A precision can only be specified for a ``float`` data type and cannot -be specified for integers. Here is an example to use a precision of 3 -for the ``float`` data type. In this case, all genes are of type -``float`` and their maximum precision is 3. - -.. code:: python - - gene_type=[float, 3] - -The next code uses prints the initial and final population where the -genes are of type ``float`` with precision 3. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[float, 3]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2.417 -0.487 3.623 2.457 -2.362] - [-1.231 0.079 -1.63 1.629 -2.637] - [ 0.692 -2.098 0.705 0.914 -3.633] - [ 2.637 -1.339 -1.107 -0.781 -3.896] - [-1.495 1.378 -1.026 3.522 2.379]] - - Final Population - [[ 1.714 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.185 -2.362] - [ 0.692 -1.024 3.623 3.375 -2.362] - [ 0.692 -1.024 4.041 3.185 -2.362] - [ 1.714 -0.644 3.623 3.185 -2.362]] - -Data Type for each Individual Gene without Precision ----------------------------------------------------- - -In `PyGAD -2.14.0 `__, -the ``gene_type`` parameter allows customizing the gene type for each -individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` -with number of elements equal to the number of genes. For each element, -a type is specified for the corresponding gene. - -This is an example for a 5-gene problem where different types are -assigned to the genes. - -.. code:: python - - gene_type=[int, float, numpy.float16, numpy.int8, float] - -This is a complete code that prints the initial and final population for -a custom-gene data type. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, float, numpy.float16, numpy.int8, float]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] - [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] - [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] - [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] - [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] - - Final Population - [[3 3.7729827570110714 2.055 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036] - [3 3.7729827570110714 1.458 0 0.0869406120516778] - [3 3.7729827570110714 1.458 0 0.7243617940450235] - [3 3.7729827570110714 1.458 0 -0.14638754050305036]] - -Data Type for each Individual Gene with Precision -------------------------------------------------- - -The precision can also be specified for the ``float`` data types as in -the next line where the second gene precision is 2 and last gene -precision is 1. - -.. code:: python - - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] - -This is a complete example where the initial and final populations are -printed where the genes comply with the data types and precisions -specified. - -.. code:: python - - import pygad - import numpy - - equation_inputs = [4, -2, 3.5, 8, -2] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) - - print("Initial Population") - print(ga_instance.initial_population) - - ga_instance.run() - - print("Final Population") - print(ga_instance.population) - -.. code:: python - - Initial Population - [[-2 -1.22 1.716796875 -1 0.2] - [-1 -1.58 -3.091796875 0 -1.3] - [3 3.35 -0.107421875 1 -3.3] - [-2 -3.58 -1.779296875 0 0.6] - [2 -3.73 2.65234375 3 -0.5]] - - Final Population - [[2 -4.22 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3] - [2 -4.22 3.47 2 -1.3] - [2 -4.58 3.47 3 -1.3] - [2 -3.73 3.47 3 -1.3]] - -Parallel Processing in PyGAD -============================ - -Starting from `PyGAD -2.17.0 `__, -parallel processing becomes supported. This section explains how to use -parallel processing in PyGAD. - -According to the `PyGAD -lifecycle `__, -parallel processing can be parallelized in only 2 operations: - -1. Population fitness calculation. - -2. Mutation. - -The reason is that the calculations in these 2 operations are -independent (i.e. each solution/chromosome is handled independently from -the others) and can be distributed across different processes or -threads. - -For the mutation operation, it does not do intensive calculations on the -CPU. Its calculations are simple like flipping the values of some genes -from 0 to 1 or adding a random value to some genes. So, it does not take -much CPU processing time. Experiments proved that parallelizing the -mutation operation across the solutions increases the time instead of -reducing it. This is because running multiple processes or threads adds -overhead to manage them. Thus, parallel processing cannot be applied on -the mutation operation. - -For the population fitness calculation, parallel processing can help -make a difference and reduce the processing time. But this is -conditional on the type of calculations done in the fitness function. If -the fitness function makes intensive calculations and takes much -processing time from the CPU, then it is probably that parallel -processing will help to cut down the overall time. - -This section explains how parallel processing works in PyGAD and how to -use parallel processing in PyGAD - -How to Use Parallel Processing in PyGAD ---------------------------------------- - -Starting from `PyGAD -2.17.0 `__, -a new parameter called ``parallel_processing`` added to the constructor -of the ``pygad.GA`` class. - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - parallel_processing=...) - ... - -This parameter allows the user to do the following: - -1. Enable parallel processing. - -2. Select whether processes or threads are used. - -3. Specify the number of processes or threads to be used. - -These are 3 possible values for the ``parallel_processing`` parameter: - -1. ``None``: (Default) It means no parallel processing is used. - -2. A positive integer referring to the number of threads to be used - (i.e. threads, not processes, are used. - -3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is - assigned, then: - - 1. The first element can be either ``'process'`` or ``'thread'`` to - specify whether processes or threads are used, respectively. - - 2. The second element can be: - - 1. A positive integer to select the maximum number of processes or - threads to be used - - 2. ``0`` to indicate that 0 processes or threads are used. It - means no parallel processing. This is identical to setting - ``parallel_processing=None``. - - 3. ``None`` to use the default value as calculated by the - ``concurrent.futures module``. - -These are examples of the values assigned to the ``parallel_processing`` -parameter: - -- ``parallel_processing=4``: Because the parameter is assigned a - positive integer, this means parallel processing is activated where 4 - threads are used. - -- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 - threads. This is identical to ``parallel_processing=5``. - -- ``parallel_processing=["process", 8]``: Use parallel processing with - 8 processes. - -- ``parallel_processing=["process", 0]``: As the second element is - given the value 0, this means do not use parallel processing. This is - identical to ``parallel_processing=None``. - -Examples --------- - -The examples will help you know the difference between using processes -and threads. Moreover, it will give an idea when parallel processing -would make a difference and reduce the time. These are dummy examples -where the fitness function is made to always return 0. - -The first example uses 10 genes, 5 solutions in the population where -only 3 solutions mate, and 9999 generations. The fitness function uses a -``for`` loop with 100 iterations just to have some calculations. In the -constructor of the ``pygad.GA`` class, ``parallel_processing=None`` -means no parallel processing is used. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99): - pass - return 0 - - ga_instance = pygad.GA(num_generations=9999, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - - ga_instance.run() - - t2 = time.time() - print("Time is", t2-t1) - -When parallel processing is not used, the time it takes to run the -genetic algorithm is ``1.5`` seconds. - -In the comparison, let's do a second experiment where parallel -processing is used with 5 threads. In this case, it take ``5`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=5) - ... - -For the third experiment, processes instead of threads are used. Also, -only 99 generations are used instead of 9999. The time it takes is -``99`` seconds. - -.. code:: python - - ... - ga_instance = pygad.GA(num_generations=99, - ..., - parallel_processing=["process", 5]) - ... - -This is the summary of the 3 experiments: - -1. No parallel processing & 9999 generations: 1.5 seconds. - -2. Parallel processing with 5 threads & 9999 generations: 5 seconds - -3. Parallel processing with 5 processes & 99 generations: 99 seconds - -Because the fitness function does not need much CPU time, the normal -processing takes the least time. Running processes for this simple -problem takes 99 compared to only 5 seconds for threads because managing -processes is much heavier than managing threads. Thus, most of the CPU -time is for swapping the processes instead of executing the code. - -In the second example, the loop makes 99999999 iterations and only 5 -generations are used. With no parallelization, it takes 22 seconds. - -.. code:: python - - import pygad - import time - - def fitness_func(ga_instance, solution, solution_idx): - for _ in range(99999999): - pass - return 0 - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=3, - sol_per_pop=5, - num_genes=10, - fitness_func=fitness_func, - suppress_warnings=True, - parallel_processing=None) - - if __name__ == '__main__': - t1 = time.time() - ga_instance.run() - t2 = time.time() - print("Time is", t2-t1) - -It takes 15 seconds when 10 processes are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["process", 10]) - ... - -This is compared to 20 seconds when 10 threads are used. - -.. code:: python - - ... - ga_instance = pygad.GA(..., - parallel_processing=["thread", 10]) - ... - -Based on the second example, using parallel processing with 10 processes -takes the least time because there is much CPU work done. Generally, -processes are preferred over threads when most of the work in on the -CPU. Threads are preferred over processes in some situations like doing -input/output operations. - -*Before releasing* `PyGAD -2.17.0 `__\ *,* -`László -Fazekas `__ -*wrote an article to parallelize the fitness function with PyGAD. Check -it:* `How Genetic Algorithms Can Compete with Gradient Descent and -Backprop `__. - -Print Lifecycle Summary -======================= - -In `PyGAD -2.19.0 `__, -a new method called ``summary()`` is supported. It prints a Keras-like -summary of the PyGAD lifecycle showing the steps, callback functions, -parameters, etc. - -This method accepts the following parameters: - -- ``line_length=70``: An integer representing the length of the single - line in characters. - -- ``fill_character=" "``: A character to fill the lines. - -- ``line_character="-"``: A character for creating a line separator. - -- ``line_character2="="``: A secondary character to create a line - separator. - -- ``columns_equal_len=False``: The table rows are split into - equal-sized columns or split subjective to the width needed. - -- ``print_step_parameters=True``: Whether to print extra parameters - about each step inside the step. If ``print_step_parameters=False`` - and ``print_parameters_summary=True``, then the parameters of each - step are printed at the end of the table. - -- ``print_parameters_summary=True``: Whether to print parameters - summary at the end of the table. If ``print_step_parameters=False``, - then the parameters of each step are printed at the end of the table - too. - -This is a quick example to create a PyGAD example. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - def genetic_fitness(solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_gen(ga): - pass - - def on_crossover_callback(a, b): - pass - - ga_instance = pygad.GA(num_generations=100, - num_parents_mating=10, - sol_per_pop=20, - num_genes=len(function_inputs), - on_crossover=on_crossover_callback, - on_generation=on_gen, - parallel_processing=2, - stop_criteria="reach_10", - fitness_batch_size=4, - crossover_probability=0.4, - fitness_func=genetic_fitness) - -Then call the ``summary()`` method to print the summary with the default -parameters. Note that entries for the crossover and generation callback -function are created because their callback functions are implemented -through the ``on_crossover_callback()`` and ``on_gen()``, respectively. - -.. code:: python - - ga_instance.summary() - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - Fitness batch size: 4 - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - Number of Parents: 10 - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - Crossover probability: 0.4 - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - Mutation Genes: 1 - Random Mutation Range: (-1.0, 1.0) - Mutation by Replacement: False - Allow Duplicated Genes: True - ---------------------------------------------------------------------- - On Generation on_gen() None - Stop Criteria: [['reach', 10.0]] - ---------------------------------------------------------------------- - ====================================================================== - Population Size: (20, 6) - Number of Generations: 100 - Initial Population Range: (-4, 4) - Keep Elitism: 1 - Gene DType: [, None] - Parallel Processing: ['thread', 2] - Save Best Solutions: False - Save Solutions: False - ====================================================================== - -We can set the ``print_step_parameters`` and -``print_parameters_summary`` parameters to ``False`` to not print the -parameters. - -.. code:: python - - ga_instance.summary(print_step_parameters=False, - print_parameters_summary=False) - -.. code:: bash - - ---------------------------------------------------------------------- - PyGAD Lifecycle - ====================================================================== - Step Handler Output Shape - ====================================================================== - Fitness Function genetic_fitness() (1) - ---------------------------------------------------------------------- - Parent Selection steady_state_selection() (10, 6) - ---------------------------------------------------------------------- - Crossover single_point_crossover() (10, 6) - ---------------------------------------------------------------------- - On Crossover on_crossover_callback() None - ---------------------------------------------------------------------- - Mutation random_mutation() (10, 6) - ---------------------------------------------------------------------- - On Generation on_gen() None - ---------------------------------------------------------------------- - ====================================================================== - -Logging Outputs -=============== - -In `PyGAD -3.0.0 `__, -the ``print()`` statement is no longer used and the outputs are printed -using the `logging `__ -module. A a new parameter called ``logger`` is supported to accept the -user-defined logger. - -.. code:: python - - import logging - - logger = ... - - ga_instance = pygad.GA(..., - logger=logger, - ...) - -The default value for this parameter is ``None``. If there is no logger -passed (i.e. ``logger=None``), then a default logger is created to log -the messages to the console exactly like how the ``print()`` statement -works. - -Some advantages of using the the -`logging `__ module -instead of the ``print()`` statement are: - -1. The user has more control over the printed messages specially if - there is a project that uses multiple modules where each module - prints its messages. A logger can organize the outputs. - -2. Using the proper ``Handler``, the user can log the output messages to - files and not only restricted to printing it to the console. So, it - is much easier to record the outputs. - -3. The format of the printed messages can be changed by customizing the - ``Formatter`` assigned to the Logger. - -This section gives some quick examples to use the ``logging`` module and -then gives an example to use the logger with PyGAD. - -Logging to the Console ----------------------- - -This is an example to create a logger to log the messages to the -console. - -.. code:: python - - import logging - - # Create a logger - logger = logging.getLogger(__name__) - - # Set the logger level to debug so that all the messages are printed. - logger.setLevel(logging.DEBUG) - - # Create a stream handler to log the messages to the console. - stream_handler = logging.StreamHandler() - - # Set the handler level to debug. - stream_handler.setLevel(logging.DEBUG) - - # Create a formatter - formatter = logging.Formatter('%(message)s') - - # Add the formatter to handler. - stream_handler.setFormatter(formatter) - - # Add the stream handler to the logger - logger.addHandler(stream_handler) - -Now, we can log messages to the console with the format specified in the -``Formatter``. - -.. code:: python - - logger.debug('Debug message.') - logger.info('Info message.') - logger.warning('Warn message.') - logger.error('Error message.') - logger.critical('Critical message.') - -The outputs are identical to those returned using the ``print()`` -statement. - -.. code:: - - Debug message. - Info message. - Warn message. - Error message. - Critical message. - -By changing the format of the output messages, we can have more -information about each message. - -.. code:: python - - formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - -This is a sample output. - -.. code:: python - - 2023-04-03 18:46:27 DEBUG: Debug message. - 2023-04-03 18:46:27 INFO: Info message. - 2023-04-03 18:46:27 WARNING: Warn message. - 2023-04-03 18:46:27 ERROR: Error message. - 2023-04-03 18:46:27 CRITICAL: Critical message. - -Note that you may need to clear the handlers after finishing the -execution. This is to make sure no cached handlers are used in the next -run. If the cached handlers are not cleared, then the single output -message may be repeated. - -.. code:: python - - logger.handlers.clear() - -Logging to a File ------------------ - -This is another example to log the messages to a file named -``logfile.txt``. The formatter prints the following about each message: - -1. The date and time at which the message is logged. - -2. The log level. - -3. The message. - -4. The path of the file. - -5. The lone number of the log message. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name, 'a+', 'utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - -This is how the outputs look like. - -.. code:: python - - 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 - 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 - 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 - 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 - 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -Log to Both the Console and a File ----------------------------------- - -This is an example to create a single Logger associated with 2 handlers: - -1. A file handler. - -2. A stream handler. - -.. code:: python - - import logging - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - -When a log message is executed, then it is both printed to the console -and saved in the ``logfile.txt``. - -Consider clearing the handlers if necessary. - -.. code:: python - - logger.handlers.clear() - -PyGAD Example -------------- - -To use the logger in PyGAD, just create your custom logger and pass it -to the ``logger`` parameter. - -.. code:: python - - import logging - import pygad - import numpy - - level = logging.DEBUG - name = 'logfile.txt' - - logger = logging.getLogger(name) - logger.setLevel(level) - - file_handler = logging.FileHandler(name,'a+','utf-8') - file_handler.setLevel(logging.DEBUG) - file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') - file_handler.setFormatter(file_format) - logger.addHandler(file_handler) - - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.INFO) - console_format = logging.Formatter('%(message)s') - console_handler.setFormatter(console_format) - logger.addHandler(console_handler) - - equation_inputs = [4, -2, 8] - desired_output = 2671.1234 - - def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - def on_generation(ga_instance): - ga_instance.logger.info(f"Generation = {ga_instance.generations_completed}") - ga_instance.logger.info(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") - - ga_instance = pygad.GA(num_generations=10, - sol_per_pop=40, - num_parents_mating=2, - keep_parents=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - on_generation=on_generation, - logger=logger) - ga_instance.run() - - logger.handlers.clear() - -By executing this code, the logged messages are printed to the console -and also saved in the text file. - -.. code:: python - - 2023-04-03 19:04:27 INFO: Generation = 1 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 - 2023-04-03 19:04:27 INFO: Generation = 2 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 - 2023-04-03 19:04:27 INFO: Generation = 3 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 - 2023-04-03 19:04:27 INFO: Generation = 4 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 - 2023-04-03 19:04:27 INFO: Generation = 5 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 - 2023-04-03 19:04:27 INFO: Generation = 6 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 - 2023-04-03 19:04:27 INFO: Generation = 7 - 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 - 2023-04-03 19:04:27 INFO: Generation = 8 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 - 2023-04-03 19:04:27 INFO: Generation = 9 - 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 - 2023-04-03 19:04:27 INFO: Generation = 10 - 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 - -Solve Non-Deterministic Problems -================================ - -PyGAD can be used to solve both deterministic and non-deterministic -problems. Deterministic are those that return the same fitness for the -same solution. For non-deterministic problems, a different fitness value -would be returned for the same solution. - -By default, PyGAD settings are set to solve deterministic problems. -PyGAD can save the explored solutions and their fitness to reuse in the -future. These instances attributes can save the solutions: - -1. ``solutions``: Exists if ``save_solutions=True``. - -2. ``best_solutions``: Exists if ``save_best_solutions=True``. - -3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. - -4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or - ``keep_parents=-1``. - -To configure PyGAD for non-deterministic problems, we have to disable -saving the previous solutions. This is by setting these parameters: - -1. ``keep_elisitm=0`` - -2. ``keep_parents=0`` - -3. ``keep_solutions=False`` - -4. ``keep_best_solutions=False`` - -.. code:: python - - import pygad - ... - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -This way PyGAD will not save any explored solution and thus the fitness -function have to be called for each individual solution. - -Reuse the Fitness instead of Calling the Fitness Function -========================================================= - -It may happen that a previously explored solution in generation X is -explored again in another generation Y (where Y > X). For some problems, -calling the fitness function takes much time. - -For deterministic problems, it is better to not call the fitness -function for an already explored solutions. Instead, reuse the fitness -of the old solution. PyGAD supports some options to help you save time -calling the fitness function for a previously explored solution. - -The parameters explored in this section can be set in the constructor of -the ``pygad.GA`` class. - -The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these -parameters to see if there is a possibility of reusing the fitness -instead of calling the fitness function. - -.. _1-savesolutions: - -1. ``save_solutions`` ---------------------- - -It defaults to ``False``. If set to ``True``, then the population of -each generation is saved into the ``solutions`` attribute of the -``pygad.GA`` instance. In other words, every single solution is saved in -the ``solutions`` attribute. - -.. _2-savebestsolutions: - -2. ``save_best_solutions`` --------------------------- - -It defaults to ``False``. If ``True``, then it only saves the best -solution in every generation. - -.. _3-keepelitism: - -3. ``keep_elitism`` -------------------- - -It accepts an integer and defaults to 1. If set to a positive integer, -then it keeps the elitism of one generation available in the next -generation. - -.. _4-keepparents: - -4. ``keep_parents`` -------------------- - -It accepts an integer and defaults to -1. It set to ``-1`` or a positive -integer, then it keeps the parents of one generation available in the -next generation. - -Why the Fitness Function is not Called for Solution at Index 0? -=============================================================== - -PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This -parameter defines the number of best solutions in generation **X** to -keep in the next generation **X+1**. The best solutions are just copied -from generation **X** to generation **X+1** without making any change. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=1, - ...) - -The best solutions are copied at the beginning of the population. If -``keep_elitism=1``, this means the best solution in generation X is kept -in the next generation X+1 at index 0 of the population. If -``keep_elitism=2``, this means the 2 best solutions in generation X are -kept in the next generation X+1 at indices 0 and 1 of the population of -generation 1. - -Because the fitness of these best solutions are already calculated in -generation X, then their fitness values will not be recalculated at -generation X+1 (i.e. the fitness function will not be called for these -solutions again). Instead, their fitness values are just reused. This is -why you see that no solution with index 0 is passed to the fitness -function. - -To force calling the fitness function for each solution in every -generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. -Moreover, keep the 2 parameters ``save_solutions`` and -``save_best_solutions`` to their default value ``False``. - -.. code:: python - - ga_instance = pygad.GA(..., - keep_elitism=0, - keep_parents=0, - save_solutions=False, - save_best_solutions=False, - ...) - -Batch Fitness Calculation -========================= - -In `PyGAD -2.19.0 `__, -a new optional parameter called ``fitness_batch_size`` is supported. A -new optional parameter called ``fitness_batch_size`` is supported to -calculate the fitness function in batches. Thanks to `Linan -Qiu `__ for opening the `GitHub issue -#136 `__. - -Its values can be: - -- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is - assigned the value ``1`` or ``None`` (default), then the normal flow - is used where the fitness function is called for each individual - solution. That is if there are 15 solutions, then the fitness - function is called 15 times. - -- ``1 < fitness_batch_size <= sol_per_pop``: If the - ``fitness_batch_size`` parameter is assigned a value satisfying this - condition ``1 < fitness_batch_size <= sol_per_pop``, then the - solutions are grouped into batches of size ``fitness_batch_size`` and - the fitness function is called once for each batch. In this case, the - fitness function must return a list/tuple/numpy.ndarray with a length - equal to the number of solutions passed. - -.. _example-without-fitnessbatchsize-parameter: - -Example without ``fitness_batch_size`` Parameter ------------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is given -the value ``None`` (which is the default value). This is equivalent to -using the value ``1``. In this case, the fitness function will be called -for each solution. This means the fitness function ``fitness_func`` will -receive only a single solution. This is an example of the passed -arguments to the fitness function: - -.. code:: - - solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] - solution_idx: 3 - -The fitness function also must return a single numeric value as the -fitness for the passed solution. - -As we have a population of ``20`` solutions, then the fitness function -is called 20 times per generation. For 5 generations, then the fitness -function is called ``20*5 = 100`` times. In PyGAD, the fitness function -is called after the last generation too and this adds additional 20 -times. So, the total number of calls to the fitness function is -``20*5 + 20 = 120``. - -Note that the ``keep_elitism`` and ``keep_parents`` parameters are set -to ``0`` to make sure no fitness values are reused and to force calling -the fitness function for each individual solution. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func(ga_instance, solution, solution_idx): - global number_of_calls - number_of_calls = number_of_calls + 1 - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func, - fitness_batch_size=None, - # fitness_batch_size=1, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 120 - -.. _example-with-fitnessbatchsize-parameter: - -Example with ``fitness_batch_size`` Parameter ---------------------------------------------- - -This is an example where the ``fitness_batch_size`` parameter is used -and assigned the value ``4``. This means the solutions will be grouped -into batches of ``4`` solutions. The fitness function will be called -once for each patch (i.e. called once for each 4 solutions). - -This is an example of the arguments passed to it: - -.. code:: python - - solutions: - [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] - [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] - [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] - [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] - solutions_indices: - [16, 17, 18, 19] - -As we have 20 solutions, then there are ``20/4 = 5`` patches. As a -result, the fitness function is called only 5 times per generation -instead of 20. For each call to the fitness function, it receives a -batch of 4 solutions. - -As we have 5 generations, then the function will be called ``5*5 = 25`` -times. Given the call to the fitness function after the last generation, -then the total number of calls is ``5*5 + 5 = 30``. - -.. code:: python - - import pygad - import numpy - - function_inputs = [4,-2,3.5,5,-11,-4.7] - desired_output = 44 - - number_of_calls = 0 - - def fitness_func_batch(ga_instance, solutions, solutions_indices): - global number_of_calls - number_of_calls = number_of_calls + 1 - batch_fitness = [] - for solution in solutions: - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - batch_fitness.append(fitness) - return batch_fitness - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=10, - sol_per_pop=20, - fitness_func=fitness_func_batch, - fitness_batch_size=4, - num_genes=len(function_inputs), - keep_elitism=0, - keep_parents=0) - - ga_instance.run() - print(number_of_calls) - -.. code:: - - 30 - -When batch fitness calculation is used, then we saved ``120 - 30 = 90`` -calls to the fitness function. - -Use Functions and Methods to Build Fitness and Callbacks -======================================================== - -In PyGAD 2.19.0, it is possible to pass user-defined functions or -methods to the following parameters: - -1. ``fitness_func`` - -2. ``on_start`` - -3. ``on_fitness`` - -4. ``on_parents`` - -5. ``on_crossover`` - -6. ``on_mutation`` - -7. ``on_generation`` - -8. ``on_stop`` - -This section gives 2 examples to assign these parameters user-defined: - -1. Functions. - -2. Methods. - -Assign Functions ----------------- - -This is a dummy example where the fitness function returns a random -value. Note that the instance of the ``pygad.GA`` class is passed as the -last parameter of all functions. - -.. code:: python - - import pygad - import numpy - - def fitness_func(ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(ga_instanse): - print("on_start") - - def on_fitness(ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(ga_instanse): - print("on_generation\n") - - def on_stop(ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=on_start, - on_fitness=on_fitness, - on_parents=on_parents, - on_crossover=on_crossover, - on_mutation=on_mutation, - on_generation=on_generation, - on_stop=on_stop, - fitness_func=fitness_func) - - ga_instance.run() - -Assign Methods --------------- - -The next example has all the method defined inside the class ``Test``. -All of the methods accept an additional parameter representing the -method's object of the class ``Test``. - -All methods accept ``self`` as the first parameter and the instance of -the ``pygad.GA`` class as the last parameter. - -.. code:: python - - import pygad - import numpy - - class Test: - def fitness_func(self, ga_instanse, solution, solution_idx): - return numpy.random.rand() - - def on_start(self, ga_instanse): - print("on_start") - - def on_fitness(self, ga_instanse, last_gen_fitness): - print("on_fitness") - - def on_parents(self, ga_instanse, last_gen_parents): - print("on_parents") - - def on_crossover(self, ga_instanse, last_gen_offspring): - print("on_crossover") - - def on_mutation(self, ga_instanse, last_gen_offspring): - print("on_mutation") - - def on_generation(self, ga_instanse): - print("on_generation\n") - - def on_stop(self, ga_instanse, last_gen_fitness): - print("on_stop") - - ga_instance = pygad.GA(num_generations=5, - num_parents_mating=4, - sol_per_pop=10, - num_genes=2, - on_start=Test().on_start, - on_fitness=Test().on_fitness, - on_parents=Test().on_parents, - on_crossover=Test().on_crossover, - on_mutation=Test().on_mutation, - on_generation=Test().on_generation, - on_stop=Test().on_stop, - fitness_func=Test().fitness_func) - - ga_instance.run() +More About PyGAD +================ + +Multi-Objective Optimization +============================ + +In `PyGAD +3.2.0 `__, +the library supports multi-objective optimization using the +non-dominated sorting genetic algorithm II (NSGA-II). The code is +exactly similar to the regular code used for single-objective +optimization except for 1 difference. It is the return value of the +fitness function. + +In single-objective optimization, the fitness function returns a single +numeric value. In this example, the variable ``fitness`` is expected to +be a numeric value. + +.. code:: python + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + +But in multi-objective optimization, the fitness function returns any of +these data types: + +1. ``list`` + +2. ``tuple`` + +3. ``numpy.ndarray`` + +.. code:: python + + def fitness_func(ga_instance, solution, solution_idx): + ... + return [fitness1, fitness2, ..., fitnessN] + +Whenever the fitness function returns an iterable of these data types, +then the problem is considered multi-objective. This holds even if there +is a single element in the returned iterable. + +Other than the fitness function, everything else could be the same in +both single and multi-objective problems. + +But it is recommended to use one of these 2 parent selection operators +to solve multi-objective problems: + +1. ``nsga2``: This selects the parents based on non-dominated sorting + and crowding distance. + +2. ``tournament_nsga2``: This selects the parents using tournament + selection which uses non-dominated sorting and crowding distance to + rank the solutions. + +This is a multi-objective optimization example that optimizes these 2 +linear functions: + +1. ``y1 = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6`` + +2. ``y2 = f(w1:w6) = w1x7 + w2x8 + w3x9 + w4x10 + w5x11 + 6wx12`` + +Where: + +1. ``(x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7)`` and ``y=50`` + +2. ``(x7,x8,x9,x10,x11,x12)=(-2,0.7,-9,1.4,3,5)`` and ``y=30`` + +The 2 functions use the same parameters (weights) ``w1`` to ``w6``. + +The goal is to use PyGAD to find the optimal values for such weights +that satisfy the 2 functions ``y1`` and ``y2``. + +.. code:: python + + import pygad + import numpy + + """ + Given these 2 functions: + y1 = f(w1:w6) = w1x1 + w2x2 + w3x3 + w4x4 + w5x5 + 6wx6 + y2 = f(w1:w6) = w1x7 + w2x8 + w3x9 + w4x10 + w5x11 + 6wx12 + where (x1,x2,x3,x4,x5,x6)=(4,-2,3.5,5,-11,-4.7) and y=50 + and (x7,x8,x9,x10,x11,x12)=(-2,0.7,-9,1.4,3,5) and y=30 + What are the best values for the 6 weights (w1 to w6)? We are going to use the genetic algorithm to optimize these 2 functions. + This is a multi-objective optimization problem. + + PyGAD considers the problem as multi-objective if the fitness function returns: + 1) List. + 2) Or tuple. + 3) Or numpy.ndarray. + """ + + function_inputs1 = [4,-2,3.5,5,-11,-4.7] # Function 1 inputs. + function_inputs2 = [-2,0.7,-9,1.4,3,5] # Function 2 inputs. + desired_output1 = 50 # Function 1 output. + desired_output2 = 30 # Function 2 output. + + def fitness_func(ga_instance, solution, solution_idx): + output1 = numpy.sum(solution*function_inputs1) + output2 = numpy.sum(solution*function_inputs2) + fitness1 = 1.0 / (numpy.abs(output1 - desired_output1) + 0.000001) + fitness2 = 1.0 / (numpy.abs(output2 - desired_output2) + 0.000001) + return [fitness1, fitness2] + + num_generations = 100 + num_parents_mating = 10 + + sol_per_pop = 20 + num_genes = len(function_inputs1) + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=num_parents_mating, + sol_per_pop=sol_per_pop, + num_genes=num_genes, + fitness_func=fitness_func, + parent_selection_type='nsga2') + + ga_instance.run() + + ga_instance.plot_fitness(label=['Obj 1', 'Obj 2']) + + solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) + print(f"Parameters of the best solution : {solution}") + print(f"Fitness value of the best solution = {solution_fitness}") + + prediction = numpy.sum(numpy.array(function_inputs1)*solution) + print(f"Predicted output 1 based on the best solution : {prediction}") + prediction = numpy.sum(numpy.array(function_inputs2)*solution) + print(f"Predicted output 2 based on the best solution : {prediction}") + +This is the result of the print statements. The predicted outputs are +close to the desired outputs. + +.. code:: + + Parameters of the best solution : [ 0.79676439 -2.98823386 -4.12677662 5.70539445 -2.02797016 -1.07243922] + Fitness value of the best solution = [ 1.68090829 349.8591915 ] + Predicted output 1 based on the best solution : 50.59491545442283 + Predicted output 2 based on the best solution : 29.99714270722312 + +This is the figure created by the ``plot_fitness()`` method. The fitness +of the first objective has the green color. The blue color is used for +the second objective fitness. + +.. image:: https://github.com/ahmedfgad/GeneticAlgorithmPython/assets/16560492/7896f8d8-01c5-4ff9-8d15-52191c309b63 + :alt: + +.. _limit-the-gene-value-range-using-the-genespace-parameter: + +Limit the Gene Value Range using the ``gene_space`` Parameter +============================================================= + +In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter supported a new feature to allow +customizing the range of accepted values for each gene. Let's take a +quick review of the ``gene_space`` parameter to build over it. + +The ``gene_space`` parameter allows the user to feed the space of values +of each gene. This way the accepted values for each gene is retracted to +the user-defined values. Assume there is a problem that has 3 genes +where each gene has different set of values as follows: + +1. Gene 1: ``[0.4, 12, -5, 21.2]`` + +2. Gene 2: ``[-2, 0.3]`` + +3. Gene 3: ``[1.2, 63.2, 7.4]`` + +Then, the ``gene_space`` for this problem is as given below. Note that +the order is very important. + +.. code:: python + + gene_space = [[0.4, 12, -5, 21.2], + [-2, 0.3], + [1.2, 63.2, 7.4]] + +In case all genes share the same set of values, then simply feed a +single list to the ``gene_space`` parameter as follows. In this case, +all genes can only take values from this list of 6 values. + +.. code:: python + + gene_space = [33, 7, 0.5, 95. 6.3, 0.74] + +The previous example restricts the gene values to just a set of fixed +number of discrete values. In case you want to use a range of discrete +values to the gene, then you can use the ``range()`` function. For +example, ``range(1, 7)`` means the set of allowed values for the gene +are ``1, 2, 3, 4, 5, and 6``. You can also use the ``numpy.arange()`` or +``numpy.linspace()`` functions for the same purpose. + +The previous discussion only works with a range of discrete values not +continuous values. In `PyGAD +2.11.0 `__, +the ``gene_space`` parameter can be assigned a dictionary that allows +the gene to have values from a continuous range. + +Assuming you want to restrict the gene within this half-open range [1 to +5) where 1 is included and 5 is not. Then simply create a dictionary +with 2 items where the keys of the 2 items are: + +1. ``'low'``: The minimum value in the range which is 1 in the example. + +2. ``'high'``: The maximum value in the range which is 5 in the example. + +The dictionary will look like that: + +.. code:: python + + {'low': 1, + 'high': 5} + +It is not acceptable to add more than 2 items in the dictionary or use +other keys than ``'low'`` and ``'high'``. + +For a 3-gene problem, the next code creates a dictionary for each gene +to restrict its values in a continuous range. For the first gene, it can +take any floating-point value from the range that starts from 1 +(inclusive) and ends at 5 (exclusive). + +.. code:: python + + gene_space = [{'low': 1, 'high': 5}, {'low': 0.3, 'high': 1.4}, {'low': -0.2, 'high': 4.5}] + +.. _more-about-the-genespace-parameter: + +More about the ``gene_space`` Parameter +======================================= + +The ``gene_space`` parameter customizes the space of values of each +gene. + +Assuming that all genes have the same global space which include the +values 0.3, 5.2, -4, and 8, then those values can be assigned to the +``gene_space`` parameter as a list, tuple, or range. Here is a list +assigned to this parameter. By doing that, then the gene values are +restricted to those assigned to the ``gene_space`` parameter. + +.. code:: python + + gene_space = [0.3, 5.2, -4, 8] + +If some genes have different spaces, then ``gene_space`` should accept a +nested list or tuple. In this case, the elements could be: + +1. Number (of ``int``, ``float``, or ``NumPy`` data types): A single + value to be assigned to the gene. This means this gene will have the + same value across all generations. + +2. ``list``, ``tuple``, ``numpy.ndarray``, or any range like ``range``, + ``numpy.arange()``, or ``numpy.linspace``: It holds the space for + each individual gene. But this space is usually discrete. That is + there is a set of finite values to select from. + +3. ``dict``: To sample a value for a gene from a continuous range. The + dictionary must have 2 mandatory keys which are ``"low"`` and + ``"high"`` in addition to an optional key which is ``"step"``. A + random value is returned between the values assigned to the items + with ``"low"`` and ``"high"`` keys. If the ``"step"`` exists, then + this works as the previous options (i.e. discrete set of values). + +4. ``None``: A gene with its space set to ``None`` is initialized + randomly from the range specified by the 2 parameters + ``init_range_low`` and ``init_range_high``. For mutation, its value + is mutated based on a random value from the range specified by the 2 + parameters ``random_mutation_min_val`` and + ``random_mutation_max_val``. If all elements in the ``gene_space`` + parameter are ``None``, the parameter will not have any effect. + +Assuming that a chromosome has 2 genes and each gene has a different +value space. Then the ``gene_space`` could be assigned a nested +list/tuple where each element determines the space of a gene. + +According to the next code, the space of the first gene is ``[0.4, -5]`` +which has 2 values and the space for the second gene is +``[0.5, -3.2, 8.8, -9]`` which has 4 values. + +.. code:: python + + gene_space = [[0.4, -5], [0.5, -3.2, 8.2, -9]] + +For a 2 gene chromosome, if the first gene space is restricted to the +discrete values from 0 to 4 and the second gene is restricted to the +values from 10 to 19, then it could be specified according to the next +code. + +.. code:: python + + gene_space = [range(5), range(10, 20)] + +The ``gene_space`` can also be assigned to a single range, as given +below, where the values of all genes are sampled from the same range. + +.. code:: python + + gene_space = numpy.arange(15) + +The ``gene_space`` can be assigned a dictionary to sample a value from a +continuous range. + +.. code:: python + + gene_space = {"low": 4, "high": 30} + +A step also can be assigned to the dictionary. This works as if a range +is used. + +.. code:: python + + gene_space = {"low": 4, "high": 30, "step": 2.5} + +.. + + Setting a ``dict`` like ``{"low": 0, "high": 10}`` in the + ``gene_space`` means that random values from the continuous range [0, + 10) are sampled. Note that ``0`` is included but ``10`` is not + included while sampling. Thus, the maximum value that could be + returned is less than ``10`` like ``9.9999``. But if the user decided + to round the genes using, for example, ``[float, 2]``, then this + value will become 10. So, the user should be careful to the inputs. + +If a ``None`` is assigned to only a single gene, then its value will be +randomly generated initially using the ``init_range_low`` and +``init_range_high`` parameters in the ``pygad.GA`` class's constructor. +During mutation, the value are sampled from the range defined by the 2 +parameters ``random_mutation_min_val`` and ``random_mutation_max_val``. +This is an example where the second gene is given a ``None`` value. + +.. code:: python + + gene_space = [range(5), None, numpy.linspace(10, 20, 300)] + +If the user did not assign the initial population to the +``initial_population`` parameter, the initial population is created +randomly based on the ``gene_space`` parameter. Moreover, the mutation +is applied based on this parameter. + +.. _how-mutation-works-with-the-genespace-parameter: + +How Mutation Works with the ``gene_space`` Parameter? +----------------------------------------------------- + +If a gene has its static space defined in the ``gene_space`` parameter, +then mutation works by replacing the gene value by a value randomly +selected from the gene space. This happens for both ``int`` and +``float`` data types. + +For example, the following ``gene_space`` has the static space +``[1, 2, 3]`` defined for the first gene. So, this gene can only have a +value out of these 3 values. + +.. code:: python + + Gene space: [[1, 2, 3], + None] + Solution: [1, 5] + +For a solution like ``[1, -0.5, 4]``, then mutation happens for the +first gene by simply replacing its current value by a randomly selected +value (other than its current value if possible). So, the value 1 will +be replaced by either 2 or 3. + +For the second gene, its space is set to ``None``. So, traditional +mutation happens for this gene by: + +1. Generating a random value from the range defined by the + ``random_mutation_min_val`` and ``random_mutation_max_val`` + parameters. + +2. Adding this random value to the current gene's value. + +If its current value is 5 and the random value is ``-0.5``, then the new +value is 4.5. If the gene type is integer, then the value will be +rounded. + +Stop at Any Generation +====================== + +In `PyGAD +2.4.0 `__, +it is possible to stop the genetic algorithm after any generation. All +you need to do it to return the string ``"stop"`` in the callback +function ``on_generation``. When this callback function is implemented +and assigned to the ``on_generation`` parameter in the constructor of +the ``pygad.GA`` class, then the algorithm immediately stops after +completing its current generation. Let's discuss an example. + +Assume that the user wants to stop algorithm either after the 100 +generations or if a condition is met. The user may assign a value of 100 +to the ``num_generations`` parameter of the ``pygad.GA`` class +constructor. + +The condition that stops the algorithm is written in a callback function +like the one in the next code. If the fitness value of the best solution +exceeds 70, then the string ``"stop"`` is returned. + +.. code:: python + + def func_generation(ga_instance): + if ga_instance.best_solution()[1] >= 70: + return "stop" + +Stop Criteria +============= + +In `PyGAD +2.15.0 `__, +a new parameter named ``stop_criteria`` is added to the constructor of +the ``pygad.GA`` class. It helps to stop the evolution based on some +criteria. It can be assigned to one or more criterion. + +Each criterion is passed as ``str`` that consists of 2 parts: + +1. Stop word. + +2. Number. + +It takes this form: + +.. code:: python + + "word_num" + +The current 2 supported words are ``reach`` and ``saturate``. + +The ``reach`` word stops the ``run()`` method if the fitness value is +equal to or greater than a given fitness value. An example for ``reach`` +is ``"reach_40"`` which stops the evolution if the fitness is >= 40. + +``saturate`` stops the evolution if the fitness saturates for a given +number of consecutive generations. An example for ``saturate`` is +``"saturate_7"`` which means stop the ``run()`` method if the fitness +does not change for 7 consecutive generations. + +Here is an example that stops the evolution if either the fitness value +reached ``127.4`` or if the fitness saturates for ``15`` generations. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, 9, 4] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=200, + sol_per_pop=10, + num_parents_mating=4, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + stop_criteria=["reach_127.4", "saturate_15"]) + + ga_instance.run() + print(f"Number of generations passed is {ga_instance.generations_completed}") + +Elitism Selection +================= + +In `PyGAD +2.18.0 `__, +a new parameter called ``keep_elitism`` is supported. It accepts an +integer to define the number of elitism (i.e. best solutions) to keep in +the next generation. This parameter defaults to ``1`` which means only +the best solution is kept in the next generation. + +In the next example, the ``keep_elitism`` parameter in the constructor +of the ``pygad.GA`` class is set to 2. Thus, the best 2 solutions in +each generation are kept in the next generation. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + num_genes=6, + sol_per_pop=5, + keep_elitism=2) + + ga_instance.run() + +The value passed to the ``keep_elitism`` parameter must satisfy 2 +conditions: + +1. It must be ``>= 0``. + +2. It must be ``<= sol_per_pop``. That is its value cannot exceed the + number of solutions in the current population. + +In the previous example, if the ``keep_elitism`` parameter is set equal +to the value passed to the ``sol_per_pop`` parameter, which is 5, then +there will be no evolution at all as in the next figure. This is because +all the 5 solutions are used as elitism in the next generation and no +offspring will be created. + +.. code:: python + + ... + + ga_instance = pygad.GA(..., + sol_per_pop=5, + keep_elitism=5) + + ga_instance.run() + +.. image:: https://user-images.githubusercontent.com/16560492/189273225-67ffad41-97ab-45e1-9324-429705e17b20.png + :alt: + +Note that if the ``keep_elitism`` parameter is effective (i.e. is +assigned a positive integer, not zero), then the ``keep_parents`` +parameter will have no effect. Because the default value of the +``keep_elitism`` parameter is 1, then the ``keep_parents`` parameter has +no effect by default. The ``keep_parents`` parameter is only effective +when ``keep_elitism=0``. + +Random Seed +=========== + +In `PyGAD +2.18.0 `__, +a new parameter called ``random_seed`` is supported. Its value is used +as a seed for the random function generators. + +PyGAD uses random functions in these 2 libraries: + +1. NumPy + +2. random + +The ``random_seed`` parameter defaults to ``None`` which means no seed +is used. As a result, different random numbers are generated for each +run of PyGAD. + +If this parameter is assigned a proper seed, then the results will be +reproducible. In the next example, the integer 2 is used as a random +seed. + +.. code:: python + + import numpy + import pygad + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / numpy.abs(output - desired_output) + return fitness + + ga_instance = pygad.GA(num_generations=2, + num_parents_mating=3, + fitness_func=fitness_func, + sol_per_pop=5, + num_genes=6, + random_seed=2) + + ga_instance.run() + best_solution, best_solution_fitness, best_match_idx = ga_instance.best_solution() + print(best_solution) + print(best_solution_fitness) + +This is the best solution found and its fitness value. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +After running the code again, it will find the same result. + +.. code:: + + [ 2.77249188 -4.06570662 0.04196872 -3.47770796 -0.57502138 -3.22775267] + 0.04872203136549972 + +Continue without Loosing Progress +================================= + +In `PyGAD +2.18.0 `__, +and thanks for `Felix Bernhard `__ for +opening `this GitHub +issue `__, +the values of these 4 instance attributes are no longer reset after each +call to the ``run()`` method. + +1. ``self.best_solutions`` + +2. ``self.best_solutions_fitness`` + +3. ``self.solutions`` + +4. ``self.solutions_fitness`` + +This helps the user to continue where the last run stopped without +loosing the values of these 4 attributes. + +Now, the user can save the model by calling the ``save()`` method. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + ga_instance = pygad.GA(...) + + ga_instance.run() + + ga_instance.plot_fitness() + + ga_instance.save("pygad_GA") + +Then the saved model is loaded by calling the ``load()`` function. After +calling the ``run()`` method over the loaded instance, then the data +from the previous 4 attributes are not reset but extended with the new +data. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + ... + return fitness + + loaded_ga_instance = pygad.load("pygad_GA") + + loaded_ga_instance.run() + + loaded_ga_instance.plot_fitness() + +The plot created by the ``plot_fitness()`` method will show the data +collected from both the runs. + +Note that the 2 attributes (``self.best_solutions`` and +``self.best_solutions_fitness``) only work if the +``save_best_solutions`` parameter is set to ``True``. Also, the 2 +attributes (``self.solutions`` and ``self.solutions_fitness``) only work +if the ``save_solutions`` parameter is ``True``. + +Prevent Duplicates in Gene Values +================================= + +In `PyGAD +2.13.0 `__, +a new bool parameter called ``allow_duplicate_genes`` is supported to +control whether duplicates are supported in the chromosome or not. In +other words, whether 2 or more genes might have the same exact value. + +If ``allow_duplicate_genes=True`` (which is the default case), genes may +have the same value. If ``allow_duplicate_genes=False``, then no 2 genes +will have the same value given that there are enough unique values for +the genes. + +The next code gives an example to use the ``allow_duplicate_genes`` +parameter. A callback generation function is implemented to print the +population after each generation. + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=5, + sol_per_pop=5, + num_genes=4, + mutation_num_genes=3, + random_mutation_min_val=-5, + random_mutation_max_val=5, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Here are the population after the 5 generations. Note how there are no +duplicate values. + +.. code:: python + + Generation 1 + [[ 2 -2 -3 3] + [ 0 1 2 3] + [ 5 -3 6 3] + [-3 1 -2 4] + [-1 0 -2 3]] + Generation 2 + [[-1 0 -2 3] + [-3 1 -2 4] + [ 0 -3 -2 6] + [-3 0 -2 3] + [ 1 -4 2 4]] + Generation 3 + [[ 1 -4 2 4] + [-3 0 -2 3] + [ 4 0 -2 1] + [-4 0 -2 -3] + [-4 2 0 3]] + Generation 4 + [[-4 2 0 3] + [-4 0 -2 -3] + [-2 5 4 -3] + [-1 2 -4 4] + [-4 2 0 -3]] + Generation 5 + [[-4 2 0 -3] + [-1 2 -4 4] + [ 3 4 -4 0] + [-1 0 2 -2] + [-4 2 -1 1]] + +The ``allow_duplicate_genes`` parameter is configured with use with the +``gene_space`` parameter. Here is an example where each of the 4 genes +has the same space of values that consists of 4 values (1, 2, 3, and 4). + +.. code:: python + + import pygad + + def fitness_func(ga_instance, solution, solution_idx): + return 0 + + def on_generation(ga): + print("Generation", ga.generations_completed) + print(ga.population) + + ga_instance = pygad.GA(num_generations=1, + sol_per_pop=5, + num_genes=4, + num_parents_mating=2, + fitness_func=fitness_func, + gene_type=int, + gene_space=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + on_generation=on_generation, + allow_duplicate_genes=False) + ga_instance.run() + +Even that all the genes share the same space of values, no 2 genes +duplicate their values as provided by the next output. + +.. code:: python + + Generation 1 + [[2 3 1 4] + [2 3 1 4] + [2 4 1 3] + [2 3 1 4] + [1 3 2 4]] + Generation 2 + [[1 3 2 4] + [2 3 1 4] + [1 3 2 4] + [2 3 4 1] + [1 3 4 2]] + Generation 3 + [[1 3 4 2] + [2 3 4 1] + [1 3 4 2] + [3 1 4 2] + [3 2 4 1]] + Generation 4 + [[3 2 4 1] + [3 1 4 2] + [3 2 4 1] + [1 2 4 3] + [1 3 4 2]] + Generation 5 + [[1 3 4 2] + [1 2 4 3] + [2 1 4 3] + [1 2 4 3] + [1 2 4 3]] + +You should care of giving enough values for the genes so that PyGAD is +able to find alternatives for the gene value in case it duplicates with +another gene. + +There might be 2 duplicate genes where changing either of the 2 +duplicating genes will not solve the problem. For example, if +``gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]]`` and the +solution is ``[3 2 0 0]``, then the values of the last 2 genes +duplicate. There are no possible changes in the last 2 genes to solve +the problem. + +This problem can be solved by randomly changing one of the +non-duplicating genes that may make a room for a unique value in one the +2 duplicating genes. For example, by changing the second gene from 2 to +4, then any of the last 2 genes can take the value 2 and solve the +duplicates. The resultant gene is then ``[3 4 2 0]``. But this option is +not yet supported in PyGAD. + +Solve Duplicates using a Third Gene +----------------------------------- + +When ``allow_duplicate_genes=False`` and a user-defined ``gene_space`` +is used, it sometimes happen that there is no room to solve the +duplicates between the 2 genes by simply replacing the value of one gene +by another gene. In `PyGAD +3.1.0 `__, +the duplicates are solved by looking for a third gene that will help in +solving the duplicates. The following examples explain how it works. + +Example 1: + +Let's assume that this gene space is used and there is a solution with 2 +duplicate genes with the same value 4. + +.. code:: python + + Gene space: [[2, 3], + [3, 4], + [4, 5], + [5, 6]] + Solution: [3, 4, 4, 5] + +By checking the gene space, the second gene can have the values +``[3, 4]`` and the third gene can have the values ``[4, 5]``. To solve +the duplicates, we have the value of any of these 2 genes. + +If the value of the second gene changes from 4 to 3, then it will be +duplicate with the first gene. If we are to change the value of the +third gene from 4 to 5, then it will duplicate with the fourth gene. As +a conclusion, trying to just selecting a different gene value for either +the second or third genes will introduce new duplicating genes. + +When there are 2 duplicate genes but there is no way to solve their +duplicates, then the solution is to change a third gene that makes a +room to solve the duplicates between the 2 genes. + +In our example, duplicates between the second and third genes can be +solved by, for example,: + +- Changing the first gene from 3 to 2 then changing the second gene + from 4 to 3. + +- Or changing the fourth gene from 5 to 6 then changing the third gene + from 4 to 5. + +Generally, this is how to solve such duplicates: + +1. For any duplicate gene **GENE1**, select another value. + +2. Check which other gene **GENEX** has duplicate with this new value. + +3. Find if **GENEX** can have another value that will not cause any more + duplicates. If so, go to step 7. + +4. If all the other values of **GENEX** will cause duplicates, then try + another gene **GENEY**. + +5. Repeat steps 3 and 4 until exploring all the genes. + +6. If there is no possibility to solve the duplicates, then there is not + way to solve the duplicates and we have to keep the duplicate value. + +7. If a value for a gene **GENEM** is found that will not cause more + duplicates, then use this value for the gene **GENEM**. + +8. Replace the value of the gene **GENE1** by the old value of the gene + **GENEM**. This solves the duplicates. + +This is an example to solve the duplicate for the solution +``[3, 4, 4, 5]``: + +1. Let's use the second gene with value 4. Because the space of this + gene is ``[3, 4]``, then the only other value we can select is 3. + +2. The first gene also have the value 3. + +3. The first gene has another value 2 that will not cause more + duplicates in the solution. Then go to step 7. + +4. Skip. + +5. Skip. + +6. Skip. + +7. The value of the first gene 3 will be replaced by the new value 2. + The new solution is [2, 4, 4, 5]. + +8. Replace the value of the second gene 4 by the old value of the first + gene which is 3. The new solution is [2, 3, 4, 5]. The duplicate is + solved. + +Example 2: + +.. code:: python + + Gene space: [[0, 1], + [1, 2], + [2, 3], + [3, 4]] + Solution: [1, 2, 2, 3] + +The quick summary is: + +- Change the value of the first gene from 1 to 0. The solution becomes + [0, 2, 2, 3]. + +- Change the value of the second gene from 2 to 1. The solution becomes + [0, 1, 2, 3]. The duplicate is solved. + +.. _more-about-the-genetype-parameter: + +More about the ``gene_type`` Parameter +====================================== + +The ``gene_type`` parameter allows the user to control the data type for +all genes at once or each individual gene. In `PyGAD +2.15.0 `__, +the ``gene_type`` parameter also supports customizing the precision for +``float`` data types. As a result, the ``gene_type`` parameter helps to: + +1. Select a data type for all genes with or without precision. + +2. Select a data type for each individual gene with or without + precision. + +Let's discuss things by examples. + +Data Type for All Genes without Precision +----------------------------------------- + +The data type for all genes can be specified by assigning the numeric +data type directly to the ``gene_type`` parameter. This is an example to +make all genes of ``int`` data types. + +.. code:: python + + gene_type=int + +Given that the supported numeric data types of PyGAD include Python's +``int`` and ``float`` in addition to all numeric types of ``NumPy``, +then any of these types can be assigned to the ``gene_type`` parameter. + +If no precision is specified for a ``float`` data type, then the +complete floating-point number is kept. + +The next code uses an ``int`` data type for all genes where the genes in +the initial and final population are only integers. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=int) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[ 1 -1 2 0 -3] + [ 0 -2 0 -3 -1] + [ 0 -1 -1 2 0] + [-2 3 -2 3 3] + [ 0 0 2 -2 -2]] + + Final Population + [[ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0] + [ 1 -1 2 2 0]] + +Data Type for All Genes with Precision +-------------------------------------- + +A precision can only be specified for a ``float`` data type and cannot +be specified for integers. Here is an example to use a precision of 3 +for the ``float`` data type. In this case, all genes are of type +``float`` and their maximum precision is 3. + +.. code:: python + + gene_type=[float, 3] + +The next code uses prints the initial and final population where the +genes are of type ``float`` with precision 3. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[float, 3]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2.417 -0.487 3.623 2.457 -2.362] + [-1.231 0.079 -1.63 1.629 -2.637] + [ 0.692 -2.098 0.705 0.914 -3.633] + [ 2.637 -1.339 -1.107 -0.781 -3.896] + [-1.495 1.378 -1.026 3.522 2.379]] + + Final Population + [[ 1.714 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.185 -2.362] + [ 0.692 -1.024 3.623 3.375 -2.362] + [ 0.692 -1.024 4.041 3.185 -2.362] + [ 1.714 -0.644 3.623 3.185 -2.362]] + +Data Type for each Individual Gene without Precision +---------------------------------------------------- + +In `PyGAD +2.14.0 `__, +the ``gene_type`` parameter allows customizing the gene type for each +individual gene. This is by using a ``list``/``tuple``/``numpy.ndarray`` +with number of elements equal to the number of genes. For each element, +a type is specified for the corresponding gene. + +This is an example for a 5-gene problem where different types are +assigned to the genes. + +.. code:: python + + gene_type=[int, float, numpy.float16, numpy.int8, float] + +This is a complete code that prints the initial and final population for +a custom-gene data type. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, float, numpy.float16, numpy.int8, float]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[0 0.8615522360026828 0.7021484375 -2 3.5301821368185866] + [-3 2.648189378595294 -3.830078125 1 -0.9586271572917742] + [3 3.7729827570110714 1.2529296875 -3 1.395741994211889] + [0 1.0490687178053282 1.51953125 -2 0.7243617940450235] + [0 -0.6550158436937226 -2.861328125 -2 1.8212734549263097]] + + Final Population + [[3 3.7729827570110714 2.055 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036] + [3 3.7729827570110714 1.458 0 0.0869406120516778] + [3 3.7729827570110714 1.458 0 0.7243617940450235] + [3 3.7729827570110714 1.458 0 -0.14638754050305036]] + +Data Type for each Individual Gene with Precision +------------------------------------------------- + +The precision can also be specified for the ``float`` data types as in +the next line where the second gene precision is 2 and last gene +precision is 1. + +.. code:: python + + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]] + +This is a complete example where the initial and final populations are +printed where the genes comply with the data types and precisions +specified. + +.. code:: python + + import pygad + import numpy + + equation_inputs = [4, -2, 3.5, 8, -2] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + gene_type=[int, [float, 2], numpy.float16, numpy.int8, [float, 1]]) + + print("Initial Population") + print(ga_instance.initial_population) + + ga_instance.run() + + print("Final Population") + print(ga_instance.population) + +.. code:: python + + Initial Population + [[-2 -1.22 1.716796875 -1 0.2] + [-1 -1.58 -3.091796875 0 -1.3] + [3 3.35 -0.107421875 1 -3.3] + [-2 -3.58 -1.779296875 0 0.6] + [2 -3.73 2.65234375 3 -0.5]] + + Final Population + [[2 -4.22 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3] + [2 -4.22 3.47 2 -1.3] + [2 -4.58 3.47 3 -1.3] + [2 -3.73 3.47 3 -1.3]] + +Parallel Processing in PyGAD +============================ + +Starting from `PyGAD +2.17.0 `__, +parallel processing becomes supported. This section explains how to use +parallel processing in PyGAD. + +According to the `PyGAD +lifecycle `__, +parallel processing can be parallelized in only 2 operations: + +1. Population fitness calculation. + +2. Mutation. + +The reason is that the calculations in these 2 operations are +independent (i.e. each solution/chromosome is handled independently from +the others) and can be distributed across different processes or +threads. + +For the mutation operation, it does not do intensive calculations on the +CPU. Its calculations are simple like flipping the values of some genes +from 0 to 1 or adding a random value to some genes. So, it does not take +much CPU processing time. Experiments proved that parallelizing the +mutation operation across the solutions increases the time instead of +reducing it. This is because running multiple processes or threads adds +overhead to manage them. Thus, parallel processing cannot be applied on +the mutation operation. + +For the population fitness calculation, parallel processing can help +make a difference and reduce the processing time. But this is +conditional on the type of calculations done in the fitness function. If +the fitness function makes intensive calculations and takes much +processing time from the CPU, then it is probably that parallel +processing will help to cut down the overall time. + +This section explains how parallel processing works in PyGAD and how to +use parallel processing in PyGAD + +How to Use Parallel Processing in PyGAD +--------------------------------------- + +Starting from `PyGAD +2.17.0 `__, +a new parameter called ``parallel_processing`` added to the constructor +of the ``pygad.GA`` class. + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + parallel_processing=...) + ... + +This parameter allows the user to do the following: + +1. Enable parallel processing. + +2. Select whether processes or threads are used. + +3. Specify the number of processes or threads to be used. + +These are 3 possible values for the ``parallel_processing`` parameter: + +1. ``None``: (Default) It means no parallel processing is used. + +2. A positive integer referring to the number of threads to be used + (i.e. threads, not processes, are used. + +3. ``list``/``tuple``: If a list or a tuple of exactly 2 elements is + assigned, then: + + 1. The first element can be either ``'process'`` or ``'thread'`` to + specify whether processes or threads are used, respectively. + + 2. The second element can be: + + 1. A positive integer to select the maximum number of processes or + threads to be used + + 2. ``0`` to indicate that 0 processes or threads are used. It + means no parallel processing. This is identical to setting + ``parallel_processing=None``. + + 3. ``None`` to use the default value as calculated by the + ``concurrent.futures module``. + +These are examples of the values assigned to the ``parallel_processing`` +parameter: + +- ``parallel_processing=4``: Because the parameter is assigned a + positive integer, this means parallel processing is activated where 4 + threads are used. + +- ``parallel_processing=["thread", 5]``: Use parallel processing with 5 + threads. This is identical to ``parallel_processing=5``. + +- ``parallel_processing=["process", 8]``: Use parallel processing with + 8 processes. + +- ``parallel_processing=["process", 0]``: As the second element is + given the value 0, this means do not use parallel processing. This is + identical to ``parallel_processing=None``. + +Examples +-------- + +The examples will help you know the difference between using processes +and threads. Moreover, it will give an idea when parallel processing +would make a difference and reduce the time. These are dummy examples +where the fitness function is made to always return 0. + +The first example uses 10 genes, 5 solutions in the population where +only 3 solutions mate, and 9999 generations. The fitness function uses a +``for`` loop with 100 iterations just to have some calculations. In the +constructor of the ``pygad.GA`` class, ``parallel_processing=None`` +means no parallel processing is used. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99): + pass + return 0 + + ga_instance = pygad.GA(num_generations=9999, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + + ga_instance.run() + + t2 = time.time() + print("Time is", t2-t1) + +When parallel processing is not used, the time it takes to run the +genetic algorithm is ``1.5`` seconds. + +In the comparison, let's do a second experiment where parallel +processing is used with 5 threads. In this case, it take ``5`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=5) + ... + +For the third experiment, processes instead of threads are used. Also, +only 99 generations are used instead of 9999. The time it takes is +``99`` seconds. + +.. code:: python + + ... + ga_instance = pygad.GA(num_generations=99, + ..., + parallel_processing=["process", 5]) + ... + +This is the summary of the 3 experiments: + +1. No parallel processing & 9999 generations: 1.5 seconds. + +2. Parallel processing with 5 threads & 9999 generations: 5 seconds + +3. Parallel processing with 5 processes & 99 generations: 99 seconds + +Because the fitness function does not need much CPU time, the normal +processing takes the least time. Running processes for this simple +problem takes 99 compared to only 5 seconds for threads because managing +processes is much heavier than managing threads. Thus, most of the CPU +time is for swapping the processes instead of executing the code. + +In the second example, the loop makes 99999999 iterations and only 5 +generations are used. With no parallelization, it takes 22 seconds. + +.. code:: python + + import pygad + import time + + def fitness_func(ga_instance, solution, solution_idx): + for _ in range(99999999): + pass + return 0 + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=3, + sol_per_pop=5, + num_genes=10, + fitness_func=fitness_func, + suppress_warnings=True, + parallel_processing=None) + + if __name__ == '__main__': + t1 = time.time() + ga_instance.run() + t2 = time.time() + print("Time is", t2-t1) + +It takes 15 seconds when 10 processes are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["process", 10]) + ... + +This is compared to 20 seconds when 10 threads are used. + +.. code:: python + + ... + ga_instance = pygad.GA(..., + parallel_processing=["thread", 10]) + ... + +Based on the second example, using parallel processing with 10 processes +takes the least time because there is much CPU work done. Generally, +processes are preferred over threads when most of the work in on the +CPU. Threads are preferred over processes in some situations like doing +input/output operations. + +*Before releasing* `PyGAD +2.17.0 `__\ *,* +`László +Fazekas `__ +*wrote an article to parallelize the fitness function with PyGAD. Check +it:* `How Genetic Algorithms Can Compete with Gradient Descent and +Backprop `__. + +Print Lifecycle Summary +======================= + +In `PyGAD +2.19.0 `__, +a new method called ``summary()`` is supported. It prints a Keras-like +summary of the PyGAD lifecycle showing the steps, callback functions, +parameters, etc. + +This method accepts the following parameters: + +- ``line_length=70``: An integer representing the length of the single + line in characters. + +- ``fill_character=" "``: A character to fill the lines. + +- ``line_character="-"``: A character for creating a line separator. + +- ``line_character2="="``: A secondary character to create a line + separator. + +- ``columns_equal_len=False``: The table rows are split into + equal-sized columns or split subjective to the width needed. + +- ``print_step_parameters=True``: Whether to print extra parameters + about each step inside the step. If ``print_step_parameters=False`` + and ``print_parameters_summary=True``, then the parameters of each + step are printed at the end of the table. + +- ``print_parameters_summary=True``: Whether to print parameters + summary at the end of the table. If ``print_step_parameters=False``, + then the parameters of each step are printed at the end of the table + too. + +This is a quick example to create a PyGAD example. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + def genetic_fitness(solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_gen(ga): + pass + + def on_crossover_callback(a, b): + pass + + ga_instance = pygad.GA(num_generations=100, + num_parents_mating=10, + sol_per_pop=20, + num_genes=len(function_inputs), + on_crossover=on_crossover_callback, + on_generation=on_gen, + parallel_processing=2, + stop_criteria="reach_10", + fitness_batch_size=4, + crossover_probability=0.4, + fitness_func=genetic_fitness) + +Then call the ``summary()`` method to print the summary with the default +parameters. Note that entries for the crossover and generation callback +function are created because their callback functions are implemented +through the ``on_crossover_callback()`` and ``on_gen()``, respectively. + +.. code:: python + + ga_instance.summary() + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + Fitness batch size: 4 + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + Number of Parents: 10 + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + Crossover probability: 0.4 + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + Mutation Genes: 1 + Random Mutation Range: (-1.0, 1.0) + Mutation by Replacement: False + Allow Duplicated Genes: True + ---------------------------------------------------------------------- + On Generation on_gen() None + Stop Criteria: [['reach', 10.0]] + ---------------------------------------------------------------------- + ====================================================================== + Population Size: (20, 6) + Number of Generations: 100 + Initial Population Range: (-4, 4) + Keep Elitism: 1 + Gene DType: [, None] + Parallel Processing: ['thread', 2] + Save Best Solutions: False + Save Solutions: False + ====================================================================== + +We can set the ``print_step_parameters`` and +``print_parameters_summary`` parameters to ``False`` to not print the +parameters. + +.. code:: python + + ga_instance.summary(print_step_parameters=False, + print_parameters_summary=False) + +.. code:: bash + + ---------------------------------------------------------------------- + PyGAD Lifecycle + ====================================================================== + Step Handler Output Shape + ====================================================================== + Fitness Function genetic_fitness() (1) + ---------------------------------------------------------------------- + Parent Selection steady_state_selection() (10, 6) + ---------------------------------------------------------------------- + Crossover single_point_crossover() (10, 6) + ---------------------------------------------------------------------- + On Crossover on_crossover_callback() None + ---------------------------------------------------------------------- + Mutation random_mutation() (10, 6) + ---------------------------------------------------------------------- + On Generation on_gen() None + ---------------------------------------------------------------------- + ====================================================================== + +Logging Outputs +=============== + +In `PyGAD +3.0.0 `__, +the ``print()`` statement is no longer used and the outputs are printed +using the `logging `__ +module. A a new parameter called ``logger`` is supported to accept the +user-defined logger. + +.. code:: python + + import logging + + logger = ... + + ga_instance = pygad.GA(..., + logger=logger, + ...) + +The default value for this parameter is ``None``. If there is no logger +passed (i.e. ``logger=None``), then a default logger is created to log +the messages to the console exactly like how the ``print()`` statement +works. + +Some advantages of using the the +`logging `__ module +instead of the ``print()`` statement are: + +1. The user has more control over the printed messages specially if + there is a project that uses multiple modules where each module + prints its messages. A logger can organize the outputs. + +2. Using the proper ``Handler``, the user can log the output messages to + files and not only restricted to printing it to the console. So, it + is much easier to record the outputs. + +3. The format of the printed messages can be changed by customizing the + ``Formatter`` assigned to the Logger. + +This section gives some quick examples to use the ``logging`` module and +then gives an example to use the logger with PyGAD. + +Logging to the Console +---------------------- + +This is an example to create a logger to log the messages to the +console. + +.. code:: python + + import logging + + # Create a logger + logger = logging.getLogger(__name__) + + # Set the logger level to debug so that all the messages are printed. + logger.setLevel(logging.DEBUG) + + # Create a stream handler to log the messages to the console. + stream_handler = logging.StreamHandler() + + # Set the handler level to debug. + stream_handler.setLevel(logging.DEBUG) + + # Create a formatter + formatter = logging.Formatter('%(message)s') + + # Add the formatter to handler. + stream_handler.setFormatter(formatter) + + # Add the stream handler to the logger + logger.addHandler(stream_handler) + +Now, we can log messages to the console with the format specified in the +``Formatter``. + +.. code:: python + + logger.debug('Debug message.') + logger.info('Info message.') + logger.warning('Warn message.') + logger.error('Error message.') + logger.critical('Critical message.') + +The outputs are identical to those returned using the ``print()`` +statement. + +.. code:: + + Debug message. + Info message. + Warn message. + Error message. + Critical message. + +By changing the format of the output messages, we can have more +information about each message. + +.. code:: python + + formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + +This is a sample output. + +.. code:: python + + 2023-04-03 18:46:27 DEBUG: Debug message. + 2023-04-03 18:46:27 INFO: Info message. + 2023-04-03 18:46:27 WARNING: Warn message. + 2023-04-03 18:46:27 ERROR: Error message. + 2023-04-03 18:46:27 CRITICAL: Critical message. + +Note that you may need to clear the handlers after finishing the +execution. This is to make sure no cached handlers are used in the next +run. If the cached handlers are not cleared, then the single output +message may be repeated. + +.. code:: python + + logger.handlers.clear() + +Logging to a File +----------------- + +This is another example to log the messages to a file named +``logfile.txt``. The formatter prints the following about each message: + +1. The date and time at which the message is logged. + +2. The log level. + +3. The message. + +4. The path of the file. + +5. The lone number of the log message. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name, 'a+', 'utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + +This is how the outputs look like. + +.. code:: python + + 2023-04-03 18:54:03 DEBUG: Debug message. - c:\users\agad069\desktop\logger\example2.py:46 + 2023-04-03 18:54:03 INFO: Info message. - c:\users\agad069\desktop\logger\example2.py:47 + 2023-04-03 18:54:03 WARNING: Warn message. - c:\users\agad069\desktop\logger\example2.py:48 + 2023-04-03 18:54:03 ERROR: Error message. - c:\users\agad069\desktop\logger\example2.py:49 + 2023-04-03 18:54:03 CRITICAL: Critical message. - c:\users\agad069\desktop\logger\example2.py:50 + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +Log to Both the Console and a File +---------------------------------- + +This is an example to create a single Logger associated with 2 handlers: + +1. A file handler. + +2. A stream handler. + +.. code:: python + + import logging + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s - %(pathname)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + +When a log message is executed, then it is both printed to the console +and saved in the ``logfile.txt``. + +Consider clearing the handlers if necessary. + +.. code:: python + + logger.handlers.clear() + +PyGAD Example +------------- + +To use the logger in PyGAD, just create your custom logger and pass it +to the ``logger`` parameter. + +.. code:: python + + import logging + import pygad + import numpy + + level = logging.DEBUG + name = 'logfile.txt' + + logger = logging.getLogger(name) + logger.setLevel(level) + + file_handler = logging.FileHandler(name,'a+','utf-8') + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_format) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter('%(message)s') + console_handler.setFormatter(console_format) + logger.addHandler(console_handler) + + equation_inputs = [4, -2, 8] + desired_output = 2671.1234 + + def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + def on_generation(ga_instance): + ga_instance.logger.info(f"Generation = {ga_instance.generations_completed}") + ga_instance.logger.info(f"Fitness = {ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]}") + + ga_instance = pygad.GA(num_generations=10, + sol_per_pop=40, + num_parents_mating=2, + keep_parents=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + on_generation=on_generation, + logger=logger) + ga_instance.run() + + logger.handlers.clear() + +By executing this code, the logged messages are printed to the console +and also saved in the text file. + +.. code:: python + + 2023-04-03 19:04:27 INFO: Generation = 1 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038086960368076276 + 2023-04-03 19:04:27 INFO: Generation = 2 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038214871408010853 + 2023-04-03 19:04:27 INFO: Generation = 3 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003832795907974678 + 2023-04-03 19:04:27 INFO: Generation = 4 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038398612055017196 + 2023-04-03 19:04:27 INFO: Generation = 5 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038442348890867516 + 2023-04-03 19:04:27 INFO: Generation = 6 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003854406039137763 + 2023-04-03 19:04:27 INFO: Generation = 7 + 2023-04-03 19:04:27 INFO: Fitness = 0.00038646083174063284 + 2023-04-03 19:04:27 INFO: Generation = 8 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003875169193024936 + 2023-04-03 19:04:27 INFO: Generation = 9 + 2023-04-03 19:04:27 INFO: Fitness = 0.0003888816727311021 + 2023-04-03 19:04:27 INFO: Generation = 10 + 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 + +Solve Non-Deterministic Problems +================================ + +PyGAD can be used to solve both deterministic and non-deterministic +problems. Deterministic are those that return the same fitness for the +same solution. For non-deterministic problems, a different fitness value +would be returned for the same solution. + +By default, PyGAD settings are set to solve deterministic problems. +PyGAD can save the explored solutions and their fitness to reuse in the +future. These instances attributes can save the solutions: + +1. ``solutions``: Exists if ``save_solutions=True``. + +2. ``best_solutions``: Exists if ``save_best_solutions=True``. + +3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. + +4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or + ``keep_parents=-1``. + +To configure PyGAD for non-deterministic problems, we have to disable +saving the previous solutions. This is by setting these parameters: + +1. ``keep_elisitm=0`` + +2. ``keep_parents=0`` + +3. ``keep_solutions=False`` + +4. ``keep_best_solutions=False`` + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +This way PyGAD will not save any explored solution and thus the fitness +function have to be called for each individual solution. + +Reuse the Fitness instead of Calling the Fitness Function +========================================================= + +It may happen that a previously explored solution in generation X is +explored again in another generation Y (where Y > X). For some problems, +calling the fitness function takes much time. + +For deterministic problems, it is better to not call the fitness +function for an already explored solutions. Instead, reuse the fitness +of the old solution. PyGAD supports some options to help you save time +calling the fitness function for a previously explored solution. + +The parameters explored in this section can be set in the constructor of +the ``pygad.GA`` class. + +The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these +parameters to see if there is a possibility of reusing the fitness +instead of calling the fitness function. + +.. _1-savesolutions: + +1. ``save_solutions`` +--------------------- + +It defaults to ``False``. If set to ``True``, then the population of +each generation is saved into the ``solutions`` attribute of the +``pygad.GA`` instance. In other words, every single solution is saved in +the ``solutions`` attribute. + +.. _2-savebestsolutions: + +2. ``save_best_solutions`` +-------------------------- + +It defaults to ``False``. If ``True``, then it only saves the best +solution in every generation. + +.. _3-keepelitism: + +3. ``keep_elitism`` +------------------- + +It accepts an integer and defaults to 1. If set to a positive integer, +then it keeps the elitism of one generation available in the next +generation. + +.. _4-keepparents: + +4. ``keep_parents`` +------------------- + +It accepts an integer and defaults to -1. It set to ``-1`` or a positive +integer, then it keeps the parents of one generation available in the +next generation. + +Why the Fitness Function is not Called for Solution at Index 0? +=============================================================== + +PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This +parameter defines the number of best solutions in generation **X** to +keep in the next generation **X+1**. The best solutions are just copied +from generation **X** to generation **X+1** without making any change. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=1, + ...) + +The best solutions are copied at the beginning of the population. If +``keep_elitism=1``, this means the best solution in generation X is kept +in the next generation X+1 at index 0 of the population. If +``keep_elitism=2``, this means the 2 best solutions in generation X are +kept in the next generation X+1 at indices 0 and 1 of the population of +generation 1. + +Because the fitness of these best solutions are already calculated in +generation X, then their fitness values will not be recalculated at +generation X+1 (i.e. the fitness function will not be called for these +solutions again). Instead, their fitness values are just reused. This is +why you see that no solution with index 0 is passed to the fitness +function. + +To force calling the fitness function for each solution in every +generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. +Moreover, keep the 2 parameters ``save_solutions`` and +``save_best_solutions`` to their default value ``False``. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +Batch Fitness Calculation +========================= + +In `PyGAD +2.19.0 `__, +a new optional parameter called ``fitness_batch_size`` is supported. A +new optional parameter called ``fitness_batch_size`` is supported to +calculate the fitness function in batches. Thanks to `Linan +Qiu `__ for opening the `GitHub issue +#136 `__. + +Its values can be: + +- ``1`` or ``None``: If the ``fitness_batch_size`` parameter is + assigned the value ``1`` or ``None`` (default), then the normal flow + is used where the fitness function is called for each individual + solution. That is if there are 15 solutions, then the fitness + function is called 15 times. + +- ``1 < fitness_batch_size <= sol_per_pop``: If the + ``fitness_batch_size`` parameter is assigned a value satisfying this + condition ``1 < fitness_batch_size <= sol_per_pop``, then the + solutions are grouped into batches of size ``fitness_batch_size`` and + the fitness function is called once for each batch. In this case, the + fitness function must return a list/tuple/numpy.ndarray with a length + equal to the number of solutions passed. + +.. _example-without-fitnessbatchsize-parameter: + +Example without ``fitness_batch_size`` Parameter +------------------------------------------------ + +This is an example where the ``fitness_batch_size`` parameter is given +the value ``None`` (which is the default value). This is equivalent to +using the value ``1``. In this case, the fitness function will be called +for each solution. This means the fitness function ``fitness_func`` will +receive only a single solution. This is an example of the passed +arguments to the fitness function: + +.. code:: + + solution: [ 2.52860734, -0.94178795, 2.97545704, 0.84131987, -3.78447118, 2.41008358] + solution_idx: 3 + +The fitness function also must return a single numeric value as the +fitness for the passed solution. + +As we have a population of ``20`` solutions, then the fitness function +is called 20 times per generation. For 5 generations, then the fitness +function is called ``20*5 = 100`` times. In PyGAD, the fitness function +is called after the last generation too and this adds additional 20 +times. So, the total number of calls to the fitness function is +``20*5 + 20 = 120``. + +Note that the ``keep_elitism`` and ``keep_parents`` parameters are set +to ``0`` to make sure no fitness values are reused and to force calling +the fitness function for each individual solution. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func(ga_instance, solution, solution_idx): + global number_of_calls + number_of_calls = number_of_calls + 1 + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func, + fitness_batch_size=None, + # fitness_batch_size=1, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 120 + +.. _example-with-fitnessbatchsize-parameter: + +Example with ``fitness_batch_size`` Parameter +--------------------------------------------- + +This is an example where the ``fitness_batch_size`` parameter is used +and assigned the value ``4``. This means the solutions will be grouped +into batches of ``4`` solutions. The fitness function will be called +once for each patch (i.e. called once for each 4 solutions). + +This is an example of the arguments passed to it: + +.. code:: python + + solutions: + [[ 3.1129432 -0.69123589 1.93792414 2.23772968 -1.54616001 -0.53930799] + [ 3.38508121 0.19890812 1.93792414 2.23095014 -3.08955597 3.10194128] + [ 2.37079504 -0.88819803 2.97545704 1.41742256 -3.95594055 2.45028256] + [ 2.52860734 -0.94178795 2.97545704 0.84131987 -3.78447118 2.41008358]] + solutions_indices: + [16, 17, 18, 19] + +As we have 20 solutions, then there are ``20/4 = 5`` patches. As a +result, the fitness function is called only 5 times per generation +instead of 20. For each call to the fitness function, it receives a +batch of 4 solutions. + +As we have 5 generations, then the function will be called ``5*5 = 25`` +times. Given the call to the fitness function after the last generation, +then the total number of calls is ``5*5 + 5 = 30``. + +.. code:: python + + import pygad + import numpy + + function_inputs = [4,-2,3.5,5,-11,-4.7] + desired_output = 44 + + number_of_calls = 0 + + def fitness_func_batch(ga_instance, solutions, solutions_indices): + global number_of_calls + number_of_calls = number_of_calls + 1 + batch_fitness = [] + for solution in solutions: + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + batch_fitness.append(fitness) + return batch_fitness + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=10, + sol_per_pop=20, + fitness_func=fitness_func_batch, + fitness_batch_size=4, + num_genes=len(function_inputs), + keep_elitism=0, + keep_parents=0) + + ga_instance.run() + print(number_of_calls) + +.. code:: + + 30 + +When batch fitness calculation is used, then we saved ``120 - 30 = 90`` +calls to the fitness function. + +Use Functions and Methods to Build Fitness and Callbacks +======================================================== + +In PyGAD 2.19.0, it is possible to pass user-defined functions or +methods to the following parameters: + +1. ``fitness_func`` + +2. ``on_start`` + +3. ``on_fitness`` + +4. ``on_parents`` + +5. ``on_crossover`` + +6. ``on_mutation`` + +7. ``on_generation`` + +8. ``on_stop`` + +This section gives 2 examples to assign these parameters user-defined: + +1. Functions. + +2. Methods. + +Assign Functions +---------------- + +This is a dummy example where the fitness function returns a random +value. Note that the instance of the ``pygad.GA`` class is passed as the +last parameter of all functions. + +.. code:: python + + import pygad + import numpy + + def fitness_func(ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(ga_instanse): + print("on_start") + + def on_fitness(ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(ga_instanse): + print("on_generation\n") + + def on_stop(ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=on_start, + on_fitness=on_fitness, + on_parents=on_parents, + on_crossover=on_crossover, + on_mutation=on_mutation, + on_generation=on_generation, + on_stop=on_stop, + fitness_func=fitness_func) + + ga_instance.run() + +Assign Methods +-------------- + +The next example has all the method defined inside the class ``Test``. +All of the methods accept an additional parameter representing the +method's object of the class ``Test``. + +All methods accept ``self`` as the first parameter and the instance of +the ``pygad.GA`` class as the last parameter. + +.. code:: python + + import pygad + import numpy + + class Test: + def fitness_func(self, ga_instanse, solution, solution_idx): + return numpy.random.rand() + + def on_start(self, ga_instanse): + print("on_start") + + def on_fitness(self, ga_instanse, last_gen_fitness): + print("on_fitness") + + def on_parents(self, ga_instanse, last_gen_parents): + print("on_parents") + + def on_crossover(self, ga_instanse, last_gen_offspring): + print("on_crossover") + + def on_mutation(self, ga_instanse, last_gen_offspring): + print("on_mutation") + + def on_generation(self, ga_instanse): + print("on_generation\n") + + def on_stop(self, ga_instanse, last_gen_fitness): + print("on_stop") + + ga_instance = pygad.GA(num_generations=5, + num_parents_mating=4, + sol_per_pop=10, + num_genes=2, + on_start=Test().on_start, + on_fitness=Test().on_fitness, + on_parents=Test().on_parents, + on_crossover=Test().on_crossover, + on_mutation=Test().on_mutation, + on_generation=Test().on_generation, + on_stop=Test().on_stop, + fitness_func=Test().fitness_func) + + ga_instance.run() diff --git a/docs/source/releases.rst b/docs/source/releases.rst index 44e95a3..1ca8181 100644 --- a/docs/source/releases.rst +++ b/docs/source/releases.rst @@ -250,7 +250,7 @@ Release date: 19 July 2020 values. It is useful if the gene space is restricted to a certain range or to discrete values. For more information, check the `More about the ``gene_space`` - Parameter `__ + Parameter `__ section. Thanks to `Prof. Tamer A. Farrag `__ for requesting this useful feature. @@ -416,7 +416,7 @@ Release Date: 03 January 2021 2. Support of adaptive mutation where the mutation rate is determined by the fitness value of each solution. Read the `Adaptive - Mutation `__ + Mutation `__ section for more details. Also, read this paper: `Libelli, S. Marsili, and P. Alba. "Adaptive mutation in genetic algorithms." Soft computing 4.2 (2000): @@ -525,7 +525,7 @@ Release Date: 16 February 2021 to 5 (exclusive) while the second one has values between 0.2 (inclusive) and 0.85 (exclusive). For more information, please check the `Limit the Gene Value - Range `__ + Range `__ section of the documentation. 2. The ``plot_result()`` method returns the figure so that the user can @@ -594,7 +594,7 @@ Release Date: 12 March 2021 solution/chromosome may have duplicate gene values. If ``False``, then each gene will have a unique value in its solution. Check the `Prevent Duplicates in Gene - Values `__ + Values `__ section for more details. 2. The ``last_generation_fitness`` is updated at the end of each @@ -624,7 +624,7 @@ Release Date: 19 May 2021 Previously, the ``gene_type`` can be assigned only to a single data type that is applied for all genes. For more information, check the `More about the ``gene_type`` - Parameter `__ + Parameter `__ section. Thanks to `Rainer Engel `__ for asking about this feature in `this @@ -721,7 +721,7 @@ Release Date: 17 June 2021 ``{"low": 0, "high": 30, "step": 2}`` to have only even values for the gene(s) starting from 0 to 30. For more information, check the `More about the ``gene_space`` - Parameter `__ + Parameter `__ section. https://github.com/ahmedfgad/GeneticAlgorithmPython/discussions/48 @@ -794,7 +794,7 @@ Release Date: 17 June 2021 ``[float, 2]`` forces a gene with a value like ``0.1234`` to be ``0.12``. For more information, check the `More about the ``gene_type`` - Parameter `__ + Parameter `__ section. .. _pygad-2151: @@ -829,7 +829,7 @@ Release Date: 19 June 2021 ``pygad.GA`` class to create a custom mutation, crossover, and parent selection operators. Check the `User-Defined Crossover, Mutation, and Parent Selection - Operators `__ + Operators `__ section for more details. https://github.com/ahmedfgad/GeneticAlgorithmPython/discussions/50 @@ -994,7 +994,7 @@ Release Date: 8 July 2022 issue `#78 `__ at GitHub. Check the `Parallel Processing in - PyGAD `__ + PyGAD `__ section for more information and examples. .. _pygad-2180: @@ -1021,7 +1021,7 @@ Release Date: 9 September 2022 the ``run()`` method is called, as the old fitness value are shown on the graph alongside with the new fitness values. Read more in this section: `Continue without Loosing - Progress `__ + Progress `__ 4. Thanks `Prof. Fernando Jiménez Barrionuevo `__ (Dept. of Information and @@ -1037,7 +1037,7 @@ Release Date: 9 September 2022 ``keep_elitism``. It defaults to 1 which means for each generation keep only the best solution in the next generation. If assigned 0, then it has no effect. Read more in this section: `Elitism - Selection `__. + Selection `__. https://github.com/ahmedfgad/GeneticAlgorithmPython/issues/74 7. A new instance attribute named ``last_generation_elitism`` added to @@ -1048,7 +1048,7 @@ Release Date: 9 September 2022 https://github.com/ahmedfgad/GeneticAlgorithmPython/issues/70 and `Prof. Fernando Jiménez Barrionuevo `__. Read more in this section: `Random - Seed `__. + Seed `__. 9. Editing the ``pygad.TorchGA`` module to make sure the tensor data is moved from GPU to CPU. Thanks to Rasmus Johansson for opening this @@ -1226,7 +1226,7 @@ Release Date 8 April 2023 the console and text file instead of using the ``print()`` function. This is by assigning the ``logging.Logger`` to the new ``logger`` parameter. Check the `Logging - Outputs `__ + Outputs `__ for more information. 8. A new instance attribute called ``logger`` to save the logger. @@ -1235,14 +1235,14 @@ Release Date 8 April 2023 a new parameter that refers to the instance of the ``pygad.GA`` class. Check this for an example: `Use Functions and Methods to Build Fitness Function and - Callbacks `__. + Callbacks `__. https://github.com/ahmedfgad/GeneticAlgorithmPython/issues/163 10. Update the documentation to include an example of using functions and methods to calculate the fitness and build callbacks. Check this for more details: `Use Functions and Methods to Build Fitness Function and - Callbacks `__. + Callbacks `__. https://github.com/ahmedfgad/GeneticAlgorithmPython/pull/92#issuecomment-1443635003 11. Validate the value passed to the ``initial_population`` parameter. @@ -1362,7 +1362,7 @@ Release Date 20 June 2023 value of one gene by another gene. This release tries to solve such duplicates by looking for a third gene that will help in solving the duplicates. Check `this - section `__ + section `__ for more information. 14. Use probabilities to select parents using the rank parent selection @@ -1396,7 +1396,7 @@ Release Date 20 June 2023 19. The documentation is updated to explain how mutation works when ``gene_space`` is used with ``int`` or ``float`` data types. Check `this - section `__. + section `__. https://github.com/ahmedfgad/GeneticAlgorithmPython/discussions/198 .. _pygad-320: @@ -1410,14 +1410,18 @@ Release Date 6 September 2023 class that includes the functionalities of NSGA-II. The class has these methods: 1) ``get_non_dominated_set()`` 2) ``non_dominated_sorting()`` 3) ``crowding_distance()`` 4) - ``sort_solutions_nsga2()`` + ``sort_solutions_nsga2()``. Check `this + section `__ + for an example. 2. Support of multi-objective optimization using Non-Dominated Sorting Genetic Algorithm II (NSGA-II) using the ``NSGA2`` class in the ``pygad.utils.nsga2`` module. Just return a ``list``, ``tuple``, or ``numpy.ndarray`` from the fitness function and the library will consider the problem as multi-objective optimization. All the - objectives are expected to be maximization. + objectives are expected to be maximization. Check `this + section `__ + for an example. 3. The parent selection methods and adaptive mutation are edited to support multi-objective optimization. diff --git a/examples/example_multi_objective.py b/examples/example_multi_objective.py index 479ba9b..b6053a2 100644 --- a/examples/example_multi_objective.py +++ b/examples/example_multi_objective.py @@ -53,7 +53,7 @@ def on_generation(ga_instance): # Running the GA to optimize the parameters of the function. ga_instance.run() -ga_instance.plot_fitness() +ga_instance.plot_fitness(labels=['Obj 1', 'Obj 2']) # Returning the details of the best solution. solution, solution_fitness, solution_idx = ga_instance.best_solution(ga_instance.last_generation_fitness) diff --git a/pygad/pygad.py b/pygad/pygad.py index d0a124c..db17751 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -1695,7 +1695,7 @@ def cal_pop_fitness(self): # This is a multi-objective optimization problem. pass else: - raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") + raise ValueError(f"The fitness function should return a number or an iterable (list, tuple, or numpy.ndarray) but the value {fitness} of type {type(fitness)} found.") else: # Reaching this point means that batch processing is in effect to calculate the fitness values. # Do not continue the loop as no fitness is calculated. The fitness will be calculated later in batch mode. @@ -1736,7 +1736,7 @@ def cal_pop_fitness(self): # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: - raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") + raise ValueError(f"The fitness function should return a number or an iterable (list, tuple, or numpy.ndarray) but the value {fitness} of type {type(fitness)} found.") else: # Calculating the fitness value of each solution in the current population. for sol_idx, sol in enumerate(self.population): @@ -1803,7 +1803,7 @@ def cal_pop_fitness(self): # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: - raise ValueError(f"The fitness function should return a number but the value {fitness} of type {type(fitness)} found.") + raise ValueError(f"The fitness function should return a number or an iterable (list, tuple, or numpy.ndarray) but the value {fitness} of type {type(fitness)} found.") else: # Reaching this block means that batch processing is used. The fitness values are calculated in batches. @@ -1840,7 +1840,7 @@ def cal_pop_fitness(self): # This is a multi-objective optimization problem. pop_fitness[index] = fitness else: - raise ValueError(f"The fitness function should return a number but the value ({fitness}) of type {type(fitness)} found.") + raise ValueError(f"The fitness function should return a number or an iterable (list, tuple, or numpy.ndarray) but the value ({fitness}) of type {type(fitness)} found.") pop_fitness = numpy.array(pop_fitness) except Exception as ex: diff --git a/pygad/utils/nsga2.py b/pygad/utils/nsga2.py index c6a93ba..2b56b34 100644 --- a/pygad/utils/nsga2.py +++ b/pygad/utils/nsga2.py @@ -2,6 +2,7 @@ import pygad class NSGA2: + def __init__(): pass @@ -32,20 +33,21 @@ def get_non_dominated_set(self, curr_solutions): for idx2, sol2 in enumerate(curr_solutions): if idx1 == idx2: continue + # Zipping the 2 solutions so the corresponding genes are in the same list. # The returned array is of size (N, 2) where N is the number of genes. two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) - - #TODO Consider repacing < by > for maximization problems. - # Checking for if any solution dominates the current solution by applying the 2 conditions. - # le_eq (less than or equal): All elements must be True. - # le (less than): Only 1 element must be True. - le_eq = two_solutions[:, 1] >= two_solutions[:, 0] - le = two_solutions[:, 1] > two_solutions[:, 0] - + + # Use < for minimization problems and > for maximization problems. + # Checking if any solution dominates the current solution by applying the 2 conditions. + # gr_eq (greater than or equal): All elements must be True. + # gr (greater than): Only 1 element must be True. + gr_eq = two_solutions[:, 1] >= two_solutions[:, 0] + gr = two_solutions[:, 1] > two_solutions[:, 0] + # If the 2 conditions hold, then a solution dominates the current solution. # The current solution is not considered a member of the dominated set. - if le_eq.all() and le.any(): + if gr_eq.all() and gr.any(): # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. # Instead, insert it into the non-dominated set. is_dominated = False @@ -77,6 +79,15 @@ def non_dominated_sorting(self, fitness): An array of the pareto fronts. """ + + # Verify that the problem is multi-objective optimization as non-dominated sorting is only applied to multi-objective problems. + if type(fitness[0]) in [list, tuple, numpy.ndarray]: + pass + elif type(fitness[0]) in self.supported_int_float_types: + raise TypeError('Non-dominated sorting is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray') + else: + raise TypeError(f'Non-dominated sorting is only applied when optimizing multi-objective problems. \n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray\n\nBut the data type {type(fitness[0])} found.') + # A list of all non-dominated sets. pareto_fronts = [] diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 4016ca1..ac293c2 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -331,7 +331,13 @@ def tournament_selection_nsga2(self, parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) else: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) - + + # Verify that the problem is multi-objective optimization as the tournament NSGA-II selection is only applied to multi-objective problems. + if type(fitness[0]) in [list, tuple, numpy.ndarray]: + pass + elif type(fitness[0]) in self.supported_int_float_types: + raise ValueError('The tournament NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray') + # The indices of the selected parents. parents_indices = [] @@ -427,7 +433,7 @@ def nsga2_selection(self, fitness, num_parents ): - + """ Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II). The selection is done using non-dominated sorting and crowding distance. @@ -443,17 +449,23 @@ def nsga2_selection(self, -num_parents: The number of parents to be selected. -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - + It returns: -An array of the selected parents. -The indices of the selected solutions. """ - + if self.gene_type_single == True: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) else: parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) - + + # Verify that the problem is multi-objective optimization as the NSGA-II selection is only applied to multi-objective problems. + if type(fitness[0]) in [list, tuple, numpy.ndarray]: + pass + elif type(fitness[0]) in self.supported_int_float_types: + raise ValueError('The NSGA-II parent selection operator is only applied when optimizing multi-objective problems.\n\nBut a single-objective optimization problem found as the fitness function returns a single numeric value.\n\nTo use multi-objective optimization, consider returning an iterable of any of these data types:\n1)list\n2)tuple\n3)numpy.ndarray') + # The indices of the selected parents. parents_indices = [] From fedac6bd530e831d4b268ee2bb15d09c158b9e91 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 7 Sep 2023 15:46:00 -0400 Subject: [PATCH 23/25] Update docs --- docs/source/pygad_more.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/pygad_more.rst b/docs/source/pygad_more.rst index 3a97840..33c3442 100644 --- a/docs/source/pygad_more.rst +++ b/docs/source/pygad_more.rst @@ -359,10 +359,10 @@ value out of these 3 values. None] Solution: [1, 5] -For a solution like ``[1, -0.5, 4]``, then mutation happens for the -first gene by simply replacing its current value by a randomly selected -value (other than its current value if possible). So, the value 1 will -be replaced by either 2 or 3. +For a solution like ``[1, 5]``, then mutation happens for the first gene +by simply replacing its current value by a randomly selected value +(other than its current value if possible). So, the value 1 will be +replaced by either 2 or 3. For the second gene, its space is set to ``None``. So, traditional mutation happens for this gene by: From d6890c5e24366361f6d269759a05e08df91d38fd Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 7 Sep 2023 20:26:14 -0400 Subject: [PATCH 24/25] Update docs --- docs/source/index.rst | 44 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 8d1edc7..baecf3c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -300,9 +300,23 @@ visualize Module - .. _header-n8: +helper Module +=============== + + +.. toctree:: + :maxdepth: 4 + :caption: helper Module TOC + + helper.rst + + + + +.. _header-n9: + pygad.nn Module =============== @@ -317,7 +331,7 @@ pygad.nn Module -.. _header-n9: +.. _header-n10: pygad.gann Module ================= @@ -337,7 +351,7 @@ pygad.gann Module -.. _header-n10: +.. _header-n11: pygad.cnn Module ================= @@ -351,7 +365,7 @@ pygad.cnn Module -.. _header-n11: +.. _header-n12: pygad.gacnn Module ================= @@ -366,7 +380,7 @@ pygad.gacnn Module -.. _header-n12: +.. _header-n13: pygad.kerasga Module ================= @@ -381,7 +395,7 @@ pygad.kerasga Module -.. _header-n13: +.. _header-n14: pygad.torchga Module ================= @@ -394,9 +408,7 @@ pygad.torchga Module torchga.rst - - -.. _header-n14: +.. _header-n15: Releases ================= @@ -410,20 +422,6 @@ Releases -.. _header-n15: - -helper Module -================= - - -.. toctree:: - :maxdepth: 4 - :caption: helper Module TOC - - helper.rst - - - Indices and tables ================== From d50f118367a380868b064a21baff56a141573382 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 7 Sep 2023 20:26:44 -0400 Subject: [PATCH 25/25] Remove NSGA-II temp files --- .../non_dominant_sorting_crowding_distance.py | 494 ------------------ 1 file changed, 494 deletions(-) delete mode 100644 NSGA-II/non_dominant_sorting_crowding_distance.py diff --git a/NSGA-II/non_dominant_sorting_crowding_distance.py b/NSGA-II/non_dominant_sorting_crowding_distance.py deleted file mode 100644 index 4563cc1..0000000 --- a/NSGA-II/non_dominant_sorting_crowding_distance.py +++ /dev/null @@ -1,494 +0,0 @@ -import numpy - -fitness = numpy.array([[20, 2.2], - [60, 4.4], - [65, 3.5], - [15, 4.4], - [55, 4.5], - [50, 1.8], - [80, 4.0], - [25, 4.6]]) - -# fitness = numpy.array([20, -# 60, -# 65, -# 15, -# 55, -# 50, -# 80, -# 25]) - -# fitness = numpy.array([[20], -# [60], -# [65], -# [15], -# [55], -# [50], -# [80], -# [25]]) - -def get_non_dominated_set(curr_solutions): - """ - Get the set of non-dominated solutions from the current set of solutions. - - Parameters - ---------- - curr_solutions : TYPE - The set of solutions to find its non-dominated set. - - Returns - ------- - dominated_set : TYPE - A set of the dominated solutions. - non_dominated_set : TYPE - A set of the non-dominated set. - - """ - # List of the members of the current dominated pareto front/set. - dominated_set = [] - # List of the non-members of the current dominated pareto front/set. - non_dominated_set = [] - for idx1, sol1 in enumerate(curr_solutions): - # Flag indicates whether the solution is a member of the current dominated set. - is_dominated = True - for idx2, sol2 in enumerate(curr_solutions): - if idx1 == idx2: - continue - # Zipping the 2 solutions so the corresponding genes are in the same list. - # The returned array is of size (N, 2) where N is the number of genes. - two_solutions = numpy.array(list(zip(sol1[1], sol2[1]))) - - #TODO Consider repacing < by > for maximization problems. - # Checking for if any solution dominates the current solution by applying the 2 conditions. - # le_eq (less than or equal): All elements must be True. - # le (less than): Only 1 element must be True. - le_eq = two_solutions[:, 1] <= two_solutions[:, 0] - le = two_solutions[:, 1] < two_solutions[:, 0] - - # If the 2 conditions hold, then a solution dominates the current solution. - # The current solution is not considered a member of the dominated set. - if le_eq.all() and le.any(): - # Set the is_dominated flag to False to NOT insert the current solution in the current dominated set. - # Instead, insert it into the non-dominated set. - is_dominated = False - non_dominated_set.append(sol1) - break - else: - # Reaching here means the solution does not dominate the current solution. - pass - - # If the flag is True, then no solution dominates the current solution. - if is_dominated: - dominated_set.append(sol1) - - # Return the dominated and non-dominated sets. - return dominated_set, non_dominated_set - -def non_dominated_sorting(fitness): - """ - Apply the non-dominant sorting over the fitness to create the pareto fronts based on non-dominaned sorting of the solutions. - - Parameters - ---------- - fitness : TYPE - An array of the population fitness across all objective function. - - Returns - ------- - pareto_fronts : TYPE - An array of the pareto fronts. - - """ - # A list of all non-dominated sets. - pareto_fronts = [] - - # The remaining set to be explored for non-dominance. - # Initially it is set to the entire population. - # The solutions of each non-dominated set are removed after each iteration. - remaining_set = fitness.copy() - - # Zipping the solution index with the solution's fitness. - # This helps to easily identify the index of each solution. - # Each element has: - # 1) The index of the solution. - # 2) An array of the fitness values of this solution across all objectives. - # remaining_set = numpy.array(list(zip(range(0, fitness.shape[0]), non_dominated_set))) - remaining_set = list(zip(range(0, fitness.shape[0]), remaining_set)) - - # A list mapping the index of each pareto front to the set of solutions in this front. - solutions_fronts_indices = [-1]*len(remaining_set) - solutions_fronts_indices = numpy.array(solutions_fronts_indices) - - # Index of the current pareto front. - front_index = -1 - while len(remaining_set) > 0: - front_index += 1 - - # Get the current non-dominated set of solutions. - pareto_front, remaining_set = get_non_dominated_set(curr_solutions=remaining_set) - pareto_front = numpy.array(pareto_front, dtype=object) - pareto_fronts.append(pareto_front) - - solutions_indices = pareto_front[:, 0].astype(int) - solutions_fronts_indices[solutions_indices] = front_index - - return pareto_fronts, solutions_fronts_indices - -def crowding_distance(pareto_front, fitness): - """ - Calculate the crowding dstance for all solutions in the current pareto front. - - Parameters - ---------- - pareto_front : TYPE - The set of solutions in the current pareto front. - fitness : TYPE - The fitness of the current population. - - Returns - ------- - obj_crowding_dist_list : TYPE - A nested list of the values for all objectives alongside their crowding distance. - crowding_dist_sum : TYPE - A list of the sum of crowding distances across all objectives for each solution. - crowding_dist_front_sorted_indices : TYPE - The indices of the solutions (relative to the current front) sorted by the crowding distance. - crowding_dist_pop_sorted_indices : TYPE - The indices of the solutions (relative to the population) sorted by the crowding distance. - """ - - # Each solution in the pareto front has 2 elements: - # 1) The index of the solution in the population. - # 2) A list of the fitness values for all objectives of the solution. - # Before proceeding, remove the indices from each solution in the pareto front. - pareto_front_no_indices = numpy.array([pareto_front[:, 1][idx] for idx in range(pareto_front.shape[0])]) - - # If there is only 1 solution, then return empty arrays for the crowding distance. - if pareto_front_no_indices.shape[0] == 1: - # There is only 1 index. - return numpy.array([]), numpy.array([]), numpy.array([0]), pareto_front[:, 0].astype(int) - - # An empty list holding info about the objectives of each solution. The info includes the objective value and crowding distance. - obj_crowding_dist_list = [] - # Loop through the objectives to calculate the crowding distance of each solution across all objectives. - for obj_idx in range(pareto_front_no_indices.shape[1]): - obj = pareto_front_no_indices[:, obj_idx] - # This variable has a nested list where each child list zip the following together: - # 1) The index of the objective value. - # 2) The objective value. - # 3) Initialize the crowding distance by zero. - obj = list(zip(range(len(obj)), obj, [0]*len(obj))) - obj = [list(element) for element in obj] - # This variable is the sorted version where sorting is done by the objective value (second element). - # Note that the first element is still the original objective index before sorting. - obj_sorted = sorted(obj, key=lambda x: x[1]) - - # Get the minimum and maximum values for the current objective. - obj_min_val = min(fitness[:, obj_idx]) - obj_max_val = max(fitness[:, obj_idx]) - denominator = obj_max_val - obj_min_val - # To avoid division by zero, set the denominator to a tiny value. - if denominator == 0: - denominator = 0.0000001 - - # Set the crowding distance to the first and last solutions (after being sorted) to infinity. - inf_val = float('inf') - # crowding_distance[0] = inf_val - obj_sorted[0][2] = inf_val - # crowding_distance[-1] = inf_val - obj_sorted[-1][2] = inf_val - - # If there are only 2 solutions in the current pareto front, then do not proceed. - # The crowding distance for such 2 solutions is infinity. - if len(obj_sorted) <= 2: - break - - for idx in range(1, len(obj_sorted)-1): - # Calculate the crowding distance. - crowding_dist = obj_sorted[idx+1][1] - obj_sorted[idx-1][1] - crowding_dist = crowding_dist / denominator - # Insert the crowding distance back into the list to override the initial zero. - obj_sorted[idx][2] = crowding_dist - - # Sort the objective by the original index at index 0 of the each child list. - obj_sorted = sorted(obj_sorted, key=lambda x: x[0]) - obj_crowding_dist_list.append(obj_sorted) - - obj_crowding_dist_list = numpy.array(obj_crowding_dist_list) - crowding_dist = numpy.array([obj_crowding_dist_list[idx, :, 2] for idx in range(len(obj_crowding_dist_list))]) - crowding_dist_sum = numpy.sum(crowding_dist, axis=0) - - # An array of the sum of crowding distances across all objectives. - # Each row has 2 elements: - # 1) The index of the solution. - # 2) The sum of all crowding distances for all objective of the solution. - crowding_dist_sum = numpy.array(list(zip(obj_crowding_dist_list[0, :, 0], crowding_dist_sum))) - crowding_dist_sum = sorted(crowding_dist_sum, key=lambda x: x[1], reverse=True) - - # The sorted solutions' indices by the crowding distance. - crowding_dist_front_sorted_indices = numpy.array(crowding_dist_sum)[:, 0] - crowding_dist_front_sorted_indices = crowding_dist_front_sorted_indices.astype(int) - # Note that such indices are relative to the front, NOT the population. - # It is mandatory to map such front indices to population indices before using them to refer to the population. - crowding_dist_pop_sorted_indices = pareto_front[:, 0] - crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices[crowding_dist_front_sorted_indices] - crowding_dist_pop_sorted_indices = crowding_dist_pop_sorted_indices.astype(int) - - return obj_crowding_dist_list, crowding_dist_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices - -def tournament_selection_nsga2(self, - fitness, - num_parents - # pareto_fronts, - # solutions_fronts_indices, - ): - - """ - Select the parents using the tournament selection technique for NSGA-II. - The traditional tournament selection uses the fitness values. But the tournament selection for NSGA-II uses non-dominated sorting and crowding distance. - Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. - To select the parents solutions, 2 solutions are selected randomly. If the 2 solutions are in different pareto fronts, then the solution comming from a pareto front with lower index is selected. - If 2 solutions are in the same pareto front, then crowding distance is calculated. The solution with the higher crowding distance is selected. - If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. - Later, the selected parents will mate to produce the offspring. - - It accepts 2 parameters: - -fitness: The fitness values for the current population. - -num_parents: The number of parents to be selected. - -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. - -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - - It returns an array of the selected parents alongside their indices in the population. - """ - - if self.gene_type_single == True: - parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) - else: - parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) - - # The indices of the selected parents. - parents_indices = [] - - # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. - # TODO Make a test to check for that behaviour. - # Find the pareto fronts and the solutions' indicies in each front. - pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) - - # Randomly generate pairs of indices to apply for NSGA-II tournament selection for selecting the parents solutions. - rand_indices = numpy.random.randint(low=0.0, - high=len(solutions_fronts_indices), - size=(num_parents, self.K_tournament)) - # rand_indices[0, 0] = 5 - # rand_indices[0, 1] = 3 - # rand_indices[1, 0] = 1 - # rand_indices[1, 1] = 6 - - for parent_num in range(num_parents): - # Return the indices of the current 2 solutions. - current_indices = rand_indices[parent_num] - # Return the front index of the 2 solutions. - parent_fronts_indices = solutions_fronts_indices[current_indices] - - if parent_fronts_indices[0] < parent_fronts_indices[1]: - # If the first solution is in a lower pareto front than the second, then select it. - selected_parent_idx = current_indices[0] - elif parent_fronts_indices[0] > parent_fronts_indices[1]: - # If the second solution is in a lower pareto front than the first, then select it. - selected_parent_idx = current_indices[1] - else: - # The 2 solutions are in the same pareto front. - # The selection is made using the crowding distance. - - # A list holding the crowding distance of the current 2 solutions. It is initialized to -1. - solutions_crowding_distance = [-1, -1] - - # Fetch the current pareto front. - pareto_front = pareto_fronts[parent_fronts_indices[0]] # Index 1 can also be used. - - # If there is only 1 solution in the pareto front, just return it without calculating the crowding distance (it is useless). - if pareto_front.shape[0] == 1: - selected_parent_idx = current_indices[0] # Index 1 can also be used. - else: - # Reaching here means the pareto front has more than 1 solution. - - # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), - fitness=fitness) - - # This list has the sorted front-based indices for the solutions in the current pareto front. - crowding_dist_front_sorted_indices = list(crowding_dist_front_sorted_indices) - # This list has the sorted population-based indices for the solutions in the current pareto front. - crowding_dist_pop_sorted_indices = list(crowding_dist_pop_sorted_indices) - - # Return the indices of the solutions from the pareto front. - solution1_idx = crowding_dist_pop_sorted_indices.index(current_indices[0]) - solution2_idx = crowding_dist_pop_sorted_indices.index(current_indices[1]) - - # Fetch the crowding distance using the indices. - solutions_crowding_distance[0] = crowding_distance_sum[solution1_idx][1] - solutions_crowding_distance[1] = crowding_distance_sum[solution2_idx][1] - - # # Instead of using the crowding distance, we can select the solution that comes first in the list. - # # Its limitation is that it is biased towards the low indexed solution if the 2 solutions have the same crowding distance. - # if solution1_idx < solution2_idx: - # # Select the first solution if it has higher crowding distance. - # selected_parent_idx = current_indices[0] - # else: - # # Select the second solution if it has higher crowding distance. - # selected_parent_idx = current_indices[1] - - if solutions_crowding_distance[0] > solutions_crowding_distance[1]: - # Select the first solution if it has higher crowding distance. - selected_parent_idx = current_indices[0] - elif solutions_crowding_distance[1] > solutions_crowding_distance[0]: - # Select the second solution if it has higher crowding distance. - selected_parent_idx = current_indices[1] - else: - # If the crowding distance is equal, select the parent randomly. - rand_num = numpy.random.uniform() - if rand_num < 0.5: - # If the random number is < 0.5, then select the first solution. - selected_parent_idx = current_indices[0] - else: - # If the random number is >= 0.5, then select the second solution. - selected_parent_idx = current_indices[1] - - # Insert the selected parent index. - parents_indices.append(selected_parent_idx) - # Insert the selected parent. - parents[parent_num, :] = self.population[selected_parent_idx, :].copy() - - # Make sure the parents indices is returned as a NumPy array. - return parents, numpy.array(parents_indices) - -def nsga2_selection(self, - fitness, - num_parents - # pareto_fronts, - # solutions_fronts_indices - ): - - """ - Select the parents using the Non-Dominated Sorting Genetic Algorithm II (NSGA-II). - The selection is done using non-dominated sorting and crowding distance. - Using non-dominated sorting, the solutions are distributed across pareto fronts. The fronts are given the indices 0, 1, 2, ..., N where N is the number of pareto fronts. The lower the index of the pareto front, the better its solutions. - The parents are selected from the lower pareto fronts and moving up until selecting the number of desired parents. - A solution from a pareto front X cannot be taken as a parent until all solutions in pareto front Y is selected given that Y < X. - For a pareto front X, if only a subset of its solutions is needed, then the corwding distance is used to determine which solutions to be selected from the front. The solution with the higher crowding distance is selected. - If the 2 solutions are in the same pareto front and have the same crowding distance, then a solution is randomly selected. - Later, the selected parents will mate to produce the offspring. - - It accepts 2 parameters: - -fitness: The fitness values for the current population. - -num_parents: The number of parents to be selected. - -pareto_fronts: A nested array of all the pareto fronts. Each front has its solutions. - -solutions_fronts_indices: A list of the pareto front index of each solution in the current population. - - It returns an array of the selected parents alongside their indices in the population. - """ - - if self.gene_type_single == True: - parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) - else: - parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) - - # The indices of the selected parents. - parents_indices = [] - - # TODO If there is only a single objective, each pareto front is expected to have only 1 solution. - # TODO Make a test to check for that behaviour. - # Find the pareto fronts and the solutions' indicies in each front. - pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) - - # The number of remaining parents to be selected. - num_remaining_parents = num_parents - - # A loop variable holding the index of the current pareto front. - pareto_front_idx = 0 - while num_remaining_parents != 0 and pareto_front_idx < len(pareto_fronts): - # Return the current pareto front. - current_pareto_front = pareto_fronts[pareto_front_idx] - # Check if the entire front fits into the parents array. - # If so, then insert all the solutions in the current front into the parents array. - if num_remaining_parents >= len(current_pareto_front): - for sol_idx in range(len(current_pareto_front)): - selected_solution_idx = current_pareto_front[sol_idx, 0] - # Insert the parent into the parents array. - parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() - # Insert the index of the selected parent. - parents_indices.append(selected_solution_idx) - - # Decrement the number of remaining parents by the length of the pareto front. - num_remaining_parents -= len(current_pareto_front) - else: - # If only a subset of the front is needed, then use the crowding distance to sort the solutions and select only the number needed. - - # Calculate the crowding distance of the solutions of the pareto front. - obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=current_pareto_front.copy(), - fitness=fitness) - - for selected_solution_idx in crowding_dist_pop_sorted_indices[0:num_remaining_parents]: - # Insert the parent into the parents array. - parents[sol_idx, :] = self.population[selected_solution_idx, :].copy() - # Insert the index of the selected parent. - parents_indices.append(selected_solution_idx) - - # Decrement the number of remaining parents by the number of selected parents. - num_remaining_parents -= num_remaining_parents - - # Increase the pareto front index to take parents from the next front. - pareto_front_idx += 1 - - # Make sure the parents indices is returned as a NumPy array. - return parents, numpy.array(parents_indices) - -# TODO If there is only a single objective, each pareto front is expected to have only 1 solution. -# TODO Make a test to check for that behaviour. -# Find the pareto fronts and the solutions' indicies in each front. -pareto_fronts, solutions_fronts_indices = non_dominated_sorting(fitness) -# # print('\nsolutions_fronts_indices\n', solutions_fronts_indices) -# for i, s in enumerate(pareto_fronts): -# # print(f'Dominated Pareto Front Set {i+1}:\n{s}') -# print(f'Dominated Pareto Front Indices {i+1}:\n{s[:, 0]}') -# print("\n\n\n--------------------") - -class Object(object): - pass - -obj = Object() -obj.population = numpy.random.rand(8, 4) -obj.gene_type_single = True -obj.gene_type = [float, 0] -obj.K_tournament = 2 - -parents, parents_indices = tournament_selection_nsga2(self=obj, - fitness=fitness, - num_parents=4 - # pareto_fronts=pareto_fronts, - # solutions_fronts_indices=solutions_fronts_indices, - ) -print(f'Tournament Parent Selection for NSGA-II - Indices: \n{parents_indices}') - -parents, parents_indices = nsga2_selection(self=obj, - fitness=fitness, - num_parents=4 - # pareto_fronts=pareto_fronts, - # solutions_fronts_indices=solutions_fronts_indices, - ) -print(f'NSGA-II Parent Selection - Indices: \n{parents_indices}') - -# for idx in range(len(pareto_fronts)): -# # Fetch the current pareto front. -# pareto_front = pareto_fronts[idx] -# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), -# fitness=fitness) -# print('Front IDX', crowding_dist_front_sorted_indices) -# print('POP IDX ', crowding_dist_pop_sorted_indices) -# print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}') - -# # Fetch the current pareto front. -# pareto_front = pareto_fronts[0] -# obj_crowding_distance_list, crowding_distance_sum, crowding_dist_front_sorted_indices, crowding_dist_pop_sorted_indices = crowding_distance(pareto_front=pareto_front.copy(), -# fitness=fitness) -# print('\n', crowding_dist_pop_sorted_indices) -# print(f'Sorted Sum of Crowd Dists\n{crowding_distance_sum}')