Source code for simba.analog_discovery.simba_analog_discovery

import numpy as np


[docs] class AnalogDiscovery:
[docs] @staticmethod def compute_ranking(similarities_mces, similarities_ed, max_value_2_int=5): """ based on mces and edit distance rerank. If 2 matches have the same mces, choose the one with lowest edit distance """ similarities_mces_integer = np.round(similarities_mces) # Preallocate the ranking array with the same shape as similarities1. ranking_total = np.zeros(similarities_mces.shape, dtype=int) # Process each row (or each set of values) individually. for row_index, (row_sim, row_int, row_int2) in enumerate( zip( similarities_mces, similarities_mces_integer, similarities_ed, strict=False, ) ): # Use lexsort with a composite key: # - Primary: similarities1_integer (ascending) # - Secondary: similarities2_integer (ascending) # - Tertiary: similarities1 (descending, so use -row_sim) # # Note: np.lexsort uses the last key as the primary key. sorted_indices = np.lexsort((row_sim, row_int2, row_int)) # Now assign ranking values based on sorted order. # Here the best (first in sorted_indices) gets rank 0, # the next gets rank 1, etc. ranking = np.empty_like(sorted_indices) ranking[sorted_indices] = np.arange(len(row_sim)) # Store the ranking for this row. ranking_total[row_index] = ranking # normalizing ranking_total = 1 - ranking_total / ranking_total.shape[1] return ranking_total