2929from pm4py .util import exec_utils
3030from pm4py .util import string_distance
3131from pm4py .util import typing
32- from pm4py .util import constants , xes_constants
32+ from pm4py .util import constants , xes_constants , thread_utils
3333import pandas as pd
3434from pm4py .objects .conversion .log import converter as log_converter
3535
@@ -46,7 +46,32 @@ def apply(
4646 parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
4747) -> typing .ListAlignments :
4848 """
49- Aligns each trace of the first log against the second log, minimizing the edit distance
49+ Aligns each trace of the first log against the second log, minimizing the edit distance.
50+
51+ Parameters
52+ --------------
53+ log1
54+ First log
55+ log2
56+ Second log
57+ parameters
58+ Parameters of the algorithm
59+
60+ Returns
61+ ---------------
62+ aligned_traces
63+ List that contains, for each trace of the first log, the corresponding alignment
64+ """
65+ return apply_log (log1 , log2 , parameters = parameters )
66+
67+
68+ def apply_log (
69+ log1 : EventLog ,
70+ log2 : EventLog ,
71+ parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
72+ ) -> typing .ListAlignments :
73+ """
74+ Aligns each trace of the first log against the second log, minimizing the edit distance.
5075
5176 Parameters
5277 --------------
@@ -80,8 +105,6 @@ def apply(
80105 Parameters .PERFORM_ANTI_ALIGNMENT , parameters , False
81106 )
82107
83- aligned_traces = []
84-
85108 # form a mapping dictionary associating each activity of the two logs to
86109 # an ASCII character
87110 mapping = log_regex .form_encoding_dictio_from_two_logs (
@@ -102,23 +125,33 @@ def apply(
102125 else :
103126 list_encodings = sorted (list_encodings , key = lambda x : len (x ))
104127
105- # keeps an alignment cache (to avoid re-calculating the same edit
106- # distances :) )
107- cache_align = {}
128+ # keeps an alignment cache (to avoid re-calculating the same edit distances)
129+ cache_align : Dict [str , typing .AlignmentResult ] = {}
108130
109131 best_worst_cost = min (len (x ) for x in list_encodings )
110132
133+ # encode all traces once and keep track of the unique encodings
134+ encoded_traces : List [str ] = []
135+ unique_encoded_traces : Set [str ] = set ()
111136 for trace in log1 :
112- # gets the alignment
113- align_result = align_trace (
114- trace ,
115- list_encodings ,
116- set_encodings ,
117- mapping ,
118- cache_align = cache_align ,
119- parameters = parameters ,
137+ encoded_trace = log_regex .get_encoded_trace (
138+ trace , mapping , parameters = parameters
120139 )
121- aligned_traces .append (align_result )
140+ encoded_traces .append (encoded_trace )
141+ unique_encoded_traces .add (encoded_trace )
142+
143+ thm = thread_utils .Pm4pyThreadManager ()
144+ f = lambda x : __perform_trace_alignment (x , list_encodings , set_encodings , mapping , cache_align = cache_align , parameters = parameters )
145+
146+ # perform the alignment per encoded trace
147+ for encoded_trace in unique_encoded_traces :
148+ thm .submit (f , encoded_trace )
149+ thm .join ()
150+
151+ # map each trace back to its alignment result
152+ aligned_traces = [
153+ cache_align [encoded_trace ] for encoded_trace in encoded_traces
154+ ]
122155
123156 # assign fitness to traces
124157 for index , align in enumerate (aligned_traces ):
@@ -142,7 +175,7 @@ def apply(
142175 return aligned_traces
143176
144177
145- def align_trace (
178+ def apply_trace (
146179 trace : Trace ,
147180 list_encodings : List [str ],
148181 set_encodings : Set [str ],
@@ -151,7 +184,7 @@ def align_trace(
151184 parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
152185) -> typing .AlignmentResult :
153186 """
154- Aligns a trace against a list of traces, minimizing the edit distance
187+ Aligns a trace against a list of traces, minimizing the edit distance.
155188
156189 Parameters
157190 --------------
@@ -177,8 +210,58 @@ def align_trace(
177210 if parameters is None :
178211 parameters = {}
179212
180- # keeps an alignment cache (to avoid re-calculating the same edit
181- # distances :) )
213+ if cache_align is None :
214+ cache_align = {}
215+
216+ # encode the current trace using the mapping dictionary, then align it
217+ encoded_trace = log_regex .get_encoded_trace (
218+ trace , mapping , parameters = parameters
219+ )
220+ return __perform_trace_alignment (
221+ encoded_trace ,
222+ list_encodings ,
223+ set_encodings ,
224+ mapping ,
225+ cache_align = cache_align ,
226+ parameters = parameters ,
227+ )
228+
229+
230+ def align_trace (
231+ trace : Trace ,
232+ list_encodings : List [str ],
233+ set_encodings : Set [str ],
234+ mapping : Dict [str , str ],
235+ cache_align : Optional [Dict [Any , Any ]] = None ,
236+ parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
237+ ) -> typing .AlignmentResult :
238+ """
239+ Backwards-compatible wrapper for apply_trace.
240+ """
241+ return apply_trace (
242+ trace ,
243+ list_encodings ,
244+ set_encodings ,
245+ mapping ,
246+ cache_align = cache_align ,
247+ parameters = parameters ,
248+ )
249+
250+
251+ def __perform_trace_alignment (
252+ encoded_trace : str ,
253+ list_encodings : List [str ],
254+ set_encodings : Set [str ],
255+ mapping : Dict [str , str ],
256+ cache_align : Optional [Dict [Any , Any ]] = None ,
257+ parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
258+ ) -> typing .AlignmentResult :
259+ """
260+ Aligns an encoded trace against a list of encoded traces, minimizing the edit distance.
261+ """
262+ if parameters is None :
263+ parameters = {}
264+
182265 if cache_align is None :
183266 cache_align = {}
184267
@@ -191,54 +274,48 @@ def align_trace(
191274 else string_distance .argmin_levenshtein
192275 )
193276
194- # encode the current trace using the mapping dictionary
195- encoded_trace = log_regex .get_encoded_trace (
196- trace , mapping , parameters = parameters
197- )
198277 inv_mapping = {y : x for x , y in mapping .items ()}
199278
200279 if encoded_trace not in cache_align :
201280 if not anti_alignment and encoded_trace in set_encodings :
202- # the trace is already in the encodings. we don't need to calculate
203- # any edit distance
281+ # the trace is already in the encodings. we don't need to calculate any edit distance
204282 argmin_dist = encoded_trace
205283 else :
206- # finds the encoded trace of the other log that is at minimal
207- # distance
284+ # finds the encoded trace of the other log that is at minimal distance
208285 argmin_dist = comparison_function (encoded_trace , list_encodings )
209286
210287 seq_match = difflib .SequenceMatcher (
211288 None , encoded_trace , argmin_dist
212289 ).get_matching_blocks ()
213290 i = 0
214291 j = 0
215- align_trace = []
292+ aligned_moves = []
216293 total_cost = 0
217294 for el in seq_match :
218295 while i < el .a :
219- align_trace .append ((inv_mapping [encoded_trace [i ]], ">>" ))
296+ aligned_moves .append ((inv_mapping [encoded_trace [i ]], ">>" ))
220297 total_cost += align_utils .STD_MODEL_LOG_MOVE_COST
221- i = i + 1
298+ i += 1
222299 while j < el .b :
223- align_trace .append ((">>" , inv_mapping [argmin_dist [j ]]))
300+ aligned_moves .append ((">>" , inv_mapping [argmin_dist [j ]]))
224301 total_cost += align_utils .STD_MODEL_LOG_MOVE_COST
225- j = j + 1
226- for z in range (el .size ):
227- align_trace .append (
302+ j += 1
303+ for _ in range (el .size ):
304+ aligned_moves .append (
228305 (
229306 inv_mapping [encoded_trace [i ]],
230307 inv_mapping [argmin_dist [j ]],
231308 )
232309 )
233- i = i + 1
234- j = j + 1
310+ i += 1
311+ j += 1
235312
236- align = {"alignment" : align_trace , "cost" : total_cost }
313+ align = {"alignment" : aligned_moves , "cost" : total_cost }
237314 # saves the alignment in the cache
238315 cache_align [encoded_trace ] = align
239316 return align
240- else :
241- return cache_align [encoded_trace ]
317+
318+ return cache_align [encoded_trace ]
242319
243320
244321def project_log_on_variant (
0 commit comments