Skip to content

Commit f38abe7

Browse files
Merge remote-tracking branch 'upstream/hotfixes' into release
2 parents ac25bf5 + f90b39a commit f38abe7

31 files changed

Lines changed: 896 additions & 331 deletions

File tree

pm4py/algo/conformance/alignments/decomposed/variants/recompos_maximal.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from pm4py.util import variants_util
4040

4141
from enum import Enum
42-
from pm4py.util import constants, nx_utils
42+
from pm4py.util import constants, nx_utils, thread_utils
4343

4444
from typing import Optional, Dict, Any, Union
4545
from pm4py.objects.log.obj import EventLog
@@ -233,23 +233,13 @@ def apply_log(log, list_nets, parameters=None):
233233

234234
all_alignments = [None] * len(variants_to_process) # Pre-allocate result list
235235

236-
# Serial processing
237-
max_align_time = exec_utils.get_param_value(
238-
Parameters.PARAM_MAX_ALIGN_TIME, parameters, sys.maxsize
239-
)
240-
start_time = time.time()
236+
thm = thread_utils.Pm4pyThreadManager()
237+
f = lambda x, y, z: (z.insert(x[2], apply_trace(x[1], y, parameters=parameters)), progress.update() if progress is not None else None)
241238

242239
for variant_info in variants_to_process:
243-
this_time = time.time()
244-
if this_time - start_time <= max_align_time:
245-
alignment = apply_trace(variant_info[1], list_nets, parameters=parameters)
246-
else:
247-
alignment = None
248-
249-
all_alignments[variant_info[2]] = alignment
240+
thm.submit(f, variant_info, list_nets, all_alignments)
250241

251-
if progress is not None:
252-
progress.update()
242+
thm.join()
253243

254244
# Map alignments back to original traces
255245
al_idx = {}

pm4py/algo/conformance/alignments/dfg/variants/classic.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import uuid
2525
from enum import Enum
2626

27-
from pm4py.util import constants, xes_constants, exec_utils, pandas_utils
27+
from pm4py.util import constants, xes_constants, exec_utils, pandas_utils, thread_utils
2828
from pm4py.util import variants_util
2929
from pm4py.objects.petri_net.utils import align_utils
3030
from pm4py.objects.log.obj import EventLog, Trace, Event
@@ -156,13 +156,7 @@ def apply_log(log, dfg, sa, ea, parameters=None):
156156
case_id_key = exec_utils.get_param_value(
157157
Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME
158158
)
159-
traces = [
160-
tuple(x)
161-
for x in log.groupby(case_id_key)[activity_key]
162-
.agg(list)
163-
.to_dict()
164-
.values()
165-
]
159+
traces = pandas_utils.get_traces(log, case_id_key, activity_key)
166160
else:
167161
log = log_converter.apply(
168162
log,
@@ -171,29 +165,39 @@ def apply_log(log, dfg, sa, ea, parameters=None):
171165
)
172166
traces = [tuple(x[activity_key] for x in trace) for trace in log]
173167

168+
unique_traces = set(traces)
169+
170+
thm = thread_utils.Pm4pyThreadManager()
171+
f = lambda x, y: __compute_alignments(x, dfg, sa, ea, y, al_empty_cost, parameters)
172+
173+
for trace_act in unique_traces:
174+
thm.submit(f, trace_act, align_dict)
175+
176+
thm.join()
177+
174178
for trace_act in traces:
175-
if trace_act in align_dict:
176-
aligned_traces.append(align_dict[trace_act])
177-
else:
178-
log_move_cost_function = exec_utils.get_param_value(
179-
Parameters.LOG_MOVE_COST_FUNCTION,
180-
parameters,
181-
{x: align_utils.STD_MODEL_LOG_MOVE_COST for x in trace_act},
182-
)
183-
trace_bwc_cost = sum(log_move_cost_function[x] for x in trace_act)
184-
al_tr = __apply_list_activities(
185-
trace_act, dfg, sa, ea, parameters=parameters
186-
)
187-
al_tr["fitness"] = 1.0 - al_tr["cost"] / (
188-
al_empty_cost + trace_bwc_cost
189-
)
190-
al_tr["bwc"] = al_empty_cost + trace_bwc_cost
191-
align_dict[trace_act] = al_tr
192-
aligned_traces.append(align_dict[trace_act])
179+
aligned_traces.append(align_dict[trace_act])
193180

194181
return aligned_traces
195182

196183

184+
def __compute_alignments(trace_act, dfg, sa, ea, align_dict, al_empty_cost, parameters):
185+
log_move_cost_function = exec_utils.get_param_value(
186+
Parameters.LOG_MOVE_COST_FUNCTION,
187+
parameters,
188+
{x: align_utils.STD_MODEL_LOG_MOVE_COST for x in trace_act},
189+
)
190+
trace_bwc_cost = sum(log_move_cost_function[x] for x in trace_act)
191+
al_tr = __apply_list_activities(
192+
trace_act, dfg, sa, ea, parameters=parameters
193+
)
194+
al_tr["fitness"] = 1.0 - al_tr["cost"] / (
195+
al_empty_cost + trace_bwc_cost
196+
)
197+
al_tr["bwc"] = al_empty_cost + trace_bwc_cost
198+
align_dict[trace_act] = al_tr
199+
200+
197201
def apply_trace(trace, dfg, sa, ea, parameters=None):
198202
"""
199203
Applies the alignment algorithm provided a trace of a log, and a *connected* DFG

pm4py/algo/conformance/alignments/edit_distance/variants/edit_distance.py

Lines changed: 117 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pm4py.util import exec_utils
3030
from pm4py.util import string_distance
3131
from pm4py.util import typing
32-
from pm4py.util import constants, xes_constants
32+
from pm4py.util import constants, xes_constants, thread_utils
3333
import pandas as pd
3434
from pm4py.objects.conversion.log import converter as log_converter
3535

@@ -46,7 +46,32 @@ def apply(
4646
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
4747
) -> typing.ListAlignments:
4848
"""
49-
Aligns each trace of the first log against the second log, minimizing the edit distance
49+
Aligns each trace of the first log against the second log, minimizing the edit distance.
50+
51+
Parameters
52+
--------------
53+
log1
54+
First log
55+
log2
56+
Second log
57+
parameters
58+
Parameters of the algorithm
59+
60+
Returns
61+
---------------
62+
aligned_traces
63+
List that contains, for each trace of the first log, the corresponding alignment
64+
"""
65+
return apply_log(log1, log2, parameters=parameters)
66+
67+
68+
def apply_log(
69+
log1: EventLog,
70+
log2: EventLog,
71+
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
72+
) -> typing.ListAlignments:
73+
"""
74+
Aligns each trace of the first log against the second log, minimizing the edit distance.
5075
5176
Parameters
5277
--------------
@@ -80,8 +105,6 @@ def apply(
80105
Parameters.PERFORM_ANTI_ALIGNMENT, parameters, False
81106
)
82107

83-
aligned_traces = []
84-
85108
# form a mapping dictionary associating each activity of the two logs to
86109
# an ASCII character
87110
mapping = log_regex.form_encoding_dictio_from_two_logs(
@@ -102,23 +125,33 @@ def apply(
102125
else:
103126
list_encodings = sorted(list_encodings, key=lambda x: len(x))
104127

105-
# keeps an alignment cache (to avoid re-calculating the same edit
106-
# distances :) )
107-
cache_align = {}
128+
# keeps an alignment cache (to avoid re-calculating the same edit distances)
129+
cache_align: Dict[str, typing.AlignmentResult] = {}
108130

109131
best_worst_cost = min(len(x) for x in list_encodings)
110132

133+
# encode all traces once and keep track of the unique encodings
134+
encoded_traces: List[str] = []
135+
unique_encoded_traces: Set[str] = set()
111136
for trace in log1:
112-
# gets the alignment
113-
align_result = align_trace(
114-
trace,
115-
list_encodings,
116-
set_encodings,
117-
mapping,
118-
cache_align=cache_align,
119-
parameters=parameters,
137+
encoded_trace = log_regex.get_encoded_trace(
138+
trace, mapping, parameters=parameters
120139
)
121-
aligned_traces.append(align_result)
140+
encoded_traces.append(encoded_trace)
141+
unique_encoded_traces.add(encoded_trace)
142+
143+
thm = thread_utils.Pm4pyThreadManager()
144+
f = lambda x: __perform_trace_alignment(x, list_encodings, set_encodings, mapping, cache_align=cache_align, parameters=parameters)
145+
146+
# perform the alignment per encoded trace
147+
for encoded_trace in unique_encoded_traces:
148+
thm.submit(f, encoded_trace)
149+
thm.join()
150+
151+
# map each trace back to its alignment result
152+
aligned_traces = [
153+
cache_align[encoded_trace] for encoded_trace in encoded_traces
154+
]
122155

123156
# assign fitness to traces
124157
for index, align in enumerate(aligned_traces):
@@ -142,7 +175,7 @@ def apply(
142175
return aligned_traces
143176

144177

145-
def align_trace(
178+
def apply_trace(
146179
trace: Trace,
147180
list_encodings: List[str],
148181
set_encodings: Set[str],
@@ -151,7 +184,7 @@ def align_trace(
151184
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
152185
) -> typing.AlignmentResult:
153186
"""
154-
Aligns a trace against a list of traces, minimizing the edit distance
187+
Aligns a trace against a list of traces, minimizing the edit distance.
155188
156189
Parameters
157190
--------------
@@ -177,8 +210,58 @@ def align_trace(
177210
if parameters is None:
178211
parameters = {}
179212

180-
# keeps an alignment cache (to avoid re-calculating the same edit
181-
# distances :) )
213+
if cache_align is None:
214+
cache_align = {}
215+
216+
# encode the current trace using the mapping dictionary, then align it
217+
encoded_trace = log_regex.get_encoded_trace(
218+
trace, mapping, parameters=parameters
219+
)
220+
return __perform_trace_alignment(
221+
encoded_trace,
222+
list_encodings,
223+
set_encodings,
224+
mapping,
225+
cache_align=cache_align,
226+
parameters=parameters,
227+
)
228+
229+
230+
def align_trace(
231+
trace: Trace,
232+
list_encodings: List[str],
233+
set_encodings: Set[str],
234+
mapping: Dict[str, str],
235+
cache_align: Optional[Dict[Any, Any]] = None,
236+
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
237+
) -> typing.AlignmentResult:
238+
"""
239+
Backwards-compatible wrapper for apply_trace.
240+
"""
241+
return apply_trace(
242+
trace,
243+
list_encodings,
244+
set_encodings,
245+
mapping,
246+
cache_align=cache_align,
247+
parameters=parameters,
248+
)
249+
250+
251+
def __perform_trace_alignment(
252+
encoded_trace: str,
253+
list_encodings: List[str],
254+
set_encodings: Set[str],
255+
mapping: Dict[str, str],
256+
cache_align: Optional[Dict[Any, Any]] = None,
257+
parameters: Optional[Dict[Union[str, Parameters], Any]] = None,
258+
) -> typing.AlignmentResult:
259+
"""
260+
Aligns an encoded trace against a list of encoded traces, minimizing the edit distance.
261+
"""
262+
if parameters is None:
263+
parameters = {}
264+
182265
if cache_align is None:
183266
cache_align = {}
184267

@@ -191,54 +274,48 @@ def align_trace(
191274
else string_distance.argmin_levenshtein
192275
)
193276

194-
# encode the current trace using the mapping dictionary
195-
encoded_trace = log_regex.get_encoded_trace(
196-
trace, mapping, parameters=parameters
197-
)
198277
inv_mapping = {y: x for x, y in mapping.items()}
199278

200279
if encoded_trace not in cache_align:
201280
if not anti_alignment and encoded_trace in set_encodings:
202-
# the trace is already in the encodings. we don't need to calculate
203-
# any edit distance
281+
# the trace is already in the encodings. we don't need to calculate any edit distance
204282
argmin_dist = encoded_trace
205283
else:
206-
# finds the encoded trace of the other log that is at minimal
207-
# distance
284+
# finds the encoded trace of the other log that is at minimal distance
208285
argmin_dist = comparison_function(encoded_trace, list_encodings)
209286

210287
seq_match = difflib.SequenceMatcher(
211288
None, encoded_trace, argmin_dist
212289
).get_matching_blocks()
213290
i = 0
214291
j = 0
215-
align_trace = []
292+
aligned_moves = []
216293
total_cost = 0
217294
for el in seq_match:
218295
while i < el.a:
219-
align_trace.append((inv_mapping[encoded_trace[i]], ">>"))
296+
aligned_moves.append((inv_mapping[encoded_trace[i]], ">>"))
220297
total_cost += align_utils.STD_MODEL_LOG_MOVE_COST
221-
i = i + 1
298+
i += 1
222299
while j < el.b:
223-
align_trace.append((">>", inv_mapping[argmin_dist[j]]))
300+
aligned_moves.append((">>", inv_mapping[argmin_dist[j]]))
224301
total_cost += align_utils.STD_MODEL_LOG_MOVE_COST
225-
j = j + 1
226-
for z in range(el.size):
227-
align_trace.append(
302+
j += 1
303+
for _ in range(el.size):
304+
aligned_moves.append(
228305
(
229306
inv_mapping[encoded_trace[i]],
230307
inv_mapping[argmin_dist[j]],
231308
)
232309
)
233-
i = i + 1
234-
j = j + 1
310+
i += 1
311+
j += 1
235312

236-
align = {"alignment": align_trace, "cost": total_cost}
313+
align = {"alignment": aligned_moves, "cost": total_cost}
237314
# saves the alignment in the cache
238315
cache_align[encoded_trace] = align
239316
return align
240-
else:
241-
return cache_align[encoded_trace]
317+
318+
return cache_align[encoded_trace]
242319

243320

244321
def project_log_on_variant(

0 commit comments

Comments
 (0)