From ea1db86f15e73e8cd44c68d7e7604f6ebe283511 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Thu, 16 Oct 2025 22:08:03 +0200 Subject: [PATCH 01/82] Add our user agent to 120.uploader (#255) Resolves issue #254 * [benchmarks] Add our user agent to 120.uploader * [benchmarks] Avoid potential resource leak --- benchmarks/100.webapps/120.uploader/python/function.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/benchmarks/100.webapps/120.uploader/python/function.py b/benchmarks/100.webapps/120.uploader/python/function.py index c13656d0c..d9fb88c9a 100755 --- a/benchmarks/100.webapps/120.uploader/python/function.py +++ b/benchmarks/100.webapps/120.uploader/python/function.py @@ -8,9 +8,10 @@ from . import storage client = storage.storage.get_instance() +SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" def handler(event): - + bucket = event.get('bucket').get('bucket') output_prefix = event.get('bucket').get('output') url = event.get('object').get('url') @@ -18,7 +19,11 @@ def handler(event): download_path = '/tmp/{}'.format(name) process_begin = datetime.datetime.now() - urllib.request.urlretrieve(url, filename=download_path) + req = urllib.request.Request(url) + req.add_header('User-Agent', SEBS_USER_AGENT) + with open(download_path, 'wb') as f: + with urllib.request.urlopen(req) as response: + f.write(response.read()) size = os.path.getsize(download_path) process_end = datetime.datetime.now() From e201761771b5714e449c58f07941fa54b629ec7c Mon Sep 17 00:00:00 2001 From: Zisen Liu <29354199+rabbull@users.noreply.github.com> Date: Thu, 16 Oct 2025 22:19:34 +0200 Subject: [PATCH 02/82] Minor Bug Fixes on GCP (#252) * bugfix * add timeout for gcp function deployment --- .gitignore | 4 +++- benchmarks/100.webapps/120.uploader/python/function.py | 1 - benchmarks/wrappers/gcp/nodejs/storage.js | 2 +- config/systems.json | 5 ----- sebs/gcp/gcp.py | 4 ++++ 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0712f6d7b..274165ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,6 @@ cache* minio-volume scylladb-volume - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -188,3 +187,6 @@ cache # IntelliJ IDEA files .idea *.iml + +# MacOS Finder +**/.DS_Store \ No newline at end of file diff --git a/benchmarks/100.webapps/120.uploader/python/function.py b/benchmarks/100.webapps/120.uploader/python/function.py index d9fb88c9a..d032bbdb6 100755 --- a/benchmarks/100.webapps/120.uploader/python/function.py +++ b/benchmarks/100.webapps/120.uploader/python/function.py @@ -1,7 +1,6 @@ import datetime import os -import uuid import urllib.request diff --git a/benchmarks/wrappers/gcp/nodejs/storage.js b/benchmarks/wrappers/gcp/nodejs/storage.js index fd67a4ace..859c693a5 100644 --- a/benchmarks/wrappers/gcp/nodejs/storage.js +++ b/benchmarks/wrappers/gcp/nodejs/storage.js @@ -20,7 +20,7 @@ class gcp_storage { upload(container, file, filepath) { let bucket = this.storage.bucket(container); let uniqueName = this.unique_name(file); - let options = {destination: uniqueName}; + let options = {destination: uniqueName, resumable: false}; return [uniqueName, bucket.upload(filepath, options)]; }; diff --git a/config/systems.json b/config/systems.json index 5a4077a23..5a38b4965 100644 --- a/config/systems.json +++ b/config/systems.json @@ -196,7 +196,6 @@ "python": { "base_images": { "x64": { - "3.7": "ubuntu:22.04", "3.8": "ubuntu:22.04", "3.9": "ubuntu:22.04", "3.10": "ubuntu:22.04", @@ -228,10 +227,6 @@ "nodejs": { "base_images": { "x64": { - "10": "ubuntu:18.04", - "12": "ubuntu:18.04", - "14": "ubuntu:18.04", - "16": "ubuntu:18.04", "18": "ubuntu:22.04", "20": "ubuntu:22.04" } diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 187d8cda8..eb94eca3f 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -323,12 +323,16 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) self.function_client.projects().locations().functions().get(name=full_func_name) ) deployed = False + begin = time.time() while not deployed: status_res = our_function_req.execute() if status_res["status"] == "ACTIVE": deployed = True else: time.sleep(3) + if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable + self.logging.error(f"Failed to deploy function: {function.name}") + raise RuntimeError(f"Deployment timeout!") self.logging.info(f"Function {function.name} - deployed!") invoke_url = status_res["httpsTrigger"]["url"] From b54b46c645b6ca8c3cee301177d3960cbced0bad Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Fri, 17 Oct 2025 14:37:47 +0200 Subject: [PATCH 03/82] [dev] Linting --- sebs/gcp/gcp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index eb94eca3f..6525034c2 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -332,7 +332,7 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) time.sleep(3) if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable self.logging.error(f"Failed to deploy function: {function.name}") - raise RuntimeError(f"Deployment timeout!") + raise RuntimeError("Deployment timeout!") self.logging.info(f"Function {function.name} - deployed!") invoke_url = status_res["httpsTrigger"]["url"] From 550cc8cc033e07f3550b6c78e38116052df9b16a Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 30 Oct 2025 08:53:26 +0100 Subject: [PATCH 04/82] added bert as a test inference benchmark --- .../412.language-bert/config.json | 6 + .../400.inference/412.language-bert/input.py | 31 ++++ .../412.language-bert/python/function.py | 152 ++++++++++++++++++ .../412.language-bert/python/init.sh | 3 + .../412.language-bert/python/package.sh | 35 ++++ .../412.language-bert/python/requirements.txt | 3 + .../python/requirements.txt.3.10 | 3 + .../python/requirements.txt.3.11 | 3 + .../python/requirements.txt.3.8 | 3 + .../python/requirements.txt.3.9 | 3 + docs/benchmarks.md | 6 +- sebs/regression.py | 1 + 12 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 benchmarks/400.inference/412.language-bert/config.json create mode 100644 benchmarks/400.inference/412.language-bert/input.py create mode 100644 benchmarks/400.inference/412.language-bert/python/function.py create mode 100755 benchmarks/400.inference/412.language-bert/python/init.sh create mode 100644 benchmarks/400.inference/412.language-bert/python/package.sh create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 diff --git a/benchmarks/400.inference/412.language-bert/config.json b/benchmarks/400.inference/412.language-bert/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py new file mode 100644 index 000000000..f3daa83bc --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -0,0 +1,31 @@ +import os + + +def buckets_count(): + # model bucket and text bucket + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + filepath = os.path.join(root, file) + relative_key = os.path.join(prefix, file) + upload_func(0, relative_key, filepath) + + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + model_archive = "bert-tiny-onnx.tar.gz" + upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) + + text_filename = "sentences.jsonl" + upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_archive + input_config["object"]["input"] = text_filename + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["model"] = input_paths[0] + input_config["bucket"]["text"] = input_paths[1] + return input_config diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py new file mode 100644 index 000000000..3f0761428 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -0,0 +1,152 @@ +import datetime +import json +import os +import tarfile +import uuid +from typing import Dict, List, Optional + +import numpy as np +import onnxruntime as ort +from tokenizers import Tokenizer + +from . import storage + +client = storage.storage.get_instance() + +MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" +MODEL_DIRECTORY = "/tmp/bert_language_model" +MODEL_SUBDIR = "bert-tiny-onnx" + +_session: Optional[ort.InferenceSession] = None +_tokenizer: Optional[Tokenizer] = None +_labels: Optional[Dict[int, str]] = None + + +def _ensure_model(bucket: str, model_prefix: str): + """ + Lazily download and initialize the ONNX model and tokenizer. + """ + global _session, _tokenizer, _labels + + model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _tokenizer is None or _labels is None: + if not os.path.exists(model_path): + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") + client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + model_process_begin = datetime.datetime.now() + tokenizer_path = os.path.join(model_path, "tokenizer.json") + _tokenizer = Tokenizer.from_file(tokenizer_path) + _tokenizer.enable_truncation(max_length=128) + _tokenizer.enable_padding(length=128) + + label_map_path = os.path.join(model_path, "label_map.json") + with open(label_map_path, "r") as f: + raw_labels = json.load(f) + _labels = {int(idx): label for idx, label in raw_labels.items()} + + onnx_path = os.path.join(model_path, "model.onnx") + _session = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _prepare_inputs(sentences: List[str]): + assert _tokenizer is not None + + encodings = _tokenizer.encode_batch(sentences) + + input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64) + attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64) + token_type_ids = np.array( + [enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings], + dtype=np.int64, + ) + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + text_prefix = event.get("bucket", {}).get("text") + text_key = event.get("object", {}).get("input") + + download_begin = datetime.datetime.now() + text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}") + client.download(bucket, os.path.join(text_prefix, text_key), text_download_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix) + assert _session is not None and _labels is not None and _tokenizer is not None + + with open(text_download_path, "r") as f: + sentences = [json.loads(line)["text"] for line in f if line.strip()] + + os.remove(text_download_path) + + inference_begin = datetime.datetime.now() + inputs = _prepare_inputs(sentences) + outputs = _session.run(None, inputs) + logits = outputs[0] + probabilities = _softmax(logits) + inference_end = datetime.datetime.now() + + results = [] + for sentence, probs in zip(sentences, probabilities): + label_idx = int(np.argmax(probs)) + label = _labels.get(label_idx, str(label_idx)) + results.append( + { + "text": sentence, + "label": label, + "confidence": float(probs[label_idx]), + "raw_scores": probs.tolist(), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": results}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/python/init.sh b/benchmarks/400.inference/412.language-bert/python/init.sh new file mode 100755 index 000000000..160852abe --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for the BERT inference benchmark. diff --git a/benchmarks/400.inference/412.language-bert/python/package.sh b/benchmarks/400.inference/412.language-bert/python/package.sh new file mode 100644 index 000000000..edb27ebe0 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/package.sh @@ -0,0 +1,35 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +TORCH_DIR=".python_packages/lib/site-packages/torch" +if [ -d "$1/${TORCH_DIR}" ]; then + cd $1 + zip -qr torch.zip ${TORCH_DIR} + rm -rf ${TORCH_DIR} + cd ${CUR_DIR} + echo "Torch-zipped size $(du -sh $1 | cut -f1)" +fi diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt new file mode 100644 index 000000000..b692be1f7 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 new file mode 100644 index 000000000..b692be1f7 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 new file mode 100644 index 000000000..b692be1f7 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 new file mode 100644 index 000000000..b692be1f7 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 new file mode 100644 index 000000000..b692be1f7 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime==1.16.3 +tokenizers==0.13.3 diff --git a/docs/benchmarks.md b/docs/benchmarks.md index e292a4b04..73056c86c 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -10,6 +10,7 @@ | Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. | | Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. | | Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. | +| Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. | | Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. | | Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. | | Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. | @@ -70,6 +71,10 @@ It implements the .zip file creation with the help of the `shutil` standard libr The benchmark is inspired by MLPerf and implements image recognition with Resnet50. It downloads the input and model from the storage and uses the CPU-only `pytorch` library in Python. +### Language Inference + +This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences. + ## Scientific ### Graph PageRank, BFS, MST @@ -87,4 +92,3 @@ This benchmark is inspired by the [DNAVisualization](https://github.com/Benjamin ## Applications **(WiP)** Coming soon! - diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1c..e0eaf7f4f 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -21,6 +21,7 @@ "220.video-processing", "311.compression", "411.image-recognition", + "412.language-bert", "501.graph-pagerank", "502.graph-mst", "503.graph-bfs", From f9c3817f0992b849f972d38144b4ad92053bb83b Mon Sep 17 00:00:00 2001 From: McLavish Date: Mon, 3 Nov 2025 22:11:05 +0100 Subject: [PATCH 05/82] hotfix to enable gpu capabilities --- sebs/local/local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ffb..4bfa069a7 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -221,8 +221,9 @@ def _start_container( container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" container_kwargs["ports"] = {f"{port}/tcp": port} - - container = self._docker_client.containers.run(**container_kwargs) + + from docker.types import DeviceRequest + container = self._docker_client.containers.run(**container_kwargs, device_requests=[DeviceRequest(driver="nvidia", count=-1, capabilities=[["gpu"]])], ) pid: Optional[int] = None if self.measurements_enabled and self._memory_measurement_path is not None: From 0f93b6608007844099f5f13f177a24b3bc6c2b2b Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 4 Nov 2025 15:38:39 +0100 Subject: [PATCH 06/82] added pre-commit hooks for linting and formatting --- .pre-commit-config.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..3389b494a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +# .pre-commit-config.yaml +repos: + - repo: local + hooks: + - id: flake8-local + name: flake8 (project env) + entry: flake8 + language: system + args: ["--config=.flake8.cfg"] + types: [python] + files: ^sebs/ + - repo: local + hooks: + - id: black-check-local + name: black --check (project env) + entry: black + language: system + args: ["--config=.black.toml", "--check", "--diff"] + types: [python] + files: ^sebs/ + From b965d7bd302782d130766c7bdc6a2938928627e0 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 4 Nov 2025 16:34:14 +0100 Subject: [PATCH 07/82] linting and formatting setting for whoever uses vscode + black + flake8 extensions --- .vscode/settings.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..127ae8a76 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + + "black-formatter.importStrategy": "fromEnvironment", + "black-formatter.path": [], + "black-formatter.args": ["--config=.black.toml"], + + "flake8.importStrategy": "fromEnvironment", + "flake8.path": [], + "flake8.args": ["--config=.flake8.cfg"], + "flake8.enabled": true +} From e9916db6ea0a5e7cd019487d90b2b315ee0f2073 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 4 Nov 2025 17:25:51 +0100 Subject: [PATCH 08/82] reformatted local file so it passes linting/format --- .pre-commit-config.yaml | 6 ++++-- sebs/local/local.py | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3389b494a..22e59d275 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,8 +4,9 @@ repos: hooks: - id: flake8-local name: flake8 (project env) + language: python + additional_dependencies: ["flake8==7.1.1"] entry: flake8 - language: system args: ["--config=.flake8.cfg"] types: [python] files: ^sebs/ @@ -13,8 +14,9 @@ repos: hooks: - id: black-check-local name: black --check (project env) + language: python + additional_dependencies: ["black==22.8.0"] entry: black - language: system args: ["--config=.black.toml", "--check", "--diff"] types: [python] files: ^sebs/ diff --git a/sebs/local/local.py b/sebs/local/local.py index 4bfa069a7..7f49974e5 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -221,9 +221,13 @@ def _start_container( container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" container_kwargs["ports"] = {f"{port}/tcp": port} - + from docker.types import DeviceRequest - container = self._docker_client.containers.run(**container_kwargs, device_requests=[DeviceRequest(driver="nvidia", count=-1, capabilities=[["gpu"]])], ) + + container = self._docker_client.containers.run( + **container_kwargs, + device_requests=[DeviceRequest(driver="nvidia", count=-1, capabilities=[["gpu"]])], + ) pid: Optional[int] = None if self.measurements_enabled and self._memory_measurement_path is not None: From 813af03cce3261e1a5cd52d8bc47842bbea08474 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 4 Nov 2025 17:48:16 +0100 Subject: [PATCH 09/82] bert now uses gpu --- benchmarks/400.inference/412.language-bert/input.py | 4 +++- .../400.inference/412.language-bert/python/function.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py index f3daa83bc..9af7ecb56 100644 --- a/benchmarks/400.inference/412.language-bert/input.py +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -15,7 +15,9 @@ def upload_files(data_root, data_dir, upload_func): upload_func(0, relative_key, filepath) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): model_archive = "bert-tiny-onnx.tar.gz" upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py index 3f0761428..b3d6efe12 100644 --- a/benchmarks/400.inference/412.language-bert/python/function.py +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -58,7 +58,11 @@ def _ensure_model(bucket: str, model_prefix: str): _labels = {int(idx): label for idx, label in raw_labels.items()} onnx_path = os.path.join(model_path, "model.onnx") - _session = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) + available_providers = ort.get_available_providers() + execution_providers = ["CUDAExecutionProvider"] if "CUDAExecutionProvider" in available_providers else [] + execution_providers.append("CPUExecutionProvider") + # Prefer GPU execution when available, otherwise fall back to CPU. + _session = ort.InferenceSession(onnx_path, providers=execution_providers) model_process_end = datetime.datetime.now() else: model_process_begin = datetime.datetime.now() From 3a96f0433db368dcdf87be1d13175bd4f5a7ff22 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 4 Nov 2025 18:13:33 +0100 Subject: [PATCH 10/82] changed data repo to be OUR forked data repo --- .gitmodules | 2 +- install.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4feae9bfb..c33a17880 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/spcl/serverless-benchmarks-data.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git diff --git a/install.py b/install.py index 57f047d23..b856e45b7 100755 --- a/install.py +++ b/install.py @@ -86,7 +86,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") else: raise error @@ -99,4 +99,3 @@ def execute(cmd, cwd=None): execute("python3 setup.py build") execute("python3 pypapi/papi_build.py") os.chdir(cur_dir) - From d4d5d30ab57f99946d19247f3bf60c9c1d5c4eeb Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 5 Nov 2025 01:16:51 +0100 Subject: [PATCH 11/82] change data loading path to own forked repo --- install.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/install.py b/install.py index 57f047d23..784b0ab28 100755 --- a/install.py +++ b/install.py @@ -5,28 +5,46 @@ import subprocess parser = argparse.ArgumentParser(description="Install SeBS and dependencies.") -parser.add_argument('--venv', metavar='DIR', type=str, default="python-venv", help='destination of local Python virtual environment') -parser.add_argument('--python-path', metavar='DIR', type=str, default="python3", help='Path to local Python installation.') +parser.add_argument( + "--venv", + metavar="DIR", + type=str, + default="python-venv", + help="destination of local Python virtual environment", +) +parser.add_argument( + "--python-path", + metavar="DIR", + type=str, + default="python3", + help="Path to local Python installation.", +) for deployment in ["aws", "azure", "gcp", "openwhisk"]: - parser.add_argument(f"--{deployment}", action="store_const", const=True, default=True, dest=deployment) - parser.add_argument(f"--no-{deployment}", action="store_const", const=False, default=True, dest=deployment) + parser.add_argument( + f"--{deployment}", action="store_const", const=True, default=True, dest=deployment + ) + parser.add_argument( + f"--no-{deployment}", action="store_const", const=False, default=True, dest=deployment + ) for deployment in ["local"]: - parser.add_argument(f"--{deployment}", action="store_const", default=True, const=True, dest=deployment) + parser.add_argument( + f"--{deployment}", action="store_const", default=True, const=True, dest=deployment + ) parser.add_argument(f"--no-{deployment}", action="store_const", const=False, dest=deployment) parser.add_argument("--with-pypapi", action="store_true") args = parser.parse_args() + def execute(cmd, cwd=None): - ret = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=cwd - ) + ret = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=cwd) if ret.returncode: raise RuntimeError( "Running {} failed!\n Output: {}".format(cmd, ret.stdout.decode("utf-8")) ) return ret.stdout.decode("utf-8") -env_dir=args.venv + +env_dir = args.venv if not os.path.exists(env_dir): print("Creating Python virtualenv at {}".format(env_dir)) @@ -86,7 +104,9 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute( + f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}" + ) else: raise error @@ -99,4 +119,3 @@ def execute(cmd, cwd=None): execute("python3 setup.py build") execute("python3 pypapi/papi_build.py") os.chdir(cur_dir) - From 1b7deb7c03c86bc6491e8da47964feb59c6bc920 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 5 Nov 2025 01:21:34 +0100 Subject: [PATCH 12/82] change data loading path to own forked repo --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 4feae9bfb..362530502 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/spcl/serverless-benchmarks-data.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git \ No newline at end of file From 668652c58a794bb14411032f1564063d8da6bb12 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 5 Nov 2025 02:23:21 +0100 Subject: [PATCH 13/82] update benchmark function --- .../413.image-classification/config.json | 6 + .../413.image-classification/input.py | 51 ++++++ .../python/function.py | 156 ++++++++++++++++++ .../python/imagenet_class_index.json | 1 + .../413.image-classification/python/init.sh | 10 ++ .../python/package.sh | 32 ++++ .../python/requirements.txt | 4 + .../python/requirements.txt.3.10 | 5 + .../python/requirements.txt.3.11 | 5 + .../python/requirements.txt.3.6 | 4 + .../python/requirements.txt.3.7 | 4 + .../python/requirements.txt.3.8 | 3 + .../python/requirements.txt.3.9 | 3 + 13 files changed, 284 insertions(+) create mode 100644 benchmarks/400.inference/413.image-classification/config.json create mode 100644 benchmarks/400.inference/413.image-classification/input.py create mode 100644 benchmarks/400.inference/413.image-classification/python/function.py create mode 100755 benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json create mode 100755 benchmarks/400.inference/413.image-classification/python/init.sh create mode 100644 benchmarks/400.inference/413.image-classification/python/package.sh create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 diff --git a/benchmarks/400.inference/413.image-classification/config.json b/benchmarks/400.inference/413.image-classification/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/413.image-classification/input.py b/benchmarks/400.inference/413.image-classification/input.py new file mode 100644 index 000000000..e97d38057 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/input.py @@ -0,0 +1,51 @@ +import glob, os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + "/" + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + + +""" + Generate test, small and large workload for compression test. + + :param data_dir: directory where benchmark data is placed + :param size: workload size + :param input_buckets: input storage containers for this benchmark + :param output_buckets: + :param upload_func: upload function taking three params(bucket_idx, key, filepath) +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + # upload model + model_name = "resnet50.tar.gz" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) + + input_images = [] + resnet_path = os.path.join(data_dir, "fake-resnet") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: + for line in f: + img, img_class = line.split() + input_images.append((img, img_class)) + upload_func(1, img, os.path.join(resnet_path, img)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] + return input_config diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py new file mode 100644 index 000000000..1ee9d653a --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -0,0 +1,156 @@ +import datetime, json, os, tarfile +from pathlib import Path + +from PIL import Image +import torch +from torchvision import transforms +from torchvision.models import resnet50 + +# ---------- Config ---------- +# Optional env overrides; event fields take precedence if provided +ENV_MODEL_PATH = os.getenv("MODEL_PATH") # /abs/path/resnet50.tar.gz or .pth/.pt +ENV_IMAGE_PATH = os.getenv("IMAGE_PATH") # /abs/path/test.jpg +USE_AMP = True # autocast for faster inference on CUDA +# ---------------------------- + +SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) +idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] + +DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +torch.backends.cudnn.benchmark = True + +model = None # cache across invocations (same as your original) + + +def _extract_pth_from_tar(tar_path: str, out_dir: str = "/tmp/resnet50_unpack") -> str: + """Extract .tar.gz/.tgz and return the first .pth/.pt found.""" + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + with tarfile.open(tar_path, "r:gz") as tar: + tar.extractall(out) + for ext in ("*.pth", "*.pt"): + found = list(out.rglob(ext)) + if found: + return str(found[0]) + raise FileNotFoundError(f"No .pth/.pt found in archive: {tar_path}") + + +def _load_resnet50_from_path(model_path: str) -> torch.nn.Module: + """Load torchvision ResNet-50 from a local .tar.gz or .pth/.pt (CPU), then return it.""" + if model_path.endswith((".tar.gz", ".tgz")): + weight_path = _extract_pth_from_tar(model_path) + else: + weight_path = model_path + + ckpt = torch.load(weight_path, map_location="cpu") + if isinstance(ckpt, dict): + state = ckpt.get("state_dict", ckpt.get("model", ckpt)) + if not isinstance(state, dict): + state = ckpt + if len(state) > 0 and next(iter(state)).startswith("module."): + state = {k.replace("module.", "", 1): v for k, v in state.items()} + m = resnet50(pretrained=False) + m.load_state_dict(state, strict=False) + m.eval() + return m + elif isinstance(ckpt, torch.nn.Module): + ckpt.eval() + return ckpt + else: + raise TypeError(f"Unsupported checkpoint type: {type(ckpt)}") + + +def _maybe_sync(): + if DEVICE.type == "cuda": + torch.cuda.synchronize() + + +def handler(event): + """ + Accepts local paths via event (preferred for your benchmark runner): + event = { + "local_model_archive": "/abs/path/resnet50.tar.gz" or ".pth", + "local_image_path": "/abs/path/image.jpg" + } + Falls back to env MODEL_PATH / IMAGE_PATH if not provided. + Returns the SAME structure as your existing function.py. + """ + if not torch.cuda.is_available(): + raise RuntimeError("CUDA not available. Run on a GPU machine/container.") + + # -------- resolve inputs -------- + model_path = event.get("local_model_archive") or ENV_MODEL_PATH + image_path = event.get("local_image_path") or ENV_IMAGE_PATH + assert model_path, "Provide local_model_archive in event or set MODEL_PATH" + assert image_path, "Provide local_image_path in event or set IMAGE_PATH" + + # -------- timings: image "download" (local -> count as zero) -------- + image_download_begin = datetime.datetime.now() + image_download_end = image_download_begin # local file, no download + + # -------- lazy model load (cache like your original) -------- + global model + if model is None: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin # local file, no remote download + + model_process_begin = datetime.datetime.now() + # load on CPU, then move to GPU + m = _load_resnet50_from_path(model_path) + model = m.to(DEVICE, non_blocking=True).eval() + _maybe_sync() + model_process_end = datetime.datetime.now() + else: + # reuse cached model + model_download_begin = model_download_end = datetime.datetime.now() + model_process_begin = model_process_end = model_download_begin + + # -------- preprocess + inference on GPU (with proper sync) -------- + input_image = Image.open(image_path).convert("RGB") + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + input_tensor = preprocess(input_image).unsqueeze(0) # [1,3,224,224] + + _maybe_sync() + process_begin = datetime.datetime.now() + with torch.inference_mode(): + x = input_tensor.to(DEVICE, non_blocking=True) + if USE_AMP and DEVICE.type == "cuda": + with torch.cuda.amp.autocast(): + y = model(x) + else: + y = model(x) + _maybe_sync() + process_end = datetime.datetime.now() + + # -------- postprocess -------- + probs = torch.softmax(y[0], dim=0) + idx = int(torch.argmax(probs).item()) + pred = idx2label[idx] + + # -------- SAME measurement keys (microseconds) -------- + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"idx": idx, "class": pred}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": process_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json new file mode 100755 index 000000000..5fe0dfefc --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json @@ -0,0 +1 @@ +{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} \ No newline at end of file diff --git a/benchmarks/400.inference/413.image-classification/python/init.sh b/benchmarks/400.inference/413.image-classification/python/init.sh new file mode 100755 index 000000000..71a2e39c0 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/imagenet_class_index.json" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with json ${path}" +fi +cp ${path} ${DIR} diff --git a/benchmarks/400.inference/413.image-classification/python/package.sh b/benchmarks/400.inference/413.image-classification/python/package.sh new file mode 100644 index 000000000..038fac7c5 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr torch.zip $1/torch + rm -rf $1/torch + echo "Torch-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt b/benchmarks/400.inference/413.image-classification/python/requirements.txt new file mode 100644 index 000000000..d191dc6dd --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt @@ -0,0 +1,4 @@ +#torch==1.2.0+cpu +#torchvision==0.4.0+cpu +#https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl +#torch==1.0.1.post2+cpu diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 new file mode 100644 index 000000000..ab734881f --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 @@ -0,0 +1,5 @@ +pillow==10.3.0 +https://download.pytorch.org/whl/cpu/torch-1.11.0%2Bcpu-cp310-cp310-linux_x86_64.whl +torchvision==0.12 +# prevent installing numpy 2.0 +numpy==1.22.0 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 new file mode 100644 index 000000000..3288171f8 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 @@ -0,0 +1,5 @@ +pillow==10.3.0 +https://download.pytorch.org/whl/cpu/torch-2.0.0%2Bcpu-cp311-cp311-linux_x86_64.whl +torchvision==0.15.1 +# prevent installing numpy 2.0 +numpy==1.24.0 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 new file mode 100644 index 000000000..63409acaa --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 @@ -0,0 +1,4 @@ +Pillow==6.1 +numpy==1.16 +https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl +torchvision==0.2.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 new file mode 100644 index 000000000..54bddbd58 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 @@ -0,0 +1,4 @@ +Pillow==6.1 +numpy==1.16 +https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl +torchvision==0.2.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 new file mode 100644 index 000000000..7b873eafa --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.18 +https://download.pytorch.org/whl/cpu/torch-1.4.0%2Bcpu-cp38-cp38-linux_x86_64.whl +torchvision==0.5 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 new file mode 100644 index 000000000..c7fc0663e --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.20 +https://download.pytorch.org/whl/cpu/torch-1.8.0%2Bcpu-cp39-cp39-linux_x86_64.whl +torchvision==0.9.0 From aae1023e3a03549c93f56f2c3adb0a1868a0fad4 Mon Sep 17 00:00:00 2001 From: McLavish Date: Wed, 5 Nov 2025 09:54:07 +0100 Subject: [PATCH 14/82] fix: replaced onnxruntime requirement from CPU to GPU. now it actually uses the gpu --- .../412.language-bert/python/function.py | 11 ++++++----- .../412.language-bert/python/requirements.txt | 2 +- .../412.language-bert/python/requirements.txt.3.10 | 2 +- .../412.language-bert/python/requirements.txt.3.11 | 2 +- .../412.language-bert/python/requirements.txt.3.8 | 2 +- .../412.language-bert/python/requirements.txt.3.9 | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py index b3d6efe12..7e4f981ef 100644 --- a/benchmarks/400.inference/412.language-bert/python/function.py +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -58,11 +58,12 @@ def _ensure_model(bucket: str, model_prefix: str): _labels = {int(idx): label for idx, label in raw_labels.items()} onnx_path = os.path.join(model_path, "model.onnx") - available_providers = ort.get_available_providers() - execution_providers = ["CUDAExecutionProvider"] if "CUDAExecutionProvider" in available_providers else [] - execution_providers.append("CPUExecutionProvider") - # Prefer GPU execution when available, otherwise fall back to CPU. - _session = ort.InferenceSession(onnx_path, providers=execution_providers) + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) model_process_end = datetime.datetime.now() else: model_process_begin = datetime.datetime.now() diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt index b692be1f7..67a8c1e18 100644 --- a/benchmarks/400.inference/412.language-bert/python/requirements.txt +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -1,3 +1,3 @@ numpy==1.24.4 -onnxruntime==1.16.3 +onnxruntime-gpu==1.16.3 tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 index b692be1f7..67a8c1e18 100644 --- a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -1,3 +1,3 @@ numpy==1.24.4 -onnxruntime==1.16.3 +onnxruntime-gpu==1.16.3 tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 index b692be1f7..67a8c1e18 100644 --- a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -1,3 +1,3 @@ numpy==1.24.4 -onnxruntime==1.16.3 +onnxruntime-gpu==1.16.3 tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 index b692be1f7..67a8c1e18 100644 --- a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -1,3 +1,3 @@ numpy==1.24.4 -onnxruntime==1.16.3 +onnxruntime-gpu==1.16.3 tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 index b692be1f7..67a8c1e18 100644 --- a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -1,3 +1,3 @@ numpy==1.24.4 -onnxruntime==1.16.3 +onnxruntime-gpu==1.16.3 tokenizers==0.13.3 From 25fd1d9616f1cb5ebb31207b9da011d73cc1f4bf Mon Sep 17 00:00:00 2001 From: McLavish Date: Wed, 5 Nov 2025 10:24:36 +0100 Subject: [PATCH 15/82] circleci mypy fix? --- .mypy.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mypy.ini b/.mypy.ini index e202650ed..636105bfa 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -3,6 +3,9 @@ [mypy-docker] ignore_missing_imports = True +[mypy-docker.*] +ignore_missing_imports = True + [mypy-tzlocal] ignore_missing_imports = True From d6c422745a9d2d399e316b5e58232054a8f11b06 Mon Sep 17 00:00:00 2001 From: McLavish Date: Wed, 5 Nov 2025 10:45:00 +0100 Subject: [PATCH 16/82] benchmarks is now flake8/black compliant. pre-commit hooks also check this --- .pre-commit-config.yaml | 4 +- .../000.microbenchmarks/010.sleep/input.py | 13 +-- .../010.sleep/python/function.py | 6 +- .../020.network-benchmark/input.py | 10 +- .../020.network-benchmark/python/function.py | 35 +++--- .../030.clock-synchronization/input.py | 13 ++- .../python/function.py | 41 +++---- .../040.server-reply/input.py | 13 +-- .../040.server-reply/python/function.py | 5 +- .../100.webapps/110.dynamic-html/input.py | 14 +-- .../110.dynamic-html/python/function.py | 19 ++-- benchmarks/100.webapps/120.uploader/input.py | 24 ++-- .../120.uploader/python/function.py | 39 ++++--- .../200.multimedia/210.thumbnailer/input.py | 33 +++--- .../210.thumbnailer/python/function.py | 54 +++++---- .../220.video-processing/input.py | 33 +++--- .../220.video-processing/python/function.py | 104 ++++++++++-------- .../300.utilities/311.compression/input.py | 26 +++-- .../311.compression/python/function.py | 47 ++++---- .../411.image-recognition/input.py | 39 ++++--- .../411.image-recognition/python/function.py | 86 ++++++++------- .../501.graph-pagerank/input.py | 13 +-- .../501.graph-pagerank/python/function.py | 17 +-- .../500.scientific/502.graph-mst/input.py | 13 +-- .../502.graph-mst/python/function.py | 17 +-- .../500.scientific/503.graph-bfs/input.py | 13 +-- .../503.graph-bfs/python/function.py | 17 +-- .../504.dna-visualisation/input.py | 21 ++-- .../504.dna-visualisation/python/function.py | 33 +++--- benchmarks/wrappers/aws/python/handler.py | 72 ++++++------ benchmarks/wrappers/aws/python/setup.py | 11 +- benchmarks/wrappers/aws/python/storage.py | 16 ++- benchmarks/wrappers/azure/python/handler.py | 81 ++++++++------ benchmarks/wrappers/azure/python/storage.py | 27 ++--- benchmarks/wrappers/gcp/python/handler.py | 76 +++++++------ benchmarks/wrappers/gcp/python/storage.py | 8 +- benchmarks/wrappers/local/python/storage.py | 25 ++--- .../wrappers/openwhisk/python/__main__.py | 20 ++-- benchmarks/wrappers/openwhisk/python/nosql.py | 5 +- benchmarks/wrappers/openwhisk/python/setup.py | 12 +- .../wrappers/openwhisk/python/storage.py | 21 ++-- 41 files changed, 628 insertions(+), 548 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 22e59d275..fce4c4da7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: entry: flake8 args: ["--config=.flake8.cfg"] types: [python] - files: ^sebs/ + files: ^(sebs/|benchmarks/) - repo: local hooks: - id: black-check-local @@ -19,5 +19,5 @@ repos: entry: black args: ["--config=.black.toml", "--check", "--diff"] types: [python] - files: ^sebs/ + files: ^(sebs/|benchmarks/) diff --git a/benchmarks/000.microbenchmarks/010.sleep/input.py b/benchmarks/000.microbenchmarks/010.sleep/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/input.py +++ b/benchmarks/000.microbenchmarks/010.sleep/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/010.sleep/python/function.py b/benchmarks/000.microbenchmarks/010.sleep/python/function.py index 7dda59a57..64be15557 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/python/function.py +++ b/benchmarks/000.microbenchmarks/010.sleep/python/function.py @@ -1,9 +1,9 @@ - from time import sleep + def handler(event): # start timing - sleep_time = event.get('sleep') + sleep_time = event.get("sleep") sleep(sleep_time) - return { 'result': sleep_time } + return {"result": sleep_time} diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py index 0d969bc74..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py @@ -2,10 +2,12 @@ def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py index eb8ccdcf2..58c376a2d 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py @@ -1,27 +1,26 @@ import csv -import json import os.path import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] i = 0 socket.setdefaulttimeout(3) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 while i < repetitions + 1: @@ -43,16 +42,16 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(2) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") - return { 'result': key } + return {"result": key} diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py index 427215380..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py @@ -1,12 +1,13 @@ - - def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py index 9ffd978ae..9cf93eccf 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py @@ -1,28 +1,27 @@ import csv -import json import os import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] print("Starting communication with {}:{}".format(address, port)) i = 0 socket.setdefaulttimeout(4) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 measurements_not_smaller = 0 @@ -43,11 +42,13 @@ def handler(event): if i > 0: times.append([i, send_begin, recv_end]) cur_time = recv_end - send_begin - print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller)) + print( + "Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller) + ) if cur_time > cur_min and cur_min > 0: measurements_not_smaller += 1 if measurements_not_smaller == repetitions: - message = "stop".encode('utf-8') + message = "stop".encode("utf-8") server_socket.sendto(message, adr) break else: @@ -57,18 +58,18 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(4) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") else: key = None - return { 'result': {'bucket-key': key, 'timestamp': event['income-timestamp']} } + return {"result": {"bucket-key": key, "timestamp": event["income-timestamp"]}} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/input.py b/benchmarks/000.microbenchmarks/040.server-reply/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/input.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py index fb5b57aa3..4c2a294ba 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py @@ -1,11 +1,10 @@ - import socket -from time import sleep + def handler(event): # start timing - addr = (event.get('ip-address'), event.get('port')) + addr = (event.get("ip-address"), event.get("port")) socket.setdefaulttimeout(20) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(addr) diff --git a/benchmarks/100.webapps/110.dynamic-html/input.py b/benchmarks/100.webapps/110.dynamic-html/input.py index 98dac88b2..c20154ec3 100644 --- a/benchmarks/100.webapps/110.dynamic-html/input.py +++ b/benchmarks/100.webapps/110.dynamic-html/input.py @@ -1,11 +1,9 @@ +size_generators = {"test": 10, "small": 1000, "large": 100000} -size_generators = { - 'test' : 10, - 'small' : 1000, - 'large': 100000 -} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - input_config = {'username': 'testname'} - input_config['random_len'] = size_generators[size] +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + input_config = {"username": "testname"} + input_config["random_len"] = size_generators[size] return input_config diff --git a/benchmarks/100.webapps/110.dynamic-html/python/function.py b/benchmarks/100.webapps/110.dynamic-html/python/function.py index 7c990f4eb..6f7b42bc5 100644 --- a/benchmarks/100.webapps/110.dynamic-html/python/function.py +++ b/benchmarks/100.webapps/110.dynamic-html/python/function.py @@ -1,22 +1,21 @@ -from datetime import datetime -from random import sample +from datetime import datetime +from random import sample from os import path -from time import time -import os from jinja2 import Template SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__))) + def handler(event): # start timing - name = event.get('username') - size = event.get('random_len') + name = event.get("username") + size = event.get("random_len") cur_time = datetime.now() random_numbers = sample(range(0, 1000000), size) - template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read()) - html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers) + template = Template(open(path.join(SCRIPT_DIR, "templates", "template.html"), "r").read()) + html = template.render(username=name, cur_time=cur_time, random_numbers=random_numbers) # end timing - # dump stats - return {'result': html} + # dump stats + return {"result": html} diff --git a/benchmarks/100.webapps/120.uploader/input.py b/benchmarks/100.webapps/120.uploader/input.py index ce6169ccb..7aafb2b22 100644 --- a/benchmarks/100.webapps/120.uploader/input.py +++ b/benchmarks/100.webapps/120.uploader/input.py @@ -1,19 +1,25 @@ - url_generators = { # source: mlperf fake_imagenet.sh. 230 kB - 'test' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Jammlich_crop.jpg/800px-Jammlich_crop.jpg', + "test": ( + "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/" + "Jammlich_crop.jpg/800px-Jammlich_crop.jpg" + ), # video: HPX source code, 6.7 MB - 'small': 'https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip', + "small": "https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip", # resnet model from pytorch. 98M - 'large': 'https://download.pytorch.org/models/resnet50-19c8e357.pth' + "large": "https://download.pytorch.org/models/resnet50-19c8e357.pth", } + def buckets_count(): return (0, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - input_config = {'object': {}, 'bucket': {}} - input_config['object']['url'] = url_generators[size] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['output'] = output_buckets[0] + +def generate_input( + data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func +): + input_config = {"object": {}, "bucket": {}} + input_config["object"]["url"] = url_generators[size] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["output"] = output_buckets[0] return input_config diff --git a/benchmarks/100.webapps/120.uploader/python/function.py b/benchmarks/100.webapps/120.uploader/python/function.py index d032bbdb6..cb17131f1 100755 --- a/benchmarks/100.webapps/120.uploader/python/function.py +++ b/benchmarks/100.webapps/120.uploader/python/function.py @@ -1,26 +1,29 @@ - import datetime import os import urllib.request from . import storage + client = storage.storage.get_instance() -SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" +SEBS_USER_AGENT = ( + "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" +) + def handler(event): - bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') - url = event.get('object').get('url') + bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") + url = event.get("object").get("url") name = os.path.basename(url) - download_path = '/tmp/{}'.format(name) + download_path = "/tmp/{}".format(name) process_begin = datetime.datetime.now() req = urllib.request.Request(url) - req.add_header('User-Agent', SEBS_USER_AGENT) - with open(download_path, 'wb') as f: + req.add_header("User-Agent", SEBS_USER_AGENT) + with open(download_path, "wb") as f: with urllib.request.urlopen(req) as response: f.write(response.read()) size = os.path.getsize(download_path) @@ -33,16 +36,12 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'url': url, - 'key': key_name - }, - 'measurement': { - 'download_time': 0, - 'download_size': 0, - 'upload_time': upload_time, - 'upload_size': size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "url": url, "key": key_name}, + "measurement": { + "download_time": 0, + "download_size": 0, + "upload_time": upload_time, + "upload_size": size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/210.thumbnailer/input.py b/benchmarks/200.multimedia/210.thumbnailer/input.py index 8943effed..6f04bfafb 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/input.py +++ b/benchmarks/200.multimedia/210.thumbnailer/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,19 +14,23 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): - for file in glob.glob(os.path.join(data_dir, '*.jpg')): + for file in glob.glob(os.path.join(data_dir, "*.jpg")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['width'] = 200 - input_config['object']['height'] = 200 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["width"] = 200 + input_config["object"]["height"] = 200 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/210.thumbnailer/python/function.py b/benchmarks/200.multimedia/210.thumbnailer/python/function.py index 20527067b..2df0a7bfb 100755 --- a/benchmarks/200.multimedia/210.thumbnailer/python/function.py +++ b/benchmarks/200.multimedia/210.thumbnailer/python/function.py @@ -1,44 +1,45 @@ import datetime import io import os -import sys -import uuid from urllib.parse import unquote_plus from PIL import Image from . import storage + client = storage.storage.get_instance() # Disk-based solution -#def resize_image(image_path, resized_path, w, h): +# def resize_image(image_path, resized_path, w, h): # with Image.open(image_path) as image: # image.thumbnail((w,h)) # image.save(resized_path) + # Memory-based solution def resize_image(image_bytes, w, h): with Image.open(io.BytesIO(image_bytes)) as image: - image.thumbnail((w,h)) + image.thumbnail((w, h)) out = io.BytesIO() - image.save(out, format='jpeg') + image.save(out, format="jpeg") # necessary to rewind to the beginning of the buffer out.seek(0) return out + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = unquote_plus(event.get('object').get('key')) - width = event.get('object').get('width') - height = event.get('object').get('height') + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = unquote_plus(event.get("object").get("key")) + width = event.get("object").get("width") + height = event.get("object").get("height") # UUID to handle multiple calls - #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) - #upload_path = '/tmp/resized-{}'.format(key) - #client.download(input_bucket, key, download_path) - #resize_image(download_path, upload_path, width, height) - #client.upload(output_bucket, key, upload_path) + # download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) + # upload_path = '/tmp/resized-{}'.format(key) + # client.download(input_bucket, key, download_path) + # resize_image(download_path, upload_path, width, height) + # client.upload(output_bucket, key, upload_path) download_begin = datetime.datetime.now() img = client.download_stream(bucket, os.path.join(input_prefix, key)) download_end = datetime.datetime.now() @@ -56,15 +57,12 @@ def handler(event): upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': len(img), - 'upload_time': upload_time, - 'upload_size': resized_size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": len(img), + "upload_time": upload_time, + "upload_size": resized_size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/220.video-processing/input.py b/benchmarks/200.multimedia/220.video-processing/input.py index 6da31647f..86c7191cb 100644 --- a/benchmarks/200.multimedia/220.video-processing/input.py +++ b/benchmarks/200.multimedia/220.video-processing/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,17 +14,21 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.mp4')): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + for file in glob.glob(os.path.join(data_dir, "*.mp4")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['op'] = 'watermark' - input_config['object']['duration'] = 1 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["op"] = "watermark" + input_config["object"]["duration"] = 1 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/220.video-processing/python/function.py b/benchmarks/200.multimedia/220.video-processing/python/function.py index 9f8a869aa..ab132ba2e 100755 --- a/benchmarks/200.multimedia/220.video-processing/python/function.py +++ b/benchmarks/200.multimedia/220.video-processing/python/function.py @@ -7,62 +7,84 @@ from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) + def call_ffmpeg(args): - ret = subprocess.run([os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg'), '-y'] + args, - #subprocess might inherit Lambda's input for some reason - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ret = subprocess.run( + [os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg"), "-y"] + args, + # subprocess might inherit Lambda's input for some reason + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) if ret.returncode != 0: - print('Invocation of ffmpeg failed!') - print('Out: ', ret.stdout.decode('utf-8')) + print("Invocation of ffmpeg failed!") + print("Out: ", ret.stdout.decode("utf-8")) raise RuntimeError() + # https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality def to_gif(video, duration, event): - output = '/tmp/processed-{}.gif'.format(os.path.basename(video)) - call_ffmpeg(["-i", video, - "-t", - "{0}".format(duration), - "-vf", - "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", - "-loop", "0", - output]) + output = "/tmp/processed-{}.gif".format(os.path.basename(video)) + call_ffmpeg( + [ + "-i", + video, + "-t", + "{0}".format(duration), + "-vf", + "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", + "-loop", + "0", + output, + ] + ) return output + # https://devopstar.com/2019/01/28/serverless-watermark-using-aws-lambda-layers-ffmpeg/ def watermark(video, duration, event): - output = '/tmp/processed-{}'.format(os.path.basename(video)) + output = "/tmp/processed-{}".format(os.path.basename(video)) watermark_file = os.path.dirname(os.path.realpath(__file__)) - call_ffmpeg([ - "-i", video, - "-i", os.path.join(watermark_file, os.path.join('resources', 'watermark.png')), - "-t", "{0}".format(duration), - "-filter_complex", "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", - output]) + call_ffmpeg( + [ + "-i", + video, + "-i", + os.path.join(watermark_file, os.path.join("resources", "watermark.png")), + "-t", + "{0}".format(duration), + "-filter_complex", + "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", + output, + ] + ) return output + def transcode_mp3(video, duration, event): pass -operations = { 'transcode' : transcode_mp3, 'extract-gif' : to_gif, 'watermark' : watermark } + +operations = {"transcode": transcode_mp3, "extract-gif": to_gif, "watermark": watermark} + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - duration = event.get('object').get('duration') - op = event.get('object').get('op') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + duration = event.get("object").get("duration") + op = event.get("object").get("op") + download_path = "/tmp/{}".format(key) # Restore executable permission - ffmpeg_binary = os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg') + ffmpeg_binary = os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg") # needed on Azure but read-only filesystem on AWS try: st = os.stat(ffmpeg_binary) @@ -89,16 +111,12 @@ def handler(event): upload_time = (upload_stop - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': upload_key - }, - 'measurement': { - 'download_time': download_time, - 'download_size': download_size, - 'upload_time': upload_time, - 'upload_size': upload_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": upload_key}, + "measurement": { + "download_time": download_time, + "download_size": download_size, + "upload_time": upload_time, + "upload_size": upload_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/300.utilities/311.compression/input.py b/benchmarks/300.utilities/311.compression/input.py index 5f88bc91a..e9e706bd5 100644 --- a/benchmarks/300.utilities/311.compression/input.py +++ b/benchmarks/300.utilities/311.compression/input.py @@ -1,4 +1,5 @@ -import glob, os +import os + def buckets_count(): return (1, 1) @@ -9,11 +10,12 @@ def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -21,8 +23,12 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload different datasets datasets = [] @@ -30,9 +36,9 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, datasets.append(dir) upload_files(data_dir, os.path.join(data_dir, dir), upload_func) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = datasets[0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = datasets[0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/300.utilities/311.compression/python/function.py b/benchmarks/300.utilities/311.compression/python/function.py index f758e14e4..8ceb52d2f 100755 --- a/benchmarks/300.utilities/311.compression/python/function.py +++ b/benchmarks/300.utilities/311.compression/python/function.py @@ -1,13 +1,13 @@ import datetime -import io import os import shutil import uuid -import zlib from . import storage + client = storage.storage.get_instance() + def parse_directory(directory): size = 0 @@ -16,13 +16,14 @@ def parse_directory(directory): size += os.path.getsize(os.path.join(root, file)) return size + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) os.makedirs(download_path) s3_download_begin = datetime.datetime.now() @@ -31,29 +32,27 @@ def handler(event): size = parse_directory(download_path) compress_begin = datetime.datetime.now() - shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path) + shutil.make_archive(os.path.join(download_path, key), "zip", root_dir=download_path) compress_end = datetime.datetime.now() s3_upload_begin = datetime.datetime.now() - archive_name = '{}.zip'.format(key) + archive_name = "{}.zip".format(key) archive_size = os.path.getsize(os.path.join(download_path, archive_name)) - key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name)) + key_name = client.upload( + bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name) + ) s3_upload_stop = datetime.datetime.now() download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1) upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1) process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': size, - 'upload_time': upload_time, - 'upload_size': archive_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": size, + "upload_time": upload_time, + "upload_size": archive_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/400.inference/411.image-recognition/input.py b/benchmarks/400.inference/411.image-recognition/input.py index 45d7215a6..c5ce190d0 100644 --- a/benchmarks/400.inference/411.image-recognition/input.py +++ b/benchmarks/400.inference/411.image-recognition/input.py @@ -1,18 +1,21 @@ -import glob, os +import os + def buckets_count(): return (2, 0) + def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -20,25 +23,29 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload model - model_name = 'resnet50-19c8e357.pth' - upload_func(0, model_name, os.path.join(data_dir, 'model', model_name)) + model_name = "resnet50-19c8e357.pth" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) input_images = [] - resnet_path = os.path.join(data_dir, 'fake-resnet') - with open(os.path.join(resnet_path, 'val_map.txt'), 'r') as f: + resnet_path = os.path.join(data_dir, "fake-resnet") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: for line in f: img, img_class = line.split() input_images.append((img, img_class)) upload_func(1, img, os.path.join(resnet_path, img)) - - input_config = {'object': {}, 'bucket': {}} - input_config['object']['model'] = model_name - input_config['object']['input'] = input_images[0][0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[1] - input_config['bucket']['model'] = input_paths[0] + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] return input_config diff --git a/benchmarks/400.inference/411.image-recognition/python/function.py b/benchmarks/400.inference/411.image-recognition/python/function.py index 411386419..0cfa1c57f 100644 --- a/benchmarks/400.inference/411.image-recognition/python/function.py +++ b/benchmarks/400.inference/411.image-recognition/python/function.py @@ -1,14 +1,20 @@ - -import datetime, json, os, uuid +import datetime +import json +import os +import uuid # Extract zipped torch model - used in Python 3.8 and 3.9 # The reason is that torch versions supported for these Python # versions are too large for Lambda packages. -if os.path.exists('function/torch.zip'): - import zipfile, sys +if os.path.exists("function/torch.zip"): + import sys + import zipfile + # we cannot write to the read-only filesystem - zipfile.ZipFile('function/torch.zip').extractall('/tmp/') - sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages')) + zipfile.ZipFile("function/torch.zip").extractall("/tmp/") + sys.path.append( + os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") + ) from PIL import Image import torch @@ -16,21 +22,23 @@ from torchvision.models import resnet50 from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) -class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), 'r')) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] model = None + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - model_prefix = event.get('bucket').get('model') - key = event.get('object').get('input') - model_key = event.get('object').get('model') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + model_prefix = event.get("bucket").get("model") + key = event.get("object").get("input") + model_key = event.get("object").get("model") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) image_download_begin = datetime.datetime.now() image_path = download_path @@ -40,7 +48,7 @@ def handler(event): global model if not model: model_download_begin = datetime.datetime.now() - model_path = os.path.join('/tmp', model_key) + model_path = os.path.join("/tmp", model_key) client.download(bucket, os.path.join(model_prefix, model_key), model_path) model_download_end = datetime.datetime.now() model_process_begin = datetime.datetime.now() @@ -53,36 +61,38 @@ def handler(event): model_download_end = model_download_begin model_process_begin = datetime.datetime.now() model_process_end = model_process_begin - + process_begin = datetime.datetime.now() input_image = Image.open(image_path) - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) input_tensor = preprocess(input_image) - input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model output = model(input_batch) _, index = torch.max(output, 1) - # The output has unnormalized scores. To get probabilities, you can run a softmax on it. - prob = torch.nn.functional.softmax(output[0], dim=0) - _, indices = torch.sort(output, descending = True) ret = idx2label[index] process_end = datetime.datetime.now() - download_time = (image_download_end- image_download_begin) / datetime.timedelta(microseconds=1) - model_download_time = (model_download_end - model_download_begin) / datetime.timedelta(microseconds=1) - model_process_time = (model_process_end - model_process_begin) / datetime.timedelta(microseconds=1) + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': {'idx': index.item(), 'class': ret}, - 'measurement': { - 'download_time': download_time + model_download_time, - 'compute_time': process_time + model_process_time, - 'model_time': model_process_time, - 'model_download_time': model_download_time - } - } - + "result": {"idx": index.item(), "class": ret}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": process_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/500.scientific/501.graph-pagerank/input.py b/benchmarks/500.scientific/501.graph-pagerank/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/input.py +++ b/benchmarks/500.scientific/501.graph-pagerank/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/501.graph-pagerank/python/function.py b/benchmarks/500.scientific/501.graph-pagerank/python/function.py index 0e462e9b4..461fc14a9 100755 --- a/benchmarks/500.scientific/501.graph-pagerank/python/function.py +++ b/benchmarks/500.scientific/501.graph-pagerank/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.pagerank() process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/502.graph-mst/input.py b/benchmarks/500.scientific/502.graph-mst/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/502.graph-mst/input.py +++ b/benchmarks/500.scientific/502.graph-mst/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/502.graph-mst/python/function.py b/benchmarks/500.scientific/502.graph-mst/python/function.py index b63fbdce2..69ad77678 100755 --- a/benchmarks/500.scientific/502.graph-mst/python/function.py +++ b/benchmarks/500.scientific/502.graph-mst/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.spanning_tree(None, False) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/503.graph-bfs/input.py b/benchmarks/500.scientific/503.graph-bfs/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/503.graph-bfs/input.py +++ b/benchmarks/500.scientific/503.graph-bfs/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/503.graph-bfs/python/function.py b/benchmarks/500.scientific/503.graph-bfs/python/function.py index 18423ae1a..51a37346b 100755 --- a/benchmarks/500.scientific/503.graph-bfs/python/function.py +++ b/benchmarks/500.scientific/503.graph-bfs/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.bfs(0) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result, - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result, + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/504.dna-visualisation/input.py b/benchmarks/500.scientific/504.dna-visualisation/input.py index a9f376ea2..ea26f48c0 100644 --- a/benchmarks/500.scientific/504.dna-visualisation/input.py +++ b/benchmarks/500.scientific/504.dna-visualisation/input.py @@ -1,16 +1,21 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.fasta')): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + for file in glob.glob(os.path.join(data_dir, "*.fasta")): data = os.path.relpath(file, data_dir) upload_func(0, data, file) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = data - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = data + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/500.scientific/504.dna-visualisation/python/function.py b/benchmarks/500.scientific/504.dna-visualisation/python/function.py index 8362a73a1..ca9f5975e 100755 --- a/benchmarks/500.scientific/504.dna-visualisation/python/function.py +++ b/benchmarks/500.scientific/504.dna-visualisation/python/function.py @@ -1,17 +1,23 @@ -import datetime, io, json, os +import datetime +import io +import json +import os + # using https://squiggle.readthedocs.io/en/latest/ from squiggle import transform from . import storage + client = storage.storage.get_instance() + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}".format(key) download_begin = datetime.datetime.now() client.download(bucket, os.path.join(input_prefix, key), download_path) @@ -34,13 +40,10 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'compute_time': process_time, - 'upload_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "compute_time": process_time, + "upload_time": upload_time, + }, } diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 907b2c612..f5a1d4195 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,39 +1,46 @@ - -import datetime, io, json, os, sys, uuid +import datetime +import io +import json +import os +import sys +import uuid # Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # TODO: usual trigger # implement support for S3 and others + + def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() # HTTP trigger with API Gateaway - if 'body' in event: - event = json.loads(event['body']) + if "body" in event: + event = json.loads(event["body"]) req_id = context.aws_request_id - event['request-id'] = req_id - event['income-timestamp'] = income_timestamp + event["request-id"] = req_id + event["income-timestamp"] = income_timestamp begin = datetime.datetime.now() from function import function + ret = function.handler(event) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in event: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in event: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = event.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = event.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -41,14 +48,14 @@ def handler(event, context): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" @@ -56,16 +63,17 @@ def handler(event, context): cold_start_var = os.environ["cold_start"] return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) + "statusCode": 200, + "body": json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": context.aws_request_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), } - diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py index b3d878351..016974465 100644 --- a/benchmarks/wrappers/aws/python/setup.py +++ b/benchmarks/wrappers/aws/python/setup.py @@ -2,14 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, ) - diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 4be0025e8..50875fbfc 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -10,16 +10,14 @@ class storage: client = None def __init__(self): - self.client = boto3.client('s3') + self.client = boto3.client("s3") @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -31,8 +29,8 @@ def download(self, bucket, file, filepath): def download_directory(self, bucket, prefix, path): objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) - for obj in objects['Contents']: - file_name = obj['Key'] + for obj in objects["Contents"]: + file_name = obj["Key"] path_to_file = os.path.dirname(file_name) os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) @@ -46,7 +44,7 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() - + def get_instance(): if storage.instance is None: storage.instance = storage() diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 88e44baf6..964fc2fde 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -1,52 +1,60 @@ - -import datetime, io, json, os, uuid +import datetime +import io +import json +import os +import uuid import azure.functions as func -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from . import nosql nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'], - os.environ['NOSQL_STORAGE_URL'], - os.environ['NOSQL_STORAGE_CREDS'] + os.environ["NOSQL_STORAGE_DATABASE"], + os.environ["NOSQL_STORAGE_URL"], + os.environ["NOSQL_STORAGE_CREDS"], ) -if 'STORAGE_CONNECTION_STRING' in os.environ: +if "STORAGE_CONNECTION_STRING" in os.environ: from . import storage - client = storage.storage.get_instance(os.environ['STORAGE_CONNECTION_STRING']) + + client = storage.storage.get_instance(os.environ["STORAGE_CONNECTION_STRING"]) + # TODO: usual trigger # implement support for blob and others + + def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() - req_json['request-id'] = context.invocation_id - req_json['income-timestamp'] = income_timestamp + req_json["request-id"] = context.invocation_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directory from . import function + ret = function.handler(req_json) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from . import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') + b = req_json.get("logs").get("bucket") req_id = context.invocation_id - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -54,14 +62,14 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: # cold test is_cold = False - fname = os.path.join('/tmp','cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() is_cold_worker = False @@ -73,17 +81,18 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: is_cold_worker = True return func.HttpResponse( - json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'result': log_data, - 'is_cold': is_cold, - 'is_cold_worker': is_cold_worker, - 'container_id': container_id, - 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "result": log_data, + "is_cold": is_cold, + "is_cold_worker": is_cold_worker, + "container_id": container_id, + "environ_container_id": os.environ["CONTAINER_NAME"], + "request_id": context.invocation_id, + } + ), + mimetype="application/json", ) - diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 42b129c89..fabd8e6a1 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -1,10 +1,10 @@ - import os import uuid from typing import Optional from azure.storage.blob import BlobServiceClient + class storage: instance = None client = None @@ -15,20 +15,18 @@ def __init__(self, connection_string: str): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, container, file, filepath): - with open(filepath, 'rb') as data: + with open(filepath, "rb") as data: return self.upload_stream(container, file, data) def download(self, container, file, filepath): - with open(filepath, 'wb') as download_file: - download_file.write( self.download_stream(container, file) ) - + with open(filepath, "wb") as download_file: + download_file.write(self.download_stream(container, file)) + def download_directory(self, container, prefix, path): client = self.client.get_container_client(container=container) objects = client.list_blobs(name_starts_with=prefix) @@ -37,20 +35,17 @@ def download_directory(self, container, prefix, path): path_to_file = os.path.dirname(file_name) os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(container, file_name, os.path.join(path, file_name)) - + def upload_stream(self, container, file, data): key_name = storage.unique_name(file) - client = self.client.get_blob_client( - container=container, - blob=key_name - ) + client = self.client.get_blob_client(container=container, blob=key_name) client.upload_blob(data) return key_name def download_stream(self, container, file): client = self.client.get_blob_client(container=container, blob=file) return client.download_blob().readall() - + @staticmethod def get_instance(connection_string: Optional[str] = None): if storage.instance is None: diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 9b6989611..57e1d000b 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,44 +1,46 @@ -import datetime, io, json, os, uuid, sys +import datetime +import io +import json +import os +import sys +import uuid -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # This variable is defined by SeBS during function creation. -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from function import nosql - nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'] - ) + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) def handler(req): income_timestamp = datetime.datetime.now().timestamp() - req_id = req.headers.get('Function-Execution-Id') - + req_id = req.headers.get("Function-Execution-Id") req_json = req.get_json() - req_json['request-id'] = req_id - req_json['income-timestamp'] = income_timestamp + req_json["request-id"] = req_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function + ret = function.handler(req_json) end = datetime.datetime.now() - - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = req_json.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -46,27 +48,33 @@ def handler(req): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': req_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} + return ( + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": req_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), + 200, + {"ContentType": "application/json"}, + ) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index 81163cb34..70f182618 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -15,11 +15,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) diff --git a/benchmarks/wrappers/local/python/storage.py b/benchmarks/wrappers/local/python/storage.py index b44968408..d25583a13 100644 --- a/benchmarks/wrappers/local/python/storage.py +++ b/benchmarks/wrappers/local/python/storage.py @@ -1,32 +1,28 @@ -import io import os import uuid import minio + class storage: instance = None client = None def __init__(self): - if 'MINIO_ADDRESS' in os.environ: - address = os.environ['MINIO_ADDRESS'] - access_key = os.environ['MINIO_ACCESS_KEY'] - secret_key = os.environ['MINIO_SECRET_KEY'] + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] self.client = minio.Minio( - address, - access_key=access_key, - secret_key=secret_key, - secure=False) + address, access_key=access_key, secret_key=secret_key, secure=False + ) @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -55,4 +51,3 @@ def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - diff --git a/benchmarks/wrappers/openwhisk/python/__main__.py b/benchmarks/wrappers/openwhisk/python/__main__.py index 3ae44f9c2..3833bff8c 100644 --- a/benchmarks/wrappers/openwhisk/python/__main__.py +++ b/benchmarks/wrappers/openwhisk/python/__main__.py @@ -2,24 +2,30 @@ import datetime import os + def main(args): logging.getLogger().setLevel(logging.INFO) begin = datetime.datetime.now() - args['request-id'] = os.getenv('__OW_ACTIVATION_ID') - args['income-timestamp'] = begin.timestamp() + args["request-id"] = os.getenv("__OW_ACTIVATION_ID") + args["income-timestamp"] = begin.timestamp() - for arg in ["MINIO_STORAGE_CONNECTION_URL", "MINIO_STORAGE_ACCESS_KEY", "MINIO_STORAGE_SECRET_KEY"]: + for arg in [ + "MINIO_STORAGE_CONNECTION_URL", + "MINIO_STORAGE_ACCESS_KEY", + "MINIO_STORAGE_SECRET_KEY", + ]: os.environ[arg] = args[arg] del args[arg] key_list = list(args.keys()) for arg in key_list: - if 'NOSQL_STORAGE_' in arg: + if "NOSQL_STORAGE_" in arg: os.environ[arg] = args[arg] del args[arg] try: from function import function + ret = function.handler(args) end = datetime.datetime.now() logging.info("Function result: {}".format(ret)) @@ -38,7 +44,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, "is_cold": is_cold, "result": log_data, @@ -49,7 +55,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, - "result": f"Error - invocation failed! Reason: {e}" + "result": f"Error - invocation failed! Reason: {e}", } diff --git a/benchmarks/wrappers/openwhisk/python/nosql.py b/benchmarks/wrappers/openwhisk/python/nosql.py index da8245009..4a8676d36 100644 --- a/benchmarks/wrappers/openwhisk/python/nosql.py +++ b/benchmarks/wrappers/openwhisk/python/nosql.py @@ -5,6 +5,7 @@ import boto3 from botocore.client import Config + class nosql: instance: Optional["nosql"] = None @@ -14,14 +15,14 @@ def __init__(self): if environ["NOSQL_STORAGE_TYPE"] != "scylladb": raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") - config = Config(connect_timeout=5, retries={'max_attempts': 0}) + config = Config(connect_timeout=5, retries={"max_attempts": 0}) self.client = boto3.resource( "dynamodb", region_name="None", aws_access_key_id="None", aws_secret_access_key="None", endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", - config=config + config=config, ) self._tables = {} diff --git a/benchmarks/wrappers/openwhisk/python/setup.py b/benchmarks/wrappers/openwhisk/python/setup.py index b942d059b..016974465 100644 --- a/benchmarks/wrappers/openwhisk/python/setup.py +++ b/benchmarks/wrappers/openwhisk/python/setup.py @@ -2,13 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, -) \ No newline at end of file + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, +) diff --git a/benchmarks/wrappers/openwhisk/python/storage.py b/benchmarks/wrappers/openwhisk/python/storage.py index 76c7e3e8e..09b9e78a7 100644 --- a/benchmarks/wrappers/openwhisk/python/storage.py +++ b/benchmarks/wrappers/openwhisk/python/storage.py @@ -1,8 +1,8 @@ +import logging import os import uuid -import json + import minio -import logging class storage: @@ -25,14 +25,14 @@ def __init__(self): maxsize=10, retries=urllib3.Retry( total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504] - ) + ), ) self.client = minio.Minio( os.getenv("MINIO_STORAGE_CONNECTION_URL"), access_key=os.getenv("MINIO_STORAGE_ACCESS_KEY"), secret_key=os.getenv("MINIO_STORAGE_SECRET_KEY"), secure=False, - http_client=mgr + http_client=mgr, ) except Exception as e: logging.info(e) @@ -41,12 +41,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) - + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -64,9 +61,7 @@ def download_directory(self, bucket, prefix, path): def upload_stream(self, bucket, file, bytes_data): key_name = storage.unique_name(file) - self.client.put_object( - bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes - ) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) return key_name def download_stream(self, bucket, file): From 27b14d691b1911e84003be31c07810e07c52a824 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 5 Nov 2025 17:55:59 +0100 Subject: [PATCH 17/82] add linalg benchmarks --- .../600.linearalgebra/601.matmul/config.json | 6 ++ .../600.linearalgebra/601.matmul/input.py | 7 ++ .../601.matmul/python/function.py | 59 ++++++++++++++++ .../601.matmul/python/requirements.txt | 1 + .../601.matmul/python/requirements.txt.3.10 | 0 .../601.matmul/python/requirements.txt.3.11 | 0 .../601.matmul/python/requirements.txt.3.12 | 0 .../601.matmul/python/requirements.txt.3.7 | 0 .../601.matmul/python/requirements.txt.3.8 | 0 .../601.matmul/python/requirements.txt.3.9 | 0 .../python/requirements.txt.arm.3.8 | 0 .../python/requirements.txt.arm.3.9 | 0 .../600.linearalgebra/602.axpy/config.json | 6 ++ .../600.linearalgebra/602.axpy/input.py | 7 ++ .../602.axpy/python/function.py | 59 ++++++++++++++++ .../602.axpy/python/requirements.txt | 1 + .../602.axpy/python/requirements.txt.3.10 | 0 .../602.axpy/python/requirements.txt.3.11 | 0 .../602.axpy/python/requirements.txt.3.12 | 0 .../602.axpy/python/requirements.txt.3.7 | 0 .../602.axpy/python/requirements.txt.3.8 | 0 .../602.axpy/python/requirements.txt.3.9 | 0 .../602.axpy/python/requirements.txt.arm.3.8 | 0 .../602.axpy/python/requirements.txt.arm.3.9 | 0 .../603.jacobi2d/config.json | 6 ++ .../600.linearalgebra/603.jacobi2d/input.py | 7 ++ .../603.jacobi2d/python/function.py | 69 +++++++++++++++++++ .../603.jacobi2d/python/requirements.txt | 1 + .../603.jacobi2d/python/requirements.txt.3.10 | 0 .../603.jacobi2d/python/requirements.txt.3.11 | 0 .../603.jacobi2d/python/requirements.txt.3.12 | 0 .../603.jacobi2d/python/requirements.txt.3.7 | 0 .../603.jacobi2d/python/requirements.txt.3.8 | 0 .../603.jacobi2d/python/requirements.txt.3.9 | 0 .../python/requirements.txt.arm.3.8 | 0 .../python/requirements.txt.arm.3.9 | 0 .../604.cholesky/config.json | 6 ++ .../600.linearalgebra/604.cholesky/input.py | 7 ++ .../604.cholesky/python/function.py | 57 +++++++++++++++ .../604.cholesky/python/requirements.txt | 1 + .../604.cholesky/python/requirements.txt.3.10 | 0 .../604.cholesky/python/requirements.txt.3.11 | 0 .../604.cholesky/python/requirements.txt.3.12 | 0 .../604.cholesky/python/requirements.txt.3.7 | 0 .../604.cholesky/python/requirements.txt.3.8 | 0 .../604.cholesky/python/requirements.txt.3.9 | 0 .../python/requirements.txt.arm.3.8 | 0 .../python/requirements.txt.arm.3.9 | 0 48 files changed, 300 insertions(+) create mode 100644 benchmarks/600.linearalgebra/601.matmul/config.json create mode 100644 benchmarks/600.linearalgebra/601.matmul/input.py create mode 100755 benchmarks/600.linearalgebra/601.matmul/python/function.py create mode 100755 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 create mode 100644 benchmarks/600.linearalgebra/602.axpy/config.json create mode 100644 benchmarks/600.linearalgebra/602.axpy/input.py create mode 100755 benchmarks/600.linearalgebra/602.axpy/python/function.py create mode 100755 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/config.json create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/input.py create mode 100755 benchmarks/600.linearalgebra/603.jacobi2d/python/function.py create mode 100755 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 create mode 100644 benchmarks/600.linearalgebra/604.cholesky/config.json create mode 100644 benchmarks/600.linearalgebra/604.cholesky/input.py create mode 100755 benchmarks/600.linearalgebra/604.cholesky/python/function.py create mode 100755 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 diff --git a/benchmarks/600.linearalgebra/601.matmul/config.json b/benchmarks/600.linearalgebra/601.matmul/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/601.matmul/input.py b/benchmarks/600.linearalgebra/601.matmul/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py new file mode 100755 index 000000000..c3aefa7c6 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import sys, json, math, torch +import datetime + + +def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): + alpha = torch.tensor(1.5, dtype=dtype, device=device) + beta = torch.tensor(1.2, dtype=dtype, device=device) + i = torch.arange(NI, device=device) + j = torch.arange(NJ, device=device) + k = torch.arange(NK, device=device) + C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI + A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK + B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ + return alpha, beta, C, A, B + + +def kernel_gemm(alpha, beta, C, A, B, reps=1): + torch.cuda.synchronize() + _ = alpha * (A @ B) + beta * C # warmup + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(reps): + C = alpha * (A @ B) + beta * C + end.record() + torch.cuda.synchronize() + return C, float(start.elapsed_time(end)) # ms for all reps + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + matrix_generating_begin = datetime.datetime.now() + alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=1) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "graph_generating_time": matrix_generating_time, + "compute_time": matmul_time, + }, + } diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/config.json b/benchmarks/600.linearalgebra/602.axpy/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/602.axpy/input.py b/benchmarks/600.linearalgebra/602.axpy/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py new file mode 100755 index 000000000..c3aefa7c6 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import sys, json, math, torch +import datetime + + +def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): + alpha = torch.tensor(1.5, dtype=dtype, device=device) + beta = torch.tensor(1.2, dtype=dtype, device=device) + i = torch.arange(NI, device=device) + j = torch.arange(NJ, device=device) + k = torch.arange(NK, device=device) + C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI + A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK + B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ + return alpha, beta, C, A, B + + +def kernel_gemm(alpha, beta, C, A, B, reps=1): + torch.cuda.synchronize() + _ = alpha * (A @ B) + beta * C # warmup + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(reps): + C = alpha * (A @ B) + beta * C + end.record() + torch.cuda.synchronize() + return C, float(start.elapsed_time(end)) # ms for all reps + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + matrix_generating_begin = datetime.datetime.now() + alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=1) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "graph_generating_time": matrix_generating_time, + "compute_time": matmul_time, + }, + } diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/config.json b/benchmarks/600.linearalgebra/603.jacobi2d/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/input.py b/benchmarks/600.linearalgebra/603.jacobi2d/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py new file mode 100755 index 000000000..e212cf8c5 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import sys, json, math, torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + i = torch.arange(N, device=device, dtype=dtype).view(-1, 1) + j = torch.arange(N, device=device, dtype=dtype).view(1, -1) + + A = i * (j + 2) / N + B = i * (j + 3) / N + return A, B + + +def kernel_jacobi2d(A, B, iters=50): + torch.cuda.synchronize() + # warmup + if A.shape[0] > 2 and A.shape[1] > 2: + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(iters): + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return A, B, gpu_ms + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + matrix_generating_begin = datetime.datetime.now() + A, B = initialize_torch(size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + A_out, B_out, gpu_ms = kernel_jacobi2d(A, B, reps=50) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "graph_generating_time": matrix_generating_time, + "compute_time": matmul_time, + "gpu_time": gpu_ms, + }, + } diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/config.json b/benchmarks/600.linearalgebra/604.cholesky/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/604.cholesky/input.py b/benchmarks/600.linearalgebra/604.cholesky/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py new file mode 100755 index 000000000..f01983ecf --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import sys, json, torch, datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + j = torch.arange(N, device=device) + v = (torch.remainder(-j, N).to(dtype) / N) + 1 + + L = v.expand(N, -1).clone() + L = torch.tril(L) + L.fill_diagonal_(1.0) + + A = L @ L.transpose(-1, -2) + return A + + +def kernel_cholesky(A): + torch.cuda.synchronize() + _ = torch.linalg.cholesky(A) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(A.size(0)): + L = torch.linalg.cholesky(A) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return L, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + L, gpu_ms = kernel_cholesky(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "graph_generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb From ace23355b27ef073c0f0ef3aa0d8db8fef36cf1c Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 5 Nov 2025 18:05:07 +0100 Subject: [PATCH 18/82] add linalg benchmarks --- .../601.matmul/python/function.py | 3 + .../602.axpy/python/function.py | 69 +++++++++---------- .../603.jacobi2d/python/function.py | 3 + .../604.cholesky/python/function.py | 3 + 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py index c3aefa7c6..33c06ae2e 100755 --- a/benchmarks/600.linearalgebra/601.matmul/python/function.py +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -37,6 +37,9 @@ def handler(event): random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + matrix_generating_begin = datetime.datetime.now() alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") matrix_generating_end = datetime.datetime.now() diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py index c3aefa7c6..7f55d540b 100755 --- a/benchmarks/600.linearalgebra/602.axpy/python/function.py +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -1,59 +1,58 @@ #!/usr/bin/env python3 -import sys, json, math, torch -import datetime +import sys, json, torch, datetime -def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): - alpha = torch.tensor(1.5, dtype=dtype, device=device) - beta = torch.tensor(1.2, dtype=dtype, device=device) - i = torch.arange(NI, device=device) - j = torch.arange(NJ, device=device) - k = torch.arange(NK, device=device) - C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI - A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK - B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ - return alpha, beta, C, A, B +def initialize_torch(N, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + alpha = torch.randn((), dtype=dtype, device=device) + x = torch.randn(N, dtype=dtype, device=device) + y = torch.randn(N, dtype=dtype, device=device) + return alpha, x, y -def kernel_gemm(alpha, beta, C, A, B, reps=1): +def kernel_axpy(alpha, x, y, reps=100): torch.cuda.synchronize() - _ = alpha * (A @ B) + beta * C # warmup + _ = alpha * x + y # warmup torch.cuda.synchronize() - start = torch.cuda.Event(enable_timing=True) - end = torch.cuda.Event(enable_timing=True) - start.record() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() for _ in range(reps): - C = alpha * (A @ B) + beta * C - end.record() + y = alpha * x + y + end_evt.record() torch.cuda.synchronize() - return C, float(start.elapsed_time(end)) # ms for all reps + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms def handler(event): - size = event.get("size") if "seed" in event: import random random.seed(event["seed"]) - matrix_generating_begin = datetime.datetime.now() - alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") - matrix_generating_end = datetime.datetime.now() + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + alpha, x, y = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() - matmul_begin = datetime.datetime.now() - C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=1) - matmul_end = datetime.datetime.now() + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_axpy(alpha, x, y, reps=100) + comp_end = datetime.datetime.now() - matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( - microseconds=1 - ) - matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) return { - # "result": result[0], "measurement": { - "graph_generating_time": matrix_generating_time, - "compute_time": matmul_time, - }, + "graph_generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } } diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py index e212cf8c5..5f10c44f7 100755 --- a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -46,6 +46,9 @@ def handler(event): random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + matrix_generating_begin = datetime.datetime.now() A, B = initialize_torch(size, dtype=torch.float32, device="cuda") matrix_generating_end = datetime.datetime.now() diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py index f01983ecf..c0776d568 100755 --- a/benchmarks/600.linearalgebra/604.cholesky/python/function.py +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -37,6 +37,9 @@ def handler(event): random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + gen_begin = datetime.datetime.now() A = initialize_torch(size, dtype=torch.float32, device="cuda") gen_end = datetime.datetime.now() From ad3023dc02fd14a888e10c663b76692f50066449 Mon Sep 17 00:00:00 2001 From: McLavish Date: Wed, 5 Nov 2025 20:50:24 +0100 Subject: [PATCH 19/82] changed CI/CD to run linting on the benchmarks folder ONLY. disabled mypy checks --- .circleci/config.yml | 14 +++++++------- .pre-commit-config.yaml | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a841c1584..77243ae8a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,18 +21,18 @@ jobs: - run: command: | . python-venv/bin/activate - black sebs --check --config .black.toml + black benchmarks --check --config .black.toml name: Python code formatting with black - run: command: | . python-venv/bin/activate - flake8 sebs --config=.flake8.cfg --tee --output-file flake-reports + flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports name: Python code lint with flake8 - - run: - command: | - . python-venv/bin/activate - mypy sebs --config-file=.mypy.ini - name: Python static code verification with mypy + # - run: + # command: | + # . python-venv/bin/activate + # mypy sebs --config-file=.mypy.ini + # name: Python static code verification with mypy - store_artifacts: path: flake-reports destination: flake-reports diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fce4c4da7..58f8adb8d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,11 @@ repos: args: ["--config=.black.toml", "--check", "--diff"] types: [python] files: ^(sebs/|benchmarks/) + # - repo: local + # hooks: + # - id: mypy-local + # name: mypy (project venv) + # language: system + # entry: bash -lc 'python -m mypy --config-file=.mypy.ini sebs' + # types: [python] From 52f30c038a5639884e7e7eaeeba1ba57eeb327e1 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Thu, 6 Nov 2025 02:08:53 +0100 Subject: [PATCH 20/82] fix typo --- .../601.matmul/python/function.py | 2 +- .../602.axpy/python/function.py | 2 +- .../603.jacobi2d/python/function.py | 2 +- .../604.cholesky/python/function.py | 2 +- config/local_deployment.json | 125 ++++++++++++++++++ out_storage.json | 33 +++++ 6 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 config/local_deployment.json create mode 100644 out_storage.json diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py index 33c06ae2e..ee1ceaff7 100755 --- a/benchmarks/600.linearalgebra/601.matmul/python/function.py +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -56,7 +56,7 @@ def handler(event): return { # "result": result[0], "measurement": { - "graph_generating_time": matrix_generating_time, + "generating_time": matrix_generating_time, "compute_time": matmul_time, }, } diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py index 7f55d540b..9c31c05bd 100755 --- a/benchmarks/600.linearalgebra/602.axpy/python/function.py +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -51,7 +51,7 @@ def handler(event): return { "measurement": { - "graph_generating_time": gen_us, + "generating_time": gen_us, "compute_time": comp_us, "gpu_time": gpu_ms, } diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py index 5f10c44f7..b83230f04 100755 --- a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -65,7 +65,7 @@ def handler(event): return { # "result": result[0], "measurement": { - "graph_generating_time": matrix_generating_time, + "generating_time": matrix_generating_time, "compute_time": matmul_time, "gpu_time": gpu_ms, }, diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py index c0776d568..537015e56 100755 --- a/benchmarks/600.linearalgebra/604.cholesky/python/function.py +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -53,7 +53,7 @@ def handler(event): return { "measurement": { - "graph_generating_time": gen_us, + "generating_time": gen_us, "compute_time": comp_us, "gpu_time": gpu_ms, } diff --git a/config/local_deployment.json b/config/local_deployment.json new file mode 100644 index 000000000..d89b3a968 --- /dev/null +++ b/config/local_deployment.json @@ -0,0 +1,125 @@ +{ + "experiments": { + "deployment": "local", + "update_code": false, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.11" + }, + "type": "invocation-overhead", + "perf-cost": { + "benchmark": "601.matmul", + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [ + 128, + 256 + ] + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "invocation-overhead": { + "repetitions": 5, + "N": 20, + "type": "payload", + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20 + }, + "eviction-model": { + "invocations": 1, + "function_copy_idx": 0, + "repetitions": 5, + "sleep": 1 + } + }, + "deployment": { + "name": "local", + "aws": { + "region": "us-east-1", + "lambda-role": "" + }, + "azure": { + "region": "westeurope" + }, + "gcp": { + "region": "europe-west1", + "project_name": "", + "credentials": "" + }, + "local": { + "storage": { + "object": { + "type": "minio", + "minio": { + "address": "192.168.1.101:9011", + "mapped_port": 9011, + "access_key": "Ux22nOcFoUaZAnmg-kULuIzvXmiFNVRiZkzcOaDawpU", + "secret_key": "9bb8b39326b0fde8bdfa5d013f743c03e57d146f10433fe2fe3ccce7225078d5", + "instance_id": "f337f509c0375ca4457f815cc0f67352088f6093053ba33c1e45aca0012e0a9f", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "192.168.1.101:9012", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "c7be7d211bb211b92f41afb073f75635d9f5f2a222abeb6d9b17673b02d079ca", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume" + } + } + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } + } +} diff --git a/out_storage.json b/out_storage.json new file mode 100644 index 000000000..16c13dba6 --- /dev/null +++ b/out_storage.json @@ -0,0 +1,33 @@ +{ + "object": { + "type": "minio", + "minio": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "vTIGFqQKDU9CVlE_eFkJ7kZFt823CoiiG1GRgxLFczc", + "secret_key": "01872a84cd3ec4af4b897cc57fa515ca7a704a5e4557b5ecde5b98fe41ecc489", + "instance_id": "39a39aa73d44cee61a627a73fecd962f8fdcdbc415f70f702d850eff2afae3a3", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "b302608abce0d96e1518260ff38c366bbe0dfe279935c521ef682d740d84fe69", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume" + } + } +} \ No newline at end of file From 4efff4d794abc3ded0835acb7d02287d70066f19 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Thu, 6 Nov 2025 02:43:33 +0100 Subject: [PATCH 21/82] update code --- .../python/function.py | 198 ++++++++---------- 1 file changed, 91 insertions(+), 107 deletions(-) diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py index 1ee9d653a..7c241d525 100644 --- a/benchmarks/400.inference/413.image-classification/python/function.py +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -1,141 +1,119 @@ -import datetime, json, os, tarfile -from pathlib import Path +import datetime, json, os, uuid + +# Extract zipped torch model - used in Python 3.8 and 3.9 +if os.path.exists("function/torch.zip"): + import zipfile, sys + + zipfile.ZipFile("function/torch.zip").extractall("/tmp/") + sys.path.append( + os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") + ) from PIL import Image import torch from torchvision import transforms from torchvision.models import resnet50 -# ---------- Config ---------- -# Optional env overrides; event fields take precedence if provided -ENV_MODEL_PATH = os.getenv("MODEL_PATH") # /abs/path/resnet50.tar.gz or .pth/.pt -ENV_IMAGE_PATH = os.getenv("IMAGE_PATH") # /abs/path/test.jpg -USE_AMP = True # autocast for faster inference on CUDA -# ---------------------------- +from . import storage -SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) +client = storage.storage.get_instance() + +SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] -DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -torch.backends.cudnn.benchmark = True - -model = None # cache across invocations (same as your original) +model = None +device = "cuda" if torch.cuda.is_available() else "cpu" -def _extract_pth_from_tar(tar_path: str, out_dir: str = "/tmp/resnet50_unpack") -> str: - """Extract .tar.gz/.tgz and return the first .pth/.pt found.""" - out = Path(out_dir) - out.mkdir(parents=True, exist_ok=True) - with tarfile.open(tar_path, "r:gz") as tar: - tar.extractall(out) - for ext in ("*.pth", "*.pt"): - found = list(out.rglob(ext)) - if found: - return str(found[0]) - raise FileNotFoundError(f"No .pth/.pt found in archive: {tar_path}") - - -def _load_resnet50_from_path(model_path: str) -> torch.nn.Module: - """Load torchvision ResNet-50 from a local .tar.gz or .pth/.pt (CPU), then return it.""" - if model_path.endswith((".tar.gz", ".tgz")): - weight_path = _extract_pth_from_tar(model_path) - else: - weight_path = model_path - - ckpt = torch.load(weight_path, map_location="cpu") - if isinstance(ckpt, dict): - state = ckpt.get("state_dict", ckpt.get("model", ckpt)) - if not isinstance(state, dict): - state = ckpt - if len(state) > 0 and next(iter(state)).startswith("module."): - state = {k.replace("module.", "", 1): v for k, v in state.items()} - m = resnet50(pretrained=False) - m.load_state_dict(state, strict=False) - m.eval() - return m - elif isinstance(ckpt, torch.nn.Module): - ckpt.eval() - return ckpt - else: - raise TypeError(f"Unsupported checkpoint type: {type(ckpt)}") - - -def _maybe_sync(): - if DEVICE.type == "cuda": - torch.cuda.synchronize() +def handler(event): + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + model_prefix = event.get("bucket").get("model") + key = event.get("object").get("input") + model_key = event.get("object").get("model") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) -def handler(event): - """ - Accepts local paths via event (preferred for your benchmark runner): - event = { - "local_model_archive": "/abs/path/resnet50.tar.gz" or ".pth", - "local_image_path": "/abs/path/image.jpg" - } - Falls back to env MODEL_PATH / IMAGE_PATH if not provided. - Returns the SAME structure as your existing function.py. - """ - if not torch.cuda.is_available(): - raise RuntimeError("CUDA not available. Run on a GPU machine/container.") - - # -------- resolve inputs -------- - model_path = event.get("local_model_archive") or ENV_MODEL_PATH - image_path = event.get("local_image_path") or ENV_IMAGE_PATH - assert model_path, "Provide local_model_archive in event or set MODEL_PATH" - assert image_path, "Provide local_image_path in event or set IMAGE_PATH" - - # -------- timings: image "download" (local -> count as zero) -------- + # --- Download image --- image_download_begin = datetime.datetime.now() - image_download_end = image_download_begin # local file, no download + image_path = download_path + client.download(bucket, os.path.join(input_prefix, key), download_path) + image_download_end = datetime.datetime.now() - # -------- lazy model load (cache like your original) -------- global model if model is None: + # --- Download weights --- model_download_begin = datetime.datetime.now() - model_download_end = model_download_begin # local file, no remote download + model_path = os.path.join("/tmp", model_key) + client.download(bucket, os.path.join(model_prefix, model_key), model_path) + model_download_end = datetime.datetime.now() + # --- Load model (CPU), then move to GPU --- model_process_begin = datetime.datetime.now() - # load on CPU, then move to GPU - m = _load_resnet50_from_path(model_path) - model = m.to(DEVICE, non_blocking=True).eval() - _maybe_sync() + model = resnet50(pretrained=False) + state = torch.load(model_path, map_location="cpu") # robust for CPU-saved checkpoints + # handle checkpoints that wrap state dict: + state = state.get("state_dict", state) + model.load_state_dict(state) + model.eval() + model.to(device) + # speed on cuDNN-convolutional nets + if device == "cuda": + torch.backends.cudnn.benchmark = True model_process_end = datetime.datetime.now() else: - # reuse cached model + # model already cached model_download_begin = model_download_end = datetime.datetime.now() - model_process_begin = model_process_end = model_download_begin + model_process_begin = model_process_end = datetime.datetime.now() - # -------- preprocess + inference on GPU (with proper sync) -------- + # --- Preprocess (CPU) --- + process_begin = datetime.datetime.now() input_image = Image.open(image_path).convert("RGB") preprocess = transforms.Compose( [ transforms.Resize(256), transforms.CenterCrop(224), - transforms.ToTensor(), + transforms.ToTensor(), # [0,1], CHW transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) - input_tensor = preprocess(input_image).unsqueeze(0) # [1,3,224,224] + input_tensor = preprocess(input_image) # CPU tensor + input_batch = input_tensor.unsqueeze(0).to(device, non_blocking=True) # NCHW on GPU + + # --- Inference (GPU) --- + with torch.no_grad(): + # Ensure wall-clock timing includes GPU work + if device == "cuda": + torch.cuda.synchronize() + # GPU event timing (kernel time) + start_evt = end_evt = None + if device == "cuda": + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + + output = model(input_batch) # logits [1,1000] + + if device == "cuda": + end_evt.record() + torch.cuda.synchronize() + + # compute top-1 / top-5 on CPU + probs = torch.nn.functional.softmax(output, dim=1) + conf, index = torch.max(probs, 1) + # make Python types + top1_idx = index.item() + top1_conf = float(conf.item()) + # (optional) top-5 + _, top5_idx = torch.topk(probs, k=5, dim=1) + top5_idx = top5_idx[0].tolist() + + ret = idx2label[top1_idx] # <- use .item() result - _maybe_sync() - process_begin = datetime.datetime.now() - with torch.inference_mode(): - x = input_tensor.to(DEVICE, non_blocking=True) - if USE_AMP and DEVICE.type == "cuda": - with torch.cuda.amp.autocast(): - y = model(x) - else: - y = model(x) - _maybe_sync() process_end = datetime.datetime.now() - # -------- postprocess -------- - probs = torch.softmax(y[0], dim=0) - idx = int(torch.argmax(probs).item()) - pred = idx2label[idx] - - # -------- SAME measurement keys (microseconds) -------- + # timings download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( microseconds=1 @@ -145,12 +123,18 @@ def handler(event): ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + # optional precise GPU kernel time (ms) + gpu_time_ms = 0.0 + if start_evt is not None and end_evt is not None: + gpu_time_ms = float(start_evt.elapsed_time(end_evt)) # milliseconds + return { - "result": {"idx": idx, "class": pred}, + "result": {"idx": top1_idx, "class": ret, "confidence": top1_conf, "top5_idx": top5_idx}, "measurement": { - "download_time": download_time + model_download_time, - "compute_time": process_time + model_process_time, - "model_time": model_process_time, - "model_download_time": model_download_time, + "download_time": download_time + model_download_time, # µs + "compute_time": process_time + model_process_time, # µs (wall time, includes GPU) + "model_time": model_process_time, # µs + "model_download_time": model_download_time, # µs + "gpu_time_ms": round(gpu_time_ms, 3), # extra: CUDA kernel time }, } From adf54a5edf58e8b11d395d8e24726d3e52c5b88d Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 6 Nov 2025 18:29:32 +0100 Subject: [PATCH 22/82] migrated from CircleCI to Github Actions --- .circleci/config.yml | 81 ----------------------- .github/ISSUE_TEMPLATE/workflows/lint.yml | 55 +++++++++++++++ 2 files changed, 55 insertions(+), 81 deletions(-) delete mode 100644 .circleci/config.yml create mode 100644 .github/ISSUE_TEMPLATE/workflows/lint.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 77243ae8a..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@1.4.0 - -jobs: - linting: - executor: python/default - steps: - - checkout - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - sudo apt update && sudo apt install libcurl4-openssl-dev - name: Install curl-config from Ubuntu APT - - run: - command: | - python3 install.py --aws --azure --gcp --no-local - name: Install pip dependencies - - run: - command: | - . python-venv/bin/activate - black benchmarks --check --config .black.toml - name: Python code formatting with black - - run: - command: | - . python-venv/bin/activate - flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports - name: Python code lint with flake8 - # - run: - # command: | - # . python-venv/bin/activate - # mypy sebs --config-file=.mypy.ini - # name: Python static code verification with mypy - - store_artifacts: - path: flake-reports - destination: flake-reports - test-aws: - executor: python/default - steps: - - checkout - - setup_remote_docker - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - if [[ -d $HOME/docker ]]; - then - ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; - else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x - fi - name: Load Docker images - - run: - command: | - python3 install.py --aws - name: Install pip dependencies - - run: - command: | - mkdir -p $HOME/docker - docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\ - xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz' - name: Save Docker images - - save_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - paths: - - "sebs-virtualenv" - - $HOME/docker - - run: - command: | - . sebs-virtualenv/bin/activate - tests/test_runner.py --deployment aws - name: Execute AWS tests - -workflows: - main: - jobs: - - linting - diff --git a/.github/ISSUE_TEMPLATE/workflows/lint.yml b/.github/ISSUE_TEMPLATE/workflows/lint.yml new file mode 100644 index 000000000..6cb6444bd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/workflows/lint.yml @@ -0,0 +1,55 @@ +name: Lint + +on: + push: + pull_request: + +jobs: + linting: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Cache virtualenv + uses: actions/cache@v4 + with: + path: python-venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}-${{ github.ref_name }} + restore-keys: | + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}- + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}- + + - name: Install system packages + run: | + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev + + - name: Install Python dependencies (via install.py) + run: | + python3 install.py --aws --azure --gcp --no-local + + - name: Black (check) + run: | + . python-venv/bin/activate + black benchmarks --check --config .black.toml + + - name: Flake8 (lint) + run: | + . python-venv/bin/activate + # write to file and echo to stdout (requires flake8 with --tee support) + flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports + + - name: Upload flake report + if: always() + uses: actions/upload-artifact@v4 + with: + name: flake-reports + path: flake-reports From 67772e2e5a48c6dcddebe426952279a9e876356f Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 6 Nov 2025 18:40:24 +0100 Subject: [PATCH 23/82] fixed workflow directory --- .github/{ISSUE_TEMPLATE => }/workflows/lint.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{ISSUE_TEMPLATE => }/workflows/lint.yml (100%) diff --git a/.github/ISSUE_TEMPLATE/workflows/lint.yml b/.github/workflows/lint.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/workflows/lint.yml rename to .github/workflows/lint.yml From 8f02b6679a25240c1192eab07187a3491ae6122f Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 6 Nov 2025 19:00:02 +0100 Subject: [PATCH 24/82] pip dependencies take too long --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6cb6444bd..1043be62e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -34,7 +34,7 @@ jobs: - name: Install Python dependencies (via install.py) run: | - python3 install.py --aws --azure --gcp --no-local + python3 install.py --no-aws --no-azure --no-gcp --no-openwhisk --no-local - name: Black (check) run: | From e06985cf48c86dab3357c77c6d704dd55e921572 Mon Sep 17 00:00:00 2001 From: McLavish Date: Mon, 10 Nov 2025 19:36:28 +0100 Subject: [PATCH 25/82] new benchmark data --- benchmarks-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks-data b/benchmarks-data index 6a17a460f..25c2bb40b 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 From 037f6c34859683012e8838cedb4570d66b15f31f Mon Sep 17 00:00:00 2001 From: xipang Date: Wed, 12 Nov 2025 20:24:53 +0100 Subject: [PATCH 26/82] Bring folder from other-branch --- .../412.language-bert/config.json | 6 + .../400.inference/412.language-bert/input.py | 33 ++++ .../412.language-bert/python/function.py | 157 ++++++++++++++++++ .../412.language-bert/python/init.sh | 3 + .../412.language-bert/python/package.sh | 35 ++++ .../412.language-bert/python/requirements.txt | 3 + .../python/requirements.txt.3.10 | 3 + .../python/requirements.txt.3.11 | 3 + .../python/requirements.txt.3.8 | 3 + .../python/requirements.txt.3.9 | 3 + 10 files changed, 249 insertions(+) create mode 100644 benchmarks/400.inference/412.language-bert/config.json create mode 100644 benchmarks/400.inference/412.language-bert/input.py create mode 100644 benchmarks/400.inference/412.language-bert/python/function.py create mode 100755 benchmarks/400.inference/412.language-bert/python/init.sh create mode 100644 benchmarks/400.inference/412.language-bert/python/package.sh create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 create mode 100644 benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 diff --git a/benchmarks/400.inference/412.language-bert/config.json b/benchmarks/400.inference/412.language-bert/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py new file mode 100644 index 000000000..9af7ecb56 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -0,0 +1,33 @@ +import os + + +def buckets_count(): + # model bucket and text bucket + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + filepath = os.path.join(root, file) + relative_key = os.path.join(prefix, file) + upload_func(0, relative_key, filepath) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_archive = "bert-tiny-onnx.tar.gz" + upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) + + text_filename = "sentences.jsonl" + upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_archive + input_config["object"]["input"] = text_filename + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["model"] = input_paths[0] + input_config["bucket"]["text"] = input_paths[1] + return input_config diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py new file mode 100644 index 000000000..7e4f981ef --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -0,0 +1,157 @@ +import datetime +import json +import os +import tarfile +import uuid +from typing import Dict, List, Optional + +import numpy as np +import onnxruntime as ort +from tokenizers import Tokenizer + +from . import storage + +client = storage.storage.get_instance() + +MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" +MODEL_DIRECTORY = "/tmp/bert_language_model" +MODEL_SUBDIR = "bert-tiny-onnx" + +_session: Optional[ort.InferenceSession] = None +_tokenizer: Optional[Tokenizer] = None +_labels: Optional[Dict[int, str]] = None + + +def _ensure_model(bucket: str, model_prefix: str): + """ + Lazily download and initialize the ONNX model and tokenizer. + """ + global _session, _tokenizer, _labels + + model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _tokenizer is None or _labels is None: + if not os.path.exists(model_path): + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") + client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + model_process_begin = datetime.datetime.now() + tokenizer_path = os.path.join(model_path, "tokenizer.json") + _tokenizer = Tokenizer.from_file(tokenizer_path) + _tokenizer.enable_truncation(max_length=128) + _tokenizer.enable_padding(length=128) + + label_map_path = os.path.join(model_path, "label_map.json") + with open(label_map_path, "r") as f: + raw_labels = json.load(f) + _labels = {int(idx): label for idx, label in raw_labels.items()} + + onnx_path = os.path.join(model_path, "model.onnx") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _prepare_inputs(sentences: List[str]): + assert _tokenizer is not None + + encodings = _tokenizer.encode_batch(sentences) + + input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64) + attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64) + token_type_ids = np.array( + [enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings], + dtype=np.int64, + ) + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + text_prefix = event.get("bucket", {}).get("text") + text_key = event.get("object", {}).get("input") + + download_begin = datetime.datetime.now() + text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}") + client.download(bucket, os.path.join(text_prefix, text_key), text_download_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix) + assert _session is not None and _labels is not None and _tokenizer is not None + + with open(text_download_path, "r") as f: + sentences = [json.loads(line)["text"] for line in f if line.strip()] + + os.remove(text_download_path) + + inference_begin = datetime.datetime.now() + inputs = _prepare_inputs(sentences) + outputs = _session.run(None, inputs) + logits = outputs[0] + probabilities = _softmax(logits) + inference_end = datetime.datetime.now() + + results = [] + for sentence, probs in zip(sentences, probabilities): + label_idx = int(np.argmax(probs)) + label = _labels.get(label_idx, str(label_idx)) + results.append( + { + "text": sentence, + "label": label, + "confidence": float(probs[label_idx]), + "raw_scores": probs.tolist(), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": results}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/python/init.sh b/benchmarks/400.inference/412.language-bert/python/init.sh new file mode 100755 index 000000000..160852abe --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for the BERT inference benchmark. diff --git a/benchmarks/400.inference/412.language-bert/python/package.sh b/benchmarks/400.inference/412.language-bert/python/package.sh new file mode 100644 index 000000000..edb27ebe0 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/package.sh @@ -0,0 +1,35 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +TORCH_DIR=".python_packages/lib/site-packages/torch" +if [ -d "$1/${TORCH_DIR}" ]; then + cd $1 + zip -qr torch.zip ${TORCH_DIR} + rm -rf ${TORCH_DIR} + cd ${CUR_DIR} + echo "Torch-zipped size $(du -sh $1 | cut -f1)" +fi diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 From 377d949c57999970dd65134b4d839a113efd3e14 Mon Sep 17 00:00:00 2001 From: xipang Date: Wed, 12 Nov 2025 20:31:53 +0100 Subject: [PATCH 27/82] update code --- benchmarks-data | 2 +- .../python/function.py | 215 ++++++++++-------- config/local_deployment.tmp | 0 3 files changed, 116 insertions(+), 101 deletions(-) create mode 100644 config/local_deployment.tmp diff --git a/benchmarks-data b/benchmarks-data index 6a17a460f..25c2bb40b 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py index 7c241d525..71a14fe79 100644 --- a/benchmarks/400.inference/413.image-classification/python/function.py +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -1,13 +1,17 @@ -import datetime, json, os, uuid +import datetime +import json +import os +import uuid +from typing import List, Optional, Tuple # Extract zipped torch model - used in Python 3.8 and 3.9 -if os.path.exists("function/torch.zip"): - import zipfile, sys +# if os.path.exists("function/torch.zip"): +# import zipfile, sys - zipfile.ZipFile("function/torch.zip").extractall("/tmp/") - sys.path.append( - os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") - ) +# zipfile.ZipFile("function/torch.zip").extractall("/tmp/") +# sys.path.append( +# os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") +# ) from PIL import Image import torch @@ -22,119 +26,130 @@ class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] -model = None -device = "cuda" if torch.cuda.is_available() else "cpu" - - -def handler(event): - bucket = event.get("bucket").get("bucket") - input_prefix = event.get("bucket").get("input") - model_prefix = event.get("bucket").get("model") - key = event.get("object").get("input") - model_key = event.get("object").get("model") - - download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) - - # --- Download image --- - image_download_begin = datetime.datetime.now() - image_path = download_path - client.download(bucket, os.path.join(input_prefix, key), download_path) - image_download_end = datetime.datetime.now() +MODEL_DIRECTORY = "resnet50.tar.gz" +_model: Optional[torch.nn.Module] = None +_model_key: Optional[str] = None +_device = "cuda" if torch.cuda.is_available() else "cpu" +_preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] +) + + +def _ensure_model(bucket: str, model_prefix: str, model_key: str) -> Tuple[float, float]: + """ + Lazily download and load the ResNet model so repeated invocations stay warm. + """ + global _model, _model_key + + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + if _model is None or _model_key != model_key: + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + weights_name = os.path.basename(model_key) + weights_path = os.path.join(MODEL_DIRECTORY, weights_name) + + if not os.path.exists(weights_path): + client.download(bucket, os.path.join(model_prefix, model_key), weights_path) + model_download_end = datetime.datetime.now() + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin - global model - if model is None: - # --- Download weights --- - model_download_begin = datetime.datetime.now() - model_path = os.path.join("/tmp", model_key) - client.download(bucket, os.path.join(model_prefix, model_key), model_path) - model_download_end = datetime.datetime.now() - - # --- Load model (CPU), then move to GPU --- model_process_begin = datetime.datetime.now() model = resnet50(pretrained=False) - state = torch.load(model_path, map_location="cpu") # robust for CPU-saved checkpoints - # handle checkpoints that wrap state dict: + state = torch.load(weights_path, map_location="cpu") state = state.get("state_dict", state) model.load_state_dict(state) model.eval() - model.to(device) - # speed on cuDNN-convolutional nets - if device == "cuda": + model.to(_device) + if _device == "cuda": torch.backends.cudnn.benchmark = True + _model = model + _model_key = model_key model_process_end = datetime.datetime.now() - else: - # model already cached - model_download_begin = model_download_end = datetime.datetime.now() - model_process_begin = model_process_end = datetime.datetime.now() - - # --- Preprocess (CPU) --- - process_begin = datetime.datetime.now() - input_image = Image.open(image_path).convert("RGB") - preprocess = transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), # [0,1], CHW - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] + + return ( + (model_download_end - model_download_begin) / datetime.timedelta(microseconds=1), + (model_process_end - model_process_begin) / datetime.timedelta(microseconds=1), ) - input_tensor = preprocess(input_image) # CPU tensor - input_batch = input_tensor.unsqueeze(0).to(device, non_blocking=True) # NCHW on GPU - # --- Inference (GPU) --- + +def _prepare_tensor(image_path: str) -> torch.Tensor: + image = Image.open(image_path).convert("RGB") + tensor = _preprocess(image).unsqueeze(0) + return tensor.to(_device, non_blocking=True) + + +def _run_inference(batch: torch.Tensor) -> Tuple[int, float, List[int], float]: + assert _model is not None + + gpu_time_ms = 0.0 + start_evt = end_evt = None + if _device == "cuda": + torch.cuda.synchronize() + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + with torch.no_grad(): - # Ensure wall-clock timing includes GPU work - if device == "cuda": - torch.cuda.synchronize() - # GPU event timing (kernel time) - start_evt = end_evt = None - if device == "cuda": - start_evt = torch.cuda.Event(enable_timing=True) - end_evt = torch.cuda.Event(enable_timing=True) - start_evt.record() - - output = model(input_batch) # logits [1,1000] - - if device == "cuda": - end_evt.record() - torch.cuda.synchronize() - - # compute top-1 / top-5 on CPU + output = _model(batch) + + if _device == "cuda" and start_evt and end_evt: + end_evt.record() + torch.cuda.synchronize() + gpu_time_ms = float(start_evt.elapsed_time(end_evt)) + probs = torch.nn.functional.softmax(output, dim=1) conf, index = torch.max(probs, 1) - # make Python types - top1_idx = index.item() - top1_conf = float(conf.item()) - # (optional) top-5 _, top5_idx = torch.topk(probs, k=5, dim=1) - top5_idx = top5_idx[0].tolist() - ret = idx2label[top1_idx] # <- use .item() result + return index.item(), float(conf.item()), top5_idx[0].tolist(), gpu_time_ms - process_end = datetime.datetime.now() - # timings - download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) - model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( - microseconds=1 - ) - model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( - microseconds=1 - ) - process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + input_prefix = event.get("bucket", {}).get("input") + model_prefix = event.get("bucket", {}).get("model") + key = event.get("object", {}).get("input") + model_key = event.get("object", {}).get("model") - # optional precise GPU kernel time (ms) - gpu_time_ms = 0.0 - if start_evt is not None and end_evt is not None: - gpu_time_ms = float(start_evt.elapsed_time(end_evt)) # milliseconds + download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(key)}") + image_download_begin = datetime.datetime.now() + client.download(bucket, os.path.join(input_prefix, key), download_path) + image_download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix, model_key) + + inference_begin = datetime.datetime.now() + input_batch = _prepare_tensor(download_path) + top1_idx, top1_conf, top5_idx, gpu_time_ms = _run_inference(input_batch) + inference_end = datetime.datetime.now() + + os.remove(download_path) + + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) return { - "result": {"idx": top1_idx, "class": ret, "confidence": top1_conf, "top5_idx": top5_idx}, + "result": { + "idx": top1_idx, + "class": idx2label[top1_idx], + "confidence": top1_conf, + "top5_idx": top5_idx, + }, "measurement": { - "download_time": download_time + model_download_time, # µs - "compute_time": process_time + model_process_time, # µs (wall time, includes GPU) - "model_time": model_process_time, # µs - "model_download_time": model_download_time, # µs - "gpu_time_ms": round(gpu_time_ms, 3), # extra: CUDA kernel time + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + "gpu_time_ms": round(gpu_time_ms, 3), }, } diff --git a/config/local_deployment.tmp b/config/local_deployment.tmp new file mode 100644 index 000000000..e69de29bb From 8dd8a6e8fec20df291f0ddffe60f3473dce903f6 Mon Sep 17 00:00:00 2001 From: xipang Date: Wed, 12 Nov 2025 21:59:12 +0100 Subject: [PATCH 28/82] modify code and requirements --- .../413.image-classification/input.py | 2 +- .../python/function.py | 171 ++++++++++-------- .../python/requirements.txt | 7 +- .../python/requirements.txt.3.10 | 6 +- .../python/requirements.txt.3.11 | 6 +- .../python/requirements.txt.3.6 | 7 +- .../python/requirements.txt.3.7 | 7 +- .../python/requirements.txt.3.8 | 6 +- .../python/requirements.txt.3.9 | 6 +- "eval \"$(ssh-agent -s)\"" | 7 + "eval \"$(ssh-agent -s)\".pub" | 1 + 11 files changed, 125 insertions(+), 101 deletions(-) create mode 100644 "eval \"$(ssh-agent -s)\"" create mode 100644 "eval \"$(ssh-agent -s)\".pub" diff --git a/benchmarks/400.inference/413.image-classification/input.py b/benchmarks/400.inference/413.image-classification/input.py index e97d38057..6ee2fdd08 100644 --- a/benchmarks/400.inference/413.image-classification/input.py +++ b/benchmarks/400.inference/413.image-classification/input.py @@ -35,7 +35,7 @@ def generate_input( upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) input_images = [] - resnet_path = os.path.join(data_dir, "fake-resnet") + resnet_path = os.path.join(data_dir, "data") with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: for line in f: img, img_class = line.split() diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py index 71a14fe79..48e837f70 100644 --- a/benchmarks/400.inference/413.image-classification/python/function.py +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -1,22 +1,14 @@ import datetime import json import os +import shutil +import tarfile import uuid from typing import List, Optional, Tuple -# Extract zipped torch model - used in Python 3.8 and 3.9 -# if os.path.exists("function/torch.zip"): -# import zipfile, sys - -# zipfile.ZipFile("function/torch.zip").extractall("/tmp/") -# sys.path.append( -# os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") -# ) - +import numpy as np +import onnxruntime as ort from PIL import Image -import torch -from torchvision import transforms -from torchvision.models import resnet50 from . import storage @@ -26,92 +18,122 @@ class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] -MODEL_DIRECTORY = "resnet50.tar.gz" -_model: Optional[torch.nn.Module] = None -_model_key: Optional[str] = None -_device = "cuda" if torch.cuda.is_available() else "cpu" -_preprocess = transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] -) +MODEL_ARCHIVE = "resnet50.tar.gz" +MODEL_DIRECTORY = "/tmp/image_classification_model" +MODEL_SUBDIR = "resnet50" + +_session: Optional[ort.InferenceSession] = None +_session_input: Optional[str] = None +_session_output: Optional[str] = None +_cached_model_key: Optional[str] = None + +_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) +_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) def _ensure_model(bucket: str, model_prefix: str, model_key: str) -> Tuple[float, float]: """ - Lazily download and load the ResNet model so repeated invocations stay warm. + Lazily download, extract, and initialize the ONNX ResNet model. """ - global _model, _model_key + global _session, _session_input, _session_output, _cached_model_key + effective_model_key = model_key or MODEL_ARCHIVE model_download_begin = datetime.datetime.now() model_download_end = model_download_begin - model_process_begin = datetime.datetime.now() - model_process_end = model_process_begin - if _model is None or _model_key != model_key: + if _session is None or _cached_model_key != effective_model_key: + archive_basename = os.path.basename(effective_model_key) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{archive_basename}") + model_dir = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + + if os.path.exists(model_dir): + shutil.rmtree(model_dir) os.makedirs(MODEL_DIRECTORY, exist_ok=True) - weights_name = os.path.basename(model_key) - weights_path = os.path.join(MODEL_DIRECTORY, weights_name) - if not os.path.exists(weights_path): - client.download(bucket, os.path.join(model_prefix, model_key), weights_path) - model_download_end = datetime.datetime.now() - else: - model_download_begin = datetime.datetime.now() - model_download_end = model_download_begin + client.download(bucket, os.path.join(model_prefix, effective_model_key), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) model_process_begin = datetime.datetime.now() - model = resnet50(pretrained=False) - state = torch.load(weights_path, map_location="cpu") - state = state.get("state_dict", state) - model.load_state_dict(state) - model.eval() - model.to(_device) - if _device == "cuda": - torch.backends.cudnn.benchmark = True - _model = model - _model_key = model_key + onnx_path = os.path.join(model_dir, "model.onnx") + if not os.path.exists(onnx_path): + raise FileNotFoundError(f"Expected ONNX model at {onnx_path}") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (providers: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + _session_input = _session.get_inputs()[0].name + _session_output = _session.get_outputs()[0].name + _cached_model_key = effective_model_key model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin - return ( - (model_download_end - model_download_begin) / datetime.timedelta(microseconds=1), - (model_process_end - model_process_begin) / datetime.timedelta(microseconds=1), + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _resize_shorter_side(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + if width < height: + new_width = size + new_height = int(round(size * height / width)) + else: + new_height = size + new_width = int(round(size * width / height)) + resample = getattr(Image, "Resampling", Image).BILINEAR + return image.resize((new_width, new_height), resample=resample) + + +def _center_crop(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + left = max(0, int(round((width - size) / 2))) + top = max(0, int(round((height - size) / 2))) + right = left + size + bottom = top + size + return image.crop((left, top, right, bottom)) -def _prepare_tensor(image_path: str) -> torch.Tensor: +def _prepare_tensor(image_path: str) -> np.ndarray: image = Image.open(image_path).convert("RGB") - tensor = _preprocess(image).unsqueeze(0) - return tensor.to(_device, non_blocking=True) + image = _resize_shorter_side(image, 256) + image = _center_crop(image, 224) + np_image = np.asarray(image).astype(np.float32) / 255.0 + np_image = (np_image - _MEAN) / _STD + np_image = np.transpose(np_image, (2, 0, 1)) + return np_image[np.newaxis, :] -def _run_inference(batch: torch.Tensor) -> Tuple[int, float, List[int], float]: - assert _model is not None - gpu_time_ms = 0.0 - start_evt = end_evt = None - if _device == "cuda": - torch.cuda.synchronize() - start_evt = torch.cuda.Event(enable_timing=True) - end_evt = torch.cuda.Event(enable_timing=True) - start_evt.record() +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) - with torch.no_grad(): - output = _model(batch) - if _device == "cuda" and start_evt and end_evt: - end_evt.record() - torch.cuda.synchronize() - gpu_time_ms = float(start_evt.elapsed_time(end_evt)) +def _run_inference(batch: np.ndarray) -> Tuple[int, float, List[int]]: + assert _session is not None and _session_input is not None and _session_output is not None - probs = torch.nn.functional.softmax(output, dim=1) - conf, index = torch.max(probs, 1) - _, top5_idx = torch.topk(probs, k=5, dim=1) + outputs = _session.run([_session_output], {_session_input: batch}) + logits = outputs[0] + probs = _softmax(logits) + top1_idx = int(np.argmax(probs, axis=1)[0]) + top1_conf = float(probs[0, top1_idx]) + top5_idx = np.argsort(probs[0])[::-1][:5].tolist() - return index.item(), float(conf.item()), top5_idx[0].tolist(), gpu_time_ms + return top1_idx, top1_conf, top5_idx def handler(event): @@ -130,13 +152,14 @@ def handler(event): inference_begin = datetime.datetime.now() input_batch = _prepare_tensor(download_path) - top1_idx, top1_conf, top5_idx, gpu_time_ms = _run_inference(input_batch) + top1_idx, top1_conf, top5_idx = _run_inference(input_batch) inference_end = datetime.datetime.now() os.remove(download_path) download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + #gpu_time_ms = 0.0 return { "result": { @@ -150,6 +173,6 @@ def handler(event): "compute_time": compute_time + model_process_time, "model_time": model_process_time, "model_download_time": model_download_time, - "gpu_time_ms": round(gpu_time_ms, 3), + #"gpu_time_ms": round(gpu_time_ms, 3), }, } diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt b/benchmarks/400.inference/413.image-classification/python/requirements.txt index d191dc6dd..cbbddaca6 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt @@ -1,4 +1,3 @@ -#torch==1.2.0+cpu -#torchvision==0.4.0+cpu -#https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl -#torch==1.0.1.post2+cpu +pillow==10.3.0 +numpy==1.24.4 +onnxruntime-gpu==1.16.3 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 index ab734881f..cbbddaca6 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 @@ -1,5 +1,3 @@ pillow==10.3.0 -https://download.pytorch.org/whl/cpu/torch-1.11.0%2Bcpu-cp310-cp310-linux_x86_64.whl -torchvision==0.12 -# prevent installing numpy 2.0 -numpy==1.22.0 +numpy==1.24.4 +onnxruntime-gpu==1.16.3 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 index 3288171f8..cbbddaca6 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 @@ -1,5 +1,3 @@ pillow==10.3.0 -https://download.pytorch.org/whl/cpu/torch-2.0.0%2Bcpu-cp311-cp311-linux_x86_64.whl -torchvision==0.15.1 -# prevent installing numpy 2.0 -numpy==1.24.0 +numpy==1.24.4 +onnxruntime-gpu==1.16.3 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 index 63409acaa..cd198a130 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 @@ -1,4 +1,3 @@ -Pillow==6.1 -numpy==1.16 -https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl -torchvision==0.2.1 +Pillow==6.1.0 +numpy==1.19.5 +onnxruntime-gpu==1.8.2 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 index 54bddbd58..cd198a130 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 @@ -1,4 +1,3 @@ -Pillow==6.1 -numpy==1.16 -https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp37-cp37m-linux_x86_64.whl -torchvision==0.2.1 +Pillow==6.1.0 +numpy==1.19.5 +onnxruntime-gpu==1.8.2 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 index 7b873eafa..cbbddaca6 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 @@ -1,3 +1,3 @@ -numpy==1.18 -https://download.pytorch.org/whl/cpu/torch-1.4.0%2Bcpu-cp38-cp38-linux_x86_64.whl -torchvision==0.5 +pillow==10.3.0 +numpy==1.24.4 +onnxruntime-gpu==1.16.3 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 index c7fc0663e..cbbddaca6 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 @@ -1,3 +1,3 @@ -numpy==1.20 -https://download.pytorch.org/whl/cpu/torch-1.8.0%2Bcpu-cp39-cp39-linux_x86_64.whl -torchvision==0.9.0 +pillow==10.3.0 +numpy==1.24.4 +onnxruntime-gpu==1.16.3 diff --git "a/eval \"$(ssh-agent -s)\"" "b/eval \"$(ssh-agent -s)\"" new file mode 100644 index 000000000..892647ba0 --- /dev/null +++ "b/eval \"$(ssh-agent -s)\"" @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACB1mtWWL4bmQmqRMvbVqjz3ZUKX7NeYfMzA7Wf+31DqtAAAAKA7gCOkO4Aj +pAAAAAtzc2gtZWQyNTUxOQAAACB1mtWWL4bmQmqRMvbVqjz3ZUKX7NeYfMzA7Wf+31DqtA +AAAEBZdoiktY5L2ikHyUK4JfoeaTTX1KBHCtB+muQV2Y68SXWa1ZYvhuZCapEy9tWqPPdl +Qpfs15h8zMDtZ/7fUOq0AAAAGXJ1c3NlbGxwYW5nMDUwM0BnbWFpbC5jb20BAgME +-----END OPENSSH PRIVATE KEY----- diff --git "a/eval \"$(ssh-agent -s)\".pub" "b/eval \"$(ssh-agent -s)\".pub" new file mode 100644 index 000000000..c616a46e0 --- /dev/null +++ "b/eval \"$(ssh-agent -s)\".pub" @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHWa1ZYvhuZCapEy9tWqPPdlQpfs15h8zMDtZ/7fUOq0 russellpang0503@gmail.com From de15075aff66ae1d010d5e5fc3605bacdd7080c0 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 12 Nov 2025 22:01:26 +0100 Subject: [PATCH 29/82] unfinished new fuc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sync: image-classification requirements + add 605.lu benchmark - Resolve conflicts in 413.image-classification/python/requirements* - Drop py3.6/py3.7 variants removed upstream; keep/update 3.8–3.11 - Add new 600.linearalgebra/605.lu benchmark (config, input, function, reqs) - Rename local_deployment.tmp -> 600.linearalgebra/605.lu/config.json - Update local_deployment.json; add out_benchmark*.json; update out_storage.json --- .../python/requirements.txt | 8 ++- .../python/requirements.txt.3.10 | 7 ++- .../python/requirements.txt.3.11 | 7 ++- .../python/requirements.txt.3.12 | 4 ++ .../python/requirements.txt.3.6 | 3 - .../python/requirements.txt.3.7 | 3 - .../python/requirements.txt.3.8 | 8 ++- .../python/requirements.txt.3.9 | 7 ++- .../python/requirements.txt.arm.3.8 | 5 ++ .../python/requirements.txt.arm.3.9 | 4 ++ .../600.linearalgebra/605.lu/config.json | 6 ++ benchmarks/600.linearalgebra/605.lu/input.py | 7 +++ .../605.lu/python/function.py | 60 +++++++++++++++++++ .../605.lu/python/requirements.txt | 1 + .../605.lu/python/requirements.txt.3.10 | 0 .../605.lu/python/requirements.txt.3.11 | 0 .../605.lu/python/requirements.txt.3.12 | 0 .../605.lu/python/requirements.txt.3.7 | 0 .../605.lu/python/requirements.txt.3.8 | 0 .../605.lu/python/requirements.txt.3.9 | 0 .../605.lu/python/requirements.txt.arm.3.8 | 0 .../605.lu/python/requirements.txt.arm.3.9 | 0 config/local_deployment.json | 18 +++--- out_benchmark.json | 40 +++++++++++++ out_benchmark_bert.json | 47 +++++++++++++++ out_storage.json | 8 +-- 26 files changed, 209 insertions(+), 34 deletions(-) mode change 100644 => 100755 benchmarks/400.inference/413.image-classification/python/requirements.txt create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 delete mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 delete mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 mode change 100644 => 100755 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 mode change 100644 => 100755 benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 create mode 100644 benchmarks/600.linearalgebra/605.lu/config.json create mode 100644 benchmarks/600.linearalgebra/605.lu/input.py create mode 100755 benchmarks/600.linearalgebra/605.lu/python/function.py create mode 100755 benchmarks/600.linearalgebra/605.lu/python/requirements.txt rename config/local_deployment.tmp => benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 (100%) create mode 100644 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 create mode 100644 out_benchmark.json create mode 100644 out_benchmark_bert.json diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt b/benchmarks/400.inference/413.image-classification/python/requirements.txt old mode 100644 new mode 100755 index cbbddaca6..01d9a45b4 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt @@ -1,3 +1,5 @@ -pillow==10.3.0 -numpy==1.24.4 -onnxruntime-gpu==1.16.3 +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 index cbbddaca6..96299cb57 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 @@ -1,3 +1,4 @@ -pillow==10.3.0 -numpy==1.24.4 -onnxruntime-gpu==1.16.3 +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 index cbbddaca6..96299cb57 100644 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 @@ -1,3 +1,4 @@ -pillow==10.3.0 -numpy==1.24.4 -onnxruntime-gpu==1.16.3 +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 deleted file mode 100644 index cd198a130..000000000 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.6 +++ /dev/null @@ -1,3 +0,0 @@ -Pillow==6.1.0 -numpy==1.19.5 -onnxruntime-gpu==1.8.2 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 deleted file mode 100644 index cd198a130..000000000 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.7 +++ /dev/null @@ -1,3 +0,0 @@ -Pillow==6.1.0 -numpy==1.19.5 -onnxruntime-gpu==1.8.2 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 old mode 100644 new mode 100755 index cbbddaca6..01d9a45b4 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 @@ -1,3 +1,5 @@ -pillow==10.3.0 -numpy==1.24.4 -onnxruntime-gpu==1.16.3 +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 old mode 100644 new mode 100755 index cbbddaca6..96299cb57 --- a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 @@ -1,3 +1,4 @@ -pillow==10.3.0 -numpy==1.24.4 -onnxruntime-gpu==1.16.3 +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/600.linearalgebra/605.lu/config.json b/benchmarks/600.linearalgebra/605.lu/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/605.lu/input.py b/benchmarks/600.linearalgebra/605.lu/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/605.lu/python/function.py b/benchmarks/600.linearalgebra/605.lu/python/function.py new file mode 100755 index 000000000..ea3de4d19 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/function.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import sys, json, torch, datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + col = torch.arange(N, device=device) + base = (torch.remainder(-col, N).to(dtype) / N) + 1 + + A = torch.tril(base.expand(N, N)).clone() + + A.fill_diagonal_(torch.tensor(1.0, dtype=dtype, device=device)) + + A = A @ A.T + return A + + +def kernel_cholesky(A): + torch.cuda.synchronize() + _ = torch.linalg.cholesky(A) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(A.size(0)): + L = torch.linalg.cholesky(A) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return L, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + L, gpu_ms = kernel_cholesky(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/config/local_deployment.tmp b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 similarity index 100% rename from config/local_deployment.tmp rename to benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/config/local_deployment.json b/config/local_deployment.json index d89b3a968..773b43943 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -1,18 +1,18 @@ { "experiments": { - "deployment": "local", + "deployment": "aws", "update_code": false, "update_storage": false, "download_results": false, - "architecture": "x64", + "architecture": "arm64", "container_deployment": true, "runtime": { "language": "python", - "version": "3.11" + "version": "3.8" }, "type": "invocation-overhead", "perf-cost": { - "benchmark": "601.matmul", + "benchmark": "110.dynamic-html", "experiments": [ "cold", "warm", @@ -51,7 +51,7 @@ } }, "deployment": { - "name": "local", + "name": "aws", "aws": { "region": "us-east-1", "lambda-role": "" @@ -71,9 +71,9 @@ "minio": { "address": "192.168.1.101:9011", "mapped_port": 9011, - "access_key": "Ux22nOcFoUaZAnmg-kULuIzvXmiFNVRiZkzcOaDawpU", - "secret_key": "9bb8b39326b0fde8bdfa5d013f743c03e57d146f10433fe2fe3ccce7225078d5", - "instance_id": "f337f509c0375ca4457f815cc0f67352088f6093053ba33c1e45aca0012e0a9f", + "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", + "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", + "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", @@ -89,7 +89,7 @@ "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "c7be7d211bb211b92f41afb073f75635d9f5f2a222abeb6d9b17673b02d079ca", + "instance_id": "28fb7af28043b633b33b4b5999b48c14767f717ed76a73515ff8c68f253baeb1", "region": "None", "cpus": 1, "memory": "750", diff --git a/out_benchmark.json b/out_benchmark.json new file mode 100644 index 000000000..cf96b5c5d --- /dev/null +++ b/out_benchmark.json @@ -0,0 +1,40 @@ +{ + "functions": [ + { + "benchmark": "110.dynamic-html", + "config": { + "architecture": "x64", + "memory": 128, + "runtime": { + "language": "python", + "version": "3.8" + }, + "timeout": 10 + }, + "hash": "c9fbe3e5e85e119e20d6a72651ef23b0", + "instance_id": "be83782894ea459ea3328d4d3b4aeb7173acec54f2c1cebb354f97459b318eef", + "name": "sebs-local-f2033fb2-110.dynamic-html-python-3.8", + "port": 9000, + "triggers": [], + "url": "172.17.0.4:9000" + } + ], + "inputs": [ + { + "random_len": 10, + "username": "testname" + } + ], + "storage": { + "access_key": "FC95zkJKAUbfmXU3ci5lc0SFWhDWYrSgx0nRNycthcY", + "address": "172.17.0.2:9000", + "data_volume": "minio-volume", + "input_buckets": [], + "instance_id": "d14d6dc800ce2f976cc19dc4dd85d2010b65709266f5ae0f1a0c157d338134ec", + "mapped_port": 9011, + "output_buckets": [], + "secret_key": "65eb94d63b0191bf765864ffbd4ee58cc1eb852bdc3a5ca463bca2e7c5915aec", + "type": "minio", + "version": "RELEASE.2024-07-16T23-46-41Z" + } +} \ No newline at end of file diff --git a/out_benchmark_bert.json b/out_benchmark_bert.json new file mode 100644 index 000000000..c706062a2 --- /dev/null +++ b/out_benchmark_bert.json @@ -0,0 +1,47 @@ +{ + "functions": [ + { + "benchmark": "413.image-classification", + "config": { + "architecture": "x64", + "memory": 512, + "runtime": { + "language": "python", + "version": "3.8" + }, + "timeout": 60 + }, + "hash": "445383b434f036f9520743532c7bc0b1", + "instance_id": "a59d2b025927d6350363787bfbab3e7d44b90e0575cd72b939dd9cee291dd63c", + "name": "sebs-local-f2033fb2-413.image-classification-python-3.8", + "port": 9000, + "triggers": [], + "url": "172.17.0.4:9000" + } + ], + "inputs": [ + { + "bucket": { + "bucket": "sebs-benchmarks-local-f2033fb2", + "input": "413.image-classification-1-input", + "model": "413.image-classification-0-input" + }, + "object": { + "input": "800px-Porsche_991_silver_IAA.jpg", + "model": "resnet50.tar.gz" + } + } + ], + "storage": { + "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", + "address": "192.168.1.101:9011", + "data_volume": "minio-volume", + "input_buckets": [], + "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", + "mapped_port": 9011, + "output_buckets": [], + "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", + "type": "minio", + "version": "RELEASE.2024-07-16T23-46-41Z" + } +} \ No newline at end of file diff --git a/out_storage.json b/out_storage.json index 16c13dba6..6c4a8e799 100644 --- a/out_storage.json +++ b/out_storage.json @@ -4,9 +4,9 @@ "minio": { "address": "172.17.0.2:9000", "mapped_port": 9011, - "access_key": "vTIGFqQKDU9CVlE_eFkJ7kZFt823CoiiG1GRgxLFczc", - "secret_key": "01872a84cd3ec4af4b897cc57fa515ca7a704a5e4557b5ecde5b98fe41ecc489", - "instance_id": "39a39aa73d44cee61a627a73fecd962f8fdcdbc415f70f702d850eff2afae3a3", + "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", + "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", + "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", @@ -22,7 +22,7 @@ "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "b302608abce0d96e1518260ff38c366bbe0dfe279935c521ef682d740d84fe69", + "instance_id": "28fb7af28043b633b33b4b5999b48c14767f717ed76a73515ff8c68f253baeb1", "region": "None", "cpus": 1, "memory": "750", From 30068797db9ac4b32c8af80ce3b0cf6e2b7027e4 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 12 Nov 2025 22:28:23 +0100 Subject: [PATCH 30/82] add new functions --- .../605.lu/python/function.py | 27 +++++-- .../600.linearalgebra/606.spmv/config.json | 6 ++ .../600.linearalgebra/606.spmv/input.py | 7 ++ .../606.spmv/python/function.py | 70 +++++++++++++++++++ .../606.spmv/python/requirements.txt | 1 + .../606.spmv/python/requirements.txt.3.10 | 0 .../606.spmv/python/requirements.txt.3.11 | 0 .../606.spmv/python/requirements.txt.3.12 | 0 .../606.spmv/python/requirements.txt.3.7 | 0 .../606.spmv/python/requirements.txt.3.8 | 0 .../606.spmv/python/requirements.txt.3.9 | 0 .../606.spmv/python/requirements.txt.arm.3.8 | 0 .../606.spmv/python/requirements.txt.arm.3.9 | 0 13 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 benchmarks/600.linearalgebra/606.spmv/config.json create mode 100644 benchmarks/600.linearalgebra/606.spmv/input.py create mode 100755 benchmarks/600.linearalgebra/606.spmv/python/function.py create mode 100755 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 diff --git a/benchmarks/600.linearalgebra/605.lu/python/function.py b/benchmarks/600.linearalgebra/605.lu/python/function.py index ea3de4d19..492153bb7 100755 --- a/benchmarks/600.linearalgebra/605.lu/python/function.py +++ b/benchmarks/600.linearalgebra/605.lu/python/function.py @@ -14,20 +14,37 @@ def initialize_torch(N, dtype=torch.float32, device="cuda"): return A -def kernel_cholesky(A): +def _kernel_lu(B: torch.Tensor) -> torch.Tensor: + n = B.shape[0] + for i in range(n): + for j in range(i): + B[i, j] = B[i, j] - (B[i, :j] @ B[:j, j]) + B[i, j] = B[i, j] / B[j, j] + for j in range(i, n): + B[i, j] = B[i, j] - (B[i, :i] @ B[:i, j]) + return B + + +def kernel(A: torch.Tensor): torch.cuda.synchronize() - _ = torch.linalg.cholesky(A) # warmup + + _ = _kernel_lu(A.clone()) # Warm-up + torch.cuda.synchronize() start_evt = torch.cuda.Event(enable_timing=True) end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + B = None for _ in range(A.size(0)): - L = torch.linalg.cholesky(A) + B = _kernel_lu(A.clone()) end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) - return L, gpu_ms + return B, gpu_ms def handler(event): @@ -45,7 +62,7 @@ def handler(event): gen_end = datetime.datetime.now() comp_begin = datetime.datetime.now() - L, gpu_ms = kernel_cholesky(A) + B, gpu_ms = kernel(A) comp_end = datetime.datetime.now() gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) diff --git a/benchmarks/600.linearalgebra/606.spmv/config.json b/benchmarks/600.linearalgebra/606.spmv/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/606.spmv/input.py b/benchmarks/600.linearalgebra/606.spmv/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/606.spmv/python/function.py b/benchmarks/600.linearalgebra/606.spmv/python/function.py new file mode 100755 index 000000000..16e8744d1 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/function.py @@ -0,0 +1,70 @@ +import sys, json, math, torch +import datetime + + +def initialize_torch(N, density=0.01, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + nnz = int(N * N * density) + row_indices = torch.randint(0, N, (nnz,), device=device) + col_indices = torch.randint(0, N, (nnz,), device=device) + values = torch.randn(nnz, dtype=dtype, device=device) + + indices = torch.stack([row_indices, col_indices]) + sparse_matrix = torch.sparse_coo_tensor(indices, values, (N, N), dtype=dtype, device=device) + + sparse_matrix_csr = sparse_matrix.to_sparse_csr() + + x = torch.randn(N, dtype=dtype, device=device) + + return sparse_matrix_csr, x + + +def kernel_spmv(A, x, reps=100): + torch.cuda.synchronize() + _ = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(reps): + y = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms + + +def handler(event): + size = event.get("size") + density = event.get("density", 0.01) # default 1% density + + if "seed" in event: + import random + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + A, x = initialize_torch(size, density=density, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_spmv(A, x, reps=100) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb From d224ddc7c08798dab46185758644bb6196123da0 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Thu, 13 Nov 2025 01:57:44 +0100 Subject: [PATCH 31/82] add new functions --- .../600.linearalgebra/606.spmv/input.py | 2 +- .../600.linearalgebra/607.fw/config.json | 6 ++ benchmarks/600.linearalgebra/607.fw/input.py | 7 ++ .../607.fw/python/function.py | 71 +++++++++++++++++++ .../607.fw/python/requirements.txt | 1 + .../607.fw/python/requirements.txt.3.10 | 0 .../607.fw/python/requirements.txt.3.11 | 0 .../607.fw/python/requirements.txt.3.12 | 0 .../607.fw/python/requirements.txt.3.7 | 0 .../607.fw/python/requirements.txt.3.8 | 0 .../607.fw/python/requirements.txt.3.9 | 0 .../607.fw/python/requirements.txt.arm.3.8 | 0 .../607.fw/python/requirements.txt.arm.3.9 | 0 13 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 benchmarks/600.linearalgebra/607.fw/config.json create mode 100644 benchmarks/600.linearalgebra/607.fw/input.py create mode 100755 benchmarks/600.linearalgebra/607.fw/python/function.py create mode 100755 benchmarks/600.linearalgebra/607.fw/python/requirements.txt create mode 100644 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 create mode 100644 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 create mode 100644 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 create mode 100755 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 create mode 100755 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 create mode 100755 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 create mode 100644 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 diff --git a/benchmarks/600.linearalgebra/606.spmv/input.py b/benchmarks/600.linearalgebra/606.spmv/input.py index 79ff6f5cb..e0f215890 100644 --- a/benchmarks/600.linearalgebra/606.spmv/input.py +++ b/benchmarks/600.linearalgebra/606.spmv/input.py @@ -4,4 +4,4 @@ def generate_input( data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func ): - return {"size": size_generators[size], "seed": 42} + return {"size": size_generators[size], "seed": 42, "density": 0.01} diff --git a/benchmarks/600.linearalgebra/607.fw/config.json b/benchmarks/600.linearalgebra/607.fw/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/607.fw/input.py b/benchmarks/600.linearalgebra/607.fw/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/607.fw/python/function.py b/benchmarks/600.linearalgebra/607.fw/python/function.py new file mode 100755 index 000000000..2db5e1f18 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/function.py @@ -0,0 +1,71 @@ +import sys, json, math, torch +import datetime + + +def initialize_torch(N, dtype=torch.int32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + i, j = torch.meshgrid( + torch.arange(N, device=device), torch.arange(N, device=device), indexing="ij" + ) + path = ((i * j) % 7 + 1).to(dtype) + + mask = ((i + j) % 13 == 0) | ((i + j) % 7 == 0) | ((i + j) % 11 == 0) + path = path.masked_fill(mask, torch.as_tensor(999, dtype=dtype, device=device)) + return path + + +def kernel_fw(path): + torch.cuda.synchronize() + path2 = path.clone() + n = path2.size(0) + for k in range(n): + for i in range(n): + path2[i, :] = torch.minimum(path2[i, :], path2[i, k] + path2[k, :]) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + n = path.size(0) + for k in range(n): + for i in range(n): + path[i, :] = torch.minimum(path[i, :], path[i, k] + path[k, :]) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return path, gpu_ms + + +def handler(event): + size = event.get("size") + + if "seed" in event: + import random + + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + path = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + path_out, gpu_ms = kernel_fw(path) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb From 921f3217a65f041f53528ed415d4c34c2b27a5a5 Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 13 Nov 2025 10:30:03 +0100 Subject: [PATCH 32/82] added recommender benchmark --- .../400.inference/413.recommendation/input.py | 30 ++++ .../413.recommendation/python/function.py | 144 ++++++++++++++++++ .../413.recommendation/python/init.sh | 3 + .../413.recommendation/python/package.sh | 4 + .../python/requirements.txt | 1 + .../python/requirements.txt.3.10 | 1 + .../python/requirements.txt.3.11 | 1 + .../python/requirements.txt.3.8 | 1 + .../python/requirements.txt.3.9 | 1 + docs/benchmarks.md | 5 + sebs/regression.py | 1 + 11 files changed, 192 insertions(+) create mode 100644 benchmarks/400.inference/413.recommendation/input.py create mode 100644 benchmarks/400.inference/413.recommendation/python/function.py create mode 100644 benchmarks/400.inference/413.recommendation/python/init.sh create mode 100644 benchmarks/400.inference/413.recommendation/python/package.sh create mode 100644 benchmarks/400.inference/413.recommendation/python/requirements.txt create mode 100644 benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 create mode 100644 benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 create mode 100644 benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 create mode 100644 benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 diff --git a/benchmarks/400.inference/413.recommendation/input.py b/benchmarks/400.inference/413.recommendation/input.py new file mode 100644 index 000000000..4e48cfa52 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/input.py @@ -0,0 +1,30 @@ +import os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + upload_func(0, os.path.join(prefix, file), os.path.join(root, file)) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_file = "dlrm_tiny.pt" + upload_func(0, model_file, os.path.join(data_dir, "model", model_file)) + + requests_file = "requests.jsonl" + upload_func(1, requests_file, os.path.join(data_dir, "data", requests_file)) + + cfg = {"object": {}, "bucket": {}} + cfg["object"]["model"] = model_file + cfg["object"]["requests"] = requests_file + cfg["bucket"]["bucket"] = benchmarks_bucket + cfg["bucket"]["model"] = input_paths[0] + cfg["bucket"]["requests"] = input_paths[1] + return cfg diff --git a/benchmarks/400.inference/413.recommendation/python/function.py b/benchmarks/400.inference/413.recommendation/python/function.py new file mode 100644 index 000000000..e7b4ae73c --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/function.py @@ -0,0 +1,144 @@ +import datetime +import json +import os +import uuid + +import torch +import torch.nn as nn + +from . import storage + +client = storage.storage.get_instance() + +MODEL_FILE = "dlrm_tiny.pt" +MODEL_CACHE = "/tmp/dlrm_gpu_model" + +_model = None +_device = torch.device("cpu") + + +class TinyDLRM(nn.Module): + def __init__(self, num_users, num_items, num_categories, embed_dim=8): + super().__init__() + self.user_emb = nn.Embedding(num_users, embed_dim) + self.item_emb = nn.Embedding(num_items, embed_dim) + self.category_emb = nn.Embedding(num_categories, embed_dim) + in_dim = embed_dim * 3 + 2 + hidden = 16 + self.mlp = nn.Sequential( + nn.Linear(in_dim, hidden), + nn.ReLU(), + nn.Linear(hidden, 1), + ) + + def forward(self, user_id, item_id, category_id, dense): + features = torch.cat( + [ + self.user_emb(user_id), + self.item_emb(item_id), + self.category_emb(category_id), + dense, + ], + dim=-1, + ) + return torch.sigmoid(self.mlp(features)) + + +def _select_device(): + if torch.cuda.is_available(): + return torch.device("cuda") + raise RuntimeError("CUDA is not available") + return torch.device("cpu") + + +def _load_model(bucket, prefix): + global _model, _device + + if _model is not None: + return 0.0, 0.0 + + download_begin = datetime.datetime.now() + os.makedirs(MODEL_CACHE, exist_ok=True) + tmp_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_FILE}") + client.download(bucket, os.path.join(prefix, MODEL_FILE), tmp_path) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + checkpoint = torch.load(tmp_path, map_location="cpu") + meta = checkpoint["meta"] + _device = _select_device() + model = TinyDLRM( + meta["num_users"], meta["num_items"], meta["num_categories"], meta["embed_dim"] + ) + model.load_state_dict(checkpoint["state_dict"]) + model.to(_device) + model.eval() + _model = model + os.remove(tmp_path) + process_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return download_time, process_time + + +def _prepare_batch(requests): + user_ids = torch.tensor([req["user_id"] for req in requests], dtype=torch.long, device=_device) + item_ids = torch.tensor([req["item_id"] for req in requests], dtype=torch.long, device=_device) + category_ids = torch.tensor( + [req["category_id"] for req in requests], dtype=torch.long, device=_device + ) + dense = torch.tensor( + [req.get("dense", [0.0, 0.0]) for req in requests], dtype=torch.float32, device=_device + ) + return user_ids, item_ids, category_ids, dense + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + requests_prefix = event.get("bucket", {}).get("requests") + requests_key = event.get("object", {}).get("requests") + + download_begin = datetime.datetime.now() + req_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(requests_key)}") + client.download(bucket, os.path.join(requests_prefix, requests_key), req_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _load_model(bucket, model_prefix) + + with open(req_path, "r") as f: + payloads = [json.loads(line) for line in f if line.strip()] + os.remove(req_path) + + inference_begin = datetime.datetime.now() + user_ids, item_ids, category_ids, dense = _prepare_batch(payloads) + + with torch.no_grad(): + scores = _model(user_ids, item_ids, category_ids, dense).squeeze(-1).tolist() + inference_end = datetime.datetime.now() + + predictions = [] + for req, score in zip(payloads, scores): + predictions.append( + { + "user_id": req["user_id"], + "item_id": req["item_id"], + "category_id": req["category_id"], + "score": score, + "device": str(_device), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": predictions}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/413.recommendation/python/init.sh b/benchmarks/400.inference/413.recommendation/python/init.sh new file mode 100644 index 000000000..f42329404 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for GPU recommendation benchmark. diff --git a/benchmarks/400.inference/413.recommendation/python/package.sh b/benchmarks/400.inference/413.recommendation/python/package.sh new file mode 100644 index 000000000..64e9deacb --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/package.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +PACKAGE_DIR=$1 +echo "DLRM GPU package size $(du -sh $1 | cut -f1)" diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt b/benchmarks/400.inference/413.recommendation/python/requirements.txt new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 73056c86c..6977672d6 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -11,6 +11,7 @@ | Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. | | Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. | | Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. | +| Inference | 413.recommendation | Python | x64 | GPU DLRM-inspired recommender scoring implemented in PyTorch. | | Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. | | Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. | | Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. | @@ -75,6 +76,10 @@ The benchmark is inspired by MLPerf and implements image recognition with Resnet This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences. +### Recommendation + +Inspired by MLPerf’s DLRM v2, this benchmark ships a tiny PyTorch DLRM model that optionally runs on CUDA when available. The function downloads the model and request batch, moves the network to GPU if possible, performs batched inference, and reports recommendation scores alongside timing measurements. + ## Scientific ### Graph PageRank, BFS, MST diff --git a/sebs/regression.py b/sebs/regression.py index e0eaf7f4f..01dc8d071 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -22,6 +22,7 @@ "311.compression", "411.image-recognition", "412.language-bert", + "413.recommendation", "501.graph-pagerank", "502.graph-mst", "503.graph-bfs", From 4fca4aa2526a7ae9402a4b52b242d169a815c0a4 Mon Sep 17 00:00:00 2001 From: McLavish Date: Thu, 13 Nov 2025 15:39:34 +0100 Subject: [PATCH 33/82] changed data submodule to use ssh and not https --- .gitmodules | 2 +- benchmarks-data | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index c33a17880..0969aa83a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git + url = git@github.com:McLavish/serverless-benchmarks-data-dphpc.git diff --git a/benchmarks-data b/benchmarks-data index 25c2bb40b..fbb693d2e 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 +Subproject commit fbb693d2efc4538d4c3514c0e3567a516a53dd8c From 26dfcf4fc8bcc81e953ab6b3af1c41183f263631 Mon Sep 17 00:00:00 2001 From: jiahao luan Date: Sat, 15 Nov 2025 06:36:04 +0100 Subject: [PATCH 34/82] add channel_flow, compute, fft, and resnet of jax_npbench --- .pre-commit-config.yaml | 2 +- .../5xx.channel_flow_jax_npbench/config.json | 6 + .../5xx.channel_flow_jax_npbench/input.py | 17 ++ .../python/function.py | 279 ++++++++++++++++++ .../python/requirements.txt | 1 + .../5xx.compute_jax_npbench/config.json | 6 + .../5xx.compute_jax_npbench/input.py | 17 ++ .../python/function.py | 62 ++++ .../python/requirements.txt | 1 + .../config.json | 6 + .../input.py | 17 ++ .../python/function.py | 123 ++++++++ .../python/requirements.txt | 1 + 13 files changed, 537 insertions(+), 1 deletion(-) create mode 100644 benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json create mode 100644 benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py create mode 100644 benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py create mode 100644 benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt create mode 100644 benchmarks/500.scientific/5xx.compute_jax_npbench/config.json create mode 100644 benchmarks/500.scientific/5xx.compute_jax_npbench/input.py create mode 100644 benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py create mode 100644 benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt create mode 100644 benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json create mode 100644 benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py create mode 100644 benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py create mode 100644 benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 58f8adb8d..62798ad03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: language: python additional_dependencies: ["black==22.8.0"] entry: black - args: ["--config=.black.toml", "--check", "--diff"] + args: ["--config=.black.toml"] types: [python] files: ^(sebs/|benchmarks/) # - repo: local diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py new file mode 100644 index 000000000..bb53694c9 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"ny": 61, "nx": 61, "nit": 5, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "small": {"ny": 121, "nx": 121, "nit": 10, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "large": {"ny": 201, "nx": 201, "nit": 20, "rho": 1.0, "nu": 0.1, "F": 1.0}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py new file mode 100644 index 000000000..5788880b2 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py @@ -0,0 +1,279 @@ +# Barba, Lorena A., and Forsyth, Gilbert F. (2018). +# CFD Python: the 12 steps to Navier-Stokes equations. +# Journal of Open Source Education, 1(9), 21, +# https://doi.org/10.21105/jose.00021 +# TODO: License +# (c) 2017 Lorena A. Barba, Gilbert F. Forsyth. +# All content is under Creative Commons Attribution CC-BY 4.0, +# and all code is under BSD-3 clause (previously under MIT, and changed on March 8, 2018). + +import datetime + +import jax.numpy as jnp +import jax +from jax import lax +from functools import partial + + +@partial(jax.jit, static_argnums=(0,)) +def build_up_b(rho, dt, dx, dy, u, v): + b = jnp.zeros_like(u) + b = b.at[1:-1, 1:-1].set( + ( + rho + * ( + 1 + / dt + * ( + (u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + + (v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy) + ) + - ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)) ** 2 + - 2 + * ( + (u[2:, 1:-1] - u[0:-2, 1:-1]) + / (2 * dy) + * (v[1:-1, 2:] - v[1:-1, 0:-2]) + / (2 * dx) + ) + - ((v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 2 + b = b.at[1:-1, -1].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx) + (v[2:, -1] - v[0:-2, -1]) / (2 * dy)) + - ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)) ** 2 + - 2 * ((u[2:, -1] - u[0:-2, -1]) / (2 * dy) * (v[1:-1, 0] - v[1:-1, -2]) / (2 * dx)) + - ((v[2:, -1] - v[0:-2, -1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 0 + b = b.at[1:-1, 0].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx) + (v[2:, 0] - v[0:-2, 0]) / (2 * dy)) + - ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)) ** 2 + - 2 * ((u[2:, 0] - u[0:-2, 0]) / (2 * dy) * (v[1:-1, 1] - v[1:-1, -1]) / (2 * dx)) + - ((v[2:, 0] - v[0:-2, 0]) / (2 * dy)) ** 2 + ) + ) + ) + + return b + + +@partial(jax.jit, static_argnums=(0,)) +def pressure_poisson_periodic(nit, p, dx, dy, b): + def body_func(p, q): + pn = p.copy() + p = p.at[1:-1, 1:-1].set( + ((pn[1:-1, 2:] + pn[1:-1, 0:-2]) * dy**2 + (pn[2:, 1:-1] + pn[0:-2, 1:-1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 1:-1] + ) + + # Periodic BC Pressure @ x = 2 + p = p.at[1:-1, -1].set( + ((pn[1:-1, 0] + pn[1:-1, -2]) * dy**2 + (pn[2:, -1] + pn[0:-2, -1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, -1] + ) + + # Periodic BC Pressure @ x = 0 + p = p.at[1:-1, 0].set( + ( + ((pn[1:-1, 1] + pn[1:-1, -1]) * dy**2 + (pn[2:, 0] + pn[0:-2, 0]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 0] + ) + ) + + # Wall boundary conditions, pressure + p = p.at[-1, :].set(p[-2, :]) # dp/dy = 0 at y = 2 + p = p.at[0, :].set(p[1, :]) # dp/dy = 0 at y = 0 + + return p, None + + p, _ = lax.scan(body_func, p, jnp.arange(nit)) + + +@partial(jax.jit, static_argnums=(0, 7, 8, 9)) +def channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F): + udiff = 1 + stepcount = 0 + + array_vals = (udiff, stepcount, u, v, p) + + def conf_func(array_vals): + udiff, _, _, _, _ = array_vals + return udiff > 0.001 + + def body_func(array_vals): + _, stepcount, u, v, p = array_vals + + un = u.copy() + vn = v.copy() + + b = build_up_b(rho, dt, dx, dy, u, v) + pressure_poisson_periodic(nit, p, dx, dy, b) + + u = u.at[1:-1, 1:-1].set( + un[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (un[1:-1, 1:-1] - un[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (un[1:-1, 1:-1] - un[0:-2, 1:-1]) + - dt / (2 * rho * dx) * (p[1:-1, 2:] - p[1:-1, 0:-2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2]) + + dt / dy**2 * (un[2:, 1:-1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1]) + ) + + F * dt + ) + + v = v.at[1:-1, 1:-1].set( + vn[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (vn[1:-1, 1:-1] - vn[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (vn[1:-1, 1:-1] - vn[0:-2, 1:-1]) + - dt / (2 * rho * dy) * (p[2:, 1:-1] - p[0:-2, 1:-1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 2:] - 2 * vn[1:-1, 1:-1] + vn[1:-1, 0:-2]) + + dt / dy**2 * (vn[2:, 1:-1] - 2 * vn[1:-1, 1:-1] + vn[0:-2, 1:-1]) + ) + ) + + # Periodic BC u @ x = 2 + u = u.at[1:-1, -1].set( + un[1:-1, -1] + - un[1:-1, -1] * dt / dx * (un[1:-1, -1] - un[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (un[1:-1, -1] - un[0:-2, -1]) + - dt / (2 * rho * dx) * (p[1:-1, 0] - p[1:-1, -2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 0] - 2 * un[1:-1, -1] + un[1:-1, -2]) + + dt / dy**2 * (un[2:, -1] - 2 * un[1:-1, -1] + un[0:-2, -1]) + ) + + F * dt + ) + + # Periodic BC u @ x = 0 + u = u.at[1:-1, 0].set( + un[1:-1, 0] + - un[1:-1, 0] * dt / dx * (un[1:-1, 0] - un[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (un[1:-1, 0] - un[0:-2, 0]) + - dt / (2 * rho * dx) * (p[1:-1, 1] - p[1:-1, -1]) + + nu + * ( + dt / dx**2 * (un[1:-1, 1] - 2 * un[1:-1, 0] + un[1:-1, -1]) + + dt / dy**2 * (un[2:, 0] - 2 * un[1:-1, 0] + un[0:-2, 0]) + ) + + F * dt + ) + + # Periodic BC v @ x = 2 + v = v.at[1:-1, -1].set( + vn[1:-1, -1] + - un[1:-1, -1] * dt / dx * (vn[1:-1, -1] - vn[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (vn[1:-1, -1] - vn[0:-2, -1]) + - dt / (2 * rho * dy) * (p[2:, -1] - p[0:-2, -1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 0] - 2 * vn[1:-1, -1] + vn[1:-1, -2]) + + dt / dy**2 * (vn[2:, -1] - 2 * vn[1:-1, -1] + vn[0:-2, -1]) + ) + ) + + # Periodic BC v @ x = 0 + v = v.at[1:-1, 0].set( + vn[1:-1, 0] + - un[1:-1, 0] * dt / dx * (vn[1:-1, 0] - vn[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (vn[1:-1, 0] - vn[0:-2, 0]) + - dt / (2 * rho * dy) * (p[2:, 0] - p[0:-2, 0]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 1] - 2 * vn[1:-1, 0] + vn[1:-1, -1]) + + dt / dy**2 * (vn[2:, 0] - 2 * vn[1:-1, 0] + vn[0:-2, 0]) + ) + ) + + # Wall BC: u,v = 0 @ y = 0,2 + u = u.at[0, :].set(0) + u = u.at[-1, :].set(0) + v = v.at[0, :].set(0) + v = v.at[-1, :].set(0) + + udiff = (jnp.sum(u) - jnp.sum(un)) / jnp.sum(u) + stepcount += 1 + + return (udiff, stepcount, u, v, p) + + _, stepcount, _, _, _ = lax.while_loop(conf_func, body_func, array_vals) + + return stepcount + + +def initialize(ny, nx): + u = jnp.zeros((ny, nx), dtype=jnp.float64) + v = jnp.zeros((ny, nx), dtype=jnp.float64) + p = jnp.ones((ny, nx), dtype=jnp.float64) + dx = 2 / (nx - 1) + dy = 2 / (ny - 1) + dt = 0.1 / ((nx - 1) * (ny - 1)) + return u, v, p, dx, dy, dt + + +def handler(event): + + if "size" in event: + size = event["size"] + ny = size["ny"] + nx = size["nx"] + nit = size["nit"] + rho = size["rho"] + nu = size["nu"] + F = size["F"] + + generate_begin = datetime.datetime.now() + + u, v, p, dx, dy, dt = initialize(ny, nx) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if hasattr(results, "item"): + results = results.item() + elif hasattr(results, "tolist"): + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py new file mode 100644 index 000000000..56f136720 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"M": 2000, "N": 2000}, + "small": {"M": 5000, "N": 5000}, + "large": {"M": 16000, "N": 16000}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py new file mode 100644 index 000000000..2e16b320d --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py @@ -0,0 +1,62 @@ +import datetime + +import jax.numpy as jnp +import jax + + +@jax.jit +def compute(array_1, array_2, a, b, c): + return jnp.clip(array_1, 2, 10) * a + array_2 * b + c + + +def initialize(M, N): + from numpy.random import default_rng + + rng = default_rng(42) + array_1 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + array_2 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + a = jnp.int64(4) + b = jnp.int64(3) + c = jnp.int64(9) + return array_1, array_2, a, b, c + + +def handler(event): + + if "size" in event: + size = event["size"] + M = size["M"] + N = size["N"] + + generate_begin = datetime.datetime.now() + + array_1, array_2, a, b, c = initialize(M, N) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = compute(array_1, array_2, a, b, c) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py new file mode 100644 index 000000000..937e96e44 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"N": 8, "W": 14, "H": 14, "C1": 32, "C2": 8}, + "small": {"N": 8, "W": 28, "H": 28, "C1": 64, "C2": 16}, + "large": {"N": 8, "W": 56, "H": 56, "C1": 128, "C2": 32}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py new file mode 100644 index 000000000..f24b2cc71 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py @@ -0,0 +1,123 @@ +import datetime + +import jax.numpy as jnp +import jax +from jax import lax + + +@jax.jit +def relu(x): + return jnp.maximum(x, 0) + + +# Deep learning convolutional operator (stride = 1) +@jax.jit +def conv2d(input, weights): + K = weights.shape[0] # Assuming square kernel + N = input.shape[0] + H_out = input.shape[1] - K + 1 + W_out = input.shape[2] - K + 1 + C_out = weights.shape[3] + output = jnp.empty((N, H_out, W_out, C_out), dtype=jnp.float32) + + def row_update(output, i): + def col_update(output, j): + input_slice = lax.dynamic_slice(input, (0, i, j, 0), (N, K, K, input.shape[-1])) + conv_result = jnp.sum( + input_slice[:, :, :, :, None] * weights[None, :, :, :], axis=(1, 2, 3) + ) + output = lax.dynamic_update_slice(output, conv_result[:, None, None, :], (0, i, j, 0)) + return output, None + + output, _ = lax.scan(col_update, output, jnp.arange(W_out)) + return output, None + + output, _ = lax.scan(row_update, output, jnp.arange(H_out)) + return output + + +# Batch normalization operator, as used in ResNet +@jax.jit +def batchnorm2d(x, eps=1e-5): + mean = jnp.mean(x, axis=0, keepdims=True) + std = jnp.std(x, axis=0, keepdims=True) + return (x - mean) / jnp.sqrt(std + eps) + + +# Bottleneck residual block (after initial convolution, without downsampling) +# in the ResNet-50 CNN (inference) +@jax.jit +def resnet_basicblock(input, conv1, conv2, conv3): + # Pad output of first convolution for second convolution + padded = jnp.zeros( + (input.shape[0], input.shape[1] + 2, input.shape[2] + 2, conv1.shape[3]), + dtype=jnp.float32, + ) + padded = lax.dynamic_update_slice(padded, conv2d(input, conv1), (0, 1, 1, 0)) + x = batchnorm2d(padded) + x = relu(x) + + x = conv2d(x, conv2) + x = batchnorm2d(x) + x = relu(x) + x = conv2d(x, conv3) + x = batchnorm2d(x) + return relu(x + input) + + +def initialize(N, W, H, C1, C2): + from numpy.random import default_rng + + rng = default_rng(42) + + # Input + input = rng.random((N, H, W, C1), dtype=jnp.float32) + # Weights + conv1 = rng.random((1, 1, C1, C2), dtype=jnp.float32) + conv2 = rng.random((3, 3, C2, C2), dtype=jnp.float32) + conv3 = rng.random((1, 1, C2, C1), dtype=jnp.float32) + return (input, conv1, conv2, conv3) + + +def handler(event): + + if "size" in event: + size = event["size"] + N = size["N"] + W = size["W"] + H = size["H"] + C1 = size["C1"] + C2 = size["C2"] + + generate_begin = datetime.datetime.now() + + input, conv1, conv2, conv3 = initialize(N, W, H, C1, C2) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = resnet_basicblock(input, conv1, conv2, conv3) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file From fad77da51efa2090e38f0a46a06917060fc5e85c Mon Sep 17 00:00:00 2001 From: jiahao luan Date: Sat, 15 Nov 2025 06:42:12 +0100 Subject: [PATCH 35/82] reset the config --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62798ad03..58f8adb8d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: language: python additional_dependencies: ["black==22.8.0"] entry: black - args: ["--config=.black.toml"] + args: ["--config=.black.toml", "--check", "--diff"] types: [python] files: ^(sebs/|benchmarks/) # - repo: local From a563b1677c6025b5674c11ddfec50cfe1d153dab Mon Sep 17 00:00:00 2001 From: McLavish Date: Sat, 15 Nov 2025 16:53:54 +0100 Subject: [PATCH 36/82] experimental local workflows implementation --- .../local/python/function_workflow.py | 96 ++++++ run_local_workflows.sh | 66 ++++ sebs.py | 2 - sebs/benchmark.py | 92 ++--- sebs/faas/system.py | 41 ++- sebs/local/config.py | 14 +- sebs/local/executor.py | 205 +++++++++++ sebs/local/function.py | 41 ++- sebs/local/local.py | 323 ++++++++++++++++-- sebs/local/triggers.py | 70 ++++ sebs/local/workflow.py | 84 +++++ sebs/utils.py | 21 +- 12 files changed, 932 insertions(+), 123 deletions(-) create mode 100644 benchmarks/wrappers/local/python/function_workflow.py create mode 100755 run_local_workflows.sh create mode 100644 sebs/local/executor.py create mode 100644 sebs/local/triggers.py create mode 100644 sebs/local/workflow.py diff --git a/benchmarks/wrappers/local/python/function_workflow.py b/benchmarks/wrappers/local/python/function_workflow.py new file mode 100644 index 000000000..3b359bf84 --- /dev/null +++ b/benchmarks/wrappers/local/python/function_workflow.py @@ -0,0 +1,96 @@ +import datetime +import importlib +import json +import os +import uuid + +from redis import Redis + + +_FUNCTION_HANDLER = None + + +def _load_function_handler(): + global _FUNCTION_HANDLER + if _FUNCTION_HANDLER: + return _FUNCTION_HANDLER + + module_name = os.getenv("SEBS_WORKFLOW_MODULE") + if not module_name: + raise RuntimeError("Environment variable SEBS_WORKFLOW_MODULE is not set.") + + module = importlib.import_module(module_name) + if not hasattr(module, "handler"): + raise RuntimeError(f"Module {module_name} does not provide a handler(payload) function.") + _FUNCTION_HANDLER = module.handler + return _FUNCTION_HANDLER + + +def _maybe_push_measurement(event, duration_start, duration_end): + redis_host = os.getenv("SEBS_REDIS_HOST") + if not redis_host: + return + + workflow_name = os.getenv("SEBS_WORKFLOW_NAME", "workflow") + func_name = os.getenv("SEBS_WORKFLOW_FUNC", "function") + request_id = event["request_id"] + + payload = { + "func": func_name, + "start": duration_start, + "end": duration_end, + "is_cold": False, + "container_id": os.getenv("HOSTNAME", "local"), + "provider.request_id": request_id, + } + + func_res = os.getenv("SEBS_FUNCTION_RESULT") + if func_res: + payload["result"] = json.loads(func_res) + + upload_bytes = os.getenv("STORAGE_UPLOAD_BYTES", "0") + download_bytes = os.getenv("STORAGE_DOWNLOAD_BYTES", "0") + if upload_bytes.isdigit(): + payload["blob.upload"] = int(upload_bytes) + if download_bytes.isdigit(): + payload["blob.download"] = int(download_bytes) + + redis = Redis( + host=redis_host, + port=int(os.getenv("SEBS_REDIS_PORT", "6379")), + decode_responses=True, + socket_connect_timeout=10, + password=os.getenv("SEBS_REDIS_PASSWORD"), + ) + + key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) + redis.set(key, json.dumps(payload)) + print(f"[workflow] stored measurement {key}") + + +def handler(event): + """ + Entry point used by the local workflow containers. Expects events with + {"payload": , "request_id": "..."} format and returns the same + structure expected by our workflow orchestrator. + """ + + if "payload" not in event: + raise RuntimeError("Workflow invocation payload must include 'payload' key.") + + request_id = event.get("request_id", str(uuid.uuid4())) + event["request_id"] = request_id + payload = event["payload"] + handler_fn = _load_function_handler() + + begin = datetime.datetime.now().timestamp() + print(f"[workflow] handler input: {event}", flush=True) + result = handler_fn(payload) + end = datetime.datetime.now().timestamp() + + _maybe_push_measurement(event, begin, end) + + return { + "request_id": request_id, + "payload": result, + } diff --git a/run_local_workflows.sh b/run_local_workflows.sh new file mode 100755 index 000000000..61656799e --- /dev/null +++ b/run_local_workflows.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +./sebs.py storage start all config/storage.json --output-json out_storage.json + +MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) +MINIO_PORT=$(jq -r '.object.minio.mapped_port' out_storage.json) +MINIO_ACCESS=$(jq -r '.object.minio.access_key' out_storage.json) +MINIO_SECRET=$(jq -r '.object.minio.secret_key' out_storage.json) +MINIO_INSTANCE=$(jq -r '.object.minio.instance_id' out_storage.json) +SCYLLA_ADDRESS=$(jq -r '.nosql.scylladb.address' out_storage.json) +SCYLLA_PORT=$(jq -r '.nosql.scylladb.mapped_port' out_storage.json) +SCYLLA_INSTANCE=$(jq -r '.nosql.scylladb.instance_id' out_storage.json) + +for cfg in config/local_workflows.json config/local_deployment.json; do + tmp=$(mktemp) + jq \ + --arg addr "$MINIO_ADDRESS" \ + --argjson port "$MINIO_PORT" \ + --arg access "$MINIO_ACCESS" \ + --arg secret "$MINIO_SECRET" \ + --arg inst "$MINIO_INSTANCE" \ + --arg saddr "$SCYLLA_ADDRESS" \ + --argjson sport "$SCYLLA_PORT" \ + --arg sinst "$SCYLLA_INSTANCE" \ + '(.deployment.local.storage.object.minio.address = $addr) + | (.deployment.local.storage.object.minio.mapped_port = $port) + | (.deployment.local.storage.object.minio.access_key = $access) + | (.deployment.local.storage.object.minio.secret_key = $secret) + | (.deployment.local.storage.object.minio.instance_id = $inst) + | (.deployment.local.storage.nosql.scylladb.address = $saddr) + | (.deployment.local.storage.nosql.scylladb.mapped_port = $sport) + | (.deployment.local.storage.nosql.scylladb.instance_id = $sinst) + ' "$cfg" > "$tmp" + mv "$tmp" "$cfg" +done + +if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then + docker rm -f sebs-redis >/dev/null +fi +docker run -d --name sebs-redis -p 6380:6379 redis:7 + +WORKFLOWS=( + "610.gen" + "6100.1000-genome" + "6101.1000-genome-individuals" + "620.func-invo" + "6200.trip-booking" + "630.parallel-sleep" + "631.parallel-download" + "640.selfish-detour" + "650.vid" + "660.map-reduce" + "670.auth" + "680.excamera" + "690.ml" +) + +for wf in "${WORKFLOWS[@]}"; do + echo "===== Running $wf =====" + ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment local --trigger http --repetitions 1 \ + --output-dir results/local-workflows --verbose || true + sleep 5 +done diff --git a/sebs.py b/sebs.py index fc00af73c..d13e378ab 100755 --- a/sebs.py +++ b/sebs.py @@ -334,8 +334,6 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam sebs_client, deployment_client, ) = parse_common_params(**kwargs) - if isinstance(deployment_client, Local): - raise NotImplementedError("Local workflow deployment is currently not supported.") assert deployment_client.config.resources.redis_host is not None diff --git a/sebs/benchmark.py b/sebs/benchmark.py index dbcae6b43..bd9eb7db7 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -113,9 +113,7 @@ def functions(self) -> Dict[str, Any]: @property def code_location(self): if self.code_package: - return os.path.join( - self._cache_client.cache_dir, self.code_package["location"] - ) + return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) else: return self._code_location @@ -179,9 +177,7 @@ def container_deployment(self): @property # noqa: A003 def hash(self): path = os.path.join(self.benchmark_path, self.language_name) - self._hash_value = Benchmark.hash_directory( - path, self._deployment_name, self.language_name - ) + self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @hash.setter # noqa: A003 @@ -211,18 +207,14 @@ def __init__( self._container_deployment = config.container_deployment self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") if not self._benchmark_path: - raise RuntimeError( - "Benchmark {benchmark} not found!".format(benchmark=self._benchmark) - ) + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) ) if self.language not in self.benchmark_config.languages: raise RuntimeError( - "Benchmark {} not available for language {}".format( - self.benchmark, self.language - ) + "Benchmark {} not available for language {}".format(self.benchmark, self.language) ) self._cache_client = cache_client self._docker_client = docker_client @@ -385,12 +377,24 @@ def add_deployment_files(self, output_dir: str, is_workflow: bool): handler_path = os.path.join(output_dir, "handler.py") handler_function_path = os.path.join(output_dir, "handler_function.py") handler_workflow_path = os.path.join(output_dir, "handler_workflow.py") - if is_workflow: + if is_workflow and os.path.exists(handler_workflow_path): os.rename(handler_workflow_path, handler_path) - os.remove(handler_function_path) - else: + if os.path.exists(handler_function_path): + os.remove(handler_function_path) + elif not is_workflow and os.path.exists(handler_function_path): os.rename(handler_function_path, handler_path) - os.remove(handler_workflow_path) + if os.path.exists(handler_workflow_path): + os.remove(handler_workflow_path) + + workflow_entry = os.path.join(output_dir, "function_workflow.py") + function_entry = os.path.join(output_dir, "function.py") + if os.path.exists(workflow_entry): + if is_workflow: + if os.path.exists(function_entry): + os.remove(function_entry) + os.rename(workflow_entry, function_entry) + else: + os.remove(workflow_entry) def add_deployment_package_python(self, output_dir): @@ -424,9 +428,7 @@ def add_deployment_package_nodejs(self, output_dir): ) if len(packages): - package_config = os.path.join( - output_dir, f"package.json.{self._language_version}" - ) + package_config = os.path.join(output_dir, f"package.json.{self._language_version}") if not os.path.exists(package_config): package_config = os.path.join(output_dir, "package.json") @@ -484,9 +486,7 @@ def ensure_image(name: str) -> None: except docker.errors.ImageNotFound: try: self.logging.info( - "Docker pull of image {repo}:{image}".format( - repo=repo_name, image=name - ) + "Docker pull of image {repo}:{image}".format(repo=repo_name, image=name) ) self._docker_client.images.pull(repo_name, name) except docker.errors.APIError: @@ -511,9 +511,7 @@ def ensure_image(name: str) -> None: # Create set of mounted volumes unless Docker volumes are disabled if not self._experiment_config.check_flag("docker_copy_build_files"): - volumes = { - os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"} - } + volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} package_script = os.path.abspath( os.path.join(self._benchmark_path, self.language_name, "package.sh") ) @@ -531,15 +529,11 @@ def ensure_image(name: str) -> None: try: self.logging.info( "Docker build of benchmark dependencies in container " - "of image {repo}:{image}".format( - repo=repo_name, image=image_name - ) + "of image {repo}:{image}".format(repo=repo_name, image=image_name) ) uid = os.getuid() # Standard, simplest build - if not self._experiment_config.check_flag( - "docker_copy_build_files" - ): + if not self._experiment_config.check_flag("docker_copy_build_files"): self.logging.info( "Docker mount of benchmark code from path {path}".format( path=os.path.abspath(output_dir) @@ -580,9 +574,7 @@ def ensure_image(name: str) -> None: "Send benchmark code from path {path} to " "Docker instance".format(path=os.path.abspath(output_dir)) ) - tar_archive = os.path.join( - output_dir, os.path.pardir, "function.tar" - ) + tar_archive = os.path.join(output_dir, os.path.pardir, "function.tar") with tarfile.open(tar_archive, "w") as tar: for f in os.listdir(output_dir): tar.add(os.path.join(output_dir, f), arcname=f) @@ -628,18 +620,14 @@ def recalculate_code_size(self): def build( self, - deployment_build_step: Callable[ - ["Benchmark", str, bool, bool], Tuple[str, int, str] - ], + deployment_build_step: Callable[["Benchmark", str, bool, bool], Tuple[str, int, str]], is_workflow: bool, ) -> Tuple[bool, str, bool, str]: # Skip build if files are up to date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: self.logging.info( - "Using cached benchmark {} at {}".format( - self.benchmark, self.code_location - ) + "Using cached benchmark {} at {}".format(self.benchmark, self.code_location) ) if self.container_deployment: return ( @@ -656,9 +644,7 @@ def build( if not self.is_cached else "cached code package is not up to date/build enforced." ) - self.logging.info( - "Building benchmark {}. Reason: {}".format(self.benchmark, msg) - ) + self.logging.info("Building benchmark {}. Reason: {}".format(self.benchmark, msg)) # clear existing cache information self._code_package = None @@ -673,13 +659,11 @@ def build( self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) - self._code_location, self._code_size, self._container_uri = ( - deployment_build_step( - self, - os.path.abspath(self._output_dir), - is_workflow, - self.is_cached_valid, - ) + (self._code_location, self._code_size, self._container_uri,) = deployment_build_step( + self, + os.path.abspath(self._output_dir), + is_workflow, + self.is_cached_valid, ) self.logging.info( ( @@ -808,9 +792,7 @@ def code_package_modify(self, filename: str, data: bytes): if self.code_package_is_archive(): self._update_zip(self.code_location, filename, data) new_size = self.code_package_recompute_size() / 1024.0 / 1024.0 - self.logging.info( - f"Modified zip package {self.code_location}, new size {new_size} MB" - ) + self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") else: raise NotImplementedError() @@ -895,9 +877,7 @@ def load_benchmark_input(path: str) -> BenchmarkModuleInterface: import importlib.machinery import importlib.util - loader = importlib.machinery.SourceFileLoader( - "input", os.path.join(path, "input.py") - ) + loader = importlib.machinery.SourceFileLoader("input", os.path.join(path, "input.py")) spec = importlib.util.spec_from_loader(loader.name, loader) assert spec mod = importlib.util.module_from_spec(spec) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 0adcfc1d7..6ee7c95d2 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -11,7 +11,13 @@ from sebs.config import SeBSConfig from sebs.faas.resources import SystemResources from sebs.faas.config import Resources -from sebs.faas.function import CloudBenchmark, Function, Trigger, ExecutionResult, Workflow +from sebs.faas.function import ( + CloudBenchmark, + Function, + Trigger, + ExecutionResult, + Workflow, +) from sebs.utils import LoggingBase from .config import Config @@ -173,7 +179,11 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] @abstractmethod def package_code( - self, code_package: Benchmark, directory: str, is_workflow: bool, is_cached: bool + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, ) -> Tuple[str, int, str]: pass @@ -264,7 +274,9 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) if not func_name: func_name = self.default_function_name(code_package) - rebuilt, _, container_deployment, container_uri = code_package.build(self.package_code, False) + rebuilt, _, container_deployment, container_uri = code_package.build( + self.package_code, False + ) """ There's no function with that name? @@ -363,7 +375,7 @@ def update_workflow(self, workflow: Workflow, code_package: Benchmark): def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = None): if code_package.language_version not in self.system_config.supported_language_versions( - self.name(), code_package.language_name + self.name(), code_package.language_name, code_package.architecture ): raise Exception( "Unsupported {language} version {version} in {system}!".format( @@ -375,7 +387,9 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N if not workflow_name: workflow_name = self.default_function_name(code_package) - rebuilt, _ = code_package.build(self.package_code, True) + rebuilt, _, container_deployment, container_uri = code_package.build( + self.package_code, True + ) """ There's no function with that name? @@ -412,8 +426,23 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N workflow_name=workflow_name, loc=code_location ) ) + needs_refresh = getattr(workflow, "needs_refresh", False) # is the function up-to-date? - if workflow.code_package_hash != code_package.hash or rebuilt: + if needs_refresh: + self.logging.info( + f"Cached workflow {workflow_name} requires refreshing local resources." + ) + self.update_workflow(workflow, code_package) + if hasattr(workflow, "needs_refresh"): + workflow.needs_refresh = False + self.cache_client.add_benchmark( + deployment_name=self.name(), + language_name=code_package.language_name, + code_package=code_package, + benchmark=workflow, + ) + code_package.query_cache() + elif workflow.code_package_hash != code_package.hash or rebuilt: self.logging.info( f"Cached workflow {workflow_name} with hash " f"{workflow.code_package_hash} is not up to date with " diff --git a/sebs/local/config.py b/sebs/local/config.py index 0b512c67c..5074a323c 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -53,7 +53,8 @@ def initialize(res: Resources, config: dict): def update_cache(self, cache: Cache): super().update_cache(cache) cache.update_config( - val=list(self._allocated_ports), keys=["local", "resources", "allocated_ports"] + val=list(self._allocated_ports), + keys=["local", "resources", "allocated_ports"], ) @staticmethod @@ -63,6 +64,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour cached_config = cache.get_config("local") ret._deserialize(ret, config, cached_config) + if "resources" in config: + ret.load_redis(config["resources"]) + elif cached_config and "resources" in cached_config: + ret.load_redis(cached_config["resources"]) + # Load cached values if cached_config and "resources" in cached_config: LocalResources.initialize(ret, cached_config["resources"]) @@ -113,7 +119,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config return config_obj def serialize(self) -> dict: - out = {"name": "local", "region": self._region, "resources": self._resources.serialize()} + out = { + "name": "local", + "region": self._region, + "resources": self._resources.serialize(), + } return out def update_cache(self, cache: Cache): diff --git a/sebs/local/executor.py b/sebs/local/executor.py new file mode 100644 index 000000000..8212e2522 --- /dev/null +++ b/sebs/local/executor.py @@ -0,0 +1,205 @@ +import concurrent.futures +import copy +import json +from typing import Any, Dict, List, Optional + +import requests + +from sebs.faas.fsm import Loop, Map, Parallel, Repeat, State, Switch, Task + + +def _get_var(obj: Any, path: str) -> Any: + parts = [segment.strip() for segment in path.split(".") if segment.strip()] + value = obj + for part in parts: + try: + value = value[part] + except (KeyError, TypeError): + raise WorkflowExecutionError( + f"Missing key '{part}' while reading path '{path}' in object {value!r}" + ) + return value + + +def _set_var(obj: Any, value: Any, path: str): + parts = [segment.strip() for segment in path.split(".") if segment.strip()] + target = obj + for part in parts[:-1]: + target = target[part] + target[parts[-1]] = value + + +class WorkflowExecutionError(RuntimeError): + pass + + +class LocalWorkflowExecutor: + """ + Execute workflow definitions (benchmarks/600.workflows/*/definition.json) + by invoking local function containers directly. Mirrors the orchestration + semantics implemented in Azure/GCP wrappers. + """ + + def __init__(self, definition_path: str, bindings: Dict[str, Dict[str, str]]): + self._definition_path = definition_path + with open(definition_path) as definition_file: + definition = json.load(definition_file) + self._states = { + name: State.deserialize(name, payload) for name, payload in definition["states"].items() + } + self._root = definition["root"] + self._bindings = bindings + + def run(self, payload: dict, request_id: str) -> dict: + return self._run_state_machine(self._states, self._root, payload, request_id) + + def _run_state_machine( + self, states: Dict[str, State], root_name: str, payload: dict, request_id: str + ) -> dict: + current = states[root_name] + result = payload + while current: + if isinstance(current, Task): + result, current = self._execute_task(states, current, result, request_id) + elif isinstance(current, Switch): + current = self._execute_switch(states, current, result) + elif isinstance(current, Map): + result = self._execute_map(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Repeat): + result = self._execute_repeat(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Loop): + self._execute_loop(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Parallel): + result = self._execute_parallel(current, result, request_id) + current = states.get(current.next) + else: + raise WorkflowExecutionError(f"Undefined state: {current}") + return result + + def _call_function(self, func_name: str, payload: dict, request_id: str) -> dict: + if func_name not in self._bindings: + raise WorkflowExecutionError(f"No binding found for function {func_name}") + binding = self._bindings[func_name] + url = f"http://{binding['host']}:{binding['port']}/" + body_payload = payload + if isinstance(payload, dict): + body_payload = dict(payload) + body_payload.setdefault("request_id", request_id) + body_payload.setdefault("request-id", request_id) + response = requests.post( + url, + json={"payload": body_payload, "request_id": request_id}, + timeout=900, + ) + if response.status_code >= 300: + raise WorkflowExecutionError( + f"Invocation of {func_name} at {url} failed with status {response.status_code}" + ) + body = response.json() + if isinstance(body, dict): + candidate = body + if "result" in body and isinstance(body["result"], dict): + candidate = body["result"].get("output", candidate) + if isinstance(candidate, dict) and "payload" in candidate: + return candidate["payload"] + if "payload" in body: + return body["payload"] + return body + + def _execute_task( + self, states: Dict[str, State], state: Task, data: dict, request_id: str + ) -> (dict, Optional[State]): + try: + result = self._call_function(state.func_name, data, request_id) + except Exception: + if state.failure: + return data, states.get(state.failure) + raise + return result, states.get(state.next) + + def _execute_switch( + self, states: Dict[str, State], switch: Switch, data: dict + ) -> Optional[State]: + ops = { + "<": lambda x, y: x < y, + "<=": lambda x, y: x <= y, + "==": lambda x, y: x == y, + ">=": lambda x, y: x >= y, + ">": lambda x, y: x > y, + } + for case in switch.cases: + lhs = _get_var(data, case.var) + if ops[case.op](lhs, case.val): + return states.get(case.next) + if switch.default: + return states.get(switch.default) + return None + + def _build_map_payload(self, element: Any, data: dict, common_params: Optional[str]) -> dict: + if not common_params: + return element + payload: Dict[str, Any] = {"array_element": element} + for param in [entry.strip() for entry in common_params.split(",") if entry.strip()]: + payload[param] = _get_var(data, param) + return payload + + def _execute_map(self, map_state: Map, data: dict, request_id: str) -> dict: + array = _get_var(data, map_state.array) + if not isinstance(array, list): + raise WorkflowExecutionError( + f"Map state {map_state.name} expects list at {map_state.array}" + ) + map_states = {n: State.deserialize(n, s) for n, s in map_state.funcs.items()} + results: List[Any] = [] + tasks: List[Any] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=max(len(array), 1)) as executor: + for element in array: + payload = self._build_map_payload(element, data, map_state.common_params) + tasks.append( + executor.submit( + self._run_state_machine, + map_states, + map_state.root, + payload, + request_id, + ) + ) + for task in tasks: + results.append(task.result()) + _set_var(data, results, map_state.array) + return data + + def _execute_repeat(self, repeat: Repeat, data: dict, request_id: str) -> dict: + result = data + for _ in range(repeat.count): + result = self._call_function(repeat.func_name, result, request_id) + return result + + def _execute_loop(self, loop: Loop, data: dict, request_id: str): + array = _get_var(data, loop.array) + for element in array: + self._call_function(loop.func_name, element, request_id) + + def _execute_parallel(self, parallel: Parallel, data: dict, request_id: str) -> dict: + results: Dict[str, Any] = {} + tasks: List[concurrent.futures.Future] = [] + labels: List[str] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=len(parallel.funcs)) as executor: + for branch in parallel.funcs: + branch_states = {n: State.deserialize(n, s) for n, s in branch["states"].items()} + labels.append(branch["root"]) + tasks.append( + executor.submit( + self._run_state_machine, + branch_states, + branch["root"], + copy.deepcopy(data), + request_id, + ) + ) + for label, future in zip(labels, tasks): + results[label] = future.result() + return results diff --git a/sebs/local/function.py b/sebs/local/function.py index c940811ce..b92b17325 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -52,24 +52,7 @@ def __init__( self._instance = docker_container self._instance_id = docker_container.id self._instance.reload() - networks = self._instance.attrs["NetworkSettings"]["Networks"] - self._port = port - - if is_linux(): - self._url = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=port - ) - if not self._url: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" - ) - else: - self._url = f"localhost:{port}" - + self._configure_endpoint(port) self._measurement_pid = measurement_pid @property @@ -92,6 +75,28 @@ def memory_measurement_pid(self) -> Optional[int]: def typename() -> str: return "Local.LocalFunction" + def refresh_endpoint(self, port: int): + self._configure_endpoint(port) + + def _configure_endpoint(self, port: int): + self._instance.reload() + networks = self._instance.attrs["NetworkSettings"]["Networks"] + self._port = port + if is_linux(): + self._url = "{IPAddress}:{Port}".format( + IPAddress=networks["bridge"]["IPAddress"], Port=port + ) + if not self._url: + self.logging.error( + f"Couldn't read the IP address of container from attributes " + f"{json.dumps(self._instance.attrs, indent=2)}" + ) + raise RuntimeError( + f"Incorrect detection of IP address for container with id {self._instance_id}" + ) + else: + self._url = f"localhost:{port}" + def serialize(self) -> dict: return { **super().serialize(), diff --git a/sebs/local/local.py b/sebs/local/local.py index 841251138..9af782df6 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -1,8 +1,11 @@ +import json import os import requests import shutil import time -from typing import cast, Dict, List, Optional, Type, Tuple # noqa +import re +import datetime +from typing import cast, Dict, List, Optional, Type, Tuple, Set # noqa import subprocess import socket @@ -14,6 +17,8 @@ from sebs.utils import LoggingHandlers, is_linux from sebs.local.config import LocalConfig from sebs.local.function import LocalFunction +from sebs.local.workflow import LocalWorkflow +from sebs.local.triggers import WorkflowLocalTrigger from sebs.faas.function import ( CloudBenchmark, Function, @@ -25,6 +30,38 @@ from sebs.faas.system import System from sebs.faas.config import Resources from sebs.benchmark import Benchmark +from sebs.faas.fsm import State, Task, Map, Repeat, Loop, Parallel + + +def _collect_task_names(state: State) -> Set[str]: + names: Set[str] = set() + if isinstance(state, Task): + names.add(state.func_name) + elif isinstance(state, Repeat): + names.add(state.func_name) + elif isinstance(state, Loop): + names.add(state.func_name) + elif isinstance(state, Map): + for nested_name, nested_state in state.funcs.items(): + nested_obj = ( + nested_state + if isinstance(nested_state, State) + else State.deserialize(nested_name, nested_state) + ) + names.update(_collect_task_names(nested_obj)) + elif isinstance(state, Parallel): + for subworkflow in state.funcs: + for nested_name, nested_state in subworkflow["states"].items(): + names.update(_collect_task_names(State.deserialize(nested_name, nested_state))) + return names + + +def _workflow_task_names(definition: dict) -> Set[str]: + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} + names: Set[str] = set() + for state in states.values(): + names.update(_collect_task_names(state)) + return names class Local(System): @@ -45,7 +82,7 @@ def function_type() -> "Type[Function]": @staticmethod def workflow_type() -> "Type[Workflow]": - raise NotImplementedError() + return LocalWorkflow @property def config(self) -> LocalConfig: @@ -93,9 +130,153 @@ def __init__( self._memory_measurement_path: Optional[str] = None # disable external measurements self._measure_interval = -1 + self._bridge_ip: Optional[str] = self._detect_bridge_ip() self.initialize_resources(select_prefix="local") + @staticmethod + def _load_workflow_definition(path: str) -> dict: + with open(path) as definition_file: + return json.load(definition_file) + + @staticmethod + def _normalize_workflow_id(name: str) -> str: + sanitized = re.sub(r"[^A-Za-z0-9_-]", "-", name) + if not sanitized: + sanitized = "wf" + if not sanitized[0].isalpha(): + sanitized = f"wf-{sanitized}" + return sanitized + + def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: + for port in range(start_port, start_port + range_size): + if port in self.config.resources.allocated_ports: + continue + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + sock.bind(("127.0.0.1", port)) + except socket.error: + continue + self.config.resources.allocated_ports.add(port) + return port + raise RuntimeError( + f"Failed to allocate host port for container: No ports available between " + f"{start_port} and {start_port + range_size - 1}" + ) + + def _detect_bridge_ip(self) -> Optional[str]: + try: + network = self._docker_client.networks.get("bridge") + config = network.attrs.get("IPAM", {}).get("Config", []) + if config: + gateway = config[0].get("Gateway") + if gateway: + return gateway + except docker.errors.DockerException: + pass + return None + + def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: + host, port = func.url.split(":") + if is_linux(): + return host, port + host_override = os.getenv("DOCKER_HOST_IP") + if host_override: + return host_override, port + return host, port + + def _container_service_address(self, endpoint: str) -> str: + if not endpoint or ":" not in endpoint: + return endpoint + host, port = endpoint.split(":", 1) + if host not in ("127.0.0.1", "localhost"): + return endpoint + if self._bridge_ip is None: + self._bridge_ip = self._detect_bridge_ip() + if self._bridge_ip: + return f"{self._bridge_ip}:{port}" + if is_linux(): + return endpoint + host_override = os.getenv("DOCKER_HOST_IP", "host.docker.internal") + return f"{host_override}:{port}" + + def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: + overrides = { + "SEBS_WORKFLOW_NAME": workflow_name, + "SEBS_WORKFLOW_FUNC": module_name, + "SEBS_WORKFLOW_MODULE": f"function.{module_name}", + "SEBS_WORKFLOW_LOCAL": "1", + } + redis_host = self.config.resources.redis_host + if redis_host: + if ":" in redis_host: + host, port = redis_host.split(":", 1) + else: + host, port = redis_host, "6379" + container_host = host + if host in ("127.0.0.1", "localhost"): + container_host = self._bridge_ip or host + overrides["SEBS_REDIS_HOST"] = container_host + overrides["SEBS_REDIS_PORT"] = port + if self.config.resources.redis_password: + overrides["SEBS_REDIS_PASSWORD"] = self.config.resources.redis_password + return overrides + + def _prepare_workflow_functions( + self, + code_package: Benchmark, + workflow_name: str, + workflow_id: str, + definition_path: str, + definition: dict, + existing_workflow: Optional[LocalWorkflow] = None, + ) -> Tuple[List[LocalFunction], Dict[str, Dict[str, str]], str]: + + task_names = sorted(_workflow_task_names(definition)) + if not task_names: + raise RuntimeError("Workflow definition does not contain any task states.") + + existing_funcs = ( + {func.name: func for func in existing_workflow.functions} if existing_workflow else {} + ) + + functions: List[LocalFunction] = [] + bindings: Dict[str, Dict[str, str]] = {} + + required_containers = {f"{workflow_name}___{task}" for task in task_names} + obsolete_funcs = set(existing_funcs.keys()) - required_containers + for obsolete in obsolete_funcs: + existing_funcs[obsolete].stop() + + for task_name in task_names: + container_name = f"{workflow_name}___{task_name}" + existing_func = existing_funcs.get(container_name) + if existing_func: + existing_func.stop() + + env = self._workflow_env(workflow_name, task_name) + func_instance = self._start_container(code_package, container_name, existing_func, env) + functions.append(func_instance) + host, port = self._function_network_endpoint(func_instance) + workflow_function_name = f"{workflow_id}_{task_name}" + bindings[task_name] = { + "type": "custom", + "operation": "rest:post:/", + "host": host, + "port": port, + "workflow_function_name": workflow_function_name, + } + + resources_dir = os.path.join(code_package.code_location, "workflow_resources") + workflows_dir = os.path.join(resources_dir, "workflows") + os.makedirs(workflows_dir, exist_ok=True) + os.makedirs(resources_dir, exist_ok=True) + definition_copy = os.path.join(workflows_dir, f"{workflow_id}.sw.json") + shutil.copy2(definition_path, definition_copy) + + return functions, bindings, definition_copy + """ Shut down minio storage instance. """ @@ -122,7 +303,11 @@ def shutdown(self): """ def package_code( - self, code_package: Benchmark, directory: str, is_workflow: bool, is_cached: bool + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, ) -> Tuple[str, int, str]: CONFIG_FILES = { @@ -145,7 +330,11 @@ def package_code( return directory, bytes_size, "" def _start_container( - self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] + self, + code_package: Benchmark, + func_name: str, + func: Optional[LocalFunction], + env_overrides: Optional[Dict[str, str]] = None, ) -> LocalFunction: container_name = "{}:run.local.{}.{}".format( @@ -159,20 +348,33 @@ def _start_container( "CONTAINER_GID": str(os.getgid()), "CONTAINER_USER": self._system_config.username(self.name(), code_package.language_name), } - if self.config.resources.storage_config: - - environment = {**self.config.resources.storage_config.envs(), **environment} + storage_cfg = self.config.resources.storage_config + if storage_cfg: + storage_envs = dict(storage_cfg.envs()) + if "MINIO_ADDRESS" in storage_envs: + storage_envs["MINIO_ADDRESS"] = self._container_service_address( + storage_envs["MINIO_ADDRESS"] + ) + environment = {**storage_envs, **environment} if code_package.uses_nosql: nosql_storage = self.system_resources.get_nosql_storage() - environment = {**environment, **nosql_storage.envs()} + nosql_envs = dict(nosql_storage.envs()) + if "NOSQL_STORAGE_ENDPOINT" in nosql_envs: + nosql_envs["NOSQL_STORAGE_ENDPOINT"] = self._container_service_address( + nosql_envs["NOSQL_STORAGE_ENDPOINT"] + ) + environment = {**environment, **nosql_envs} for original_name, actual_name in nosql_storage.get_tables( code_package.benchmark ).items(): environment[f"NOSQL_STORAGE_TABLE_{original_name}"] = actual_name + if env_overrides: + environment.update(env_overrides) + # FIXME: make CPUs configurable # FIXME: configure memory # FIXME: configure timeout @@ -197,34 +399,12 @@ def _start_container( # If SeBS is running on non-linux platforms, # container port must be mapped to host port to make it reachable # Check if the system is NOT Linux or that it is WSL - port = self.DEFAULT_PORT if not is_linux(): - port_found = False - for p in range(self.DEFAULT_PORT, self.DEFAULT_PORT + 1000): - # check no container has been deployed on docker's port p - if p not in self.config.resources.allocated_ports: - # check if port p on the host is free - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - try: - s.bind(("127.0.0.1", p)) - # The port is available - port = p - port_found = True - self.config.resources.allocated_ports.add(p) - break - except socket.error: - # The port is already in use - continue - - if not port_found: - raise RuntimeError( - f"Failed to allocate port for container: No ports available between " - f"{self.DEFAULT_PORT} and {self.DEFAULT_PORT + 999}" - ) - + port = self._allocate_host_port(self.DEFAULT_PORT) container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" container_kwargs["ports"] = {f"{port}/tcp": port} + else: + port = self.DEFAULT_PORT container = self._docker_client.containers.run(**container_kwargs) @@ -259,6 +439,7 @@ def _start_container( else: func.container = container func._measurement_pid = pid + func.refresh_endpoint(port) # Wait until server starts max_attempts = 10 @@ -316,7 +497,7 @@ def update_function( There's only one trigger - HTTP. """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) @@ -375,15 +556,83 @@ def format_function_name(func_name: str) -> str: return func_name def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: - raise NotImplementedError() + workflow_name = self.format_function_name(workflow_name) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow_name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = self._normalize_workflow_id(workflow_name) + + functions, bindings, definition_output = self._prepare_workflow_functions( + code_package, workflow_name, workflow_id, definition_path, definition + ) + + function_cfg = FunctionConfig.from_benchmark(code_package) + workflow = LocalWorkflow( + workflow_name, + functions, + code_package.benchmark, + workflow_id, + code_package.hash, + function_cfg, + definition_output, + bindings, + ) + trigger = WorkflowLocalTrigger(definition_output, bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + return workflow def create_workflow_trigger( self, workflow: Workflow, trigger_type: Trigger.TriggerType ) -> Trigger: - raise NotImplementedError() + workflow = cast(LocalWorkflow, workflow) + if trigger_type != Trigger.TriggerType.HTTP: + raise RuntimeError("Local workflows currently support only HTTP triggers.") + + trigger = WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + self.cache_client.update_benchmark(workflow) + return trigger def update_workflow(self, workflow: Workflow, code_package: Benchmark): - raise NotImplementedError() + workflow = cast(LocalWorkflow, workflow) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow.name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = ( + workflow.workflow_id + if workflow.workflow_id + else self._normalize_workflow_id(workflow.name) + ) + functions, bindings, definition_output = self._prepare_workflow_functions( + code_package, + workflow.name, + workflow_id, + definition_path, + definition, + workflow, + ) + workflow.set_functions(functions) + workflow.definition_path = definition_output + workflow.function_bindings = bindings + workflow.workflow_id = workflow_id + + triggers = workflow.triggers(Trigger.TriggerType.HTTP) + if not triggers: + trigger = WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + else: + for trigger in triggers: + if isinstance(trigger, WorkflowLocalTrigger): + trigger.update(workflow.definition_path, workflow.function_bindings) + + self.logging.info(f"Updated workflow {workflow.name} definition.") def start_measurements(self, measure_interval: int) -> Optional[str]: diff --git a/sebs/local/triggers.py b/sebs/local/triggers.py new file mode 100644 index 000000000..ea89a4f47 --- /dev/null +++ b/sebs/local/triggers.py @@ -0,0 +1,70 @@ +import concurrent.futures +import datetime +import uuid +from typing import Optional + +from sebs.faas.function import ExecutionResult, Trigger +from sebs.local.executor import LocalWorkflowExecutor, WorkflowExecutionError + + +class WorkflowLocalTrigger(Trigger): + def __init__(self, definition_path: str, bindings: dict): + super().__init__() + self._definition_path = definition_path + self._bindings = bindings + self._executor = LocalWorkflowExecutor(definition_path, bindings) + + @staticmethod + def typename() -> str: + return "Local.WorkflowLocalTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + def _invoke(self, payload: dict) -> ExecutionResult: + request_id = str(uuid.uuid4())[0:8] + begin = datetime.datetime.now() + result = ExecutionResult.from_times(begin, begin) + try: + output = self._executor.run(payload, request_id) + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.output = output + except WorkflowExecutionError as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"Workflow execution failed: {exc}") + except Exception as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"Workflow execution error: {exc}") + return result + + def sync_invoke(self, payload: dict) -> ExecutionResult: + return self._invoke(payload) + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + return pool.submit(self._invoke, payload) + + def serialize(self) -> dict: + return { + "type": "LOCAL", + "definition_path": self._definition_path, + "bindings": self._bindings, + } + + @classmethod + def deserialize(cls, obj: dict) -> "WorkflowLocalTrigger": + return cls(obj["definition_path"], obj["bindings"]) + + def update(self, definition_path: str, bindings: dict): + self._definition_path = definition_path + self._bindings = bindings + self._executor = LocalWorkflowExecutor(definition_path, bindings) diff --git a/sebs/local/workflow.py b/sebs/local/workflow.py new file mode 100644 index 000000000..29a2dc2a4 --- /dev/null +++ b/sebs/local/workflow.py @@ -0,0 +1,84 @@ +import logging +import os +from typing import Dict, List, Optional + +from sebs.faas.function import FunctionConfig, Workflow +from sebs.local.function import LocalFunction +from sebs.local.triggers import WorkflowLocalTrigger + + +class LocalWorkflow(Workflow): + def __init__( + self, + name: str, + functions: List[LocalFunction], + benchmark: str, + workflow_id: str, + code_package_hash: str, + cfg: FunctionConfig, + definition_path: str, + function_bindings: Dict[str, Dict], + ): + super().__init__(benchmark, name, code_package_hash, cfg) + self._functions: Dict[str, LocalFunction] = {func.name: func for func in functions} + self.workflow_id = workflow_id + self.definition_path = definition_path + self.function_bindings = function_bindings + self.needs_refresh = False + + @property + def functions(self) -> List[LocalFunction]: + return list(self._functions.values()) + + def set_functions(self, functions: List[LocalFunction]): + self._functions = {func.name: func for func in functions} + + def update_function(self, func: LocalFunction): + self._functions[func.name] = func + + @staticmethod + def typename() -> str: + return "Local.Workflow" + + def serialize(self) -> dict: + serialized = { + **super().serialize(), + "functions": [func.serialize() for func in self._functions.values()], + "definition_path": self.definition_path, + "function_bindings": self.function_bindings, + "workflow_id": self.workflow_id, + } + serialized["triggers"] = [] + return serialized + + @staticmethod + def deserialize(cached_config: dict) -> "LocalWorkflow": + funcs: List[LocalFunction] = [] + missing_function = False + for entry in cached_config["functions"]: + try: + funcs.append(LocalFunction.deserialize(entry)) + except RuntimeError as exc: + logging.getLogger(__name__).warning( + "Skipping cached function for workflow %s: %s", + cached_config.get("name", ""), + exc, + ) + missing_function = True + cfg = FunctionConfig.deserialize(cached_config["config"]) + workflow = LocalWorkflow( + cached_config["name"], + funcs, + cached_config["benchmark"], + cached_config.get("workflow_id", cached_config["name"]), + cached_config["hash"], + cfg, + cached_config.get("definition_path", ""), + cached_config.get("function_bindings", {}), + ) + workflow.needs_refresh = missing_function + if os.path.exists(workflow.definition_path): + workflow.add_trigger( + WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + ) + return workflow diff --git a/sebs/utils.py b/sebs/utils.py index a0d397199..0bd11692b 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -110,8 +110,21 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): def connect_to_redis_cache(host: str, password: str): + if ":" in host: + redis_host, redis_port = host.split(":", 1) + port = int(redis_port) + else: + redis_host = host + port = 6379 + redis = Redis( - host=host, port=6379, decode_responses=True, socket_keepalive=True, socket_timeout=10, socket_connect_timeout=10, password=password + host=redis_host, + port=port, + decode_responses=True, + socket_keepalive=True, + socket_timeout=10, + socket_connect_timeout=10, + password=password, ) redis.ping() @@ -119,7 +132,11 @@ def connect_to_redis_cache(host: str, password: str): def download_measurements( - redis: Redis, workflow_name: str, after: float, request_id: Optional[str], **static_args + redis: Redis, + workflow_name: str, + after: float, + request_id: Optional[str], + **static_args, ): payloads = [] pattern = f"{workflow_name}/*/{request_id}/*" if request_id else f"{workflow_name}/*" From 379dbf6c55319b4e5fc36201a0fcd65ad86012a6 Mon Sep 17 00:00:00 2001 From: McLavish Date: Sat, 15 Nov 2025 17:03:53 +0100 Subject: [PATCH 37/82] workflow changes for local execution --- benchmarks-data | 2 +- benchmarks/600.workflows/610.gen/config.json | 3 +- benchmarks/600.workflows/610.gen/input.py | 13 +- .../610.gen/python/map_astros.py | 13 +- .../610.gen/python/requirements.txt | 1 + .../600.workflows/6100.1000-genome/input.py | 63 ++++++---- .../definition.json | 2 +- .../6101.1000-genome-individuals/input.py | 59 ++++++--- .../python/requirements.txt | 2 +- .../640.selfish-detour/python/measure.py | 26 ++-- .../600.workflows/650.vid/python/analyse.py | 116 ++++++++++++++---- benchmarks/600.workflows/670.auth/config.json | 3 +- benchmarks/600.workflows/670.auth/input.py | 21 ++-- .../600.workflows/690.ml/python/train.py | 22 ++-- benchmarks/wrappers/local/python/storage.py | 56 ++++++--- config/systems.json | 7 +- dockerfiles/local/python/Dockerfile.run | 2 +- 17 files changed, 278 insertions(+), 133 deletions(-) create mode 100644 benchmarks/600.workflows/610.gen/python/requirements.txt diff --git a/benchmarks-data b/benchmarks-data index 7c7f67be6..04bfd102a 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 7c7f67be6d6efd94a5de10607136ce237a673ef7 +Subproject commit 04bfd102aea948a8fef27bac786302513ad6b3d4 diff --git a/benchmarks/600.workflows/610.gen/config.json b/benchmarks/600.workflows/610.gen/config.json index 8eae08240..8ff6eec59 100644 --- a/benchmarks/600.workflows/610.gen/config.json +++ b/benchmarks/600.workflows/610.gen/config.json @@ -1,5 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python"] + "languages": ["python"], + "modules": [] } diff --git a/benchmarks/600.workflows/610.gen/input.py b/benchmarks/600.workflows/610.gen/input.py index 68f82e81f..2fcf1fcaa 100644 --- a/benchmarks/600.workflows/610.gen/input.py +++ b/benchmarks/600.workflows/610.gen/input.py @@ -1,5 +1,14 @@ def buckets_count(): return (0, 0) -def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): - return dict() \ No newline at end of file + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + return {} diff --git a/benchmarks/600.workflows/610.gen/python/map_astros.py b/benchmarks/600.workflows/610.gen/python/map_astros.py index b98b5e9d7..49886ee73 100644 --- a/benchmarks/600.workflows/610.gen/python/map_astros.py +++ b/benchmarks/600.workflows/610.gen/python/map_astros.py @@ -1,7 +1,10 @@ def handler(elem): name = elem["name"] - fn, ln = name.split(" ") - name = " ".join([ln, fn]) - elem["name_rev"] = name - - return elem \ No newline at end of file + parts = name.split() + if len(parts) >= 2: + first = parts[0] + last = parts[-1] + elem["name_rev"] = f"{last} {first}" + else: + elem["name_rev"] = name + return elem diff --git a/benchmarks/600.workflows/610.gen/python/requirements.txt b/benchmarks/600.workflows/610.gen/python/requirements.txt new file mode 100644 index 000000000..f2293605c --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/requirements.txt @@ -0,0 +1 @@ +requests diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index def8d0195..13ed3f8b8 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -4,19 +4,40 @@ import io size_generators = { - "test" : (1), - "small": (5), - "small-10": (10), - "large": (10), + "test": 5, + "small": 5, + "small-10": 10, + "large": 10, } + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - files = ["ALL.chr21.1250.vcf", "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", "columns.txt", "AFR", "ALL", "AMR", "EAS", "EUR", "GBR", "SAS"] + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + files = [ + "ALL.chr21.1250.vcf", + "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", + "columns.txt", + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR", + "SAS", + ] for name in files: - #if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": + # if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -26,30 +47,28 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck start_bytes = 0 with open(os.path.join(data_dir, files[0]), "r") as f: content = f.readlines() - #TODO potentially change if input file with different number of lines is to be processed. - range_per_job = 1250 / num_individuals_jobs + # limit content size for local test runs to keep tasks responsive + content = content[: min(len(content), 500)] + range_per_job = len(content) / num_individuals_jobs for i in range(0, num_individuals_jobs): - #actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. - #regex = re.compile('(?!#)') + # actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. + # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - #print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) - #data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] - #name with start and end lines is not needed as all individuals jobs can just read their entire file. + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) + # data = list(filter(regex.match, content[int(start):int(end)])) + data = content[int(start) : int(end)] + # name with start and end lines is not needed as all individuals jobs can just read their entire file. name = str(uuid.uuid4())[:8] - + upload_data = io.BytesIO() upload_data.writelines((val).encode("utf-8") for val in data) upload_data.seek(0) - #name = client.upload_stream(output_bucket, name, upload_data) - #TODO keep track of start + stop bytes and return them. + # name = client.upload_stream(output_bucket, name, upload_data) + # TODO keep track of start + stop bytes and return them. nbytes = upload_data.getbuffer().nbytes - output = { - "start_bytes": start_bytes, - "end_bytes": start_bytes + nbytes - 1 - } + output = {"start_bytes": start_bytes, "end_bytes": start_bytes + nbytes - 1} blobs.append(output) start_bytes += nbytes diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json b/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json index 1f5852d22..d89586cc7 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json @@ -5,7 +5,7 @@ "type": "map", "root": "individuals", "array": "blob", - "common_params": "bucket,columns,columns_bucket,populations,sifting_input,individuals_file", + "common_params": "benchmark_bucket,bucket,columns,columns_bucket,populations,sifting_input,individuals_file", "states": { "individuals": { "type": "task", diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index c30c5bdcc..23770c1ea 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -4,21 +4,42 @@ import io size_generators = { - "test" : (1), - "small": (5), - "small-10": (10), - "small-20": (20), - "large": (10), + "test": 5, + "small": 5, + "small-10": 10, + "small-20": 20, + "large": 10, } + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - files = ["ALL.chr21.1250.vcf", "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", "columns.txt", "AFR", "ALL", "AMR", "EAS", "EUR", "GBR", "SAS"] + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + files = [ + "ALL.chr21.1250.vcf", + "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", + "columns.txt", + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR", + "SAS", + ] for name in files: if name == "ALL.chr21.1250.vcf" or name == "columns.txt": - #if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": + # if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -28,27 +49,25 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck start_bytes = 0 with open(os.path.join(data_dir, files[0]), "r") as f: content = f.readlines() - range_per_job = 1250 / num_individuals_jobs + content = content[: min(len(content), 500)] + range_per_job = len(content) / num_individuals_jobs for i in range(0, num_individuals_jobs): - #actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. - #regex = re.compile('(?!#)') + # actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. + # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - #print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) - #data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] - #name with start and end lines is not needed as all individuals jobs can just read their entire file. + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) + # data = list(filter(regex.match, content[int(start):int(end)])) + data = content[int(start) : int(end)] + # name with start and end lines is not needed as all individuals jobs can just read their entire file. name = str(uuid.uuid4())[:8] - + upload_data = io.BytesIO() upload_data.writelines((val).encode("utf-8") for val in data) upload_data.seek(0) nbytes = upload_data.getbuffer().nbytes - output = { - "start_bytes": start_bytes, - "end_bytes": start_bytes + nbytes - 1 - } + output = {"start_bytes": start_bytes, "end_bytes": start_bytes + nbytes - 1} blobs.append(output) start_bytes += nbytes diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt index 5453e2d48..41155a4d2 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt @@ -1,3 +1,3 @@ #numpy==1.17 -numpy==1.18 #1.16 works on Azure, but not AWS +numpy==1.25 # 1.18 lacks wheels for py3.11, align with 6100 workflow matplotlib diff --git a/benchmarks/600.workflows/640.selfish-detour/python/measure.py b/benchmarks/600.workflows/640.selfish-detour/python/measure.py index 7a0900c8f..bac536cf6 100644 --- a/benchmarks/600.workflows/640.selfish-detour/python/measure.py +++ b/benchmarks/600.workflows/640.selfish-detour/python/measure.py @@ -2,6 +2,7 @@ import json from ctypes import * + def handler(event): num_samples = event["num_samples"] @@ -10,28 +11,31 @@ def handler(event): path = os.path.join(dir, so_file) if not os.path.exists(path): path = os.path.join(dir, os.pardir, so_file) + if not os.path.exists(path): + raise RuntimeError("selfish-detour.so not found in package.") lib = cdll.LoadLibrary(path) lib.get_ticks_per_second.restype = c_double lib.selfish_detour.argtypes = [c_int, c_int, POINTER(c_ulonglong)] tps = lib.get_ticks_per_second() - assert(tps > 0) + assert tps > 0 - res = (c_ulonglong*num_samples)() + res = (c_ulonglong * num_samples)() ptr = cast(res, POINTER(c_ulonglong)) lib.selfish_detour(num_samples, 900, ptr) res = list(res) - assert(all(x<=y for x, y in zip(res[2:], res[3:]))) - - payload = json.dumps({ - "min_diff": res[0], - "num_iterations": res[1], - "timestamps": res[2:], - "tps": tps - }) + assert all(x <= y for x, y in zip(res[2:], res[3:])) + + payload = json.dumps( + { + "min_diff": res[0], + "num_iterations": res[1], + "timestamps": res[2:], + "tps": tps, + } + ) os.environ["SEBS_FUNCTION_RESULT"] = payload return "ok" - diff --git a/benchmarks/600.workflows/650.vid/python/analyse.py b/benchmarks/600.workflows/650.vid/python/analyse.py index 1b8f31664..a6c134d6c 100644 --- a/benchmarks/600.workflows/650.vid/python/analyse.py +++ b/benchmarks/600.workflows/650.vid/python/analyse.py @@ -8,18 +8,88 @@ client = storage.storage.get_instance() -labels = ["person", "bicycle", "car", "motorcycle", -"airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", -"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", -"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", -"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", -"snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", -"surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", -"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", -"pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", -"toilet", "tv", "laptop", "mouse", "remote", "keyboard", -"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", -"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] def load_model(bucket, weights_blob, config_blob, dest_dir): @@ -35,7 +105,7 @@ def load_model(bucket, weights_blob, config_blob, dest_dir): def load_frames(benchmark_bucket, bucket, blobs, dest_dir): for blob in blobs: - stripped_blob = blob.replace(bucket + '/', '') + stripped_blob = blob.replace(bucket + "/", "") path = os.path.join(dest_dir, stripped_blob) client.download(benchmark_bucket, blob, path) yield cv2.imread(path) @@ -49,14 +119,11 @@ def detect(net, img): out = net.forward() preds = [] - for detection in out[0,0,:,:]: + for detection in out[0, 0, :, :]: score = float(detection[2]) if score > 0.5: class_id = int(detection[1]) - preds.append({ - "class": labels[class_id], - "score": score - }) + preds.append({"class": labels[class_id], "score": score}) return preds @@ -67,14 +134,17 @@ def handler(event): benchmark_bucket = event["benchmark_bucket"] frames = list(load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir)) - net = load_model(benchmark_bucket, event["model_bucket"] + '/' + event["model_weights"], event["model_bucket"] + '/' + event["model_config"], tmp_dir) - + net = load_model( + benchmark_bucket, + event["model_bucket"] + "/" + event["model_weights"], + event["model_bucket"] + "/" + event["model_config"], + tmp_dir, + ) preds = [detect(net, frame) for frame in frames] - + frames_names = event["frames"] frames_names = [x.split(".")[0] for x in event["frames"]] - + preds = {f"{frames_names[idx]}": dets for idx, dets in enumerate(preds)} return preds - diff --git a/benchmarks/600.workflows/670.auth/config.json b/benchmarks/600.workflows/670.auth/config.json index e6a65cb35..d6d184e8a 100644 --- a/benchmarks/600.workflows/670.auth/config.json +++ b/benchmarks/600.workflows/670.auth/config.json @@ -1,5 +1,6 @@ { "timeout": 120, "memory": 256, - "languages": ["python"] + "languages": ["python"], + "modules": [] } diff --git a/benchmarks/600.workflows/670.auth/input.py b/benchmarks/600.workflows/670.auth/input.py index d81d24e45..7f0b54cd8 100644 --- a/benchmarks/600.workflows/670.auth/input.py +++ b/benchmarks/600.workflows/670.auth/input.py @@ -1,21 +1,22 @@ import random -size_generators = { - "test" : 10, - "small": 100, - "large": 1000 -} +size_generators = {"test": 10, "small": 100, "large": 1000} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): mult = size_generators[size] msg = "Who let the dogs out?\n" * mult - return { - "message": msg, - "token": "allow" - } \ No newline at end of file + return {"message": msg, "token": "allow"} diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index d886a3072..9cae0f43f 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -9,10 +9,12 @@ from sklearn.preprocessing import StandardScaler import numpy as np + def str_to_cls(cls_name): - #print(cls_name) + # print(cls_name) return globals()[cls_name] + def load_dataset(benchmark_bucket, bucket, features, labels): dataset_dir = os.path.join("/tmp", str(uuid.uuid4())) os.makedirs(dataset_dir, exist_ok=True) @@ -20,10 +22,9 @@ def load_dataset(benchmark_bucket, bucket, features, labels): features_path = os.path.join(dataset_dir, "features.npy") labels_path = os.path.join(dataset_dir, "labels.npy") - client = storage.storage.get_instance() - client.download(benchmark_bucket, bucket + '/' + features, features_path) - client.download(benchmark_bucket, bucket + '/' + labels, labels_path) + client.download(benchmark_bucket, bucket + "/" + features, features_path) + client.download(benchmark_bucket, bucket + "/" + labels, labels_path) X = np.load(features_path) y = np.load(labels_path) @@ -34,9 +35,7 @@ def load_dataset(benchmark_bucket, bucket, features, labels): def preprocess(X, y): X = StandardScaler().fit_transform(X) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.4, random_state=123 - ) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=123) return X_train, X_test, y_train, y_test @@ -55,7 +54,8 @@ def handler(schedule): y_key = schedule.pop("labels") bucket = schedule.pop("bucket") benchmark_bucket = schedule.pop("benchmark_bucket") - request_id = schedule.pop("request-id") + schedule.pop("request-id", None) + schedule.pop("request_id", None) clf = str_to_cls(name)(**schedule) @@ -65,8 +65,4 @@ def handler(schedule): train(clf, X_train, y_train) score = val(clf, X_test, y_test) - return { - "name": name, - "score": score - } - + return {"name": name, "score": score} diff --git a/benchmarks/wrappers/local/python/storage.py b/benchmarks/wrappers/local/python/storage.py index 4e1f9c5de..9b96d789e 100644 --- a/benchmarks/wrappers/local/python/storage.py +++ b/benchmarks/wrappers/local/python/storage.py @@ -4,36 +4,39 @@ import minio + class storage: instance = None client = None def __init__(self): - if 'MINIO_ADDRESS' in os.environ: - address = os.environ['MINIO_ADDRESS'] - access_key = os.environ['MINIO_ACCESS_KEY'] - secret_key = os.environ['MINIO_SECRET_KEY'] + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] self.client = minio.Minio( - address, - access_key=access_key, - secret_key=secret_key, - secure=False) + address, access_key=access_key, secret_key=secret_key, secure=False + ) @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) - def upload(self, bucket, file, filepath): - key_name = storage.unique_name(file) + def upload(self, bucket, file, filepath, unique_name=True): + key_name = storage.unique_name(file) if unique_name else file self.client.fput_object(bucket, key_name, filepath) return key_name def download(self, bucket, file, filepath): + data = self.client.get_object(bucket, file) + size = data.headers.get("Content-Length") + if size: + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + int(size) + ) self.client.fget_object(bucket, file, filepath) def download_directory(self, bucket, prefix, path): @@ -49,15 +52,30 @@ def upload_stream(self, bucket, file, bytes_data): def download_stream(self, bucket, file): data = self.client.get_object(bucket, file) - return data.read() + body = data.read() + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(body) + ) + return body + + def download_within_range(self, bucket, file, start_byte, stop_byte): + range_header = f"bytes={start_byte}-{stop_byte}" + resp = self.client.get_object(bucket, file, request_headers={"Range": range_header}) + data = resp.read().decode("utf-8") + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(data.encode("utf-8")) + ) + return data def list_directory(self, bucket, prefix): - objects = self.client.list_objects_v2(bucket, prefix, recursive=True) - for obj in objects: + if hasattr(self.client, "list_objects_v2"): + iterator = self.client.list_objects_v2(bucket, prefix, recursive=True) + else: + iterator = self.client.list_objects(bucket, prefix, recursive=True) + for obj in iterator: yield obj.object_name def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - diff --git a/config/systems.json b/config/systems.json index 9acc1dd2d..5747173dc 100644 --- a/config/systems.json +++ b/config/systems.json @@ -34,9 +34,12 @@ "deployment": { "files": [ "storage.py", - "nosql.py" + "nosql.py", + "function_workflow.py" + ], + "packages": [ + "redis" ], - "packages": [], "module_packages": { "nosql": [ "boto3==1.28.3" diff --git a/dockerfiles/local/python/Dockerfile.run b/dockerfiles/local/python/Dockerfile.run index 768472607..2c1e27df7 100755 --- a/dockerfiles/local/python/Dockerfile.run +++ b/dockerfiles/local/python/Dockerfile.run @@ -6,7 +6,7 @@ RUN deps=''\ # for route and sudo && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ && apt-get purge -y --auto-remove ${deps}\ - && pip3 install cffi minio bottle + && pip3 install cffi minio bottle redis RUN mkdir -p /sebs COPY dockerfiles/local/run.sh /sebs/ From 54712a95b710354defd052f35e49702b56c695fd Mon Sep 17 00:00:00 2001 From: McLavish Date: Sat, 15 Nov 2025 18:09:46 +0100 Subject: [PATCH 38/82] fix: selfish-detour not including the .so file when packaging --- .../640.selfish-detour/python/package.sh | 4 + .../python/selfish-detour.c | 138 ++++++++++++++++++ run_local_workflows.sh | 7 + sebs/benchmark.py | 2 +- 4 files changed, 150 insertions(+), 1 deletion(-) create mode 100755 benchmarks/600.workflows/640.selfish-detour/python/package.sh create mode 100644 benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c diff --git a/benchmarks/600.workflows/640.selfish-detour/python/package.sh b/benchmarks/600.workflows/640.selfish-detour/python/package.sh new file mode 100755 index 000000000..05461c070 --- /dev/null +++ b/benchmarks/600.workflows/640.selfish-detour/python/package.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -euo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cc -O2 -fPIC -shared "$SCRIPT_DIR/selfish-detour.c" -o "$SCRIPT_DIR/selfish-detour.so" diff --git a/benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c b/benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c new file mode 100644 index 000000000..d4559d417 --- /dev/null +++ b/benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c @@ -0,0 +1,138 @@ +// THIS IS THE SELFISH DETOUR EXAMPLE FROM NETGAUGE https://spcl.inf.ethz.ch/Research/Performance/Netgauge/OS_Noise/ + +#include +#include +#include + +#define UINT64_T uint64_t +#define UINT32_T uint32_t + +typedef struct { + UINT32_T l; + UINT32_T h; +} x86_64_timeval_t; + +#define HRT_TIMESTAMP_T x86_64_timeval_t + +/* TODO: Do we need a while loop here? aka Is rdtsc atomic? - check in the documentation */ +#define HRT_GET_TIMESTAMP(t1) __asm__ __volatile__ ("rdtsc" : "=a" (t1.l), "=d" (t1.h)); + +#define HRT_GET_ELAPSED_TICKS(t1, t2, numptr) *numptr = (((( UINT64_T ) t2.h) << 32) | t2.l) - \ + (((( UINT64_T ) t1.h) << 32) | t1.l); + +#define HRT_GET_TIME(t1, time) time = (((( UINT64_T ) t1.h) << 32) | t1.l) + +double get_ticks_per_second() { + #define NUM_TESTS 10 + + HRT_TIMESTAMP_T t1, t2; + uint64_t res[NUM_TESTS]; + uint64_t min=0; + int count; + + for (count=0; count res[count]) min = res[count]; + } + + return ((double) min); +} + +void selfish_detour(int num_runs, int threshold, uint64_t *results) { + int cnt=0, num_not_smaller = 0; + HRT_TIMESTAMP_T current, prev, start; + uint64_t sample = 0; + uint64_t elapsed, thr, min=(uint64_t)~0; + int i; + + // we will do a "calibration run" of the detour benchmark to + // get a reasonable value for the minimal detour time + // just perform the benchmark and record the minimal detour time until + // this minimal detour time does not get smaller for 1000 (as defined by NOT_SMALLER) + // consecutive runs + + #define NOT_SMALLER 100 + #define INNER_TRIES 50 + + thr = min*(threshold/100.0); + while (num_not_smaller < NOT_SMALLER) { + cnt = 0; + + HRT_GET_TIMESTAMP(start); + HRT_GET_TIMESTAMP(current); + + // this is exactly the same loop as below for measurement + while (cnt < INNER_TRIES) { + prev = current; + HRT_GET_TIMESTAMP(current); + + sample++; + + HRT_GET_ELAPSED_TICKS(prev, current, &elapsed); + // != instead of < in the benchmark loop in order to make the + // notsmaller principle useful + if ( elapsed != thr ) { + HRT_GET_ELAPSED_TICKS(start, prev, &results[cnt++]); + HRT_GET_ELAPSED_TICKS(start, current, &results[cnt++]); + } + } + + // find minimum in results array - this is outside the + // calibration/measurement loop! + { + if(min == 0) { + printf("The initialization reached 0 clock cycles - the clock accuracy seems too low (setting min=1 and exiting calibration)\n"); + min = 1; + break; + } + int smaller=0; + for(i = 0; i < INNER_TRIES; i+=2) { + if(results[i+1]-results[i] < min) { + min = results[i+1]-results[i]; + smaller=1; + //printf("[%i] min: %lu\n", r, min); + } + } + if (!smaller) num_not_smaller++; + else num_not_smaller = 0; + } + } + + // now we perform the actual benchmark: Read a time-stamp-counter in a tight + // loop ignore the results if the timestamps are close to each other, as we can assume + // that nobody interrupted us. If the difference of the timestamps exceeds a certain + // threshold, we assume that we have been "hit" by a "noise event" and record the + // time difference for later analysis + + cnt = 2; + sample = 0; + + HRT_GET_TIMESTAMP(start); + HRT_GET_TIMESTAMP(current); + + // perform this outside measurement loop in order to save + // time/increase measurement frequency + thr = min*(threshold/100.0); + while (cnt < num_runs) { + prev = current; + HRT_GET_TIMESTAMP(current); + + sample++; + + HRT_GET_ELAPSED_TICKS(prev, current, &elapsed); + if ( elapsed > thr ) { + HRT_GET_ELAPSED_TICKS(start, prev, &results[cnt++]); + HRT_GET_ELAPSED_TICKS(start, current, &results[cnt++]); + } + } + + results[0] = min; + results[1] = sample; +} \ No newline at end of file diff --git a/run_local_workflows.sh b/run_local_workflows.sh index 61656799e..6f68e14a3 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -1,6 +1,13 @@ #!/bin/bash set -euo pipefail +if [ ! -f config/local_workflows.json ]; then + cp config/example.json config/local_workflows.json +fi +if [ ! -f config/local_deployment.json ]; then + cp config/example.json config/local_deployment.json +fi + ./sebs.py storage start all config/storage.json --output-json out_storage.json MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index bd9eb7db7..98ca47820 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -325,7 +325,7 @@ def query_cache(self): def get_code_files(self, include_config=True): FILES = { - "python": ["*.py"], + "python": ["*.py", "*.sh", "*.c", "*.h", "*.so", "*.so.*"], "nodejs": ["*.js"], } if include_config: From 1d5000077ee9026c8fbafe912979b95241bfdcaa Mon Sep 17 00:00:00 2001 From: McLavish Date: Sat, 15 Nov 2025 18:12:56 +0100 Subject: [PATCH 39/82] changed workflows script to automatically compile detour C code --- .../640.selfish-detour/package.sh | 11 -- .../640.selfish-detour/selfish-detour.c | 138 ------------------ run_local_workflows.sh | 12 ++ 3 files changed, 12 insertions(+), 149 deletions(-) delete mode 100644 benchmarks/600.workflows/640.selfish-detour/package.sh delete mode 100644 benchmarks/600.workflows/640.selfish-detour/selfish-detour.c diff --git a/benchmarks/600.workflows/640.selfish-detour/package.sh b/benchmarks/600.workflows/640.selfish-detour/package.sh deleted file mode 100644 index c1145e436..000000000 --- a/benchmarks/600.workflows/640.selfish-detour/package.sh +++ /dev/null @@ -1,11 +0,0 @@ -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -CUR_DIR=$(pwd) -cd ${SCRIPT_DIR} - -for C_FILE in $(ls *.c) -do - cc -fPIC -shared -o ${C_FILE%%.*}.so ${C_FILE} - rm ${C_FILE} -done - -cd ${CUR_DIR} diff --git a/benchmarks/600.workflows/640.selfish-detour/selfish-detour.c b/benchmarks/600.workflows/640.selfish-detour/selfish-detour.c deleted file mode 100644 index d4559d417..000000000 --- a/benchmarks/600.workflows/640.selfish-detour/selfish-detour.c +++ /dev/null @@ -1,138 +0,0 @@ -// THIS IS THE SELFISH DETOUR EXAMPLE FROM NETGAUGE https://spcl.inf.ethz.ch/Research/Performance/Netgauge/OS_Noise/ - -#include -#include -#include - -#define UINT64_T uint64_t -#define UINT32_T uint32_t - -typedef struct { - UINT32_T l; - UINT32_T h; -} x86_64_timeval_t; - -#define HRT_TIMESTAMP_T x86_64_timeval_t - -/* TODO: Do we need a while loop here? aka Is rdtsc atomic? - check in the documentation */ -#define HRT_GET_TIMESTAMP(t1) __asm__ __volatile__ ("rdtsc" : "=a" (t1.l), "=d" (t1.h)); - -#define HRT_GET_ELAPSED_TICKS(t1, t2, numptr) *numptr = (((( UINT64_T ) t2.h) << 32) | t2.l) - \ - (((( UINT64_T ) t1.h) << 32) | t1.l); - -#define HRT_GET_TIME(t1, time) time = (((( UINT64_T ) t1.h) << 32) | t1.l) - -double get_ticks_per_second() { - #define NUM_TESTS 10 - - HRT_TIMESTAMP_T t1, t2; - uint64_t res[NUM_TESTS]; - uint64_t min=0; - int count; - - for (count=0; count res[count]) min = res[count]; - } - - return ((double) min); -} - -void selfish_detour(int num_runs, int threshold, uint64_t *results) { - int cnt=0, num_not_smaller = 0; - HRT_TIMESTAMP_T current, prev, start; - uint64_t sample = 0; - uint64_t elapsed, thr, min=(uint64_t)~0; - int i; - - // we will do a "calibration run" of the detour benchmark to - // get a reasonable value for the minimal detour time - // just perform the benchmark and record the minimal detour time until - // this minimal detour time does not get smaller for 1000 (as defined by NOT_SMALLER) - // consecutive runs - - #define NOT_SMALLER 100 - #define INNER_TRIES 50 - - thr = min*(threshold/100.0); - while (num_not_smaller < NOT_SMALLER) { - cnt = 0; - - HRT_GET_TIMESTAMP(start); - HRT_GET_TIMESTAMP(current); - - // this is exactly the same loop as below for measurement - while (cnt < INNER_TRIES) { - prev = current; - HRT_GET_TIMESTAMP(current); - - sample++; - - HRT_GET_ELAPSED_TICKS(prev, current, &elapsed); - // != instead of < in the benchmark loop in order to make the - // notsmaller principle useful - if ( elapsed != thr ) { - HRT_GET_ELAPSED_TICKS(start, prev, &results[cnt++]); - HRT_GET_ELAPSED_TICKS(start, current, &results[cnt++]); - } - } - - // find minimum in results array - this is outside the - // calibration/measurement loop! - { - if(min == 0) { - printf("The initialization reached 0 clock cycles - the clock accuracy seems too low (setting min=1 and exiting calibration)\n"); - min = 1; - break; - } - int smaller=0; - for(i = 0; i < INNER_TRIES; i+=2) { - if(results[i+1]-results[i] < min) { - min = results[i+1]-results[i]; - smaller=1; - //printf("[%i] min: %lu\n", r, min); - } - } - if (!smaller) num_not_smaller++; - else num_not_smaller = 0; - } - } - - // now we perform the actual benchmark: Read a time-stamp-counter in a tight - // loop ignore the results if the timestamps are close to each other, as we can assume - // that nobody interrupted us. If the difference of the timestamps exceeds a certain - // threshold, we assume that we have been "hit" by a "noise event" and record the - // time difference for later analysis - - cnt = 2; - sample = 0; - - HRT_GET_TIMESTAMP(start); - HRT_GET_TIMESTAMP(current); - - // perform this outside measurement loop in order to save - // time/increase measurement frequency - thr = min*(threshold/100.0); - while (cnt < num_runs) { - prev = current; - HRT_GET_TIMESTAMP(current); - - sample++; - - HRT_GET_ELAPSED_TICKS(prev, current, &elapsed); - if ( elapsed > thr ) { - HRT_GET_ELAPSED_TICKS(start, prev, &results[cnt++]); - HRT_GET_ELAPSED_TICKS(start, current, &results[cnt++]); - } - } - - results[0] = min; - results[1] = sample; -} \ No newline at end of file diff --git a/run_local_workflows.sh b/run_local_workflows.sh index 6f68e14a3..ccb555239 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -1,6 +1,7 @@ #!/bin/bash set -euo pipefail +# Prepare local configuration files if [ ! -f config/local_workflows.json ]; then cp config/example.json config/local_workflows.json fi @@ -47,6 +48,17 @@ if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then fi docker run -d --name sebs-redis -p 6380:6379 redis:7 +# Ensure native helper for selfish-detour is built before packaging +SELFISH_DIR="benchmarks/600.workflows/640.selfish-detour/python" +SELFISH_SRC="$SELFISH_DIR/selfish-detour.c" +SELFISH_SO="$SELFISH_DIR/selfish-detour.so" +if [ -f "$SELFISH_SRC" ]; then + if [ ! -f "$SELFISH_SO" ] || [ "$SELFISH_SRC" -nt "$SELFISH_SO" ]; then + echo "Compiling selfish-detour shared object..." + gcc -O2 -shared -fPIC -o "$SELFISH_SO" "$SELFISH_SRC" + fi +fi + WORKFLOWS=( "610.gen" "6100.1000-genome" From 7e0d13f6bf987914fae423d05d8d8171b8ddd3a2 Mon Sep 17 00:00:00 2001 From: xipang Date: Sun, 16 Nov 2025 02:02:21 +0100 Subject: [PATCH 40/82] microbenchmark example --- .../050.matmul/config.json | 6 ++ .../000.microbenchmarks/050.matmul/input.py | 8 +++ .../050.matmul/python/function.py | 64 +++++++++++++++++++ .../050.matmul/python/requirements.txt | 1 + .../050.matmul/python/requirements.txt.3.10 | 0 .../050.matmul/python/requirements.txt.3.11 | 0 .../050.matmul/python/requirements.txt.3.12 | 0 .../050.matmul/python/requirements.txt.3.7 | 0 .../050.matmul/python/requirements.txt.3.8 | 0 .../050.matmul/python/requirements.txt.3.9 | 0 .../python/requirements.txt.arm.3.8 | 0 .../python/requirements.txt.arm.3.9 | 0 12 files changed, 79 insertions(+) create mode 100644 benchmarks/000.microbenchmarks/050.matmul/config.json create mode 100644 benchmarks/000.microbenchmarks/050.matmul/input.py create mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/function.py create mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt create mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 create mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 create mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 create mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 create mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 create mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 create mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 create mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 diff --git a/benchmarks/000.microbenchmarks/050.matmul/config.json b/benchmarks/000.microbenchmarks/050.matmul/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.matmul/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/000.microbenchmarks/050.matmul/input.py b/benchmarks/000.microbenchmarks/050.matmul/input.py new file mode 100644 index 000000000..c1597dca2 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.matmul/input.py @@ -0,0 +1,8 @@ +size_generators = {"test": 10, "small": 1000, "large": 100000} +reps_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42, "reps": reps_generators[size]} diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/function.py b/benchmarks/000.microbenchmarks/050.matmul/python/function.py new file mode 100755 index 000000000..f7a9c528e --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.matmul/python/function.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import sys, json, math, torch +import datetime + + +def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): + alpha = torch.tensor(1.5, dtype=dtype, device=device) + beta = torch.tensor(1.2, dtype=dtype, device=device) + i = torch.arange(NI, device=device) + j = torch.arange(NJ, device=device) + k = torch.arange(NK, device=device) + C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI + A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK + B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ + return alpha, beta, C, A, B + + +def kernel_gemm(alpha, beta, C, A, B, reps): + torch.cuda.synchronize() + _ = alpha * (A @ B) + beta * C # warmup + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(reps): + C = alpha * (A @ B) + beta * C + end.record() + torch.cuda.synchronize() + return C, float(start.elapsed_time(end)) # ms for all reps + + +def handler(event): + + size = event.get("size") + reps = event.get("reps") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed") + seed = int(seed) + + matrix_generating_begin = datetime.datetime.now() + alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=reps) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + "result": C_out, + "measurement": { + "generating_time": f"{matrix_generating_time} microseconds", + "compute_time": f"{gpu_ms} milliseconds", + "avg_compute_time": f"{gpu_ms / reps} milliseconds", + }, + } diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb From 942f5a1f4e94e9f70d2e078fded39ebd622ed2d9 Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Sun, 16 Nov 2025 02:13:21 +0100 Subject: [PATCH 41/82] Remove SSH public key from eval command --- "eval \"$(ssh-agent -s)\".pub" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/eval \"$(ssh-agent -s)\".pub" "b/eval \"$(ssh-agent -s)\".pub" index c616a46e0..8b1378917 100644 --- "a/eval \"$(ssh-agent -s)\".pub" +++ "b/eval \"$(ssh-agent -s)\".pub" @@ -1 +1 @@ -ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHWa1ZYvhuZCapEy9tWqPPdlQpfs15h8zMDtZ/7fUOq0 russellpang0503@gmail.com + From 6bc1dd7deb3a1abcf7efbcb48aba8277d20e0a1c Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Sun, 16 Nov 2025 02:15:27 +0100 Subject: [PATCH 42/82] Remove local_deployment.json configuration --- config/local_deployment.json | 126 +---------------------------------- 1 file changed, 1 insertion(+), 125 deletions(-) diff --git a/config/local_deployment.json b/config/local_deployment.json index 773b43943..8b1378917 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -1,125 +1 @@ -{ - "experiments": { - "deployment": "aws", - "update_code": false, - "update_storage": false, - "download_results": false, - "architecture": "arm64", - "container_deployment": true, - "runtime": { - "language": "python", - "version": "3.8" - }, - "type": "invocation-overhead", - "perf-cost": { - "benchmark": "110.dynamic-html", - "experiments": [ - "cold", - "warm", - "burst", - "sequential" - ], - "input-size": "test", - "repetitions": 50, - "concurrent-invocations": 50, - "memory-sizes": [ - 128, - 256 - ] - }, - "network-ping-pong": { - "invocations": 50, - "repetitions": 1000, - "threads": 1 - }, - "invocation-overhead": { - "repetitions": 5, - "N": 20, - "type": "payload", - "payload_begin": 1024, - "payload_end": 6251000, - "payload_points": 20, - "code_begin": 1048576, - "code_end": 261619712, - "code_points": 20 - }, - "eviction-model": { - "invocations": 1, - "function_copy_idx": 0, - "repetitions": 5, - "sleep": 1 - } - }, - "deployment": { - "name": "aws", - "aws": { - "region": "us-east-1", - "lambda-role": "" - }, - "azure": { - "region": "westeurope" - }, - "gcp": { - "region": "europe-west1", - "project_name": "", - "credentials": "" - }, - "local": { - "storage": { - "object": { - "type": "minio", - "minio": { - "address": "192.168.1.101:9011", - "mapped_port": 9011, - "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", - "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", - "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", - "output_buckets": [], - "input_buckets": [], - "version": "RELEASE.2024-07-16T23-46-41Z", - "data_volume": "minio-volume", - "type": "minio" - } - }, - "nosql": { - "type": "scylladb", - "scylladb": { - "address": "192.168.1.101:9012", - "mapped_port": 9012, - "alternator_port": 8000, - "access_key": "None", - "secret_key": "None", - "instance_id": "28fb7af28043b633b33b4b5999b48c14767f717ed76a73515ff8c68f253baeb1", - "region": "None", - "cpus": 1, - "memory": "750", - "version": "6.0", - "data_volume": "scylladb-volume" - } - } - } - }, - "openwhisk": { - "shutdownStorage": false, - "removeCluster": false, - "wskBypassSecurity": "true", - "wskExec": "wsk", - "experimentalManifest": false, - "docker_registry": { - "registry": "", - "username": "", - "password": "" - }, - "storage": { - "address": "", - "mapped_port": -1, - "access_key": "", - "secret_key": "", - "instance_id": "", - "input_buckets": [], - "output_buckets": [], - "type": "minio" - } - } - } -} + From 460ea1ff405458ad129a7861946d02a5c2676cea Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Sun, 16 Nov 2025 02:16:30 +0100 Subject: [PATCH 43/82] Delete out_storage.json configuration file Removed configuration details for MinIO and ScyllaDB. --- out_storage.json | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/out_storage.json b/out_storage.json index 6c4a8e799..8b1378917 100644 --- a/out_storage.json +++ b/out_storage.json @@ -1,33 +1 @@ -{ - "object": { - "type": "minio", - "minio": { - "address": "172.17.0.2:9000", - "mapped_port": 9011, - "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", - "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", - "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", - "output_buckets": [], - "input_buckets": [], - "version": "RELEASE.2024-07-16T23-46-41Z", - "data_volume": "minio-volume", - "type": "minio" - } - }, - "nosql": { - "type": "scylladb", - "scylladb": { - "address": "172.17.0.3:8000", - "mapped_port": 9012, - "alternator_port": 8000, - "access_key": "None", - "secret_key": "None", - "instance_id": "28fb7af28043b633b33b4b5999b48c14767f717ed76a73515ff8c68f253baeb1", - "region": "None", - "cpus": 1, - "memory": "750", - "version": "6.0", - "data_volume": "scylladb-volume" - } - } -} \ No newline at end of file + From de41ab63b4d54ae5a718b12b8a220d7a8691a00d Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Sun, 16 Nov 2025 02:18:14 +0100 Subject: [PATCH 44/82] Remove SSH private key from eval command Removed sensitive SSH private key from eval command. --- "eval \"$(ssh-agent -s)\"" | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git "a/eval \"$(ssh-agent -s)\"" "b/eval \"$(ssh-agent -s)\"" index 892647ba0..8b1378917 100644 --- "a/eval \"$(ssh-agent -s)\"" +++ "b/eval \"$(ssh-agent -s)\"" @@ -1,7 +1 @@ ------BEGIN OPENSSH PRIVATE KEY----- -b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW -QyNTUxOQAAACB1mtWWL4bmQmqRMvbVqjz3ZUKX7NeYfMzA7Wf+31DqtAAAAKA7gCOkO4Aj -pAAAAAtzc2gtZWQyNTUxOQAAACB1mtWWL4bmQmqRMvbVqjz3ZUKX7NeYfMzA7Wf+31DqtA -AAAEBZdoiktY5L2ikHyUK4JfoeaTTX1KBHCtB+muQV2Y68SXWa1ZYvhuZCapEy9tWqPPdl -Qpfs15h8zMDtZ/7fUOq0AAAAGXJ1c3NlbGxwYW5nMDUwM0BnbWFpbC5jb20BAgME ------END OPENSSH PRIVATE KEY----- + From ded520f6177aece8481e547af5e45e5bf955c974 Mon Sep 17 00:00:00 2001 From: xipang Date: Sun, 16 Nov 2025 19:47:11 +0100 Subject: [PATCH 45/82] remove garbage --- "eval \"$(ssh-agent -s)\"" | 1 - "eval \"$(ssh-agent -s)\".pub" | 1 - out_benchmark.json | 40 ----------------------------- out_benchmark_bert.json | 47 ---------------------------------- out_storage.json | 1 - 5 files changed, 90 deletions(-) delete mode 100644 "eval \"$(ssh-agent -s)\"" delete mode 100644 "eval \"$(ssh-agent -s)\".pub" delete mode 100644 out_benchmark.json delete mode 100644 out_benchmark_bert.json delete mode 100644 out_storage.json diff --git "a/eval \"$(ssh-agent -s)\"" "b/eval \"$(ssh-agent -s)\"" deleted file mode 100644 index 8b1378917..000000000 --- "a/eval \"$(ssh-agent -s)\"" +++ /dev/null @@ -1 +0,0 @@ - diff --git "a/eval \"$(ssh-agent -s)\".pub" "b/eval \"$(ssh-agent -s)\".pub" deleted file mode 100644 index 8b1378917..000000000 --- "a/eval \"$(ssh-agent -s)\".pub" +++ /dev/null @@ -1 +0,0 @@ - diff --git a/out_benchmark.json b/out_benchmark.json deleted file mode 100644 index cf96b5c5d..000000000 --- a/out_benchmark.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "functions": [ - { - "benchmark": "110.dynamic-html", - "config": { - "architecture": "x64", - "memory": 128, - "runtime": { - "language": "python", - "version": "3.8" - }, - "timeout": 10 - }, - "hash": "c9fbe3e5e85e119e20d6a72651ef23b0", - "instance_id": "be83782894ea459ea3328d4d3b4aeb7173acec54f2c1cebb354f97459b318eef", - "name": "sebs-local-f2033fb2-110.dynamic-html-python-3.8", - "port": 9000, - "triggers": [], - "url": "172.17.0.4:9000" - } - ], - "inputs": [ - { - "random_len": 10, - "username": "testname" - } - ], - "storage": { - "access_key": "FC95zkJKAUbfmXU3ci5lc0SFWhDWYrSgx0nRNycthcY", - "address": "172.17.0.2:9000", - "data_volume": "minio-volume", - "input_buckets": [], - "instance_id": "d14d6dc800ce2f976cc19dc4dd85d2010b65709266f5ae0f1a0c157d338134ec", - "mapped_port": 9011, - "output_buckets": [], - "secret_key": "65eb94d63b0191bf765864ffbd4ee58cc1eb852bdc3a5ca463bca2e7c5915aec", - "type": "minio", - "version": "RELEASE.2024-07-16T23-46-41Z" - } -} \ No newline at end of file diff --git a/out_benchmark_bert.json b/out_benchmark_bert.json deleted file mode 100644 index c706062a2..000000000 --- a/out_benchmark_bert.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "functions": [ - { - "benchmark": "413.image-classification", - "config": { - "architecture": "x64", - "memory": 512, - "runtime": { - "language": "python", - "version": "3.8" - }, - "timeout": 60 - }, - "hash": "445383b434f036f9520743532c7bc0b1", - "instance_id": "a59d2b025927d6350363787bfbab3e7d44b90e0575cd72b939dd9cee291dd63c", - "name": "sebs-local-f2033fb2-413.image-classification-python-3.8", - "port": 9000, - "triggers": [], - "url": "172.17.0.4:9000" - } - ], - "inputs": [ - { - "bucket": { - "bucket": "sebs-benchmarks-local-f2033fb2", - "input": "413.image-classification-1-input", - "model": "413.image-classification-0-input" - }, - "object": { - "input": "800px-Porsche_991_silver_IAA.jpg", - "model": "resnet50.tar.gz" - } - } - ], - "storage": { - "access_key": "-5X8s-Wf3pQfjnc7kMAlr9HYX96jIMx3P7GSd55IBAY", - "address": "192.168.1.101:9011", - "data_volume": "minio-volume", - "input_buckets": [], - "instance_id": "26785ccd17e17e72255c255d00756a9eaa14b1aeb60a52527b97cfb33eece9e5", - "mapped_port": 9011, - "output_buckets": [], - "secret_key": "c9e3b900a8d363f9907af7057fb5a8e35cb14ad24e9e474e75b8139323717fdc", - "type": "minio", - "version": "RELEASE.2024-07-16T23-46-41Z" - } -} \ No newline at end of file diff --git a/out_storage.json b/out_storage.json deleted file mode 100644 index 8b1378917..000000000 --- a/out_storage.json +++ /dev/null @@ -1 +0,0 @@ - From 5c85980e1695a1afdd4ce54b9bea223320b9c54d Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 17 Nov 2025 17:18:32 +0100 Subject: [PATCH 46/82] test --- .../400.inference/413.image-classification/python/function.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py index 48e837f70..64795612d 100644 --- a/benchmarks/400.inference/413.image-classification/python/function.py +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -159,7 +159,7 @@ def handler(event): download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) - #gpu_time_ms = 0.0 + # gpu_time_ms = 0.0 return { "result": { @@ -173,6 +173,6 @@ def handler(event): "compute_time": compute_time + model_process_time, "model_time": model_process_time, "model_download_time": model_download_time, - #"gpu_time_ms": round(gpu_time_ms, 3), + # "gpu_time_ms": round(gpu_time_ms, 3), }, } From c5782dddcb367178543a8939ae31f1fe6a04c492 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 17 Nov 2025 17:24:40 +0100 Subject: [PATCH 47/82] test --- .../606.spmv/python/function.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/benchmarks/600.linearalgebra/606.spmv/python/function.py b/benchmarks/600.linearalgebra/606.spmv/python/function.py index 16e8744d1..e2c4b0218 100755 --- a/benchmarks/600.linearalgebra/606.spmv/python/function.py +++ b/benchmarks/600.linearalgebra/606.spmv/python/function.py @@ -1,4 +1,4 @@ -import sys, json, math, torch +import torch import datetime @@ -6,19 +6,19 @@ def initialize_torch(N, density=0.01, dtype=torch.float32, device="cuda", seed=4 if seed is not None: torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) - + nnz = int(N * N * density) row_indices = torch.randint(0, N, (nnz,), device=device) col_indices = torch.randint(0, N, (nnz,), device=device) values = torch.randn(nnz, dtype=dtype, device=device) - + indices = torch.stack([row_indices, col_indices]) sparse_matrix = torch.sparse_coo_tensor(indices, values, (N, N), dtype=dtype, device=device) - + sparse_matrix_csr = sparse_matrix.to_sparse_csr() - + x = torch.randn(N, dtype=dtype, device=device) - + return sparse_matrix_csr, x @@ -26,7 +26,7 @@ def kernel_spmv(A, x, reps=100): torch.cuda.synchronize() _ = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() # warmup torch.cuda.synchronize() - + start_evt = torch.cuda.Event(enable_timing=True) end_evt = torch.cuda.Event(enable_timing=True) start_evt.record() @@ -41,26 +41,27 @@ def kernel_spmv(A, x, reps=100): def handler(event): size = event.get("size") density = event.get("density", 0.01) # default 1% density - + if "seed" in event: import random + random.seed(event["seed"]) seed = event.get("seed", 42) seed = int(seed) else: seed = 42 - + gen_begin = datetime.datetime.now() A, x = initialize_torch(size, density=density, dtype=torch.float32, device="cuda", seed=seed) gen_end = datetime.datetime.now() - + comp_begin = datetime.datetime.now() y_out, gpu_ms = kernel_spmv(A, x, reps=100) comp_end = datetime.datetime.now() - + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) - + return { "measurement": { "generating_time": gen_us, From 2b52cede914be87e97f751a74bf193e4f3252bfd Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 17 Nov 2025 17:35:28 +0100 Subject: [PATCH 48/82] test --- .../000.microbenchmarks/050.matmul/python/function.py | 8 ++++---- .../400.inference/413.image-classification/input.py | 2 +- .../600.linearalgebra/601.matmul/python/function.py | 2 +- benchmarks/600.linearalgebra/602.axpy/python/function.py | 3 ++- .../600.linearalgebra/603.jacobi2d/python/function.py | 2 +- .../600.linearalgebra/604.cholesky/python/function.py | 3 ++- benchmarks/600.linearalgebra/605.lu/python/function.py | 3 ++- benchmarks/600.linearalgebra/607.fw/python/function.py | 2 +- 8 files changed, 14 insertions(+), 11 deletions(-) diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/function.py b/benchmarks/000.microbenchmarks/050.matmul/python/function.py index f7a9c528e..cf1f5f1a0 100755 --- a/benchmarks/000.microbenchmarks/050.matmul/python/function.py +++ b/benchmarks/000.microbenchmarks/050.matmul/python/function.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import sys, json, math, torch +import torch import datetime @@ -45,14 +45,14 @@ def handler(event): alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") matrix_generating_end = datetime.datetime.now() - matmul_begin = datetime.datetime.now() + # matmul_begin = datetime.datetime.now() C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=reps) - matmul_end = datetime.datetime.now() + # matmul_end = datetime.datetime.now() matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( microseconds=1 ) - matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + # matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) return { "result": C_out, diff --git a/benchmarks/400.inference/413.image-classification/input.py b/benchmarks/400.inference/413.image-classification/input.py index 6ee2fdd08..99e8bc4b3 100644 --- a/benchmarks/400.inference/413.image-classification/input.py +++ b/benchmarks/400.inference/413.image-classification/input.py @@ -1,4 +1,4 @@ -import glob, os +import os def buckets_count(): diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py index ee1ceaff7..ee88b2e58 100755 --- a/benchmarks/600.linearalgebra/601.matmul/python/function.py +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import sys, json, math, torch +import torch import datetime diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py index 9c31c05bd..79117fa1b 100755 --- a/benchmarks/600.linearalgebra/602.axpy/python/function.py +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -import sys, json, torch, datetime +import torch +import datetime def initialize_torch(N, dtype=torch.float32, device="cuda", seed=42): diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py index b83230f04..ab3771dc7 100755 --- a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import sys, json, math, torch +import torch import datetime diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py index 537015e56..5a7ac77d5 100755 --- a/benchmarks/600.linearalgebra/604.cholesky/python/function.py +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -import sys, json, torch, datetime +import torch +import datetime def initialize_torch(N, dtype=torch.float32, device="cuda"): diff --git a/benchmarks/600.linearalgebra/605.lu/python/function.py b/benchmarks/600.linearalgebra/605.lu/python/function.py index 492153bb7..fc99a3ab9 100755 --- a/benchmarks/600.linearalgebra/605.lu/python/function.py +++ b/benchmarks/600.linearalgebra/605.lu/python/function.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -import sys, json, torch, datetime +import torch +import datetime def initialize_torch(N, dtype=torch.float32, device="cuda"): diff --git a/benchmarks/600.linearalgebra/607.fw/python/function.py b/benchmarks/600.linearalgebra/607.fw/python/function.py index 2db5e1f18..bee06dd03 100755 --- a/benchmarks/600.linearalgebra/607.fw/python/function.py +++ b/benchmarks/600.linearalgebra/607.fw/python/function.py @@ -1,4 +1,4 @@ -import sys, json, math, torch +import torch import datetime From 6488d6d8ad9d6a035b3a46ec0f77957e42c1d9ac Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 17 Nov 2025 18:35:59 +0100 Subject: [PATCH 49/82] remove unnecessay files --- config/local_deployment.json | 1 - install.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 config/local_deployment.json diff --git a/config/local_deployment.json b/config/local_deployment.json deleted file mode 100644 index 8b1378917..000000000 --- a/config/local_deployment.json +++ /dev/null @@ -1 +0,0 @@ - diff --git a/install.py b/install.py index 78e98dddd..34040b23b 100755 --- a/install.py +++ b/install.py @@ -104,7 +104,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") + execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") else: raise error From 55c4ac44c1a8d37957bb2e17a3d9aad1f324d339 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 17 Nov 2025 18:38:34 +0100 Subject: [PATCH 50/82] fuck you --- install.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/install.py b/install.py index 34040b23b..784b0ab28 100755 --- a/install.py +++ b/install.py @@ -104,7 +104,9 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute( + f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}" + ) else: raise error From b97b7a56600fc610abadf6f20eb6a590cb5f7106 Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Mon, 17 Nov 2025 18:50:48 +0100 Subject: [PATCH 51/82] Refactor argument parsing for cleaner syntax --- install.py | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/install.py b/install.py index 784b0ab28..b856e45b7 100755 --- a/install.py +++ b/install.py @@ -5,46 +5,28 @@ import subprocess parser = argparse.ArgumentParser(description="Install SeBS and dependencies.") -parser.add_argument( - "--venv", - metavar="DIR", - type=str, - default="python-venv", - help="destination of local Python virtual environment", -) -parser.add_argument( - "--python-path", - metavar="DIR", - type=str, - default="python3", - help="Path to local Python installation.", -) +parser.add_argument('--venv', metavar='DIR', type=str, default="python-venv", help='destination of local Python virtual environment') +parser.add_argument('--python-path', metavar='DIR', type=str, default="python3", help='Path to local Python installation.') for deployment in ["aws", "azure", "gcp", "openwhisk"]: - parser.add_argument( - f"--{deployment}", action="store_const", const=True, default=True, dest=deployment - ) - parser.add_argument( - f"--no-{deployment}", action="store_const", const=False, default=True, dest=deployment - ) + parser.add_argument(f"--{deployment}", action="store_const", const=True, default=True, dest=deployment) + parser.add_argument(f"--no-{deployment}", action="store_const", const=False, default=True, dest=deployment) for deployment in ["local"]: - parser.add_argument( - f"--{deployment}", action="store_const", default=True, const=True, dest=deployment - ) + parser.add_argument(f"--{deployment}", action="store_const", default=True, const=True, dest=deployment) parser.add_argument(f"--no-{deployment}", action="store_const", const=False, dest=deployment) parser.add_argument("--with-pypapi", action="store_true") args = parser.parse_args() - def execute(cmd, cwd=None): - ret = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=cwd) + ret = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=cwd + ) if ret.returncode: raise RuntimeError( "Running {} failed!\n Output: {}".format(cmd, ret.stdout.decode("utf-8")) ) return ret.stdout.decode("utf-8") - -env_dir = args.venv +env_dir=args.venv if not os.path.exists(env_dir): print("Creating Python virtualenv at {}".format(env_dir)) @@ -104,9 +86,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute( - f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}" - ) + execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") else: raise error From 1998b6b141e578fcc0a3fa42982bb5b94b34f84f Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:29:02 +0100 Subject: [PATCH 52/82] Change 'reps' to 'iters' in jacobi2d function --- benchmarks/600.linearalgebra/603.jacobi2d/python/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py index ab3771dc7..4dc37e2c6 100755 --- a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -54,7 +54,7 @@ def handler(event): matrix_generating_end = datetime.datetime.now() matmul_begin = datetime.datetime.now() - A_out, B_out, gpu_ms = kernel_jacobi2d(A, B, reps=50) + A_out, B_out, gpu_ms = kernel_jacobi2d(A, B, iters=50) matmul_end = datetime.datetime.now() matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( From 2cbd76855149eac137daf8ea021d494dfd20abc9 Mon Sep 17 00:00:00 2001 From: Russellpang <127130747+Russellpang@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:48:19 +0100 Subject: [PATCH 53/82] Delete benchmarks/000.microbenchmarks/050.matmul directory --- .../050.matmul/config.json | 6 -- .../000.microbenchmarks/050.matmul/input.py | 8 --- .../050.matmul/python/function.py | 64 ------------------- .../050.matmul/python/requirements.txt | 1 - .../050.matmul/python/requirements.txt.3.10 | 0 .../050.matmul/python/requirements.txt.3.11 | 0 .../050.matmul/python/requirements.txt.3.12 | 0 .../050.matmul/python/requirements.txt.3.7 | 0 .../050.matmul/python/requirements.txt.3.8 | 0 .../050.matmul/python/requirements.txt.3.9 | 0 .../python/requirements.txt.arm.3.8 | 0 .../python/requirements.txt.arm.3.9 | 0 12 files changed, 79 deletions(-) delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/config.json delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/input.py delete mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/function.py delete mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 delete mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 delete mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 delete mode 100755 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 delete mode 100644 benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 diff --git a/benchmarks/000.microbenchmarks/050.matmul/config.json b/benchmarks/000.microbenchmarks/050.matmul/config.json deleted file mode 100644 index e80fb4351..000000000 --- a/benchmarks/000.microbenchmarks/050.matmul/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "timeout": 120, - "memory": 512, - "languages": ["python"], - "modules": [] -} diff --git a/benchmarks/000.microbenchmarks/050.matmul/input.py b/benchmarks/000.microbenchmarks/050.matmul/input.py deleted file mode 100644 index c1597dca2..000000000 --- a/benchmarks/000.microbenchmarks/050.matmul/input.py +++ /dev/null @@ -1,8 +0,0 @@ -size_generators = {"test": 10, "small": 1000, "large": 100000} -reps_generators = {"test": 10, "small": 100, "large": 1000} - - -def generate_input( - data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func -): - return {"size": size_generators[size], "seed": 42, "reps": reps_generators[size]} diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/function.py b/benchmarks/000.microbenchmarks/050.matmul/python/function.py deleted file mode 100755 index cf1f5f1a0..000000000 --- a/benchmarks/000.microbenchmarks/050.matmul/python/function.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -import torch -import datetime - - -def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): - alpha = torch.tensor(1.5, dtype=dtype, device=device) - beta = torch.tensor(1.2, dtype=dtype, device=device) - i = torch.arange(NI, device=device) - j = torch.arange(NJ, device=device) - k = torch.arange(NK, device=device) - C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI - A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK - B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ - return alpha, beta, C, A, B - - -def kernel_gemm(alpha, beta, C, A, B, reps): - torch.cuda.synchronize() - _ = alpha * (A @ B) + beta * C # warmup - torch.cuda.synchronize() - start = torch.cuda.Event(enable_timing=True) - end = torch.cuda.Event(enable_timing=True) - start.record() - for _ in range(reps): - C = alpha * (A @ B) + beta * C - end.record() - torch.cuda.synchronize() - return C, float(start.elapsed_time(end)) # ms for all reps - - -def handler(event): - - size = event.get("size") - reps = event.get("reps") - if "seed" in event: - import random - - random.seed(event["seed"]) - - seed = event.get("seed") - seed = int(seed) - - matrix_generating_begin = datetime.datetime.now() - alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") - matrix_generating_end = datetime.datetime.now() - - # matmul_begin = datetime.datetime.now() - C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=reps) - # matmul_end = datetime.datetime.now() - - matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( - microseconds=1 - ) - # matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) - - return { - "result": C_out, - "measurement": { - "generating_time": f"{matrix_generating_time} microseconds", - "compute_time": f"{gpu_ms} milliseconds", - "avg_compute_time": f"{gpu_ms / reps} milliseconds", - }, - } diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt deleted file mode 100755 index d8d966118..000000000 --- a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -torch==2.4.1 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.10 deleted file mode 100644 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.11 deleted file mode 100644 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.12 deleted file mode 100644 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.7 deleted file mode 100755 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.8 deleted file mode 100755 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.3.9 deleted file mode 100755 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.8 deleted file mode 100644 index e69de29bb..000000000 diff --git a/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 b/benchmarks/000.microbenchmarks/050.matmul/python/requirements.txt.arm.3.9 deleted file mode 100644 index e69de29bb..000000000 From efced9c61abbd1ae31dd9443552d258c6fab3a6c Mon Sep 17 00:00:00 2001 From: McLavish Date: Mon, 17 Nov 2025 21:50:59 +0100 Subject: [PATCH 54/82] Revert "changed data submodule to use ssh and not https" This reverts commit 4fca4aa2526a7ae9402a4b52b242d169a815c0a4. --- .gitmodules | 2 +- benchmarks-data | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0969aa83a..c33a17880 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = git@github.com:McLavish/serverless-benchmarks-data-dphpc.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git diff --git a/benchmarks-data b/benchmarks-data index fbb693d2e..25c2bb40b 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit fbb693d2efc4538d4c3514c0e3567a516a53dd8c +Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 From bc48b5ed3806f1c0dc1d5314f69878c7d2e92294 Mon Sep 17 00:00:00 2001 From: McLavish Date: Mon, 17 Nov 2025 22:10:14 +0100 Subject: [PATCH 55/82] fix: missing config.json --- benchmarks/400.inference/413.recommendation/config.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 benchmarks/400.inference/413.recommendation/config.json diff --git a/benchmarks/400.inference/413.recommendation/config.json b/benchmarks/400.inference/413.recommendation/config.json new file mode 100644 index 000000000..649bb78d6 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 1024, + "languages": ["python"], + "modules": ["storage"] +} From fb0fdafe632040f505316798473db18812076267 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 18 Nov 2025 15:09:02 +0100 Subject: [PATCH 56/82] changed genome requirements --- .../600.workflows/6100.1000-genome/python/requirements.txt | 3 +-- .../6101.1000-genome-individuals/python/requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt index c357805d6..21479b821 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt +++ b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt @@ -1,3 +1,2 @@ -#numpy==1.17 -numpy==1.25 #1.16 works on Azure, but not AWS +numpy==1.17 matplotlib diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt index 41155a4d2..21479b821 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt @@ -1,3 +1,2 @@ -#numpy==1.17 -numpy==1.25 # 1.18 lacks wheels for py3.11, align with 6100 workflow +numpy==1.17 matplotlib From 4c36a96df214d3905cd6b3398da5f8e4cd57394b Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 18 Nov 2025 15:09:37 +0100 Subject: [PATCH 57/82] changed example.json to include workflow resources --- config/example.json | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/config/example.json b/config/example.json index 3133d7249..136fe75bd 100644 --- a/config/example.json +++ b/config/example.json @@ -1,10 +1,10 @@ { "experiments": { - "deployment": "aws", - "update_code": false, + "deployment": "local", + "update_code": true, "update_storage": false, "download_results": false, - "architecture": "arm64", + "architecture": "x64", "container_deployment": true, "runtime": { "language": "python", @@ -51,7 +51,7 @@ } }, "deployment": { - "name": "aws", + "name": "local", "aws": { "region": "us-east-1", "lambda-role": "", @@ -71,6 +71,12 @@ "credentials": "" }, "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, "storage": { "address": "", "mapped_port": -1, From 65067439aac0368d99439520c0c42532838d4aca Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 18 Nov 2025 16:01:23 +0100 Subject: [PATCH 58/82] changed convenience script to do all the prereqs for workflows --- run_local_workflows.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/run_local_workflows.sh b/run_local_workflows.sh index ccb555239..68c9e86a7 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -9,6 +9,20 @@ if [ ! -f config/local_deployment.json ]; then cp config/example.json config/local_deployment.json fi +DATA_FLAG="benchmarks-data/600.workflows/6100.1000-genome/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" +if [ ! -f "$DATA_FLAG" ]; then + echo "Workflow datasets missing, running download_datasets.sh..." + (cd benchmarks-data/600.workflows && ./download_datasets.sh) +else + echo "Workflow datasets present, skipping download." +fi + +cleanup() { + echo "Stopping all running Docker containers..." + docker ps -q | xargs -r docker stop >/dev/null || true +} +trap cleanup EXIT + ./sebs.py storage start all config/storage.json --output-json out_storage.json MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) @@ -31,14 +45,20 @@ for cfg in config/local_workflows.json config/local_deployment.json; do --arg saddr "$SCYLLA_ADDRESS" \ --argjson sport "$SCYLLA_PORT" \ --arg sinst "$SCYLLA_INSTANCE" \ + --arg redis_host "localhost:6380" \ + --arg redis_pass "" \ '(.deployment.local.storage.object.minio.address = $addr) | (.deployment.local.storage.object.minio.mapped_port = $port) | (.deployment.local.storage.object.minio.access_key = $access) | (.deployment.local.storage.object.minio.secret_key = $secret) | (.deployment.local.storage.object.minio.instance_id = $inst) + | (.deployment.local.storage.object.type = "minio") | (.deployment.local.storage.nosql.scylladb.address = $saddr) | (.deployment.local.storage.nosql.scylladb.mapped_port = $sport) | (.deployment.local.storage.nosql.scylladb.instance_id = $sinst) + | (.deployment.local.storage.nosql.type = "scylladb") + | (.deployment.local.resources.redis.host = $redis_host) + | (.deployment.local.resources.redis.password = $redis_pass) ' "$cfg" > "$tmp" mv "$tmp" "$cfg" done @@ -48,6 +68,8 @@ if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then fi docker run -d --name sebs-redis -p 6380:6379 redis:7 +# docker run --network=host --name redis -d redis redis-server --save 60 1 --loglevel warning --requirepass {yourpassword} + # Ensure native helper for selfish-detour is built before packaging SELFISH_DIR="benchmarks/600.workflows/640.selfish-detour/python" SELFISH_SRC="$SELFISH_DIR/selfish-detour.c" From c2d2ed4978b1f56d2ab15f2e20f8c407a05ffdd8 Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 18 Nov 2025 16:10:58 +0100 Subject: [PATCH 59/82] removed duplicate pop --- benchmarks/600.workflows/690.ml/python/train.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index 9cae0f43f..7d41c09d2 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -35,7 +35,9 @@ def load_dataset(benchmark_bucket, bucket, features, labels): def preprocess(X, y): X = StandardScaler().fit_transform(X) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=123) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.4, random_state=123 + ) return X_train, X_test, y_train, y_test @@ -55,7 +57,6 @@ def handler(schedule): bucket = schedule.pop("bucket") benchmark_bucket = schedule.pop("benchmark_bucket") schedule.pop("request-id", None) - schedule.pop("request_id", None) clf = str_to_cls(name)(**schedule) From f396272e0b79b19087cc8af6bde6be3d80e64b5c Mon Sep 17 00:00:00 2001 From: McLavish Date: Tue, 18 Nov 2025 16:17:12 +0100 Subject: [PATCH 60/82] deleted untested nodejs --- benchmarks/wrappers/local/nodejs/storage.js | 61 --------------------- 1 file changed, 61 deletions(-) delete mode 100644 benchmarks/wrappers/local/nodejs/storage.js diff --git a/benchmarks/wrappers/local/nodejs/storage.js b/benchmarks/wrappers/local/nodejs/storage.js deleted file mode 100644 index 9fb9d45f5..000000000 --- a/benchmarks/wrappers/local/nodejs/storage.js +++ /dev/null @@ -1,61 +0,0 @@ - -const minio = require('minio'), - path = require('path'), - uuid = require('uuid'), - util = require('util'), - stream = require('stream'); - -class minio_storage { - - constructor() { - let address = process.env.MINIO_ADDRESS; - let access_key = process.env.MINIO_ACCESS_KEY; - let secret_key = process.env.MINIO_SECRET_KEY; - this.client = new minio.Client( - { - endPoint: address.split(':')[0], - port: parseInt(address.split(':')[1], 10), - accessKey: access_key, - secretKey: secret_key, - useSSL: false - } - ); - } - - unique_name(file) { - let name = path.parse(file); - let uuid_name = uuid.v4().split('-')[0]; - return path.join(name.dir, util.format('%s.%s%s', name.name, uuid_name, name.ext)); - } - - upload(bucket, file, filepath) { - let uniqueName = this.unique_name(file); - return [uniqueName, this.client.fPutObject(bucket, uniqueName, filepath)]; - }; - - download(bucket, file, filepath) { - return this.client.fGetObject(bucket, file, filepath); - }; - - uploadStream(bucket, file) { - var write_stream = new stream.PassThrough(); - let uniqueName = this.unique_name(file); - let promise = this.client.putObject(bucket, uniqueName, write_stream, write_stream.size); - return [write_stream, promise, uniqueName]; - }; - - downloadStream(bucket, file) { - var read_stream = new stream.PassThrough(); - return this.client.getObject(bucket, file); - }; - - static get_instance() { - if(!this.instance) { - this.instance = new storage(); - } - return this.instance; - } - - -}; -exports.storage = minio_storage; From 7f6f901babf8568c4385e0d3e9f567012af44968 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 18 Nov 2025 17:39:02 +0100 Subject: [PATCH 61/82] reformat the benchmark workflows folder --- .../610.gen/python/few_people.py | 5 +- .../610.gen/python/get_astros.py | 5 +- .../610.gen/python/many_people.py | 5 +- .../610.gen/python/process_astros.py | 5 +- .../6100.1000-genome/python/frequency.py | 305 +++++---- .../6100.1000-genome/python/individuals.py | 82 ++- .../python/individuals_merge.py | 65 +- .../python/mutation_overlap.py | 592 +++++++++++------- .../6100.1000-genome/python/sifting.py | 135 ++-- .../python/individuals.py | 79 ++- .../600.workflows/620.func-invo/input.py | 35 +- .../600.workflows/620.func-invo/python/gen.py | 3 +- .../620.func-invo/python/process.py | 5 +- .../600.workflows/6200.trip-booking/input.py | 29 +- .../6200.trip-booking/python/cancel_flight.py | 4 +- .../6200.trip-booking/python/cancel_hotel.py | 4 +- .../6200.trip-booking/python/cancel_rental.py | 4 +- .../6200.trip-booking/python/confirm.py | 5 +- .../600.workflows/630.parallel-sleep/input.py | 64 +- .../630.parallel-sleep/python/generate.py | 9 +- .../630.parallel-sleep/python/process.py | 4 +- .../631.parallel-download/input.py | 36 +- .../631.parallel-download/python/generate.py | 5 +- .../631.parallel-download/python/process.py | 2 +- .../600.workflows/640.selfish-detour/input.py | 20 +- benchmarks/600.workflows/650.vid/input.py | 20 +- .../600.workflows/650.vid/python/analyse.py | 4 +- .../600.workflows/650.vid/python/decode.py | 25 +- .../600.workflows/650.vid/python/summarize.py | 5 +- .../600.workflows/660.map-reduce/input.py | 22 +- .../660.map-reduce/python/map.py | 7 +- .../660.map-reduce/python/reduce.py | 11 +- .../660.map-reduce/python/shuffle.py | 22 +- .../660.map-reduce/python/split.py | 37 +- .../600.workflows/670.auth/python/auth.py | 6 +- .../600.workflows/680.excamera/input.py | 20 +- .../680.excamera/python/encode.py | 17 +- .../680.excamera/python/rebase.py | 40 +- .../680.excamera/python/reencode.py | 34 +- .../680.excamera/python/split.py | 10 +- benchmarks/600.workflows/690.ml/input.py | 20 +- .../600.workflows/690.ml/python/generate.py | 27 +- 42 files changed, 1068 insertions(+), 766 deletions(-) diff --git a/benchmarks/600.workflows/610.gen/python/few_people.py b/benchmarks/600.workflows/610.gen/python/few_people.py index 9c70d9fbc..b9555199a 100644 --- a/benchmarks/600.workflows/610.gen/python/few_people.py +++ b/benchmarks/600.workflows/610.gen/python/few_people.py @@ -1,5 +1,2 @@ def handler(event): - return { - "many_astros": False, - **event - } \ No newline at end of file + return {"many_astros": False, **event} diff --git a/benchmarks/600.workflows/610.gen/python/get_astros.py b/benchmarks/600.workflows/610.gen/python/get_astros.py index 627c65231..9532fb816 100644 --- a/benchmarks/600.workflows/610.gen/python/get_astros.py +++ b/benchmarks/600.workflows/610.gen/python/get_astros.py @@ -1,8 +1,7 @@ import requests + def handler(event): res = requests.get("http://api.open-notify.org/astros.json") - return { - "astros": res.json() - } \ No newline at end of file + return {"astros": res.json()} diff --git a/benchmarks/600.workflows/610.gen/python/many_people.py b/benchmarks/600.workflows/610.gen/python/many_people.py index 2d339f325..595eed0dd 100644 --- a/benchmarks/600.workflows/610.gen/python/many_people.py +++ b/benchmarks/600.workflows/610.gen/python/many_people.py @@ -1,5 +1,2 @@ def handler(event): - return { - "many_astros": True, - **event - } \ No newline at end of file + return {"many_astros": True, **event} diff --git a/benchmarks/600.workflows/610.gen/python/process_astros.py b/benchmarks/600.workflows/610.gen/python/process_astros.py index a981660e0..8483a105a 100644 --- a/benchmarks/600.workflows/610.gen/python/process_astros.py +++ b/benchmarks/600.workflows/610.gen/python/process_astros.py @@ -1,5 +1,2 @@ def handler(arr): - return { - "astros": arr, - "done": True - } \ No newline at end of file + return {"astros": arr, "done": True} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py index 969a76de2..344ff97b6 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py @@ -6,7 +6,7 @@ import os.path import matplotlib -matplotlib.use('Agg') +matplotlib.use("Agg") import matplotlib.pyplot as plt import collections from collections import Counter @@ -21,13 +21,13 @@ class ReadData: def read_names(self, POP, pop_dir, columns_file): tic = time.perf_counter() namefile = pop_dir + POP - f = open(namefile, 'r') + f = open(namefile, "r") text = f.read() f.close() text = text.split() all_ids = text[0:] file = columns_file - f = open(file, 'r') + f = open(file, "r") text = f.read() f.close() genome_ids = text.split() @@ -43,7 +43,7 @@ def read_rs_numbers(self, siftfile, SIFT): variations = {} map_variations = {} all_variations = [] - sift_file = open(siftfile, 'r') + sift_file = open(siftfile, "r") for item in sift_file: item = item.split() if len(item) > 2: @@ -53,19 +53,25 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations - def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): + def read_individuals( + self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ): tic = time.perf_counter() mutation_index_array = [] for name in ids: - filename = data_dir + individuals_merge_filename + '/' + chrom + '.' + name - f = open(filename, 'r') + filename = data_dir + individuals_merge_filename + "/" + chrom + "." + name + f = open(filename, "r") text = [] for item in f: item = item.split() try: text.append(item[1]) except IndexError as e: - print("ERROR({}): while reading {}: (item: {})".format(str(e), filename, item)) + print( + "ERROR({}): while reading {}: (item: {})".format( + str(e), filename, item + ) + ) sifted_mutations = list(set(rs_numbers).intersection(text)) mutation_index_array.append(sifted_mutations) @@ -73,7 +79,6 @@ def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_f class Results: - def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): n_p = len(mutation_index_array) tic = time.perf_counter() @@ -87,7 +92,9 @@ def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): for pq in range(n_indiv): if 2 * pq >= len(randomized_list): break - b_multiset = collections.Counter(mutation_index_array[randomized_list[2 * pq]]) + b_multiset = collections.Counter( + mutation_index_array[randomized_list[2 * pq]] + ) r_ids.append(ids[randomized_list[2 * pq]]) result = result + b_multiset random_indiv.append(r_ids) @@ -104,60 +111,61 @@ def histogram_overlap(self, mutation_overlap, n_runs): class PlotData: - def plot_histogram_overlap(self, POP, histogram_overlap, outputFile, n_runs): tic = time.perf_counter() for run in range(n_runs): - output = outputFile + str(run) + '.png' + output = outputFile + str(run) + ".png" final_counts = [count for item, count in histogram_overlap[run].items()] N = len(final_counts) x = range(N) width = 1 / 1.5 bar1 = plt.bar(x, final_counts, width, color="grey") - plt.ylabel('Mutations') - plt.xlabel('Individuals') + plt.ylabel("Mutations") + plt.xlabel("Individuals") plt.xticks(np.arange(1, N + 1)) plt.savefig(output) plt.close() class WriteData: - - def write_histogram_overlap(self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv): + def write_histogram_overlap( + self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv + ): tic = time.perf_counter() for run in range(n_runs): - overlapfile = histogram_overlapfile + str(run) + '.txt' - f = open(overlapfile, 'w') - f.write('Number Individuals - Number Mutations \n') + overlapfile = histogram_overlapfile + str(run) + ".txt" + f = open(overlapfile, "w") + f.write("Number Individuals - Number Mutations \n") for i in range(1, n_indiv + 1): if i in histogram_overlap[run]: - f.write(str(i) + '-' + str(histogram_overlap[run][i]) + '\n') + f.write(str(i) + "-" + str(histogram_overlap[run][i]) + "\n") else: - f.write(str(i) + '-' + str(0) + '\n') + f.write(str(i) + "-" + str(0) + "\n") f.close() - def write_mutation_overlap(self, mutation_overlapfile, mutation_overlap, n_runs): tic = time.perf_counter() for run in range(n_runs): - overlapfile = mutation_overlapfile + str(run) + '.txt' - f = open(overlapfile, 'w') - f.write('Mutation Index- Number Overlapings \n') + overlapfile = mutation_overlapfile + str(run) + ".txt" + f = open(overlapfile, "w") + f.write("Mutation Index- Number Overlapings \n") for key, count in mutation_overlap[run].items(): - f.write(key + '-' + str(count) + '\n') + f.write(key + "-" + str(count) + "\n") f.close() def write_random_indiv(self, randomindiv_file, random_indiv, n_runs): tic = time.perf_counter() for run in range(n_runs): - randomfile = randomindiv_file + str(run) + '.txt' - f = open(randomfile, 'w') - f.write('Individuals \n') + randomfile = randomindiv_file + str(run) + ".txt" + f = open(randomfile, "w") + f.write("Individuals \n") for item in random_indiv[run]: f.write("%s\n" % item) f.close() - def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): + def write_mutation_index_array( + self, mutation_index_array_file, mutation_index_array + ): tic = time.perf_counter() f = open(mutation_index_array_file, "w") for item in mutation_index_array: @@ -166,107 +174,146 @@ def write_mutation_index_array(self, mutation_index_array_file, mutation_index_a def write_map_variations(self, map_variations_file, map_variations): tic = time.perf_counter() - f = open(map_variations_file, 'w') + f = open(map_variations_file, "w") for key, count in map_variations.items(): - f.write(key + '\t' + str(count) + '\n') + f.write(key + "\t" + str(count) + "\n") f.close() def handler(event): - POP = event["array_element"] - benchmark_bucket = event["sifting"]["benchmark_bucket"] - output_bucket = event["sifting"]["output_bucket"] - input_bucket = event["sifting"]["input_bucket"] - sifting_filename = event["sifting"]["output_sifting"] - individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] - - #download files - siftfile = os.path.join("/tmp", "sifting.txt") - individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") - pop_file = os.path.join("/tmp", POP) - columns_file = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, output_bucket + '/' + sifting_filename, siftfile) - client.download(benchmark_bucket, output_bucket + '/' + individuals_merge_filename, individuals_merge_file) - client.download(benchmark_bucket, input_bucket + '/' + POP, pop_file) - client.download(benchmark_bucket, input_bucket + '/' + "columns.txt", columns_file) - - #chromosome number, doesn't matter here - just used for naming - c = 21 - - SIFT = 'NO-SIFT' - n_runs = 1000 - n_indiv = 52 - - data_dir = '/tmp/' - pop_dir = '/tmp/' - outdata_dir = "/tmp/chr{0}-{1}-freq/output_no_sift/".format(str(c), str(POP)) - plot_dir = "/tmp/chr{0}-{1}-freq/plots_no_sift/".format(str(c), str(POP)) - - if not os.path.exists(outdata_dir): - os.makedirs(outdata_dir, exist_ok=True) - if not os.path.exists(plot_dir): - os.makedirs(plot_dir, exist_ok=True) - - OutputFormat = '.png' - chrom = 'chr' + str(c) - - font = {'family': 'serif', 'size': 14} - plt.rc('font', **font) - - # untar input data - import tarfile - - tar = tarfile.open(individuals_merge_file) - tar.extractall(path='/tmp/' + individuals_merge_filename) - tar.close() - - rd = ReadData() - res = Results() - wr = WriteData() - pd = PlotData() - - histogram_overlapfile = outdata_dir + 'Histogram_mutation_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - mutation_overlapfile = outdata_dir + 'Mutation_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - mutation_index_array_file = outdata_dir + 'mutation_index_array' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - histogram_overlap_plot = plot_dir + 'Frequency_mutations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP - map_variations_file = outdata_dir + 'map_variations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - randomindiv_file = outdata_dir + 'random_indiv' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - - ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids) / 2 - - rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) - mutation_index_array = rd.read_individuals(ids, rs_numbers, data_dir, chrom, individuals_merge_filename) - - wr.write_map_variations(map_variations_file, map_variations) - wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) - - mutation_overlap, random_indiv = res.overlap_ind(ids, mutation_index_array, n_runs, n_indiv) - histogram_overlap = res.histogram_overlap(mutation_overlap, n_runs) - - wr.write_mutation_overlap(mutation_overlapfile, mutation_overlap, n_runs) - wr.write_histogram_overlap(histogram_overlapfile, histogram_overlap, n_runs, n_indiv) - wr.write_random_indiv(randomindiv_file, random_indiv, n_runs) - - pd.plot_histogram_overlap(POP, histogram_overlap, histogram_overlap_plot, n_runs) - - # gen final output - tar = tarfile.open('/tmp/chr%s-%s-freq.tar.gz' % (c, POP), 'w:gz') - tar.add(outdata_dir) - tar.add(plot_dir) - tar.close() - result_name = client.upload(benchmark_bucket, output_bucket + '/' + 'chr%s-%s-freq.tar.gz' % (c, POP), '/tmp/chr%s-%s-freq.tar.gz' % (c, POP)) - result_name = result_name.replace(output_bucket + '/', '') - - return { - "output_frequency": result_name - } + POP = event["array_element"] + benchmark_bucket = event["sifting"]["benchmark_bucket"] + output_bucket = event["sifting"]["output_bucket"] + input_bucket = event["sifting"]["input_bucket"] + sifting_filename = event["sifting"]["output_sifting"] + individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] + + # download files + siftfile = os.path.join("/tmp", "sifting.txt") + individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") + pop_file = os.path.join("/tmp", POP) + columns_file = os.path.join("/tmp", "columns.txt") + + client = storage.storage.get_instance() + client.download(benchmark_bucket, output_bucket + "/" + sifting_filename, siftfile) + client.download( + benchmark_bucket, + output_bucket + "/" + individuals_merge_filename, + individuals_merge_file, + ) + client.download(benchmark_bucket, input_bucket + "/" + POP, pop_file) + client.download(benchmark_bucket, input_bucket + "/" + "columns.txt", columns_file) + + # chromosome number, doesn't matter here - just used for naming + c = 21 + + SIFT = "NO-SIFT" + n_runs = 1000 + n_indiv = 52 + + data_dir = "/tmp/" + pop_dir = "/tmp/" + outdata_dir = "/tmp/chr{0}-{1}-freq/output_no_sift/".format(str(c), str(POP)) + plot_dir = "/tmp/chr{0}-{1}-freq/plots_no_sift/".format(str(c), str(POP)) + + if not os.path.exists(outdata_dir): + os.makedirs(outdata_dir, exist_ok=True) + if not os.path.exists(plot_dir): + os.makedirs(plot_dir, exist_ok=True) + + OutputFormat = ".png" + chrom = "chr" + str(c) + + font = {"family": "serif", "size": 14} + plt.rc("font", **font) + + # untar input data + import tarfile + + tar = tarfile.open(individuals_merge_file) + tar.extractall(path="/tmp/" + individuals_merge_filename) + tar.close() + + rd = ReadData() + res = Results() + wr = WriteData() + pd = PlotData() + + histogram_overlapfile = ( + outdata_dir + + "Histogram_mutation_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + "_" + ) + mutation_overlapfile = ( + outdata_dir + + "Mutation_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + "_" + ) + mutation_index_array_file = ( + outdata_dir + + "mutation_index_array" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + histogram_overlap_plot = ( + plot_dir + "Frequency_mutations" + str(c) + "_s" + str(SIFT) + "_" + POP + ) + map_variations_file = ( + outdata_dir + "map_variations" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + randomindiv_file = ( + outdata_dir + "random_indiv" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" + ) + + ids = rd.read_names(POP, pop_dir, columns_file) + n_pairs = len(ids) / 2 + + rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) + mutation_index_array = rd.read_individuals( + ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ) + + wr.write_map_variations(map_variations_file, map_variations) + wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) + + mutation_overlap, random_indiv = res.overlap_ind( + ids, mutation_index_array, n_runs, n_indiv + ) + histogram_overlap = res.histogram_overlap(mutation_overlap, n_runs) + + wr.write_mutation_overlap(mutation_overlapfile, mutation_overlap, n_runs) + wr.write_histogram_overlap( + histogram_overlapfile, histogram_overlap, n_runs, n_indiv + ) + wr.write_random_indiv(randomindiv_file, random_indiv, n_runs) + + pd.plot_histogram_overlap(POP, histogram_overlap, histogram_overlap_plot, n_runs) + + # gen final output + tar = tarfile.open("/tmp/chr%s-%s-freq.tar.gz" % (c, POP), "w:gz") + tar.add(outdata_dir) + tar.add(plot_dir) + tar.close() + result_name = client.upload( + benchmark_bucket, + output_bucket + "/" + "chr%s-%s-freq.tar.gz" % (c, POP), + "/tmp/chr%s-%s-freq.tar.gz" % (c, POP), + ) + result_name = result_name.replace(output_bucket + "/", "") + + return {"output_frequency": result_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py index e156d0f5b..88f789f9a 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py @@ -13,11 +13,13 @@ def compress(output, input_dir): with tarfile.open(output, "w:gz") as file: file.add(input_dir, arcname=os.path.basename(input_dir)) + def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): benchmark_bucket = event["benchmark_bucket"] individuals_bucket = event["bucket"] @@ -29,28 +31,31 @@ def handler(event): columns = event["columns"] columns_bucket = event["columns_bucket"] columns_path = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, columns_bucket + '/' + columns, columns_path) - data = client.download_within_range(benchmark_bucket, columns_bucket + '/' + individuals_input, start_bytes, end_bytes) - - ndir = 'chr{}n-{}/'.format(21, individuals_input) + client = storage.storage.get_instance() + client.download(benchmark_bucket, columns_bucket + "/" + columns, columns_path) + data = client.download_within_range( + benchmark_bucket, + columns_bucket + "/" + individuals_input, + start_bytes, + end_bytes, + ) + + ndir = "chr{}n-{}/".format(21, individuals_input) ndir = os.path.join("/tmp", ndir) os.makedirs(ndir, exist_ok=True) - - regex = re.compile('(?!#)') - #print("data: ", data) + regex = re.compile("(?!#)") + # print("data: ", data) data = data.split("\n") data = list(filter(lambda line: regex.match(line) and line != "", data)) chrp_data = {} - columndata = readfile(columns_path)[0].rstrip('\n').split('\t') + columndata = readfile(columns_path)[0].rstrip("\n").split("\t") start_data = 9 # where the real data start, the first 0|1, 1|1, 1|0 or 0|0 # position of the last element (normally equals to len(data[0].split(' ')) - #end_data = 2504 + # end_data = 2504 end_data = len(columndata) - start_data for i in range(0, end_data): @@ -60,40 +65,51 @@ def handler(event): filename = "{}/chr{}.{}".format(ndir, "21", name) chrp_data[i] = [] - with open(filename, 'w') as f: + with open(filename, "w") as f: zeilennummer = 0 for line in data: zeilennummer += 1 try: - first = line.split('\t')[col] # first =`echo $l | cut -d -f$i` + first = line.split("\t")[col] # first =`echo $l | cut -d -f$i` except Exception as e: - print("faulty line at col = ", col, "zeilennummer:", zeilennummer, " line : ", line) + print( + "faulty line at col = ", + col, + "zeilennummer:", + zeilennummer, + " line : ", + line, + ) raise e - #second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` - second = line.split('\t')[0:8] + # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` + second = line.split("\t")[0:8] # We select the one we want - second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] - af_value = second[4].split(';')[8].split('=')[1] + second = [ + elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7] + ] + af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value try: - if ',' in af_value: + if "," in af_value: # We only keep the first value if more than one (that's what awk is doing) - af_value = float(af_value.split(',')[0]) + af_value = float(af_value.split(",")[0]) else: af_value = float(af_value) - elem = first.split('|') + elem = first.split("|") # We skip some lines that do not meet these conditions - if af_value >= 0.5 and elem[0] == '0': + if af_value >= 0.5 and elem[0] == "0": chrp_data[i].append(second) - elif af_value < 0.5 and elem[0] == '1': + elif af_value < 0.5 and elem[0] == "1": chrp_data[i].append(second) else: continue - f.write("{0} {1} {2} {3} {4}\n".format( - second[0], second[1], second[2], second[3], second[4]) + f.write( + "{0} {1} {2} {3} {4}\n".format( + second[0], second[1], second[2], second[3], second[4] + ) ) except ValueError: continue @@ -102,15 +118,17 @@ def handler(event): # tar -zcf .. /$outputfile . compress(os.path.join("/tmp/", outputfile), ndir) - outputfile_name = client.upload(benchmark_bucket, individuals_bucket + '/' + outputfile, os.path.join("/tmp/", outputfile)) - outputfile_name = outputfile_name.replace(individuals_bucket + '/', '') - + outputfile_name = client.upload( + benchmark_bucket, + individuals_bucket + "/" + outputfile, + os.path.join("/tmp/", outputfile), + ) + outputfile_name = outputfile_name.replace(individuals_bucket + "/", "") + # Cleaning temporary files try: shutil.rmtree(ndir) except OSError as e: print("Error: %s : %s" % (ndir, e.strerror)) - return { - "individuals_output": outputfile_name - } + return {"individuals_output": outputfile_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py index 7a563366b..0c3254025 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py @@ -6,55 +6,63 @@ import shutil import datetime + def handler(event): - benchmark_bucket = event["benchmark_bucket"] - individuals_output_bucket = event["bucket"] - filenames = [] - for elem in event["blob"]: - filenames.append(elem["individuals_output"]) - - #download files - client = storage.storage.get_instance() - for file in filenames: - client.download(benchmark_bucket, individuals_output_bucket + '/' + file, os.path.join('/tmp', file)) - - #call merging with c and directories. - outputfile_name, outputfile = merging(21, filenames) - - #upload outputfile - outputfile_name = client.upload(benchmark_bucket, individuals_output_bucket + '/' + outputfile_name, outputfile) - outputfile_name = outputfile_name.replace(individuals_output_bucket + '/', '') - - - return { - "merge_outputfile_name": outputfile_name - } + benchmark_bucket = event["benchmark_bucket"] + individuals_output_bucket = event["bucket"] + filenames = [] + for elem in event["blob"]: + filenames.append(elem["individuals_output"]) + + # download files + client = storage.storage.get_instance() + for file in filenames: + client.download( + benchmark_bucket, + individuals_output_bucket + "/" + file, + os.path.join("/tmp", file), + ) + + # call merging with c and directories. + outputfile_name, outputfile = merging(21, filenames) + + # upload outputfile + outputfile_name = client.upload( + benchmark_bucket, individuals_output_bucket + "/" + outputfile_name, outputfile + ) + outputfile_name = outputfile_name.replace(individuals_output_bucket + "/", "") + + return {"merge_outputfile_name": outputfile_name} + def compress(archive, input_dir): with tarfile.open(archive, "w:gz") as f: f.add(input_dir, arcname="") + def extract_all(archive, output_dir): with tarfile.open(archive, "r:*") as f: f.extractall(output_dir) flist = f.getnames() - if flist[0] == '': + if flist[0] == "": flist = flist[1:] return flist + def readfile(filename): - with open(filename, 'r') as f: + with open(filename, "r") as f: content = f.readlines() return content + def writefile(filename, content): - with open(filename, 'w') as f: + with open(filename, "w") as f: f.writelines(content) + def merging(c, tar_files): tic = time.perf_counter() - merged_dir = "merged_chr{}".format(c) merged_dir = os.path.join("/tmp", merged_dir) os.makedirs(merged_dir, exist_ok=True) @@ -72,10 +80,9 @@ def merging(c, tar_files): else: data[filename] = content - - for filename,content in data.items(): + for filename, content in data.items(): writefile(os.path.join(merged_dir, filename), content) - + outputfile_name = "chr{}n.tar.gz".format(c) outputfile = os.path.join("/tmp", outputfile_name) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py index 2c377e47c..26b3abd81 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py @@ -6,7 +6,8 @@ import os import os.path import matplotlib -matplotlib.use('Agg') + +matplotlib.use("Agg") import matplotlib.pyplot as plt import itertools from matplotlib import pyplot @@ -19,26 +20,26 @@ from . import storage -class ReadData : - def read_names(self, POP, pop_dir, columns_file) : +class ReadData: + def read_names(self, POP, pop_dir, columns_file): tic = time.perf_counter() namefile = pop_dir + POP - f = open(namefile, 'r') + f = open(namefile, "r") text = f.read() f.close() text = text.split() all_ids = text[0:] file = columns_file - f = open(file, 'r') + f = open(file, "r") text = f.read() f.close() genome_ids = text.split() - + ids = list(set(all_ids) & set(genome_ids)) - + return ids - def read_rs_numbers(self, siftfile, SIFT) : + def read_rs_numbers(self, siftfile, SIFT): ## NB This file is in the format of: ## line number, rs number, ENSG number, SIFT, Phenotype tic = time.perf_counter() @@ -46,34 +47,36 @@ def read_rs_numbers(self, siftfile, SIFT) : variations = {} map_variations = {} all_variations = [] - sift_file = open(siftfile,'r') + sift_file = open(siftfile, "r") for item in sift_file: item = item.split() if len(item) > 2: rs_numbers.append(item[1]) map_variations[item[1]] = item[2] - + return rs_numbers, map_variations - - def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename) : + + def read_individuals( + self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ): tic = time.perf_counter() mutation_index_array = [] - total_mutations={} - total_mutations_list =[] - for name in ids : - filename = data_dir + individuals_merge_filename + '/' + chrom + '.' + name - f = open(filename, 'r') + total_mutations = {} + total_mutations_list = [] + for name in ids: + filename = data_dir + individuals_merge_filename + "/" + chrom + "." + name + f = open(filename, "r") text = f.read() f.close() text = text.split() sifted_mutations = list(set(rs_numbers).intersection(text)) mutation_index_array.append(sifted_mutations) - total_mutations[name]= len(sifted_mutations) + total_mutations[name] = len(sifted_mutations) total_mutations_list.append(len(sifted_mutations)) - - return mutation_index_array, total_mutations, total_mutations_list - - def read_pairs_overlap(self, indpairsfile) : + + return mutation_index_array, total_mutations, total_mutations_list + + def read_pairs_overlap(self, indpairsfile): tic = time.perf_counter() pairs_overlap = np.loadtxt(indpairsfile, unpack=True) pairs_overlap = np.transpose(pairs_overlap) @@ -81,94 +84,93 @@ def read_pairs_overlap(self, indpairsfile) : return pairs_overlap -class Results : - - def group_indivuals(self, total_mutations_list, n_runs) : +class Results: + def group_indivuals(self, total_mutations_list, n_runs): tic = time.perf_counter() n_group = 26 - random_mutations_list= [] + random_mutations_list = [] for run in range(n_runs): random_mutations_list.append(sample(total_mutations_list, n_group)) return random_mutations_list - def pair_individuals(self, mutation_index_array, n_runs) : + def pair_individuals(self, mutation_index_array, n_runs): tic = time.perf_counter() - + n_p = len(mutation_index_array) - n_pairs = int(round(n_p/2)) + n_pairs = int(round(n_p / 2)) list_p = np.linspace(0, n_p - 1, n_p).astype(int) pairs_overlap = np.zeros((n_runs, n_pairs)) - for run in range(n_runs) : - randomized_list = sample(list(list_p) , n_p) - for pq in range(n_pairs) : - array1 = mutation_index_array[randomized_list[2*pq]] - - array2 = mutation_index_array[randomized_list[2*pq]] + for run in range(n_runs): + randomized_list = sample(list(list_p), n_p) + for pq in range(n_pairs): + array1 = mutation_index_array[randomized_list[2 * pq]] + + array2 = mutation_index_array[randomized_list[2 * pq]] pair_array = set(array1) & set(array2) pairs_overlap[run][pq] = len(pair_array) return pairs_overlap - def total_pair_individuals (self, mutation_index_array) : + def total_pair_individuals(self, mutation_index_array): tic = time.perf_counter() n_p = len(mutation_index_array) total_pairs_overlap = np.zeros((n_p, n_p)) simetric_overlap = np.zeros((n_p, n_p)) for run in range(n_p): - array1 = mutation_index_array[run] - start = run +1 - for pq in range(start, n_p) : - array2 = mutation_index_array[pq] - pairs_array = set(array1) & set(array2) - total_pairs_overlap[run][pq]=len(pairs_array) - simetric_overlap[run][pq] = len(pairs_array) - simetric_overlap[pq][run]= len(pairs_array) - - return total_pairs_overlap , simetric_overlap - - def half_pair_individuals(self, mutation_index_array) : + array1 = mutation_index_array[run] + start = run + 1 + for pq in range(start, n_p): + array2 = mutation_index_array[pq] + pairs_array = set(array1) & set(array2) + total_pairs_overlap[run][pq] = len(pairs_array) + simetric_overlap[run][pq] = len(pairs_array) + simetric_overlap[pq][run] = len(pairs_array) + + return total_pairs_overlap, simetric_overlap + + def half_pair_individuals(self, mutation_index_array): tic = time.perf_counter() n_p = len(mutation_index_array) - n_pairs = int(round(n_p/2)) + n_pairs = int(round(n_p / 2)) pairs_overlap = np.zeros((n_pairs, n_pairs)) for run in range(n_pairs): array1 = mutation_index_array[run] - index =0 - for pq in range(n_pairs+1, n_p): + index = 0 + for pq in range(n_pairs + 1, n_p): array2 = mutation_index_array[pq] pairs_array = set(array1) & set(array2) - pairs_overlap[run][index]=len(pairs_array) + pairs_overlap[run][index] = len(pairs_array) return pairs_overlap - def gene_pairs(self, mutation_index_array) : + def gene_pairs(self, mutation_index_array): tic = time.perf_counter() n_p = len(mutation_index_array) gene_pair_list = {} - for pp in range(n_p) : + for pp in range(n_p): pairs = itertools.combinations(mutation_index_array[pp], 2) - for pair in pairs : + for pair in pairs: key = str(pair) - if key not in gene_pair_list : gene_pair_list[key] = 1 - else : gene_pair_list[key] += 1 + if key not in gene_pair_list: + gene_pair_list[key] = 1 + else: + gene_pair_list[key] += 1 - return gene_pair_list -class PlotData : - def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT) : +class PlotData: + def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT): tic = time.perf_counter() - - pairs_overlap = np.array(pairs_overlap) + + pairs_overlap = np.array(pairs_overlap) min_p = np.min(pairs_overlap) max_p = np.max(pairs_overlap) nbins = int(max_p) + 1 n_runs = len(pairs_overlap) - nbins = int(np.max(pairs_overlap)) bin_centres = np.linspace(0, nbins, nbins) bin_edges = np.linspace(-0.5, nbins + 0.5, nbins + 1) @@ -177,203 +179,321 @@ def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT) : ax = fig.add_subplot(111) hists = [] max_h = 0 - for run in range(n_runs) : - h, edges = np.histogram(pairs_overlap[run], bins = bin_edges) - ax.plot(bin_centres, h, alpha = 0.5) + for run in range(n_runs): + h, edges = np.histogram(pairs_overlap[run], bins=bin_edges) + ax.plot(bin_centres, h, alpha=0.5) if len(h) > 0: max_h = max(max_h, max(h)) - plt.xlabel('Number of overlapping gene mutations', fontsize = 24) - plt.ylabel(r'frequency', fontsize = 28) - text1 = 'population ' + POP + '\n' +\ - 'chromosome ' + str(c) + '\n' + \ - 'SIFT < ' + str(SIFT) + '\n' + \ - str(n_runs) + ' runs' - plt.text(.95, .95, text1, fontsize = 24, - verticalalignment='top', horizontalalignment='right', - transform = ax.transAxes) - plt.savefig(outputFile) + plt.xlabel("Number of overlapping gene mutations", fontsize=24) + plt.ylabel(r"frequency", fontsize=28) + text1 = ( + "population " + + POP + + "\n" + + "chromosome " + + str(c) + + "\n" + + "SIFT < " + + str(SIFT) + + "\n" + + str(n_runs) + + " runs" + ) + plt.text( + 0.95, + 0.95, + text1, + fontsize=24, + verticalalignment="top", + horizontalalignment="right", + transform=ax.transAxes, + ) + plt.savefig(outputFile) plt.close() def total_colormap_overlap(self, POP, total_pairs_overlap, outputFile): tic = time.perf_counter() fig = plt.figure() - cmap = mpl.colors.ListedColormap(['blue','black','red', 'green', 'pink']) - img = pyplot.imshow(total_pairs_overlap,interpolation='nearest', cmap = cmap, origin='lower') - pyplot.colorbar(img,cmap=cmap) + cmap = mpl.colors.ListedColormap(["blue", "black", "red", "green", "pink"]) + img = pyplot.imshow( + total_pairs_overlap, interpolation="nearest", cmap=cmap, origin="lower" + ) + pyplot.colorbar(img, cmap=cmap) - plt.savefig(outputFile) + plt.savefig(outputFile) plt.close() -class WriteData : - def write_pair_individuals(self, indpairsfile, pairs_overlap) : +class WriteData: + def write_pair_individuals(self, indpairsfile, pairs_overlap): tic = time.perf_counter() - np.savetxt(indpairsfile, pairs_overlap, fmt = '%i') - - def write_gene_pairs(self, genepairsfile, gene_pair_list) : + np.savetxt(indpairsfile, pairs_overlap, fmt="%i") + + def write_gene_pairs(self, genepairsfile, gene_pair_list): tic = time.perf_counter() - f = open(genepairsfile, 'w') - for key, count in gene_pair_list.items() : - f.write(key + '\t' + str(count) + '\n') + f = open(genepairsfile, "w") + for key, count in gene_pair_list.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - - def write_total_indiv(self, total_mutations_filename, total_mutations) : + + def write_total_indiv(self, total_mutations_filename, total_mutations): tic = time.perf_counter() - f = open(total_mutations_filename, 'w') - for key, count in total_mutations.items() : - f.write(key + '\t' + str(count) + '\n') + f = open(total_mutations_filename, "w") + for key, count in total_mutations.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - - def write_random_mutations_list(self, random_mutations_filename, random_mutations_list, n_runs) : + + def write_random_mutations_list( + self, random_mutations_filename, random_mutations_list, n_runs + ): for run in range(n_runs): - filename= random_mutations_filename +'_run_' + str(run) + '.txt' - f = open(filename, 'w') - f.writelines(["%s\n" % item for item in random_mutations_list[run]]) - - def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): - f=open(mutation_index_array_file,"w") + filename = random_mutations_filename + "_run_" + str(run) + ".txt" + f = open(filename, "w") + f.writelines(["%s\n" % item for item in random_mutations_list[run]]) + + def write_mutation_index_array( + self, mutation_index_array_file, mutation_index_array + ): + f = open(mutation_index_array_file, "w") for item in mutation_index_array: f.write("%s\n" % item) f.close() - def write_map_variations(self, map_variations_file, map_variations) : + def write_map_variations(self, map_variations_file, map_variations): tic = time.perf_counter() - f = open(map_variations_file, 'w') - for key, count in map_variations.items() : - f.write(key + '\t' + str(count) + '\n') + f = open(map_variations_file, "w") + for key, count in map_variations.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - def handler(event): - POP = event["array_element"] - benchmark_bucket = event["sifting"]["benchmark_bucket"] - output_bucket = event["sifting"]["output_bucket"] - input_bucket = event["sifting"]["input_bucket"] - sifting_filename = event["sifting"]["output_sifting"] - individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] - - - #download files - siftfile = os.path.join("/tmp", "sifting.txt") - individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") - pop_file = os.path.join("/tmp", POP) - columns_file = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, output_bucket + '/' + sifting_filename, siftfile) - client.download(benchmark_bucket, output_bucket + '/' + individuals_merge_filename, individuals_merge_file) - client.download(benchmark_bucket, input_bucket + '/' + POP, pop_file) - client.download(benchmark_bucket, input_bucket + '/' + "columns.txt", columns_file) - #chromosome no, doesn't matter. - c = 21 - - SIFT = 'NO-SIFT' - n_runs = 1 - - data_dir = '/tmp/' - pop_dir = '/tmp/' - outdata_dir = "/tmp/chr{0}-{1}/output_no_sift/".format(str(c), str(POP)) - plots_dir = "/tmp/chr{0}-{1}/plots_no_sift/".format(str(c), str(POP)) - - if not os.path.exists(outdata_dir): - os.makedirs(outdata_dir, exist_ok=True) - if not os.path.exists(plots_dir): - os.makedirs(plots_dir, exist_ok=True) - - OutputFormat = '.png' - chrom = 'chr' + str(c) - - font = {'family':'serif', - 'size':14 } - plt.rc('font', **font) - - - # untar input data - import tarfile - tar = tarfile.open(individuals_merge_file) - tar.extractall(path='/tmp/' + individuals_merge_filename) - tar.close() - - tic = time.perf_counter() - - rd = ReadData() - res = Results() - wr = WriteData() - pd = PlotData() - - half_indpairsfile = outdata_dir + 'individual_half_pairs_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - total_indpairsfile = outdata_dir + 'total_individual_pairs_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - genepairsfile = outdata_dir + 'gene_pairs_count_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - random_indpairsfile = outdata_dir + '100_individual_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - colormap = plots_dir + 'colormap_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - half_overlap = plots_dir + 'half_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - total_overlap = plots_dir + 'total_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - random_overlap = plots_dir + '100_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - - total_mutations_filename = outdata_dir + 'total_mutations_individual' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - random_mutations_filename = outdata_dir + 'random_mutations_individual' + str(c) + '_s' + \ - str(SIFT) + '_' + POP - - mutation_index_array_file = outdata_dir + 'mutation_index_array' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - map_variations_file = outdata_dir + 'map_variations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - - - ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids)/2 - - - rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) - mutation_index_array, total_mutations, total_mutations_list = rd.read_individuals(ids, rs_numbers, data_dir, chrom, individuals_merge_filename) - wr.write_total_indiv(total_mutations_filename, total_mutations) - wr.write_map_variations(map_variations_file, map_variations) - - #cross-correlations mutations overlapping - half_pairs_overlap = res.half_pair_individuals(mutation_index_array) - total_pairs_overlap, simetric_overlap = res.total_pair_individuals(mutation_index_array) - random_pairs_overlap = res.pair_individuals(mutation_index_array, n_runs) - - wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) - wr.write_pair_individuals(half_indpairsfile, half_pairs_overlap) - wr.write_pair_individuals(total_indpairsfile, total_pairs_overlap) - wr.write_pair_individuals(random_indpairsfile, random_pairs_overlap,) - - pd.individual_overlap(POP, half_pairs_overlap, half_overlap, c, SIFT) - pd.individual_overlap(POP, simetric_overlap, total_overlap, c, SIFT) - pd.individual_overlap(POP, random_pairs_overlap, random_overlap, c, SIFT) - pd.total_colormap_overlap(POP, total_pairs_overlap, colormap) - - #list of frecuency of mutations in 26 individuals - random_mutations_list=res.group_indivuals(total_mutations_list, n_runs) - wr.write_random_mutations_list(random_mutations_filename, random_mutations_list, n_runs) - - # gen overlapping - gene_pair_list = res.gene_pairs(mutation_index_array) - wr.write_gene_pairs(genepairsfile, gene_pair_list) - - # gen final output - tar = tarfile.open('/tmp/chr%s-%s.tar.gz' % (c, POP), 'w:gz') - tar.add(outdata_dir) - tar.add(plots_dir) - tar.close() - result_name = client.upload(benchmark_bucket, output_bucket + '/' + 'chr%s-%s.tar.gz' % (c, POP), '/tmp/chr%s-%s.tar.gz' % (c, POP)) - result_name = result_name.replace(output_bucket + '/', '') - - return { - "output_mutation_overlap": result_name - } + POP = event["array_element"] + benchmark_bucket = event["sifting"]["benchmark_bucket"] + output_bucket = event["sifting"]["output_bucket"] + input_bucket = event["sifting"]["input_bucket"] + sifting_filename = event["sifting"]["output_sifting"] + individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] + + # download files + siftfile = os.path.join("/tmp", "sifting.txt") + individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") + pop_file = os.path.join("/tmp", POP) + columns_file = os.path.join("/tmp", "columns.txt") + + client = storage.storage.get_instance() + client.download(benchmark_bucket, output_bucket + "/" + sifting_filename, siftfile) + client.download( + benchmark_bucket, + output_bucket + "/" + individuals_merge_filename, + individuals_merge_file, + ) + client.download(benchmark_bucket, input_bucket + "/" + POP, pop_file) + client.download(benchmark_bucket, input_bucket + "/" + "columns.txt", columns_file) + # chromosome no, doesn't matter. + c = 21 + + SIFT = "NO-SIFT" + n_runs = 1 + + data_dir = "/tmp/" + pop_dir = "/tmp/" + outdata_dir = "/tmp/chr{0}-{1}/output_no_sift/".format(str(c), str(POP)) + plots_dir = "/tmp/chr{0}-{1}/plots_no_sift/".format(str(c), str(POP)) + + if not os.path.exists(outdata_dir): + os.makedirs(outdata_dir, exist_ok=True) + if not os.path.exists(plots_dir): + os.makedirs(plots_dir, exist_ok=True) + + OutputFormat = ".png" + chrom = "chr" + str(c) + + font = {"family": "serif", "size": 14} + plt.rc("font", **font) + + # untar input data + import tarfile + + tar = tarfile.open(individuals_merge_file) + tar.extractall(path="/tmp/" + individuals_merge_filename) + tar.close() + + tic = time.perf_counter() + + rd = ReadData() + res = Results() + wr = WriteData() + pd = PlotData() + + half_indpairsfile = ( + outdata_dir + + "individual_half_pairs_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + total_indpairsfile = ( + outdata_dir + + "total_individual_pairs_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + genepairsfile = ( + outdata_dir + + "gene_pairs_count_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + random_indpairsfile = ( + outdata_dir + + "100_individual_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + + colormap = ( + plots_dir + + "colormap_distribution_c" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + OutputFormat + ) + half_overlap = ( + plots_dir + + "half_distribution_c" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + OutputFormat + ) + total_overlap = ( + plots_dir + + "total_distribution_c" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + OutputFormat + ) + random_overlap = ( + plots_dir + + "100_distribution_c" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + OutputFormat + ) + + total_mutations_filename = ( + outdata_dir + + "total_mutations_individual" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + random_mutations_filename = ( + outdata_dir + + "random_mutations_individual" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + ) + + mutation_index_array_file = ( + outdata_dir + + "mutation_index_array" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + + map_variations_file = ( + outdata_dir + "map_variations" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + ids = rd.read_names(POP, pop_dir, columns_file) + n_pairs = len(ids) / 2 + + rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) + mutation_index_array, total_mutations, total_mutations_list = rd.read_individuals( + ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ) + wr.write_total_indiv(total_mutations_filename, total_mutations) + wr.write_map_variations(map_variations_file, map_variations) + + # cross-correlations mutations overlapping + half_pairs_overlap = res.half_pair_individuals(mutation_index_array) + total_pairs_overlap, simetric_overlap = res.total_pair_individuals( + mutation_index_array + ) + random_pairs_overlap = res.pair_individuals(mutation_index_array, n_runs) + + wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) + wr.write_pair_individuals(half_indpairsfile, half_pairs_overlap) + wr.write_pair_individuals(total_indpairsfile, total_pairs_overlap) + wr.write_pair_individuals( + random_indpairsfile, + random_pairs_overlap, + ) + + pd.individual_overlap(POP, half_pairs_overlap, half_overlap, c, SIFT) + pd.individual_overlap(POP, simetric_overlap, total_overlap, c, SIFT) + pd.individual_overlap(POP, random_pairs_overlap, random_overlap, c, SIFT) + pd.total_colormap_overlap(POP, total_pairs_overlap, colormap) + + # list of frecuency of mutations in 26 individuals + random_mutations_list = res.group_indivuals(total_mutations_list, n_runs) + wr.write_random_mutations_list( + random_mutations_filename, random_mutations_list, n_runs + ) + + # gen overlapping + gene_pair_list = res.gene_pairs(mutation_index_array) + wr.write_gene_pairs(genepairsfile, gene_pair_list) + + # gen final output + tar = tarfile.open("/tmp/chr%s-%s.tar.gz" % (c, POP), "w:gz") + tar.add(outdata_dir) + tar.add(plots_dir) + tar.close() + result_name = client.upload( + benchmark_bucket, + output_bucket + "/" + "chr%s-%s.tar.gz" % (c, POP), + "/tmp/chr%s-%s.tar.gz" % (c, POP), + ) + result_name = result_name.replace(output_bucket + "/", "") + + return {"output_mutation_overlap": result_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py index 2add45bdb..670c3d4e8 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py @@ -4,71 +4,80 @@ import subprocess import datetime + def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): - benchmark_bucket = event["benchmark_bucket"] - input_bucket = event["columns_bucket"] - input_filename = event["sifting_input"] - inputfile = os.path.join("/tmp", "sifting_file.vcf") - - output_bucket = event["bucket"] - - - client = storage.storage.get_instance() - client.download(benchmark_bucket, input_bucket + '/' + input_filename, inputfile) - - #c is the chromosome number - doesn't matter here. - c = 21 - final_name = 'sifted.SIFT.chr{}.txt'.format(c) - final = os.path.join("/tmp", final_name) - - rawdata = readfile(inputfile) - - - r1 = re.compile('.*(#).*') - header = len(list(filter(r1.match, rawdata[:1000]))) - - siftfile = 'SIFT.chr{}.vcf'.format(c) - siftfile = os.path.join("/tmp", siftfile) - with open(siftfile, 'w') as f: - subprocess.run(["grep -n \"deleterious\|tolerated\" {}".format(inputfile)], shell=True, stdout=f) - - data_temp = readfile(siftfile) - - r3 = re.compile('.*(rs).*') - data = list(filter(r3.match, data_temp)) - - - with open(final, 'w') as f: - for l in data: - line = str(int(l.split('\t')[0].split(':')[0]) - int(header)) - id = l.split('\t')[2] - - sifts = l.split('\t')[7].split('|') - sifts = sifts[4] + ' ' + sifts[16] + ' ' + sifts[17] - sifts = sifts.replace('(', ' ').replace(')', '') - - temp = (line + ' ' + id + ' ' + sifts).split(' ') - - if temp[3] == '' or temp[4] == '': - f.write("{} {} {}\n".format(temp[0], temp[1], temp[2])) - elif temp[5] == '': - f.write("{} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4])) - else: - f.write("{} {} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4], temp[6])) - - os.remove(siftfile) - final_name = client.upload(benchmark_bucket, output_bucket + '/' + final_name, final) - final_name = final_name.replace(output_bucket + '/', '') - - return { - "output_bucket": output_bucket, - "benchmark_bucket": benchmark_bucket, - "output_sifting": final_name, - "populations": event["populations"], - "input_bucket": input_bucket - } + benchmark_bucket = event["benchmark_bucket"] + input_bucket = event["columns_bucket"] + input_filename = event["sifting_input"] + inputfile = os.path.join("/tmp", "sifting_file.vcf") + + output_bucket = event["bucket"] + + client = storage.storage.get_instance() + client.download(benchmark_bucket, input_bucket + "/" + input_filename, inputfile) + + # c is the chromosome number - doesn't matter here. + c = 21 + final_name = "sifted.SIFT.chr{}.txt".format(c) + final = os.path.join("/tmp", final_name) + + rawdata = readfile(inputfile) + + r1 = re.compile(".*(#).*") + header = len(list(filter(r1.match, rawdata[:1000]))) + + siftfile = "SIFT.chr{}.vcf".format(c) + siftfile = os.path.join("/tmp", siftfile) + with open(siftfile, "w") as f: + subprocess.run( + ['grep -n "deleterious\|tolerated" {}'.format(inputfile)], + shell=True, + stdout=f, + ) + + data_temp = readfile(siftfile) + + r3 = re.compile(".*(rs).*") + data = list(filter(r3.match, data_temp)) + + with open(final, "w") as f: + for l in data: + line = str(int(l.split("\t")[0].split(":")[0]) - int(header)) + id = l.split("\t")[2] + + sifts = l.split("\t")[7].split("|") + sifts = sifts[4] + " " + sifts[16] + " " + sifts[17] + sifts = sifts.replace("(", " ").replace(")", "") + + temp = (line + " " + id + " " + sifts).split(" ") + + if temp[3] == "" or temp[4] == "": + f.write("{} {} {}\n".format(temp[0], temp[1], temp[2])) + elif temp[5] == "": + f.write("{} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4])) + else: + f.write( + "{} {} {} {} {}\n".format( + temp[0], temp[1], temp[2], temp[4], temp[6] + ) + ) + + os.remove(siftfile) + final_name = client.upload( + benchmark_bucket, output_bucket + "/" + final_name, final + ) + final_name = final_name.replace(output_bucket + "/", "") + + return { + "output_bucket": output_bucket, + "benchmark_bucket": benchmark_bucket, + "output_sifting": final_name, + "populations": event["populations"], + "input_bucket": input_bucket, + } diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py index f02c3b789..dfeb53979 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py @@ -7,16 +7,18 @@ client = storage.storage.get_instance() + def compress(output, input_dir): with tarfile.open(output, "w:gz") as file: file.add(input_dir, arcname=os.path.basename(input_dir)) def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): benchmark_bucket = event["benchmark_bucket"] individuals_bucket = event["bucket"] @@ -28,27 +30,31 @@ def handler(event): columns = event["columns"] columns_bucket = event["columns_bucket"] columns_path = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, columns_bucket + '/' + columns, columns_path) - data = client.download_within_range(benchmark_bucket, columns_bucket + '/' + individuals_input, start_bytes, end_bytes) - ndir = 'chr{}n-{}/'.format(21, individuals_input) + client = storage.storage.get_instance() + client.download(benchmark_bucket, columns_bucket + "/" + columns, columns_path) + data = client.download_within_range( + benchmark_bucket, + columns_bucket + "/" + individuals_input, + start_bytes, + end_bytes, + ) + + ndir = "chr{}n-{}/".format(21, individuals_input) ndir = os.path.join("/tmp", ndir) os.makedirs(ndir, exist_ok=True) - - regex = re.compile('(?!#)') - #print("data: ", data) + regex = re.compile("(?!#)") + # print("data: ", data) data = data.split("\n") data = list(filter(lambda line: regex.match(line) and line != "", data)) chrp_data = {} - columndata = readfile(columns_path)[0].rstrip('\n').split('\t') + columndata = readfile(columns_path)[0].rstrip("\n").split("\t") start_data = 9 # where the real data start, the first 0|1, 1|1, 1|0 or 0|0 # position of the last element (normally equals to len(data[0].split(' ')) - #end_data = 2504 + # end_data = 2504 end_data = len(columndata) - start_data for i in range(0, end_data): @@ -58,40 +64,51 @@ def handler(event): filename = "{}/chr{}.{}".format(ndir, "21", name) chrp_data[i] = [] - with open(filename, 'w') as f: + with open(filename, "w") as f: zeilennummer = 0 for line in data: zeilennummer += 1 try: - first = line.split('\t')[col] # first =`echo $l | cut -d -f$i` + first = line.split("\t")[col] # first =`echo $l | cut -d -f$i` except Exception as e: - print("faulty line at col = ", col, "zeilennummer:", zeilennummer, " line : ", line) + print( + "faulty line at col = ", + col, + "zeilennummer:", + zeilennummer, + " line : ", + line, + ) raise e - #second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` - second = line.split('\t')[0:8] + # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` + second = line.split("\t")[0:8] # We select the one we want - second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] - af_value = second[4].split(';')[8].split('=')[1] + second = [ + elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7] + ] + af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value try: - if ',' in af_value: + if "," in af_value: # We only keep the first value if more than one (that's what awk is doing) - af_value = float(af_value.split(',')[0]) + af_value = float(af_value.split(",")[0]) else: af_value = float(af_value) - elem = first.split('|') + elem = first.split("|") # We skip some lines that do not meet these conditions - if af_value >= 0.5 and elem[0] == '0': + if af_value >= 0.5 and elem[0] == "0": chrp_data[i].append(second) - elif af_value < 0.5 and elem[0] == '1': + elif af_value < 0.5 and elem[0] == "1": chrp_data[i].append(second) else: continue - f.write("{0} {1} {2} {3} {4}\n".format( - second[0], second[1], second[2], second[3], second[4]) + f.write( + "{0} {1} {2} {3} {4}\n".format( + second[0], second[1], second[2], second[3], second[4] + ) ) except ValueError: continue @@ -100,8 +117,12 @@ def handler(event): # tar -zcf .. /$outputfile . compress(os.path.join("/tmp/", outputfile), ndir) - outputfile_name = client.upload(benchmark_bucket, individuals_bucket + '/' + outputfile, os.path.join("/tmp/", outputfile)) - outputfile_name = outputfile_name.replace(individuals_bucket + '/', '') + outputfile_name = client.upload( + benchmark_bucket, + individuals_bucket + "/" + outputfile, + os.path.join("/tmp/", outputfile), + ) + outputfile_name = outputfile_name.replace(individuals_bucket + "/", "") # Cleaning temporary files try: @@ -109,6 +130,4 @@ def handler(event): except OSError as e: print("Error: %s : %s" % (ndir, e.strerror)) - return { - "individuals_output": outputfile_name - } + return {"individuals_output": outputfile_name} diff --git a/benchmarks/600.workflows/620.func-invo/input.py b/benchmarks/600.workflows/620.func-invo/input.py index afefd5d9a..19b210c21 100644 --- a/benchmarks/600.workflows/620.func-invo/input.py +++ b/benchmarks/600.workflows/620.func-invo/input.py @@ -1,16 +1,25 @@ size_generators = { - 'test' : 10, - 'small' : 2**5, - 'large': 2**20, - '2e5': 2**5, - '2e8': 2**8, - '2e10': 2**10, - '2e12': 2**12, - '2e14': 2**14, - '2e16': 2**16, - '2e18': 2**18, - '2e18-1000': (2**18)-1000 + "test": 10, + "small": 2**5, + "large": 2**20, + "2e5": 2**5, + "2e8": 2**8, + "2e10": 2**10, + "2e12": 2**12, + "2e14": 2**14, + "2e16": 2**16, + "2e18": 2**18, + "2e18-1000": (2**18) - 1000, } -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size] } + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/600.workflows/620.func-invo/python/gen.py b/benchmarks/600.workflows/620.func-invo/python/gen.py index 60c328fee..801a247a4 100644 --- a/benchmarks/600.workflows/620.func-invo/python/gen.py +++ b/benchmarks/600.workflows/620.func-invo/python/gen.py @@ -1,5 +1,6 @@ from random import shuffle + def handler(event): size = int(event["size"]) elems = list(range(size)) @@ -11,4 +12,4 @@ def handler(event): if len(data) > size: break - return {'len' : data[:size]} + return {"len": data[:size]} diff --git a/benchmarks/600.workflows/620.func-invo/python/process.py b/benchmarks/600.workflows/620.func-invo/python/process.py index 084062854..54dc04eda 100644 --- a/benchmarks/600.workflows/620.func-invo/python/process.py +++ b/benchmarks/600.workflows/620.func-invo/python/process.py @@ -1,7 +1,8 @@ from random import shuffle + def handler(event): - size = len(event['len']) + size = len(event["len"]) elems = list(range(size)) shuffle(elems) @@ -11,4 +12,4 @@ def handler(event): if len(data) > size: break - return {'len' : data[:size]} + return {"len": data[:size]} diff --git a/benchmarks/600.workflows/6200.trip-booking/input.py b/benchmarks/600.workflows/6200.trip-booking/input.py index 4c261f755..e4f4a83dd 100644 --- a/benchmarks/600.workflows/6200.trip-booking/input.py +++ b/benchmarks/600.workflows/6200.trip-booking/input.py @@ -1,23 +1,20 @@ - def allocate_nosql() -> dict: return { - "flights": { - "primary_key": "trip_id", - "secondary_key": "flight_id" - }, - "car_rentals": { - "primary_key": "trip_id", - "secondary_key": "rental_id" - }, - "hotel_booking": { - "primary_key": "trip_id", - "secondary_key": "booking_id" - } + "flights": {"primary_key": "trip_id", "secondary_key": "flight_id"}, + "car_rentals": {"primary_key": "trip_id", "secondary_key": "rental_id"}, + "hotel_booking": {"primary_key": "trip_id", "secondary_key": "booking_id"}, } + def generate_input( - data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, ): input_config = {} @@ -37,13 +34,13 @@ def generate_input( "rental_class": "compact", "rental_price_max": "100", "rental_duration": 3, - "rental_requests": ["full_tank", "CDW", "assistance"] + "rental_requests": ["full_tank", "CDW", "assistance"], } size_results = { "test": {"result": "success"}, "small": {"result": "failure", "reason": "hotel"}, - "large": {"result": "failure", "reason": "confirm"} + "large": {"result": "failure", "reason": "confirm"}, } trip_details["expected_result"] = size_results[size] diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py index f94da3dfe..82803e464 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py @@ -10,7 +10,9 @@ def handler(event): # Confirm flight nosql_table_name = "flights" flight_id = event["flight_id"] - nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("flight_id", flight_id)) + nosql_client.delete( + nosql_table_name, ("trip_id", trip_id), ("flight_id", flight_id) + ) event.pop("flight_id") return event diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py index e1f69077d..eefce2c30 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py @@ -10,6 +10,8 @@ def handler(event): # Confirm flight nosql_table_name = "hotel_booking" booking_id = event["booking_id"] - nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("booking_id", booking_id)) + nosql_client.delete( + nosql_table_name, ("trip_id", trip_id), ("booking_id", booking_id) + ) return {"trip_id": trip_id, "status": "failure"} diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py index f8ff38044..5cdd10787 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py @@ -10,7 +10,9 @@ def handler(event): # Confirm flight nosql_table_name = "car_rentals" rental_id = event["rental_id"] - nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("rental_id", rental_id)) + nosql_client.delete( + nosql_table_name, ("trip_id", trip_id), ("rental_id", rental_id) + ) event.pop("rental_id") return event diff --git a/benchmarks/600.workflows/6200.trip-booking/python/confirm.py b/benchmarks/600.workflows/6200.trip-booking/python/confirm.py index 3a555f6a4..347eeff5f 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/confirm.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/confirm.py @@ -6,7 +6,10 @@ def handler(event): expected_result = event["expected_result"] - if expected_result["result"] == "failure" and expected_result["reason"] == "confirm": + if ( + expected_result["result"] == "failure" + and expected_result["reason"] == "confirm" + ): raise RuntimeError("Failed to confirm the booking!") trip_id = event["trip_id"] diff --git a/benchmarks/600.workflows/630.parallel-sleep/input.py b/benchmarks/600.workflows/630.parallel-sleep/input.py index 092981d7a..073afa2d3 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/input.py +++ b/benchmarks/600.workflows/630.parallel-sleep/input.py @@ -1,34 +1,44 @@ - #threads-duration +# threads-duration size_generators = { - 'test' : (2, 2), - 'small': (16, 20), - 'large': (50, 2), - '2-1': (2, 1), - '4-1': (4, 1), - '8-1': (8, 1), - '16-1': (16, 1), - '2-5': (2, 5), - '4-5': (4, 5), - '8-5': (8, 5), - '16-5': (16, 5), - '2-10': (2, 10), - '4-10': (4, 10), - '8-10': (8, 10), - '16-10': (16, 10), - '2-15': (2, 15), - '4-15': (4, 15), - '8-15': (8, 15), - '16-15': (16, 15), - '2-20': (2, 20), - '4-20': (4, 20), - '8-20': (8, 20), - '16-20': (16, 20), - '50-1': (50, 1) + "test": (2, 2), + "small": (16, 20), + "large": (50, 2), + "2-1": (2, 1), + "4-1": (4, 1), + "8-1": (8, 1), + "16-1": (16, 1), + "2-5": (2, 5), + "4-5": (4, 5), + "8-5": (8, 5), + "16-5": (16, 5), + "2-10": (2, 10), + "4-10": (4, 10), + "8-10": (8, 10), + "16-10": (16, 10), + "2-15": (2, 15), + "4-15": (4, 15), + "8-15": (8, 15), + "16-15": (16, 15), + "2-20": (2, 20), + "4-20": (4, 20), + "8-20": (8, 20), + "16-20": (16, 20), + "50-1": (50, 1), } + def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): count, sleep = size_generators[size] - return { 'count': count, 'sleep': sleep } + return {"count": count, "sleep": sleep} diff --git a/benchmarks/600.workflows/630.parallel-sleep/python/generate.py b/benchmarks/600.workflows/630.parallel-sleep/python/generate.py index c291c6754..c538b6442 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/python/generate.py +++ b/benchmarks/600.workflows/630.parallel-sleep/python/generate.py @@ -1,12 +1,9 @@ def handler(event): count = int(event["count"]) sleep = int(event["sleep"]) - + sleep_list = [] for i in range(0, count): - sleep_list.append({'sleep':sleep}) - + sleep_list.append({"sleep": sleep}) - return { - "buffer": sleep_list - } + return {"buffer": sleep_list} diff --git a/benchmarks/600.workflows/630.parallel-sleep/python/process.py b/benchmarks/600.workflows/630.parallel-sleep/python/process.py index 9e2f1ab05..d56f45960 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/python/process.py +++ b/benchmarks/600.workflows/630.parallel-sleep/python/process.py @@ -1,7 +1,7 @@ import time -def handler(event): - time.sleep(event['sleep']) +def handler(event): + time.sleep(event["sleep"]) return "ok" diff --git a/benchmarks/600.workflows/631.parallel-download/input.py b/benchmarks/600.workflows/631.parallel-download/input.py index fd9d6d7b5..217ddf104 100644 --- a/benchmarks/600.workflows/631.parallel-download/input.py +++ b/benchmarks/600.workflows/631.parallel-download/input.py @@ -2,16 +2,16 @@ from random import shuffle size_generators = { - 'test' : (5, 10), - 'small': (20, 2**10), - 'large': (50, 2**10), - '2e10': (20, 2**10), - '2e28': (20, 2**28), - '2e15': (20, 2**15), - '2e20': (20, 2**20), - '2e25': (20, 2**25), - '2e26': (20, 2**26), - '2e27': (20, 2**27) + "test": (5, 10), + "small": (20, 2**10), + "large": (50, 2**10), + "2e10": (20, 2**10), + "2e28": (20, 2**28), + "2e15": (20, 2**15), + "2e20": (20, 2**20), + "2e25": (20, 2**25), + "2e26": (20, 2**26), + "2e27": (20, 2**27), } @@ -32,7 +32,15 @@ def generate(size): yield data -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): count, size_bytes = size_generators[size] data_name = f"data-{size_bytes}.txt" @@ -45,4 +53,8 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck upload_func(0, data_name, data_path) # os.remove(data_path) - return { 'count': count, "bucket": benchmarks_bucket, "blob": input_buckets[0] + '/' + data_name} + return { + "count": count, + "bucket": benchmarks_bucket, + "blob": input_buckets[0] + "/" + data_name, + } diff --git a/benchmarks/600.workflows/631.parallel-download/python/generate.py b/benchmarks/600.workflows/631.parallel-download/python/generate.py index fa20cd018..a2e4164ed 100644 --- a/benchmarks/600.workflows/631.parallel-download/python/generate.py +++ b/benchmarks/600.workflows/631.parallel-download/python/generate.py @@ -2,7 +2,4 @@ def handler(event): count = int(event["count"]) del event["count"] - - return { - "buffer": count * [event] - } + return {"buffer": count * [event]} diff --git a/benchmarks/600.workflows/631.parallel-download/python/process.py b/benchmarks/600.workflows/631.parallel-download/python/process.py index e4d56fe20..70ecbe98c 100644 --- a/benchmarks/600.workflows/631.parallel-download/python/process.py +++ b/benchmarks/600.workflows/631.parallel-download/python/process.py @@ -1,5 +1,6 @@ from . import storage + def handler(event): bucket = event["bucket"] blob = event["blob"] @@ -7,5 +8,4 @@ def handler(event): client = storage.storage.get_instance() buffer = client.download_stream(bucket, blob) - return "ok" diff --git a/benchmarks/600.workflows/640.selfish-detour/input.py b/benchmarks/600.workflows/640.selfish-detour/input.py index 69d06fcd5..687e61383 100644 --- a/benchmarks/600.workflows/640.selfish-detour/input.py +++ b/benchmarks/600.workflows/640.selfish-detour/input.py @@ -1,12 +1,22 @@ size_generators = { - 'test' : 100, - 'small': 5000, - 'large': 10000, + "test": 100, + "small": 5000, + "large": 10000, } + def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): num_samples = size_generators[size] - return { 'num_samples': num_samples } + return {"num_samples": num_samples} diff --git a/benchmarks/600.workflows/650.vid/input.py b/benchmarks/600.workflows/650.vid/input.py index c1515f901..eafdec029 100644 --- a/benchmarks/600.workflows/650.vid/input.py +++ b/benchmarks/600.workflows/650.vid/input.py @@ -1,7 +1,7 @@ import os size_generators = { - "test" : (3, 10, "video_test.mp4"), + "test": (3, 10, "video_test.mp4"), "small": (10, 5, "video_small.mp4"), "large": (1000, 3, "video_large.mp4"), } @@ -11,9 +11,21 @@ def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): n_frames, batch_size, video_name = size_generators[size] - files = ["frozen_inference_graph.pb", "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", video_name] + files = [ + "frozen_inference_graph.pb", + "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", + video_name, + ] for name in files: path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -26,5 +38,5 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck "benchmark_bucket": benchmarks_bucket, "input_bucket": input_buckets[0], "model_weights": files[0], - "model_config": files[1] + "model_config": files[1], } diff --git a/benchmarks/600.workflows/650.vid/python/analyse.py b/benchmarks/600.workflows/650.vid/python/analyse.py index a6c134d6c..46d384a2d 100644 --- a/benchmarks/600.workflows/650.vid/python/analyse.py +++ b/benchmarks/600.workflows/650.vid/python/analyse.py @@ -133,7 +133,9 @@ def handler(event): benchmark_bucket = event["benchmark_bucket"] - frames = list(load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir)) + frames = list( + load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir) + ) net = load_model( benchmark_bucket, event["model_bucket"] + "/" + event["model_weights"], diff --git a/benchmarks/600.workflows/650.vid/python/decode.py b/benchmarks/600.workflows/650.vid/python/decode.py index d27b67c3c..88504201c 100644 --- a/benchmarks/600.workflows/650.vid/python/decode.py +++ b/benchmarks/600.workflows/650.vid/python/decode.py @@ -9,12 +9,12 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + yield lst[i : i + n] def load_video(benchmark_bucket, bucket, blob, dest_dir): path = os.path.join(dest_dir, blob) - client.download(benchmark_bucket, bucket + '/' + blob, path) + client.download(benchmark_bucket, bucket + "/" + blob, path) return path @@ -36,7 +36,7 @@ def upload_imgs(benchmark_bucket, bucket, paths): for path in paths: name = os.path.basename(path) - yield client.upload(benchmark_bucket, bucket + '/' + name, path) + yield client.upload(benchmark_bucket, bucket + "/" + name, path) def handler(event): @@ -56,12 +56,15 @@ def handler(event): frames = list(chunks(paths, batch_size)) return { - "frames": [{ - "frames_bucket": frames_bucket, - "frames": fs, - "benchmark_bucket": benchmark_bucket, - "model_bucket": input_bucket, - "model_config": event["model_config"], - "model_weights": event["model_weights"] - } for fs in frames] + "frames": [ + { + "frames_bucket": frames_bucket, + "frames": fs, + "benchmark_bucket": benchmark_bucket, + "model_bucket": input_bucket, + "model_config": event["model_config"], + "model_weights": event["model_weights"], + } + for fs in frames + ] } diff --git a/benchmarks/600.workflows/650.vid/python/summarize.py b/benchmarks/600.workflows/650.vid/python/summarize.py index 8d290f3f9..a07af9526 100644 --- a/benchmarks/600.workflows/650.vid/python/summarize.py +++ b/benchmarks/600.workflows/650.vid/python/summarize.py @@ -11,8 +11,7 @@ def handler(event): logs = {} for xs in frames: - for key,value in xs.items(): - logs[key] = value + for key, value in xs.items(): + logs[key] = value return logs - diff --git a/benchmarks/600.workflows/660.map-reduce/input.py b/benchmarks/600.workflows/660.map-reduce/input.py index 36b2bcc8f..8d860950d 100644 --- a/benchmarks/600.workflows/660.map-reduce/input.py +++ b/benchmarks/600.workflows/660.map-reduce/input.py @@ -1,18 +1,22 @@ import os import random -size_generators = { - "test" : (50, 3), - "small": (1000, 3), - "large": (100000, 3) -} +size_generators = {"test": (50, 3), "small": (1000, 3), "large": (100000, 3)} def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): mult, n_mappers = size_generators[size] words = ["cat", "dog", "bird", "horse", "pig"] lst = mult * words @@ -21,15 +25,15 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck list_path = os.path.join(data_dir, "words") list_name = "words" with open(list_path, "w") as f: - f.writelines(w+"\n" for w in lst) + f.writelines(w + "\n" for w in lst) upload_func(0, list_name, list_path) - #os.remove(list_path) + # os.remove(list_path) return { "benchmark_bucket": benchmarks_bucket, "words_bucket": input_buckets[0], "words": list_name, "n_mappers": n_mappers, - "output_bucket": output_buckets[0] + "output_bucket": output_buckets[0], } diff --git a/benchmarks/600.workflows/660.map-reduce/python/map.py b/benchmarks/600.workflows/660.map-reduce/python/map.py index 0ba79ae73..4d51cd857 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/map.py +++ b/benchmarks/600.workflows/660.map-reduce/python/map.py @@ -14,6 +14,7 @@ def count_words(lst): return index + def handler(event): benchmark_bucket = event["benchmark_bucket"] bucket = event["bucket"] @@ -21,16 +22,16 @@ def handler(event): prefix = event["prefix"] client = storage.storage.get_instance() - my_buffer = client.download_stream(benchmark_bucket, bucket + '/' + blob) + my_buffer = client.download_stream(benchmark_bucket, bucket + "/" + blob) words = bytes(my_buffer).decode("utf-8").split("\n") - + index = count_words(words) for word, count in index.items(): data = io.BytesIO() data.write(str(count).encode("utf-8")) data.seek(0) - #client.upload_stream(benchmark_bucket, os.path.join(bucket, prefix, word, blob), data) + # client.upload_stream(benchmark_bucket, os.path.join(bucket, prefix, word, blob), data) client.upload_stream(benchmark_bucket, os.path.join(prefix, word, blob), data) return event diff --git a/benchmarks/600.workflows/660.map-reduce/python/reduce.py b/benchmarks/600.workflows/660.map-reduce/python/reduce.py index 15fe6d707..cd0356a42 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/reduce.py +++ b/benchmarks/600.workflows/660.map-reduce/python/reduce.py @@ -10,14 +10,11 @@ def handler(event): client = storage.storage.get_instance() count = 0 - #each blob is one word. - #for blob in client.list_directory(bucket, path): + # each blob is one word. + # for blob in client.list_directory(bucket, path): for blob in client.list_directory(bucket, path): my_buffer = client.download_stream(bucket, blob) count += int(bytes(my_buffer).decode("utf-8")) - #count += int(my_buffer.getvalue().decode("utf-8")) + # count += int(my_buffer.getvalue().decode("utf-8")) - return { - "word": os.path.basename(path), - "count": count - } + return {"word": os.path.basename(path), "count": count} diff --git a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py index 44568f27d..5d014e415 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py +++ b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py @@ -13,15 +13,15 @@ def handler(event): dirs = client.list_directory(benchmark_bucket, prefix) dirs = [p.split(os.sep)[1] for p in dirs] dirs = list(set(dirs)) - lst = [{ - "bucket": benchmark_bucket, - #"dir": os.path.join(bucket, prefix, path) - #TODO add word here. - "dir": os.path.join(prefix, path) - #"dir": os.path.join(bucket, prefix) - } for path in dirs] + lst = [ + { + "bucket": benchmark_bucket, + # "dir": os.path.join(bucket, prefix, path) + # TODO add word here. + "dir": os.path.join(prefix, path) + # "dir": os.path.join(bucket, prefix) + } + for path in dirs + ] - - return { - "list": lst - } + return {"list": lst} diff --git a/benchmarks/600.workflows/660.map-reduce/python/split.py b/benchmarks/600.workflows/660.map-reduce/python/split.py index 941ffdfff..20aaaf0b1 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/split.py +++ b/benchmarks/600.workflows/660.map-reduce/python/split.py @@ -3,11 +3,12 @@ import uuid from . import storage + def chunks(lst, n): m = int(len(lst) / n) - for i in range(n-1): - yield lst[i*m:i*m+m] - tail = lst[(n-1)*m:] + for i in range(n - 1): + yield lst[i * m : i * m + m] + tail = lst[(n - 1) * m :] if len(tail) > 0: yield tail @@ -19,7 +20,7 @@ def handler(event): words_path = os.path.join("/tmp", "words.txt") client = storage.storage.get_instance() - client.download(benchmark_bucket, words_bucket + '/' + words_blob, words_path) + client.download(benchmark_bucket, words_bucket + "/" + words_blob, words_path) with open(words_path, "r") as f: list = f.read().split("\n") os.remove(words_path) @@ -29,25 +30,25 @@ def handler(event): map_lists = chunks(list, n_mappers) blobs = [] - for chunk in map_lists: name = str(uuid.uuid4())[:8] data = io.BytesIO() - data.writelines((val+"\n").encode("utf-8") for val in chunk) + data.writelines((val + "\n").encode("utf-8") for val in chunk) data.seek(0) - name = client.upload_stream(benchmark_bucket, output_bucket + '/' + name, data) - stripped_name = name.replace(output_bucket + '/', '') + name = client.upload_stream(benchmark_bucket, output_bucket + "/" + name, data) + stripped_name = name.replace(output_bucket + "/", "") blobs.append(stripped_name) prefix = str(uuid.uuid4())[:8] - lst = [{ - "benchmark_bucket": benchmark_bucket, - "bucket": output_bucket, - "blob": b, - "prefix": prefix - } for b in blobs] - - return { - "list": lst - } + lst = [ + { + "benchmark_bucket": benchmark_bucket, + "bucket": output_bucket, + "blob": b, + "prefix": prefix, + } + for b in blobs + ] + + return {"list": lst} diff --git a/benchmarks/600.workflows/670.auth/python/auth.py b/benchmarks/600.workflows/670.auth/python/auth.py index c7b77649c..d9f4f69b7 100644 --- a/benchmarks/600.workflows/670.auth/python/auth.py +++ b/benchmarks/600.workflows/670.auth/python/auth.py @@ -17,7 +17,7 @@ def AESModeCBC(plaintext): # random initialization vector of 16 bytes blocks_size = 16 iv = "InitializationVe" - pad = 16 - len(plaintext)% blocks_size + pad = 16 - len(plaintext) % blocks_size plaintext = str("0" * pad) + plaintext aes = pyaes.AESModeOfOperationCBC(KEY, iv=iv) ciphertext = aes.encrypt(plaintext) @@ -34,6 +34,4 @@ def handler(event): res = AESModeCTR(message) res = base64.b64encode(res).decode("ascii") - return { - "response": res - } + return {"response": res} diff --git a/benchmarks/600.workflows/680.excamera/input.py b/benchmarks/600.workflows/680.excamera/input.py index 687a2eabc..57045e2f4 100644 --- a/benchmarks/600.workflows/680.excamera/input.py +++ b/benchmarks/600.workflows/680.excamera/input.py @@ -1,18 +1,22 @@ import random import os -size_generators = { - "test" : (18, 6), - "small": (30, 6), - "large": (60, 6) -} +size_generators = {"test": (18, 6), "small": (30, 6), "large": (60, 6)} def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): num_frames, batch_size = size_generators[size] for bin in os.listdir(data_dir): @@ -32,7 +36,7 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck new_vid_segs.append(name) upload_func(0, name, path) - assert(len(new_vid_segs) == num_frames) + assert len(new_vid_segs) == num_frames return { "segments": new_vid_segs, @@ -40,5 +44,5 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck "input_bucket": input_buckets[0], "output_bucket": output_buckets[0], "batch_size": batch_size, - "quality": 1 + "quality": 1, } diff --git a/benchmarks/600.workflows/680.excamera/python/encode.py b/benchmarks/600.workflows/680.excamera/python/encode.py index 44a84c5ec..e74e20495 100644 --- a/benchmarks/600.workflows/680.excamera/python/encode.py +++ b/benchmarks/600.workflows/680.excamera/python/encode.py @@ -11,19 +11,22 @@ client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) - subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) + client.download(benchmark_bucket, bucket + "/" + name, path) + subprocess.check_output( + f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True + ) def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - #print("Uploading", file, "to", path) - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + # print("Uploading", file, "to", path) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -49,13 +52,13 @@ def encode(segs, data_dir, quality): output_path = os.path.join(data_dir, output) cmd = TERMINATE_CHUNK.format(input=input_path, output=output_path) run(cmd) - files.append(output_path+".ivf") + files.append(output_path + ".ivf") input_path = output_path output_path = os.path.join(data_dir, f"{name}-0") cmd = XC_DUMP_0.format(input=input_path, output=output_path) run(cmd) - files.append(output_path+".state") + files.append(output_path + ".state") return files @@ -77,7 +80,7 @@ def handler(event): os.makedirs(data_dir, exist_ok=True) for seg in segs: path = os.path.join(data_dir, seg) - client.download(benchmark_bucket, input_bucket + '/' + seg, path) + client.download(benchmark_bucket, input_bucket + "/" + seg, path) segs = [os.path.splitext(seg)[0] for seg in segs] output_paths = encode(segs, data_dir, quality) diff --git a/benchmarks/600.workflows/680.excamera/python/rebase.py b/benchmarks/600.workflows/680.excamera/python/rebase.py index 809774305..0707fbcc9 100644 --- a/benchmarks/600.workflows/680.excamera/python/rebase.py +++ b/benchmarks/600.workflows/680.excamera/python/rebase.py @@ -9,19 +9,22 @@ client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) - subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) + client.download(benchmark_bucket, bucket + "/" + name, path) + subprocess.check_output( + f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True + ) def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - #print("Uploading", file, "to", path) - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + # print("Uploading", file, "to", path) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -34,8 +37,8 @@ def run(cmd): def prev_seg_name(seg): - idx = int(seg)-1 - assert(idx >= 0) + idx = int(seg) - 1 + assert idx >= 0 return "{:08d}".format(idx) @@ -49,7 +52,7 @@ def rebase(segs, data_dir, dry_run=False): prev_input_path = os.path.join(data_dir, prev_seg_name(name)) source_state_path = f"{prev_input_path}-1" output_state_path = f"{input_path}-1.state" - extra = f"-O {output_state_path}" if idx != len(segs)-1 else "" + extra = f"-O {output_state_path}" if idx != len(segs) - 1 else "" input_pred_path = f"{input_path}-1" pred_state_path = f"{prev_input_path}-0" @@ -59,18 +62,18 @@ def rebase(segs, data_dir, dry_run=False): source_state=source_state_path, extra=extra, input_pred=input_pred_path, - pred_state=pred_state_path) + pred_state=pred_state_path, + ) if not dry_run: run(cmd) + input_paths.append(input_path + ".y4m") + input_paths.append(source_state_path + ".state") + input_paths.append(input_pred_path + ".ivf") + input_paths.append(pred_state_path + ".state") - input_paths.append(input_path+".y4m") - input_paths.append(source_state_path+".state") - input_paths.append(input_pred_path+".ivf") - input_paths.append(pred_state_path+".state") - - output_paths.append(input_path+".ivf") - if idx != len(segs)-1: + output_paths.append(input_path + ".ivf") + if idx != len(segs) - 1: output_paths.append(output_state_path) return input_paths, output_paths @@ -91,16 +94,16 @@ def handler(event): os.makedirs(data_dir, exist_ok=True) input_paths, _ = rebase(segs, data_dir, dry_run=True) - + for path in input_paths: file = os.path.basename(path) try: if ".y4m" in file: - client.download(benchmark_bucket, input_bucket + '/' + file, path) + client.download(benchmark_bucket, input_bucket + "/" + file, path) else: file = prefix + file - client.download(benchmark_bucket, output_bucket + '/' + file, path) + client.download(benchmark_bucket, output_bucket + "/" + file, path) except: # -1.state is generated by rebase itself if not "-1.state" in file: @@ -109,7 +112,6 @@ def handler(event): _, output_paths = rebase(segs, data_dir) upload_files(benchmark_bucket, output_bucket, output_paths, prefix) - shutil.rmtree(data_dir) return event diff --git a/benchmarks/600.workflows/680.excamera/python/reencode.py b/benchmarks/600.workflows/680.excamera/python/reencode.py index ee9b6576a..5a0448fcc 100644 --- a/benchmarks/600.workflows/680.excamera/python/reencode.py +++ b/benchmarks/600.workflows/680.excamera/python/reencode.py @@ -9,18 +9,21 @@ client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) - subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) + client.download(benchmark_bucket, bucket + "/" + name, path) + subprocess.check_output( + f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True + ) def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -33,8 +36,8 @@ def run(cmd): def prev_seg_name(seg): - idx = int(seg)-1 - assert(idx >= 0) + idx = int(seg) - 1 + assert idx >= 0 return "{:08d}".format(idx) @@ -45,7 +48,7 @@ def reencode_first_frame(segs, data_dir, dry_run=False): name = segs[idx] input_path = os.path.join(data_dir, name) output_path = input_path if idx == 1 else f"{input_path}-1" - source_state_path = os.path.join(data_dir, prev_seg_name(name))+"-0" + source_state_path = os.path.join(data_dir, prev_seg_name(name)) + "-0" output_state_path = f"{input_path}-1.state" extra = f"-O {output_state_path}" if idx == 1 else "" input_pred_path = f"{input_path}-0" @@ -55,15 +58,16 @@ def reencode_first_frame(segs, data_dir, dry_run=False): output=output_path, source_state=source_state_path, extra=extra, - input_pred=input_pred_path) + input_pred=input_pred_path, + ) if not dry_run: run(cmd) - input_paths.append(input_path+".y4m") - input_paths.append(source_state_path+".state") - input_paths.append(input_pred_path+".ivf") + input_paths.append(input_path + ".y4m") + input_paths.append(source_state_path + ".state") + input_paths.append(input_pred_path + ".ivf") - output_paths.append(output_path+".ivf") + output_paths.append(output_path + ".ivf") if idx == 1: output_paths.append(output_state_path) @@ -86,14 +90,12 @@ def handler(event): input_paths, _ = reencode_first_frame(segs, data_dir, dry_run=True) for path in input_paths: file = os.path.basename(path) - + if ".y4m" in file: - client.download(benchmark_bucket, input_bucket + '/' + file, path) + client.download(benchmark_bucket, input_bucket + "/" + file, path) else: file = prefix + file - client.download(benchmark_bucket, output_bucket + '/' + file, path) - - + client.download(benchmark_bucket, output_bucket + "/" + file, path) _, output_paths = reencode_first_frame(segs, data_dir) upload_files(benchmark_bucket, output_bucket, output_paths, prefix) diff --git a/benchmarks/600.workflows/680.excamera/python/split.py b/benchmarks/600.workflows/680.excamera/python/split.py index 5ecfad0b6..69801699a 100644 --- a/benchmarks/600.workflows/680.excamera/python/split.py +++ b/benchmarks/600.workflows/680.excamera/python/split.py @@ -1,8 +1,9 @@ import uuid + def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + yield lst[i : i + n] def handler(event): @@ -15,12 +16,13 @@ def handler(event): return { "segments": [ { - "prefix": str(uuid.uuid4().int & (1<<64)-1)[:8], + "prefix": str(uuid.uuid4().int & (1 << 64) - 1)[:8], "segments": ss, "quality": quality, "input_bucket": input_bucket, "output_bucket": output_bucket, - "benchmark_bucket": benchmark_bucket - } for idx, ss in enumerate(segs) + "benchmark_bucket": benchmark_bucket, + } + for idx, ss in enumerate(segs) ] } diff --git a/benchmarks/600.workflows/690.ml/input.py b/benchmarks/600.workflows/690.ml/input.py index d3f930bc7..e432e9ecb 100644 --- a/benchmarks/600.workflows/690.ml/input.py +++ b/benchmarks/600.workflows/690.ml/input.py @@ -1,5 +1,5 @@ size_generators = { - "test" : (1, 100, 5), + "test": (1, 100, 5), "small": (2, 500, 1024), "large": (3, 1000, 1024), } @@ -8,18 +8,28 @@ {"name": "SVC", "kernel": "linear", "C": 0.025}, {"name": "RandomForestClassifier", "max_depth": 5, "n_estimators": 10}, {"name": "RandomForestClassifier", "max_depth": 5, "n_estimators": 15}, - {"name": "AdaBoostClassifier"} + {"name": "AdaBoostClassifier"}, ] + def buckets_count(): return (0, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): n_classifiers, n_samples, n_features = size_generators[size] return { "classifiers": classifiers[:n_classifiers], - "benchmark_bucket" : benchmarks_bucket, + "benchmark_bucket": benchmarks_bucket, "dataset_bucket": output_buckets[0], "n_samples": n_samples, - "n_features": n_features + "n_features": n_features, } diff --git a/benchmarks/600.workflows/690.ml/python/generate.py b/benchmarks/600.workflows/690.ml/python/generate.py index 03fea03db..31dc68233 100644 --- a/benchmarks/600.workflows/690.ml/python/generate.py +++ b/benchmarks/600.workflows/690.ml/python/generate.py @@ -14,7 +14,7 @@ def generate(n_samples, n_features): n_clusters_per_class=2, weights=[0.9, 0.1], flip_y=0.1, - random_state=123 + random_state=123, ) return X, y @@ -30,10 +30,12 @@ def upload_dataset(benchmark_bucket, bucket, X, y): np.save(labels_path, y) client = storage.storage.get_instance() - features = client.upload(benchmark_bucket, bucket + '/' + "features.npy", features_path) - features = features.replace(bucket + '/', '') - labels = client.upload(benchmark_bucket, bucket + '/' + "labels.npy", labels_path) - labels = labels.replace(bucket + '/', '') + features = client.upload( + benchmark_bucket, bucket + "/" + "features.npy", features_path + ) + features = features.replace(bucket + "/", "") + labels = client.upload(benchmark_bucket, bucket + "/" + "labels.npy", labels_path) + labels = labels.replace(bucket + "/", "") return features, labels @@ -48,7 +50,14 @@ def handler(event): X, y = generate(n_samples, n_features) X_key, y_key = upload_dataset(benchmark_bucket, bucket, X, y) - schedules = [{**c, "features": X_key, "labels": y_key, "bucket": bucket, "benchmark_bucket": benchmark_bucket} for c in classifiers] - return { - "schedules": schedules - } + schedules = [ + { + **c, + "features": X_key, + "labels": y_key, + "bucket": bucket, + "benchmark_bucket": benchmark_bucket, + } + for c in classifiers + ] + return {"schedules": schedules} From 591cd3972742c8160c0c1afaab0e729648a16737 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 18 Nov 2025 17:45:53 +0100 Subject: [PATCH 62/82] reformat the benchmark workflows folder --- .../6100.1000-genome/python/frequency.py | 61 ++-------- .../6100.1000-genome/python/individuals.py | 4 +- .../python/mutation_overlap.py | 104 +++--------------- .../6100.1000-genome/python/sifting.py | 10 +- .../python/individuals.py | 4 +- .../6200.trip-booking/python/cancel_flight.py | 4 +- .../6200.trip-booking/python/cancel_hotel.py | 4 +- .../6200.trip-booking/python/cancel_rental.py | 4 +- .../6200.trip-booking/python/confirm.py | 5 +- .../600.workflows/650.vid/python/analyse.py | 4 +- .../680.excamera/python/encode.py | 4 +- .../680.excamera/python/rebase.py | 4 +- .../680.excamera/python/reencode.py | 4 +- .../600.workflows/690.ml/python/generate.py | 4 +- .../600.workflows/690.ml/python/train.py | 4 +- 15 files changed, 40 insertions(+), 184 deletions(-) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py index 344ff97b6..5a89a0182 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py @@ -53,9 +53,7 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations - def read_individuals( - self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename - ): + def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): tic = time.perf_counter() mutation_index_array = [] for name in ids: @@ -67,11 +65,7 @@ def read_individuals( try: text.append(item[1]) except IndexError as e: - print( - "ERROR({}): while reading {}: (item: {})".format( - str(e), filename, item - ) - ) + print("ERROR({}): while reading {}: (item: {})".format(str(e), filename, item)) sifted_mutations = list(set(rs_numbers).intersection(text)) mutation_index_array.append(sifted_mutations) @@ -92,9 +86,7 @@ def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): for pq in range(n_indiv): if 2 * pq >= len(randomized_list): break - b_multiset = collections.Counter( - mutation_index_array[randomized_list[2 * pq]] - ) + b_multiset = collections.Counter(mutation_index_array[randomized_list[2 * pq]]) r_ids.append(ids[randomized_list[2 * pq]]) result = result + b_multiset random_indiv.append(r_ids) @@ -128,9 +120,7 @@ def plot_histogram_overlap(self, POP, histogram_overlap, outputFile, n_runs): class WriteData: - def write_histogram_overlap( - self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv - ): + def write_histogram_overlap(self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv): tic = time.perf_counter() for run in range(n_runs): overlapfile = histogram_overlapfile + str(run) + ".txt" @@ -163,9 +153,7 @@ def write_random_indiv(self, randomindiv_file, random_indiv, n_runs): f.write("%s\n" % item) f.close() - def write_mutation_index_array( - self, mutation_index_array_file, mutation_index_array - ): + def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): tic = time.perf_counter() f = open(mutation_index_array_file, "w") for item in mutation_index_array: @@ -240,34 +228,13 @@ def handler(event): pd = PlotData() histogram_overlapfile = ( - outdata_dir - + "Histogram_mutation_overlap_chr" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + "_" + outdata_dir + "Histogram_mutation_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" ) mutation_overlapfile = ( - outdata_dir - + "Mutation_overlap_chr" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + "_" + outdata_dir + "Mutation_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" ) mutation_index_array_file = ( - outdata_dir - + "mutation_index_array" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + ".txt" + outdata_dir + "mutation_index_array" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) histogram_overlap_plot = ( plot_dir + "Frequency_mutations" + str(c) + "_s" + str(SIFT) + "_" + POP @@ -276,9 +243,7 @@ def handler(event): outdata_dir + "map_variations" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) - randomindiv_file = ( - outdata_dir + "random_indiv" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" - ) + randomindiv_file = outdata_dir + "random_indiv" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" ids = rd.read_names(POP, pop_dir, columns_file) n_pairs = len(ids) / 2 @@ -291,15 +256,11 @@ def handler(event): wr.write_map_variations(map_variations_file, map_variations) wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) - mutation_overlap, random_indiv = res.overlap_ind( - ids, mutation_index_array, n_runs, n_indiv - ) + mutation_overlap, random_indiv = res.overlap_ind(ids, mutation_index_array, n_runs, n_indiv) histogram_overlap = res.histogram_overlap(mutation_overlap, n_runs) wr.write_mutation_overlap(mutation_overlapfile, mutation_overlap, n_runs) - wr.write_histogram_overlap( - histogram_overlapfile, histogram_overlap, n_runs, n_indiv - ) + wr.write_histogram_overlap(histogram_overlapfile, histogram_overlap, n_runs, n_indiv) wr.write_random_indiv(randomindiv_file, random_indiv, n_runs) pd.plot_histogram_overlap(POP, histogram_overlap, histogram_overlap_plot, n_runs) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py index 88f789f9a..a341fe200 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py @@ -84,9 +84,7 @@ def handler(event): # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` second = line.split("\t")[0:8] # We select the one we want - second = [ - elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7] - ] + second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value diff --git a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py index 26b3abd81..4818b3410 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py @@ -56,9 +56,7 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations - def read_individuals( - self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename - ): + def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): tic = time.perf_counter() mutation_index_array = [] total_mutations = {} @@ -216,9 +214,7 @@ def total_colormap_overlap(self, POP, total_pairs_overlap, outputFile): tic = time.perf_counter() fig = plt.figure() cmap = mpl.colors.ListedColormap(["blue", "black", "red", "green", "pink"]) - img = pyplot.imshow( - total_pairs_overlap, interpolation="nearest", cmap=cmap, origin="lower" - ) + img = pyplot.imshow(total_pairs_overlap, interpolation="nearest", cmap=cmap, origin="lower") pyplot.colorbar(img, cmap=cmap) plt.savefig(outputFile) @@ -244,17 +240,13 @@ def write_total_indiv(self, total_mutations_filename, total_mutations): f.write(key + "\t" + str(count) + "\n") f.close() - def write_random_mutations_list( - self, random_mutations_filename, random_mutations_list, n_runs - ): + def write_random_mutations_list(self, random_mutations_filename, random_mutations_list, n_runs): for run in range(n_runs): filename = random_mutations_filename + "_run_" + str(run) + ".txt" f = open(filename, "w") f.writelines(["%s\n" % item for item in random_mutations_list[run]]) - def write_mutation_index_array( - self, mutation_index_array_file, mutation_index_array - ): + def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): f = open(mutation_index_array_file, "w") for item in mutation_index_array: f.write("%s\n" % item) @@ -348,96 +340,34 @@ def handler(event): + ".txt" ) genepairsfile = ( - outdata_dir - + "gene_pairs_count_chr" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + ".txt" + outdata_dir + "gene_pairs_count_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) random_indpairsfile = ( - outdata_dir - + "100_individual_overlap_chr" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + ".txt" + outdata_dir + "100_individual_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) colormap = ( - plots_dir - + "colormap_distribution_c" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + OutputFormat + plots_dir + "colormap_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat ) half_overlap = ( - plots_dir - + "half_distribution_c" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + OutputFormat + plots_dir + "half_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat ) total_overlap = ( - plots_dir - + "total_distribution_c" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + OutputFormat + plots_dir + "total_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat ) random_overlap = ( - plots_dir - + "100_distribution_c" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + OutputFormat + plots_dir + "100_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat ) total_mutations_filename = ( - outdata_dir - + "total_mutations_individual" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + ".txt" + outdata_dir + "total_mutations_individual" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) random_mutations_filename = ( - outdata_dir - + "random_mutations_individual" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP + outdata_dir + "random_mutations_individual" + str(c) + "_s" + str(SIFT) + "_" + POP ) mutation_index_array_file = ( - outdata_dir - + "mutation_index_array" - + str(c) - + "_s" - + str(SIFT) - + "_" - + POP - + ".txt" + outdata_dir + "mutation_index_array" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" ) map_variations_file = ( @@ -456,9 +386,7 @@ def handler(event): # cross-correlations mutations overlapping half_pairs_overlap = res.half_pair_individuals(mutation_index_array) - total_pairs_overlap, simetric_overlap = res.total_pair_individuals( - mutation_index_array - ) + total_pairs_overlap, simetric_overlap = res.total_pair_individuals(mutation_index_array) random_pairs_overlap = res.pair_individuals(mutation_index_array, n_runs) wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) @@ -476,9 +404,7 @@ def handler(event): # list of frecuency of mutations in 26 individuals random_mutations_list = res.group_indivuals(total_mutations_list, n_runs) - wr.write_random_mutations_list( - random_mutations_filename, random_mutations_list, n_runs - ) + wr.write_random_mutations_list(random_mutations_filename, random_mutations_list, n_runs) # gen overlapping gene_pair_list = res.gene_pairs(mutation_index_array) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py index 670c3d4e8..0517f8ce8 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py @@ -62,16 +62,10 @@ def handler(event): elif temp[5] == "": f.write("{} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4])) else: - f.write( - "{} {} {} {} {}\n".format( - temp[0], temp[1], temp[2], temp[4], temp[6] - ) - ) + f.write("{} {} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4], temp[6])) os.remove(siftfile) - final_name = client.upload( - benchmark_bucket, output_bucket + "/" + final_name, final - ) + final_name = client.upload(benchmark_bucket, output_bucket + "/" + final_name, final) final_name = final_name.replace(output_bucket + "/", "") return { diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py index dfeb53979..cca00c95e 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py @@ -83,9 +83,7 @@ def handler(event): # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` second = line.split("\t")[0:8] # We select the one we want - second = [ - elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7] - ] + second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py index 82803e464..f94da3dfe 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_flight.py @@ -10,9 +10,7 @@ def handler(event): # Confirm flight nosql_table_name = "flights" flight_id = event["flight_id"] - nosql_client.delete( - nosql_table_name, ("trip_id", trip_id), ("flight_id", flight_id) - ) + nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("flight_id", flight_id)) event.pop("flight_id") return event diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py index eefce2c30..e1f69077d 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_hotel.py @@ -10,8 +10,6 @@ def handler(event): # Confirm flight nosql_table_name = "hotel_booking" booking_id = event["booking_id"] - nosql_client.delete( - nosql_table_name, ("trip_id", trip_id), ("booking_id", booking_id) - ) + nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("booking_id", booking_id)) return {"trip_id": trip_id, "status": "failure"} diff --git a/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py b/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py index 5cdd10787..f8ff38044 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/cancel_rental.py @@ -10,9 +10,7 @@ def handler(event): # Confirm flight nosql_table_name = "car_rentals" rental_id = event["rental_id"] - nosql_client.delete( - nosql_table_name, ("trip_id", trip_id), ("rental_id", rental_id) - ) + nosql_client.delete(nosql_table_name, ("trip_id", trip_id), ("rental_id", rental_id)) event.pop("rental_id") return event diff --git a/benchmarks/600.workflows/6200.trip-booking/python/confirm.py b/benchmarks/600.workflows/6200.trip-booking/python/confirm.py index 347eeff5f..3a555f6a4 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/confirm.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/confirm.py @@ -6,10 +6,7 @@ def handler(event): expected_result = event["expected_result"] - if ( - expected_result["result"] == "failure" - and expected_result["reason"] == "confirm" - ): + if expected_result["result"] == "failure" and expected_result["reason"] == "confirm": raise RuntimeError("Failed to confirm the booking!") trip_id = event["trip_id"] diff --git a/benchmarks/600.workflows/650.vid/python/analyse.py b/benchmarks/600.workflows/650.vid/python/analyse.py index 46d384a2d..a6c134d6c 100644 --- a/benchmarks/600.workflows/650.vid/python/analyse.py +++ b/benchmarks/600.workflows/650.vid/python/analyse.py @@ -133,9 +133,7 @@ def handler(event): benchmark_bucket = event["benchmark_bucket"] - frames = list( - load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir) - ) + frames = list(load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir)) net = load_model( benchmark_bucket, event["model_bucket"] + "/" + event["model_weights"], diff --git a/benchmarks/600.workflows/680.excamera/python/encode.py b/benchmarks/600.workflows/680.excamera/python/encode.py index e74e20495..031ee0c02 100644 --- a/benchmarks/600.workflows/680.excamera/python/encode.py +++ b/benchmarks/600.workflows/680.excamera/python/encode.py @@ -16,9 +16,7 @@ def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): client.download(benchmark_bucket, bucket + "/" + name, path) - subprocess.check_output( - f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True - ) + subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) def upload_files(benchmark_bucket, bucket, paths, prefix): diff --git a/benchmarks/600.workflows/680.excamera/python/rebase.py b/benchmarks/600.workflows/680.excamera/python/rebase.py index 0707fbcc9..c83a5659a 100644 --- a/benchmarks/600.workflows/680.excamera/python/rebase.py +++ b/benchmarks/600.workflows/680.excamera/python/rebase.py @@ -14,9 +14,7 @@ def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): client.download(benchmark_bucket, bucket + "/" + name, path) - subprocess.check_output( - f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True - ) + subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) def upload_files(benchmark_bucket, bucket, paths, prefix): diff --git a/benchmarks/600.workflows/680.excamera/python/reencode.py b/benchmarks/600.workflows/680.excamera/python/reencode.py index 5a0448fcc..9695fa26f 100644 --- a/benchmarks/600.workflows/680.excamera/python/reencode.py +++ b/benchmarks/600.workflows/680.excamera/python/reencode.py @@ -14,9 +14,7 @@ def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): client.download(benchmark_bucket, bucket + "/" + name, path) - subprocess.check_output( - f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True - ) + subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) def upload_files(benchmark_bucket, bucket, paths, prefix): diff --git a/benchmarks/600.workflows/690.ml/python/generate.py b/benchmarks/600.workflows/690.ml/python/generate.py index 31dc68233..b5b1dee36 100644 --- a/benchmarks/600.workflows/690.ml/python/generate.py +++ b/benchmarks/600.workflows/690.ml/python/generate.py @@ -30,9 +30,7 @@ def upload_dataset(benchmark_bucket, bucket, X, y): np.save(labels_path, y) client = storage.storage.get_instance() - features = client.upload( - benchmark_bucket, bucket + "/" + "features.npy", features_path - ) + features = client.upload(benchmark_bucket, bucket + "/" + "features.npy", features_path) features = features.replace(bucket + "/", "") labels = client.upload(benchmark_bucket, bucket + "/" + "labels.npy", labels_path) labels = labels.replace(bucket + "/", "") diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index 7d41c09d2..8ee6bfbd6 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -35,9 +35,7 @@ def load_dataset(benchmark_bucket, bucket, features, labels): def preprocess(X, y): X = StandardScaler().fit_transform(X) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.4, random_state=123 - ) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=123) return X_train, X_test, y_train, y_test From 218ef8cdba6fdb8f5c67a6b2260506818c1ee5a1 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 18 Nov 2025 17:48:44 +0100 Subject: [PATCH 63/82] reformat the benchmark workflows folder --- .../wrappers/azure/python/handler_workflow.py | 18 ++- .../wrappers/azure/python/main_workflow.py | 23 ++-- benchmarks/wrappers/azure/python/nosql.py | 6 +- .../wrappers/azure/python/run_subworkflow.py | 103 +++++++++------ .../wrappers/azure/python/run_workflow.py | 124 ++++++++++-------- .../wrappers/gcp/python/handler_workflow.py | 26 ++-- benchmarks/wrappers/gcp/python/storage.py | 10 +- 7 files changed, 176 insertions(+), 134 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index 143b5287f..cd9f8c930 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -9,6 +9,7 @@ import azure.functions as func from redis import Redis + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") @@ -23,6 +24,7 @@ def probe_cold_start(): return is_cold, container_id + def main(event, context: func.Context): start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" @@ -32,7 +34,7 @@ def main(event, context: func.Context): func_name = os.path.basename(os.path.dirname(__file__)) # FIXME: sort out workflow and function request id - #event["request-id"] = context.invocation_id + # event["request-id"] = context.invocation_id # this only works on benchmarks where payload is dict event["payload"]["request-id"] = context.invocation_id @@ -53,7 +55,7 @@ def main(event, context: func.Context): "end": end, "is_cold": is_cold, "container_id": container_id, - "provider.request_id": context.invocation_id + "provider.request_id": context.invocation_id, } func_res = os.getenv("SEBS_FUNCTION_RESULT") @@ -70,11 +72,13 @@ def main(event, context: func.Context): payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password={{REDIS_PASSWORD}}, + ) req_id = event["request_id"] key = os.path.join(workflow_name, func_name, req_id, str(uuid.uuid4())[0:8]) diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 0c4e55a0c..81518d911 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -8,6 +8,7 @@ import logging + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") @@ -28,12 +29,12 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu req_id = event["request_id"] logging.info("complete event: ") logging.info(event) - logging.info("req_id in main: ") + logging.info("req_id in main: ") logging.info(req_id) - if 'connection_string' in event: + if "connection_string" in event: logging.info("setting connection string.") - os.environ['STORAGE_CONNECTION_STRING'] = event['connection_string'] + os.environ["STORAGE_CONNECTION_STRING"] = event["connection_string"] begin = datetime.datetime.now() @@ -41,17 +42,17 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu instance_id = await client.start_new("run_workflow", None, event) res = client.create_check_status_response(req, instance_id) - #res = await client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) + # res = await client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) end = datetime.datetime.now() is_cold, container_id = probe_cold_start() - #status = await client.get_status(instance_id) - #code = 500 if str(status.runtime_status) == "Failed" else 200 - - #try: + # status = await client.get_status(instance_id) + # code = 500 if str(status.runtime_status) == "Failed" else 200 + + # try: # result = json.loads(res.get_body()) - #except json.decoder.JSONDecodeError: + # except json.decoder.JSONDecodeError: # result = res.get_body().decode() body = json.loads(res.get_body()) @@ -68,7 +69,5 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu } return func.HttpResponse( - status_code=res.status_code, - body=json.dumps(body), - mimetype="application/json" + status_code=res.status_code, body=json.dumps(body), mimetype="application/json" ) diff --git a/benchmarks/wrappers/azure/python/nosql.py b/benchmarks/wrappers/azure/python/nosql.py index acc211ed2..edf79e444 100644 --- a/benchmarks/wrappers/azure/python/nosql.py +++ b/benchmarks/wrappers/azure/python/nosql.py @@ -90,9 +90,9 @@ def get_instance( database: Optional[str] = None, url: Optional[str] = None, credential: Optional[str] = None ): if nosql.instance is None: - database = os.environ['NOSQL_STORAGE_DATABASE'] - url = os.environ['NOSQL_STORAGE_URL'] - credential = os.environ['NOSQL_STORAGE_CREDS'] + database = os.environ["NOSQL_STORAGE_DATABASE"] + url = os.environ["NOSQL_STORAGE_URL"] + credential = os.environ["NOSQL_STORAGE_CREDS"] assert database is not None and url is not None and credential is not None nosql.instance = nosql(url, credential, database) return nosql.instance diff --git a/benchmarks/wrappers/azure/python/run_subworkflow.py b/benchmarks/wrappers/azure/python/run_subworkflow.py index c2730714e..36df71624 100644 --- a/benchmarks/wrappers/azure/python/run_subworkflow.py +++ b/benchmarks/wrappers/azure/python/run_subworkflow.py @@ -17,7 +17,7 @@ def get_var(obj, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names: obj = obj[n] @@ -27,12 +27,13 @@ def get_var(obj, path: str): def set_var(obj, val, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names[:-1]: obj = obj[n] obj[names[-1]] = val + def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start @@ -43,8 +44,7 @@ def handler(context: df.DurableOrchestrationContext): res = input["payload"] request_id = input["request_id"] all_states = input["states"] - states = {n: State.deserialize(n, s) - for n, s in all_states.items()} + states = {n: State.deserialize(n, s) for n, s in all_states.items()} current = states[input["root"]] while current: @@ -53,7 +53,7 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(current, Task): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() current = states.get(current.next, None) @@ -63,7 +63,7 @@ def handler(context: df.DurableOrchestrationContext): "<=": operator.le, "==": operator.eq, ">=": operator.ge, - ">": operator.gt + ">": operator.gt, } next = None @@ -81,9 +81,9 @@ def handler(context: df.DurableOrchestrationContext): array = get_var(res, current.array) tasks = [] if current.common_params: - #assemble input differently + # assemble input differently for elem in array: - #assemble payload + # assemble payload payload = {} payload["array_element"] = elem params = current.common_params.split(",") @@ -91,12 +91,12 @@ def handler(context: df.DurableOrchestrationContext): payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} tasks.append(context.call_activity(current.func_name, myinput)) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} tasks.append(context.call_activity(current.func_name, myinput)) - duration += (now() - ts) + duration += now() - ts map_res = yield context.task_all(tasks) ts = now() @@ -106,7 +106,7 @@ def handler(context: df.DurableOrchestrationContext): for i in range(current.count): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() @@ -116,7 +116,7 @@ def handler(context: df.DurableOrchestrationContext): for elem in array: input = {"payload": elem, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts yield context.call_activity(current.func_name, input) ts = now() @@ -127,12 +127,13 @@ def handler(context: df.DurableOrchestrationContext): first_states = [] state_to_result = {} for i, subworkflow in enumerate(current.funcs): - parallel_states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + parallel_states = { + n: State.deserialize(n, s) for n, s in subworkflow["states"].items() + } - #for state in parallel_states.values(): + # for state in parallel_states.values(): # state_to_result[state.func_name] = [] - first_state = parallel_states[subworkflow["root"]] first_states.append(first_state) state_to_result[first_state.func_name] = [] @@ -140,24 +141,26 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - #task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if only one state, task within suborchestrator if multiple states. if first_state.next: - #call suborchestrator - #FIXME define other parameters. - parallel_task = context.call_sub_orchestrator("run_subworkflow", input, subworkflow["root"], parallel_states) + # call suborchestrator + # FIXME define other parameters. + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", input, subworkflow["root"], parallel_states + ) parallel_tasks.append(parallel_task) else: parallel_tasks.append(context.call_activity(first_state.func_name, input)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - + state_to_result[first_state.func_name].append(len(parallel_tasks) - 1) + elif isinstance(first_state, Map): array = get_var(res, first_state.array) tasks = [] if first_state.next: - #call suborchestrator. + # call suborchestrator. if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -165,20 +168,28 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - #FIXME use right parameters for suborchestrator. - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, subworkflow["root"], parallel_states) + # FIXME use right parameters for suborchestrator. + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, subworkflow["root"], parallel_states + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, subworkflow["root"], parallel_states) + + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, subworkflow["root"], parallel_states + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -186,15 +197,23 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - - duration += (now() - ts) + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + + duration += now() - ts map_res = yield context.task_all(parallel_tasks) ts = now() res = {} @@ -207,7 +226,7 @@ def handler(context: df.DurableOrchestrationContext): output.append(map_res[index]) res[state.func_name] = output else: - #task state + # task state res[state.func_name] = map_res[indices[0]] current = states.get(current.next, None) @@ -215,9 +234,9 @@ def handler(context: df.DurableOrchestrationContext): else: raise ValueError(f"Undefined state: {current}") - #workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") + # workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") func_name = "run_subworkflow" - + return res diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index d5e111408..3ba00a674 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -17,7 +17,7 @@ def get_var(obj, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names: obj = obj[n] @@ -27,12 +27,13 @@ def get_var(obj, path: str): def set_var(obj, val, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names[:-1]: obj = obj[n] obj[names[-1]] = val + def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start @@ -42,8 +43,7 @@ def handler(context: df.DurableOrchestrationContext): with open("definition.json") as f: definition = json.load(f) - states = {n: State.deserialize(n, s) - for n, s in definition["states"].items()} + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} current = states[definition["root"]] input = context.get_input() @@ -57,7 +57,7 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(current, Task): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts if current.failure is None: res = yield context.call_activity(current.func_name, input) @@ -77,7 +77,7 @@ def handler(context: df.DurableOrchestrationContext): "<=": operator.le, "==": operator.eq, ">=": operator.ge, - ">": operator.gt + ">": operator.gt, } next = None @@ -99,7 +99,7 @@ def handler(context: df.DurableOrchestrationContext): array = get_var(res, current.array) tasks = [] if first_state.next: - #call suborchestrator - each map task should proceed with next step directly after it finished. + # call suborchestrator - each map task should proceed with next step directly after it finished. if current.common_params: for elem in array: payload = {} @@ -112,20 +112,24 @@ def handler(context: df.DurableOrchestrationContext): myinput["states"] = current.funcs uuid_name = str(uuid.uuid4())[0:4] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, uuid_name) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, uuid_name + ) tasks.append(parallel_task) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} myinput["root"] = current.root myinput["states"] = current.funcs - + uuid_name = str(uuid.uuid4())[0:4] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, uuid_name) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, uuid_name + ) tasks.append(parallel_task) else: if current.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -134,12 +138,12 @@ def handler(context: df.DurableOrchestrationContext): payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} tasks.append(context.call_activity(first_state.func_name, myinput)) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} tasks.append(context.call_activity(first_state.func_name, myinput)) - duration += (now() - ts) + duration += now() - ts map_res = yield context.task_all(tasks) ts = now() @@ -149,7 +153,7 @@ def handler(context: df.DurableOrchestrationContext): for i in range(current.count): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() @@ -159,7 +163,7 @@ def handler(context: df.DurableOrchestrationContext): for elem in array: input = {"payload": elem, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts yield context.call_activity(current.func_name, input) ts = now() @@ -170,33 +174,35 @@ def handler(context: df.DurableOrchestrationContext): first_states = [] state_to_result = {} for subworkflow in current.funcs: - parallel_states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} - + parallel_states = { + n: State.deserialize(n, s) for n, s in subworkflow["states"].items() + } + first_state = parallel_states[subworkflow["root"]] first_states.append(first_state) state_to_result[first_state.func_name] = [] - if isinstance(first_state, Task): + if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - #task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if only one state, task within suborchestrator if multiple states. if first_state.next: input["root"] = subworkflow["root"] - input["states"] = subworkflow["states"] #parallel_states + input["states"] = subworkflow["states"] # parallel_states parallel_task = context.call_sub_orchestrator("run_subworkflow", input) parallel_tasks.append(parallel_task) else: parallel_tasks.append(context.call_activity(first_state.func_name, input)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - + state_to_result[first_state.func_name].append(len(parallel_tasks) - 1) + elif isinstance(first_state, Map): array = get_var(res, first_state.array) tasks = [] if first_state.next: - #call suborchestrator. + # call suborchestrator. if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -206,21 +212,29 @@ def handler(context: df.DurableOrchestrationContext): myinput = {"payload": payload, "request_id": request_id} myinput["root"] = subworkflow["root"] myinput["states"] = subworkflow["states"] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - + myinput["root"] = subworkflow["root"] myinput["states"] = subworkflow["states"] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -228,21 +242,29 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - - duration += (now() - ts) + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + + duration += now() - ts map_res = yield context.task_all(parallel_tasks) ts = now() res = {} for state in first_states: - #get respective results of map_res related to func according to state_to_result + # get respective results of map_res related to func according to state_to_result indices = state_to_result[state.func_name] if len(indices) > 1: output = [] @@ -250,7 +272,7 @@ def handler(context: df.DurableOrchestrationContext): output.append(map_res[index]) res[state.func_name] = output else: - #task state + # task state res[state.func_name] = map_res[indices[0]] current = states.get(current.next, None) @@ -261,19 +283,17 @@ def handler(context: df.DurableOrchestrationContext): workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") func_name = "run_workflow" - payload = { - "func": func_name, - "start": start, - "end": start+duration - } + payload = {"func": func_name, "start": start, "end": start + duration} payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password={{REDIS_PASSWORD}}, + ) key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) redis.set(key, payload) diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index 083ef0a53..b8a5a0c44 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -1,4 +1,3 @@ - import datetime import io import json @@ -8,17 +7,16 @@ import importlib # Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from function import nosql - nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'] - ) + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) from redis import Redis + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") @@ -41,7 +39,7 @@ def handler(req): provider_request_id = req.headers.get("Function-Execution-Id") event = req.get_json() - event["payload"]['request-id'] = provider_request_id + event["payload"]["request-id"] = provider_request_id full_function_name = os.getenv("MY_FUNCTION_NAME") workflow_name, func_name = full_function_name.split("___") function = importlib.import_module(f"function.{func_name}") @@ -56,7 +54,7 @@ def handler(req): "end": end, "is_cold": is_cold, "container_id": container_id, - "provider.request_id": provider_request_id + "provider.request_id": provider_request_id, } func_res = os.getenv("SEBS_FUNCTION_RESULT") @@ -73,11 +71,13 @@ def handler(req): payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password={{REDIS_PASSWORD}}, + ) req_id = event["request_id"] key = os.path.join(workflow_name, func_name, req_id, str(uuid.uuid4())[0:8]) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index 4663721b0..a6623b638 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -72,27 +72,27 @@ def download_stream(self, bucket, file): incr_io_env(size, "STORAGE_DOWNLOAD_BYTES") data.seek(0) - #return data + # return data return data.getbuffer() def download_within_range(self, bucket, file, start_byte, stop_byte): bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(file) - blob.download_to_filename('/tmp/' + file, start=start_byte, end=stop_byte) - with open('/tmp/' + file, 'r') as f: + blob.download_to_filename("/tmp/" + file, start=start_byte, end=stop_byte) + with open("/tmp/" + file, "r") as f: content = f.read() return content def list_directory(self, bucket, prefix): bucket_instance = self.client.bucket(bucket) - #objects = list(self.client.list_blobs(bucket_or_name=bucket_instance,prefix=prefix)) + # objects = list(self.client.list_blobs(bucket_or_name=bucket_instance,prefix=prefix)) objects = self.client.bucket(bucket).list_blobs(prefix=prefix) names = [] for obj in objects: names.append(obj.name) return names - #for obj in objects: + # for obj in objects: # yield obj.name def get_instance(): From 1b072996ae99155c0f4d556e2b6d440ee3b49a8d Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 18 Nov 2025 21:53:50 +0100 Subject: [PATCH 64/82] test tuff auto-fixing flake errors --- .tuff.toml | 7 +++ .../600.workflows/6100.1000-genome/input.py | 1 - .../6100.1000-genome/python/frequency.py | 29 ++++++------ .../6100.1000-genome/python/individuals.py | 2 - .../python/individuals_merge.py | 5 +-- .../python/mutation_overlap.py | 44 ++++++++----------- .../6100.1000-genome/python/sifting.py | 1 - .../6101.1000-genome-individuals/input.py | 1 - .../python/individuals.py | 1 - .../600.workflows/6200.trip-booking/input.py | 1 - .../631.parallel-download/python/process.py | 2 +- .../600.workflows/650.vid/python/analyse.py | 7 +-- .../600.workflows/650.vid/python/summarize.py | 6 --- .../660.map-reduce/python/reduce.py | 2 - .../660.map-reduce/python/shuffle.py | 3 +- benchmarks/600.workflows/670.auth/input.py | 1 - .../600.workflows/670.auth/python/auth.py | 2 - .../600.workflows/680.excamera/input.py | 1 - .../680.excamera/python/rebase.py | 2 +- .../600.workflows/690.ml/python/train.py | 3 -- 20 files changed, 46 insertions(+), 75 deletions(-) create mode 100644 .tuff.toml diff --git a/.tuff.toml b/.tuff.toml new file mode 100644 index 000000000..1caa6b79c --- /dev/null +++ b/.tuff.toml @@ -0,0 +1,7 @@ +line-length = 100 +target-version = "py38" +[lint] +select = ["E", "F", "W"] + +[lint.isort] +known-first-party = ["sebs"] diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index 13ed3f8b8..c84d16130 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -1,5 +1,4 @@ import os -import re import uuid import io diff --git a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py index 5a89a0182..b2fe9dded 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py @@ -11,7 +11,6 @@ import collections from collections import Counter -import datetime import os from . import storage @@ -19,7 +18,7 @@ class ReadData: def read_names(self, POP, pop_dir, columns_file): - tic = time.perf_counter() + time.perf_counter() namefile = pop_dir + POP f = open(namefile, "r") text = f.read() @@ -38,11 +37,10 @@ def read_names(self, POP, pop_dir, columns_file): def read_rs_numbers(self, siftfile, SIFT): ## NB This file is in the format of: ## line number, rs number, ENSG number, SIFT, Phenotype - tic = time.perf_counter() + time.perf_counter() rs_numbers = [] variations = {} map_variations = {} - all_variations = [] sift_file = open(siftfile, "r") for item in sift_file: item = item.split() @@ -54,7 +52,7 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): - tic = time.perf_counter() + time.perf_counter() mutation_index_array = [] for name in ids: filename = data_dir + individuals_merge_filename + "/" + chrom + "." + name @@ -75,7 +73,7 @@ def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_f class Results: def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): n_p = len(mutation_index_array) - tic = time.perf_counter() + time.perf_counter() list_p = np.linspace(0, n_p - 1, n_p).astype(int) mutation_overlap = [] random_indiv = [] @@ -94,7 +92,7 @@ def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): return mutation_overlap, random_indiv def histogram_overlap(self, mutation_overlap, n_runs): - tic = time.perf_counter() + time.perf_counter() histogram_overlap = [] for run in range(n_runs): final_counts = [count for item, count in mutation_overlap[run].items()] @@ -104,14 +102,14 @@ def histogram_overlap(self, mutation_overlap, n_runs): class PlotData: def plot_histogram_overlap(self, POP, histogram_overlap, outputFile, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): output = outputFile + str(run) + ".png" final_counts = [count for item, count in histogram_overlap[run].items()] N = len(final_counts) x = range(N) width = 1 / 1.5 - bar1 = plt.bar(x, final_counts, width, color="grey") + plt.bar(x, final_counts, width, color="grey") plt.ylabel("Mutations") plt.xlabel("Individuals") plt.xticks(np.arange(1, N + 1)) @@ -121,7 +119,7 @@ def plot_histogram_overlap(self, POP, histogram_overlap, outputFile, n_runs): class WriteData: def write_histogram_overlap(self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): overlapfile = histogram_overlapfile + str(run) + ".txt" f = open(overlapfile, "w") @@ -134,7 +132,7 @@ def write_histogram_overlap(self, histogram_overlapfile, histogram_overlap, n_ru f.close() def write_mutation_overlap(self, mutation_overlapfile, mutation_overlap, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): overlapfile = mutation_overlapfile + str(run) + ".txt" f = open(overlapfile, "w") @@ -144,7 +142,7 @@ def write_mutation_overlap(self, mutation_overlapfile, mutation_overlap, n_runs) f.close() def write_random_indiv(self, randomindiv_file, random_indiv, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): randomfile = randomindiv_file + str(run) + ".txt" f = open(randomfile, "w") @@ -154,14 +152,14 @@ def write_random_indiv(self, randomindiv_file, random_indiv, n_runs): f.close() def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() f = open(mutation_index_array_file, "w") for item in mutation_index_array: f.write("%s\n" % item) f.close() def write_map_variations(self, map_variations_file, map_variations): - tic = time.perf_counter() + time.perf_counter() f = open(map_variations_file, "w") for key, count in map_variations.items(): f.write(key + "\t" + str(count) + "\n") @@ -209,7 +207,6 @@ def handler(event): if not os.path.exists(plot_dir): os.makedirs(plot_dir, exist_ok=True) - OutputFormat = ".png" chrom = "chr" + str(c) font = {"family": "serif", "size": 14} @@ -246,7 +243,7 @@ def handler(event): randomindiv_file = outdata_dir + "random_indiv" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids) / 2 + len(ids) / 2 rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) mutation_index_array = rd.read_individuals( diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py index a341fe200..922c6b6cd 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py @@ -1,10 +1,8 @@ import os -import uuid import tarfile import shutil import re from . import storage -import datetime client = storage.storage.get_instance() diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py index 0c3254025..7afe3ea26 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py @@ -4,7 +4,6 @@ import tarfile import tempfile import shutil -import datetime def handler(event): @@ -61,7 +60,7 @@ def writefile(filename, content): def merging(c, tar_files): - tic = time.perf_counter() + time.perf_counter() merged_dir = "merged_chr{}".format(c) merged_dir = os.path.join("/tmp", merged_dir) @@ -70,7 +69,7 @@ def merging(c, tar_files): data = {} for tar in tar_files: - tic_iter = time.perf_counter() + time.perf_counter() os.makedirs("/tmp/temp_dir", exist_ok=True) with tempfile.TemporaryDirectory(dir="/tmp/temp_dir") as temp_dir: for filename in extract_all(os.path.join("/tmp", tar), temp_dir): diff --git a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py index 4818b3410..ae32a5882 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py @@ -12,9 +12,6 @@ import itertools from matplotlib import pyplot import matplotlib as mpl -import collections -from collections import Counter -import datetime import os from . import storage @@ -22,7 +19,7 @@ class ReadData: def read_names(self, POP, pop_dir, columns_file): - tic = time.perf_counter() + time.perf_counter() namefile = pop_dir + POP f = open(namefile, "r") text = f.read() @@ -42,11 +39,9 @@ def read_names(self, POP, pop_dir, columns_file): def read_rs_numbers(self, siftfile, SIFT): ## NB This file is in the format of: ## line number, rs number, ENSG number, SIFT, Phenotype - tic = time.perf_counter() + time.perf_counter() rs_numbers = [] - variations = {} map_variations = {} - all_variations = [] sift_file = open(siftfile, "r") for item in sift_file: item = item.split() @@ -57,7 +52,7 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): - tic = time.perf_counter() + time.perf_counter() mutation_index_array = [] total_mutations = {} total_mutations_list = [] @@ -75,7 +70,7 @@ def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_f return mutation_index_array, total_mutations, total_mutations_list def read_pairs_overlap(self, indpairsfile): - tic = time.perf_counter() + time.perf_counter() pairs_overlap = np.loadtxt(indpairsfile, unpack=True) pairs_overlap = np.transpose(pairs_overlap) @@ -84,7 +79,7 @@ def read_pairs_overlap(self, indpairsfile): class Results: def group_indivuals(self, total_mutations_list, n_runs): - tic = time.perf_counter() + time.perf_counter() n_group = 26 random_mutations_list = [] for run in range(n_runs): @@ -92,7 +87,7 @@ def group_indivuals(self, total_mutations_list, n_runs): return random_mutations_list def pair_individuals(self, mutation_index_array, n_runs): - tic = time.perf_counter() + time.perf_counter() n_p = len(mutation_index_array) n_pairs = int(round(n_p / 2)) @@ -110,7 +105,7 @@ def pair_individuals(self, mutation_index_array, n_runs): return pairs_overlap def total_pair_individuals(self, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() n_p = len(mutation_index_array) total_pairs_overlap = np.zeros((n_p, n_p)) simetric_overlap = np.zeros((n_p, n_p)) @@ -127,7 +122,7 @@ def total_pair_individuals(self, mutation_index_array): return total_pairs_overlap, simetric_overlap def half_pair_individuals(self, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() n_p = len(mutation_index_array) n_pairs = int(round(n_p / 2)) pairs_overlap = np.zeros((n_pairs, n_pairs)) @@ -143,7 +138,7 @@ def half_pair_individuals(self, mutation_index_array): def gene_pairs(self, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() n_p = len(mutation_index_array) gene_pair_list = {} for pp in range(n_p): @@ -160,11 +155,11 @@ def gene_pairs(self, mutation_index_array): class PlotData: def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT): - tic = time.perf_counter() + time.perf_counter() pairs_overlap = np.array(pairs_overlap) - min_p = np.min(pairs_overlap) + np.min(pairs_overlap) max_p = np.max(pairs_overlap) nbins = int(max_p) + 1 n_runs = len(pairs_overlap) @@ -175,7 +170,6 @@ def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT): fig = plt.figure(frameon=False, figsize=(10, 9)) ax = fig.add_subplot(111) - hists = [] max_h = 0 for run in range(n_runs): h, edges = np.histogram(pairs_overlap[run], bins=bin_edges) @@ -211,8 +205,8 @@ def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT): plt.close() def total_colormap_overlap(self, POP, total_pairs_overlap, outputFile): - tic = time.perf_counter() - fig = plt.figure() + time.perf_counter() + plt.figure() cmap = mpl.colors.ListedColormap(["blue", "black", "red", "green", "pink"]) img = pyplot.imshow(total_pairs_overlap, interpolation="nearest", cmap=cmap, origin="lower") pyplot.colorbar(img, cmap=cmap) @@ -223,18 +217,18 @@ def total_colormap_overlap(self, POP, total_pairs_overlap, outputFile): class WriteData: def write_pair_individuals(self, indpairsfile, pairs_overlap): - tic = time.perf_counter() + time.perf_counter() np.savetxt(indpairsfile, pairs_overlap, fmt="%i") def write_gene_pairs(self, genepairsfile, gene_pair_list): - tic = time.perf_counter() + time.perf_counter() f = open(genepairsfile, "w") for key, count in gene_pair_list.items(): f.write(key + "\t" + str(count) + "\n") f.close() def write_total_indiv(self, total_mutations_filename, total_mutations): - tic = time.perf_counter() + time.perf_counter() f = open(total_mutations_filename, "w") for key, count in total_mutations.items(): f.write(key + "\t" + str(count) + "\n") @@ -253,7 +247,7 @@ def write_mutation_index_array(self, mutation_index_array_file, mutation_index_a f.close() def write_map_variations(self, map_variations_file, map_variations): - tic = time.perf_counter() + time.perf_counter() f = open(map_variations_file, "w") for key, count in map_variations.items(): f.write(key + "\t" + str(count) + "\n") @@ -312,7 +306,7 @@ def handler(event): tar.extractall(path="/tmp/" + individuals_merge_filename) tar.close() - tic = time.perf_counter() + time.perf_counter() rd = ReadData() res = Results() @@ -375,7 +369,7 @@ def handler(event): ) ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids) / 2 + len(ids) / 2 rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) mutation_index_array, total_mutations, total_mutations_list = rd.read_individuals( diff --git a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py index 0517f8ce8..d4b2e4b0c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py @@ -2,7 +2,6 @@ import re from . import storage import subprocess -import datetime def readfile(file): diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index 23770c1ea..ebb672230 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -1,5 +1,4 @@ import os -import re import uuid import io diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py index cca00c95e..922c6b6cd 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py @@ -1,5 +1,4 @@ import os -import uuid import tarfile import shutil import re diff --git a/benchmarks/600.workflows/6200.trip-booking/input.py b/benchmarks/600.workflows/6200.trip-booking/input.py index e4f4a83dd..d5393a2bc 100644 --- a/benchmarks/600.workflows/6200.trip-booking/input.py +++ b/benchmarks/600.workflows/6200.trip-booking/input.py @@ -17,7 +17,6 @@ def generate_input( nosql_func, ): - input_config = {} # test - invoke a single trip, succeed # small - fail in the middle diff --git a/benchmarks/600.workflows/631.parallel-download/python/process.py b/benchmarks/600.workflows/631.parallel-download/python/process.py index 70ecbe98c..7fcabed04 100644 --- a/benchmarks/600.workflows/631.parallel-download/python/process.py +++ b/benchmarks/600.workflows/631.parallel-download/python/process.py @@ -6,6 +6,6 @@ def handler(event): blob = event["blob"] client = storage.storage.get_instance() - buffer = client.download_stream(bucket, blob) + client.download_stream(bucket, blob) return "ok" diff --git a/benchmarks/600.workflows/650.vid/python/analyse.py b/benchmarks/600.workflows/650.vid/python/analyse.py index a6c134d6c..f2bc2db45 100644 --- a/benchmarks/600.workflows/650.vid/python/analyse.py +++ b/benchmarks/600.workflows/650.vid/python/analyse.py @@ -1,7 +1,4 @@ import os -import io -import json -import sys from . import storage import cv2 @@ -112,8 +109,8 @@ def load_frames(benchmark_bucket, bucket, blobs, dest_dir): def detect(net, img): - rows = img.shape[0] - cols = img.shape[1] + img.shape[0] + img.shape[1] img = cv2.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False) net.setInput(img) out = net.forward() diff --git a/benchmarks/600.workflows/650.vid/python/summarize.py b/benchmarks/600.workflows/650.vid/python/summarize.py index a07af9526..b7ceeb282 100644 --- a/benchmarks/600.workflows/650.vid/python/summarize.py +++ b/benchmarks/600.workflows/650.vid/python/summarize.py @@ -1,9 +1,3 @@ -import os -import io -import uuid -import json -import sys -from . import storage def handler(event): diff --git a/benchmarks/600.workflows/660.map-reduce/python/reduce.py b/benchmarks/600.workflows/660.map-reduce/python/reduce.py index cd0356a42..1d9d99b9b 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/reduce.py +++ b/benchmarks/600.workflows/660.map-reduce/python/reduce.py @@ -1,6 +1,4 @@ import os -import io -import json from . import storage diff --git a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py index 5d014e415..e47573e4c 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py +++ b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py @@ -1,12 +1,11 @@ import os -import json from . import storage def handler(event): lst = event["list"] benchmark_bucket = lst[0]["benchmark_bucket"] - bucket = lst[0]["bucket"] + lst[0]["bucket"] prefix = lst[0]["prefix"] client = storage.storage.get_instance() diff --git a/benchmarks/600.workflows/670.auth/input.py b/benchmarks/600.workflows/670.auth/input.py index 7f0b54cd8..46e264fdf 100644 --- a/benchmarks/600.workflows/670.auth/input.py +++ b/benchmarks/600.workflows/670.auth/input.py @@ -1,4 +1,3 @@ -import random size_generators = {"test": 10, "small": 100, "large": 1000} diff --git a/benchmarks/600.workflows/670.auth/python/auth.py b/benchmarks/600.workflows/670.auth/python/auth.py index d9f4f69b7..e24bc32a1 100644 --- a/benchmarks/600.workflows/670.auth/python/auth.py +++ b/benchmarks/600.workflows/670.auth/python/auth.py @@ -1,5 +1,3 @@ -import random -import string import pyaes import base64 diff --git a/benchmarks/600.workflows/680.excamera/input.py b/benchmarks/600.workflows/680.excamera/input.py index 57045e2f4..74b19aa3e 100644 --- a/benchmarks/600.workflows/680.excamera/input.py +++ b/benchmarks/600.workflows/680.excamera/input.py @@ -1,4 +1,3 @@ -import random import os size_generators = {"test": (18, 6), "small": (30, 6), "large": (60, 6)} diff --git a/benchmarks/600.workflows/680.excamera/python/rebase.py b/benchmarks/600.workflows/680.excamera/python/rebase.py index c83a5659a..eabea0d2b 100644 --- a/benchmarks/600.workflows/680.excamera/python/rebase.py +++ b/benchmarks/600.workflows/680.excamera/python/rebase.py @@ -104,7 +104,7 @@ def handler(event): client.download(benchmark_bucket, output_bucket + "/" + file, path) except: # -1.state is generated by rebase itself - if not "-1.state" in file: + if "-1.state" not in file: raise _, output_paths = rebase(segs, data_dir) diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index 8ee6bfbd6..8b1807ce3 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -1,11 +1,8 @@ import os import uuid -import sys from . import storage from sklearn.model_selection import train_test_split -from sklearn.svm import SVC -from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.preprocessing import StandardScaler import numpy as np From 7de931c26e860fa495b51eeffb0debdfd46b6914 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 19 Nov 2025 15:45:53 +0100 Subject: [PATCH 65/82] formatting --- benchmarks/600.workflows/6200.trip-booking/input.py | 1 - benchmarks/600.workflows/650.vid/python/summarize.py | 2 -- benchmarks/600.workflows/670.auth/input.py | 1 - 3 files changed, 4 deletions(-) diff --git a/benchmarks/600.workflows/6200.trip-booking/input.py b/benchmarks/600.workflows/6200.trip-booking/input.py index d5393a2bc..0305642c8 100644 --- a/benchmarks/600.workflows/6200.trip-booking/input.py +++ b/benchmarks/600.workflows/6200.trip-booking/input.py @@ -17,7 +17,6 @@ def generate_input( nosql_func, ): - # test - invoke a single trip, succeed # small - fail in the middle # large - fail at the last step diff --git a/benchmarks/600.workflows/650.vid/python/summarize.py b/benchmarks/600.workflows/650.vid/python/summarize.py index b7ceeb282..24304557a 100644 --- a/benchmarks/600.workflows/650.vid/python/summarize.py +++ b/benchmarks/600.workflows/650.vid/python/summarize.py @@ -1,5 +1,3 @@ - - def handler(event): frames = event["frames"] diff --git a/benchmarks/600.workflows/670.auth/input.py b/benchmarks/600.workflows/670.auth/input.py index 46e264fdf..2529739e5 100644 --- a/benchmarks/600.workflows/670.auth/input.py +++ b/benchmarks/600.workflows/670.auth/input.py @@ -1,4 +1,3 @@ - size_generators = {"test": 10, "small": 100, "large": 1000} From a08cb6d86be53dc9c1226f5faa6ce266bb838d3a Mon Sep 17 00:00:00 2001 From: xipang Date: Fri, 21 Nov 2025 20:01:54 +0100 Subject: [PATCH 66/82] fix flake8 reported errors --- .../600.workflows/6100.1000-genome/input.py | 7 +++-- .../6100.1000-genome/python/frequency.py | 24 +++++++---------- .../python/mutation_overlap.py | 26 +++++++------------ .../6100.1000-genome/python/sifting.py | 10 +++---- .../6101.1000-genome-individuals/input.py | 7 +++-- .../640.selfish-detour/python/measure.py | 4 +-- .../600.workflows/650.vid/python/decode.py | 2 +- .../660.map-reduce/python/split.py | 4 +-- .../680.excamera/python/encode.py | 8 +++++- .../680.excamera/python/rebase.py | 8 ++++-- .../680.excamera/python/reencode.py | 5 +++- .../680.excamera/python/split.py | 2 +- 12 files changed, 52 insertions(+), 55 deletions(-) diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index c84d16130..51d4e234c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -36,7 +36,6 @@ def generate_input( "SAS", ] for name in files: - # if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -54,10 +53,10 @@ def generate_input( # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start) : int(end)] - # name with start and end lines is not needed as all individuals jobs can just read their entire file. + data = content[int(start):int(end)] + # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] upload_data = io.BytesIO() diff --git a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py index b2fe9dded..ab22ad4f5 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py @@ -1,20 +1,17 @@ +import collections +import os +import tarfile import time - -tic = time.perf_counter() -import numpy as np +from collections import Counter from random import sample -import os.path -import matplotlib -matplotlib.use("Agg") import matplotlib.pyplot as plt -import collections -from collections import Counter - +import numpy as np -import os from . import storage +plt.switch_backend("Agg") + class ReadData: def read_names(self, POP, pop_dir, columns_file): @@ -35,8 +32,8 @@ def read_names(self, POP, pop_dir, columns_file): return ids def read_rs_numbers(self, siftfile, SIFT): - ## NB This file is in the format of: - ## line number, rs number, ENSG number, SIFT, Phenotype + # NB This file is in the format of: + # line number, rs number, ENSG number, SIFT, Phenotype time.perf_counter() rs_numbers = [] variations = {} @@ -212,9 +209,6 @@ def handler(event): font = {"family": "serif", "size": 14} plt.rc("font", **font) - # untar input data - import tarfile - tar = tarfile.open(individuals_merge_file) tar.extractall(path="/tmp/" + individuals_merge_filename) tar.close() diff --git a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py index ae32a5882..f0ba2604b 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py @@ -1,21 +1,18 @@ +import itertools +import os +import tarfile import time - -tic = time.perf_counter() -import numpy as np from random import sample -import os -import os.path -import matplotlib -matplotlib.use("Agg") +import matplotlib as mpl import matplotlib.pyplot as plt -import itertools +import numpy as np from matplotlib import pyplot -import matplotlib as mpl -import os from . import storage +plt.switch_backend("Agg") + class ReadData: def read_names(self, POP, pop_dir, columns_file): @@ -37,8 +34,8 @@ def read_names(self, POP, pop_dir, columns_file): return ids def read_rs_numbers(self, siftfile, SIFT): - ## NB This file is in the format of: - ## line number, rs number, ENSG number, SIFT, Phenotype + # NB This file is in the format of: + # line number, rs number, ENSG number, SIFT, Phenotype time.perf_counter() rs_numbers = [] map_variations = {} @@ -299,15 +296,10 @@ def handler(event): font = {"family": "serif", "size": 14} plt.rc("font", **font) - # untar input data - import tarfile - tar = tarfile.open(individuals_merge_file) tar.extractall(path="/tmp/" + individuals_merge_filename) tar.close() - time.perf_counter() - rd = ReadData() res = Results() wr = WriteData() diff --git a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py index d4b2e4b0c..e6cbb1938 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py @@ -35,7 +35,7 @@ def handler(event): siftfile = os.path.join("/tmp", siftfile) with open(siftfile, "w") as f: subprocess.run( - ['grep -n "deleterious\|tolerated" {}'.format(inputfile)], + ['grep -n "deleterious\\\\|tolerated" {}'.format(inputfile)], shell=True, stdout=f, ) @@ -46,11 +46,11 @@ def handler(event): data = list(filter(r3.match, data_temp)) with open(final, "w") as f: - for l in data: - line = str(int(l.split("\t")[0].split(":")[0]) - int(header)) - id = l.split("\t")[2] + for line_data in data: + line = str(int(line_data.split("\t")[0].split(":")[0]) - int(header)) + id = line_data.split("\t")[2] - sifts = l.split("\t")[7].split("|") + sifts = line_data.split("\t")[7].split("|") sifts = sifts[4] + " " + sifts[16] + " " + sifts[17] sifts = sifts.replace("(", " ").replace(")", "") diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index ebb672230..01f211781 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -38,7 +38,6 @@ def generate_input( ] for name in files: if name == "ALL.chr21.1250.vcf" or name == "columns.txt": - # if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -55,10 +54,10 @@ def generate_input( # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start) : int(end)] - # name with start and end lines is not needed as all individuals jobs can just read their entire file. + data = content[int(start):int(end)] + # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] upload_data = io.BytesIO() diff --git a/benchmarks/600.workflows/640.selfish-detour/python/measure.py b/benchmarks/600.workflows/640.selfish-detour/python/measure.py index bac536cf6..d602c4e3b 100644 --- a/benchmarks/600.workflows/640.selfish-detour/python/measure.py +++ b/benchmarks/600.workflows/640.selfish-detour/python/measure.py @@ -1,6 +1,6 @@ -import os import json -from ctypes import * +import os +from ctypes import POINTER, c_double, c_int, c_ulonglong, cast, cdll def handler(event): diff --git a/benchmarks/600.workflows/650.vid/python/decode.py b/benchmarks/600.workflows/650.vid/python/decode.py index 88504201c..c293cdf30 100644 --- a/benchmarks/600.workflows/650.vid/python/decode.py +++ b/benchmarks/600.workflows/650.vid/python/decode.py @@ -9,7 +9,7 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i : i + n] + yield lst[i:i + n] def load_video(benchmark_bucket, bucket, blob, dest_dir): diff --git a/benchmarks/600.workflows/660.map-reduce/python/split.py b/benchmarks/600.workflows/660.map-reduce/python/split.py index 20aaaf0b1..582f53bfe 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/split.py +++ b/benchmarks/600.workflows/660.map-reduce/python/split.py @@ -7,8 +7,8 @@ def chunks(lst, n): m = int(len(lst) / n) for i in range(n - 1): - yield lst[i * m : i * m + m] - tail = lst[(n - 1) * m :] + yield lst[i * m:i * m + m] + tail = lst[(n - 1) * m:] if len(tail) > 0: yield tail diff --git a/benchmarks/600.workflows/680.excamera/python/encode.py b/benchmarks/600.workflows/680.excamera/python/encode.py index 031ee0c02..a79f1fc43 100644 --- a/benchmarks/600.workflows/680.excamera/python/encode.py +++ b/benchmarks/600.workflows/680.excamera/python/encode.py @@ -5,7 +5,13 @@ import logging import shutil -VPXENC = "/tmp/vpxenc --ivf --codec=vp8 --good --cpu-used=0 --end-usage=cq --min-q=0 --max-q=63 --cq-level={quality} --buf-initial-sz=10000 --buf-optimal-sz=20000 --buf-sz=40000 --undershoot-pct=100 --passes=2 --auto-alt-ref=1 --threads=1 --token-parts=0 --tune=ssim --target-bitrate=4294967295 -o {output}.ivf {input}.y4m" +VPXENC = ( + "/tmp/vpxenc --ivf --codec=vp8 --good --cpu-used=0 --end-usage=cq " + "--min-q=0 --max-q=63 --cq-level={quality} --buf-initial-sz=10000 " + "--buf-optimal-sz=20000 --buf-sz=40000 --undershoot-pct=100 --passes=2 " + "--auto-alt-ref=1 --threads=1 --token-parts=0 --tune=ssim " + "--target-bitrate=4294967295 -o {output}.ivf {input}.y4m" +) TERMINATE_CHUNK = "/tmp/xc-terminate-chunk {input}.ivf {output}.ivf" XC_DUMP_0 = "/tmp/xc-dump {input}.ivf {output}.state" diff --git a/benchmarks/600.workflows/680.excamera/python/rebase.py b/benchmarks/600.workflows/680.excamera/python/rebase.py index eabea0d2b..64a761030 100644 --- a/benchmarks/600.workflows/680.excamera/python/rebase.py +++ b/benchmarks/600.workflows/680.excamera/python/rebase.py @@ -5,7 +5,11 @@ import logging import shutil -XC_ENC_REBASE = "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r -I {source_state}.state -p {input_pred}.ivf -S {pred_state}.state {extra} {input}.y4m" +XC_ENC_REBASE = ( + "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r " + "-I {source_state}.state -p {input_pred}.ivf -S {pred_state}.state " + "{extra} {input}.y4m" +) client = storage.storage.get_instance() @@ -102,7 +106,7 @@ def handler(event): else: file = prefix + file client.download(benchmark_bucket, output_bucket + "/" + file, path) - except: + except Exception: # -1.state is generated by rebase itself if "-1.state" not in file: raise diff --git a/benchmarks/600.workflows/680.excamera/python/reencode.py b/benchmarks/600.workflows/680.excamera/python/reencode.py index 9695fa26f..2b95aea70 100644 --- a/benchmarks/600.workflows/680.excamera/python/reencode.py +++ b/benchmarks/600.workflows/680.excamera/python/reencode.py @@ -5,7 +5,10 @@ import logging import shutil -XC_ENC_FIRST_FRAME = "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r -I {source_state}.state -p {input_pred}.ivf {extra} {input}.y4m" +XC_ENC_FIRST_FRAME = ( + "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r " + "-I {source_state}.state -p {input_pred}.ivf {extra} {input}.y4m" +) client = storage.storage.get_instance() diff --git a/benchmarks/600.workflows/680.excamera/python/split.py b/benchmarks/600.workflows/680.excamera/python/split.py index 69801699a..80af78357 100644 --- a/benchmarks/600.workflows/680.excamera/python/split.py +++ b/benchmarks/600.workflows/680.excamera/python/split.py @@ -3,7 +3,7 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i : i + n] + yield lst[i:i + n] def handler(event): From db87fac82f2e616ed3146084a5ca3f476f641f67 Mon Sep 17 00:00:00 2001 From: xipang Date: Fri, 21 Nov 2025 20:13:12 +0100 Subject: [PATCH 67/82] fix flake8 reported errors --- benchmarks/600.workflows/6100.1000-genome/input.py | 2 +- .../600.workflows/6101.1000-genome-individuals/input.py | 2 +- benchmarks/600.workflows/650.vid/python/decode.py | 2 +- benchmarks/600.workflows/660.map-reduce/python/split.py | 4 ++-- benchmarks/600.workflows/680.excamera/python/split.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index 51d4e234c..a7619984d 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -55,7 +55,7 @@ def generate_input( end = i * range_per_job + range_per_job # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] + data = content[int(start) : int(end)] # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index 01f211781..a89e2252c 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -56,7 +56,7 @@ def generate_input( end = i * range_per_job + range_per_job # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] + data = content[int(start) : int(end)] # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] diff --git a/benchmarks/600.workflows/650.vid/python/decode.py b/benchmarks/600.workflows/650.vid/python/decode.py index c293cdf30..88504201c 100644 --- a/benchmarks/600.workflows/650.vid/python/decode.py +++ b/benchmarks/600.workflows/650.vid/python/decode.py @@ -9,7 +9,7 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + yield lst[i : i + n] def load_video(benchmark_bucket, bucket, blob, dest_dir): diff --git a/benchmarks/600.workflows/660.map-reduce/python/split.py b/benchmarks/600.workflows/660.map-reduce/python/split.py index 582f53bfe..20aaaf0b1 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/split.py +++ b/benchmarks/600.workflows/660.map-reduce/python/split.py @@ -7,8 +7,8 @@ def chunks(lst, n): m = int(len(lst) / n) for i in range(n - 1): - yield lst[i * m:i * m + m] - tail = lst[(n - 1) * m:] + yield lst[i * m : i * m + m] + tail = lst[(n - 1) * m :] if len(tail) > 0: yield tail diff --git a/benchmarks/600.workflows/680.excamera/python/split.py b/benchmarks/600.workflows/680.excamera/python/split.py index 80af78357..69801699a 100644 --- a/benchmarks/600.workflows/680.excamera/python/split.py +++ b/benchmarks/600.workflows/680.excamera/python/split.py @@ -3,7 +3,7 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + yield lst[i : i + n] def handler(event): From 29faa426fbbb21e424e82241ac85d192c292240e Mon Sep 17 00:00:00 2001 From: xipang Date: Sat, 22 Nov 2025 02:28:02 +0100 Subject: [PATCH 68/82] test flake8 fix on wrappers --- .../wrappers/aws/python/handler_workflow.py | 15 +++++---- .../wrappers/azure/python/handler_workflow.py | 9 ++--- .../wrappers/azure/python/main_workflow.py | 4 ++- .../wrappers/azure/python/run_subworkflow.py | 33 +++++++++---------- .../wrappers/azure/python/run_workflow.py | 27 +++++++-------- benchmarks/wrappers/azure/python/storage.py | 1 - .../wrappers/gcp/python/handler_workflow.py | 26 ++++++++------- benchmarks/wrappers/gcp/python/storage.py | 2 -- sebs/faas/fsm.py | 2 +- 9 files changed, 60 insertions(+), 59 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler_workflow.py b/benchmarks/wrappers/aws/python/handler_workflow.py index 3f372d895..8bc99ce81 100644 --- a/benchmarks/wrappers/aws/python/handler_workflow.py +++ b/benchmarks/wrappers/aws/python/handler_workflow.py @@ -1,15 +1,12 @@ import datetime -import io import json import os import sys import uuid import importlib -# Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) - -from redis import Redis +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") def probe_cold_start(): @@ -28,6 +25,10 @@ def probe_cold_start(): def handler(event, context): + # Add current directory to allow location of packages + sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) + from redis import Redis + start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" os.environ["STORAGE_DOWNLOAD_BYTES"] = "0" @@ -70,11 +71,11 @@ def handler(event, context): payload = json.dumps(payload) redis = Redis( - host={{REDIS_HOST}}, + host=REDIS_HOST, port=6379, decode_responses=True, socket_connect_timeout=10, - password={{REDIS_PASSWORD}}, + password=REDIS_PASSWORD or None, ) req_id = event["request_id"] diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index cd9f8c930..a48b1c6c0 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -4,11 +4,12 @@ import uuid import importlib -import logging - import azure.functions as func from redis import Redis +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") + def probe_cold_start(): is_cold = False @@ -73,11 +74,11 @@ def main(event, context: func.Context): payload = json.dumps(payload) redis = Redis( - host={{REDIS_HOST}}, + host=REDIS_HOST, port=6379, decode_responses=True, socket_connect_timeout=10, - password={{REDIS_PASSWORD}}, + password=REDIS_PASSWORD or None, ) req_id = event["request_id"] diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 81518d911..ce74b4e49 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -42,7 +42,9 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu instance_id = await client.start_new("run_workflow", None, event) res = client.create_check_status_response(req, instance_id) - # res = await client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) + # res = await client.wait_for_completion_or_create_check_status_response( + # req, instance_id, 1000000 + # ) end = datetime.datetime.now() diff --git a/benchmarks/wrappers/azure/python/run_subworkflow.py b/benchmarks/wrappers/azure/python/run_subworkflow.py index 36df71624..e8bf98e22 100644 --- a/benchmarks/wrappers/azure/python/run_subworkflow.py +++ b/benchmarks/wrappers/azure/python/run_subworkflow.py @@ -1,18 +1,10 @@ -import json -import sys -import os -import uuid -import operator -import logging import datetime +import logging +import operator import azure.durable_functions as df -from redis import Redis -dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(dir_path, os.path.pardir)) - -from .fsm import * +from .fsm import Loop, Map, Parallel, Repeat, State, Switch, Task def get_var(obj, path: str): @@ -37,7 +29,9 @@ def set_var(obj, val, path: str): def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start - now = lambda: datetime.datetime.now().timestamp() + + def now(): + return datetime.datetime.now().timestamp() duration = 0 input = context.get_input() @@ -141,12 +135,15 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - # task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if one state, otherwise suborchestrator if first_state.next: # call suborchestrator # FIXME define other parameters. parallel_task = context.call_sub_orchestrator( - "run_subworkflow", input, subworkflow["root"], parallel_states + "run_subworkflow", + input, + subworkflow["root"], + parallel_states, ) parallel_tasks.append(parallel_task) else: @@ -170,7 +167,10 @@ def handler(context: df.DurableOrchestrationContext): myinput = {"payload": payload, "request_id": request_id} # FIXME use right parameters for suborchestrator. parallel_task = context.call_sub_orchestrator( - "run_subworkflow", myinput, subworkflow["root"], parallel_states + "run_subworkflow", + myinput, + subworkflow["root"], + parallel_states, ) parallel_tasks.append(parallel_task) state_to_result[first_state.func_name].append( @@ -234,9 +234,6 @@ def handler(context: df.DurableOrchestrationContext): else: raise ValueError(f"Undefined state: {current}") - # workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") - func_name = "run_subworkflow" - return res diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 3ba00a674..f8c0b9328 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -1,18 +1,17 @@ +import datetime import json -import sys +import logging +import operator import os import uuid -import operator -import logging -import datetime import azure.durable_functions as df from redis import Redis -dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(dir_path, os.path.pardir)) +from .fsm import Map, Loop, Parallel, Repeat, State, Switch, Task -from .fsm import * +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") def get_var(obj, path: str): @@ -37,7 +36,9 @@ def set_var(obj, val, path: str): def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start - now = lambda: datetime.datetime.now().timestamp() + + def now(): + return datetime.datetime.now().timestamp() duration = 0 with open("definition.json") as f: @@ -66,7 +67,7 @@ def handler(context: df.DurableOrchestrationContext): try: res = yield context.call_activity(current.func_name, input) current = states.get(current.next, None) - except: + except Exception: current = states.get(current.failure, None) ts = now() @@ -99,7 +100,7 @@ def handler(context: df.DurableOrchestrationContext): array = get_var(res, current.array) tasks = [] if first_state.next: - # call suborchestrator - each map task should proceed with next step directly after it finished. + # call suborchestrator - each map task continues with next step after finishing if current.common_params: for elem in array: payload = {} @@ -185,7 +186,7 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - # task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if one state, else run within suborchestrator if first_state.next: input["root"] = subworkflow["root"] input["states"] = subworkflow["states"] # parallel_states @@ -288,11 +289,11 @@ def handler(context: df.DurableOrchestrationContext): payload = json.dumps(payload) redis = Redis( - host={{REDIS_HOST}}, + host=REDIS_HOST, port=6379, decode_responses=True, socket_connect_timeout=10, - password={{REDIS_PASSWORD}}, + password=REDIS_PASSWORD or None, ) key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 6cc739db4..c0e3b0843 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -1,6 +1,5 @@ import os import uuid -from typing import Optional from azure.storage.blob import BlobServiceClient diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index b8a5a0c44..d84a066d6 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -1,20 +1,12 @@ import datetime -import io import json import os import sys import uuid import importlib -# Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) - -if "NOSQL_STORAGE_DATABASE" in os.environ: - from function import nosql - - nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) - -from redis import Redis +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") def probe_cold_start(): @@ -33,6 +25,16 @@ def probe_cold_start(): def handler(req): + # Add current directory to allow location of packages + sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) + + if "NOSQL_STORAGE_DATABASE" in os.environ: + from function import nosql + + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) + + from redis import Redis + start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" os.environ["STORAGE_DOWNLOAD_BYTES"] = "0" @@ -72,11 +74,11 @@ def handler(req): payload = json.dumps(payload) redis = Redis( - host={{REDIS_HOST}}, + host=REDIS_HOST, port=6379, decode_responses=True, socket_connect_timeout=10, - password={{REDIS_PASSWORD}}, + password=REDIS_PASSWORD or None, ) req_id = event["request_id"] diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index a6623b638..50a358daa 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -85,8 +85,6 @@ def download_within_range(self, bucket, file, start_byte, stop_byte): return content def list_directory(self, bucket, prefix): - bucket_instance = self.client.bucket(bucket) - # objects = list(self.client.list_blobs(bucket_or_name=bucket_instance,prefix=prefix)) objects = self.client.bucket(bucket).list_blobs(prefix=prefix) names = [] for obj in objects: diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 039457cd4..9f7c620ae 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Optional, List, Callable, Union, Dict, Type, Tuple +from typing import Optional, List, Callable, Union, Dict, Type import json From ff8e0904d89613aa43ac8cae12d1cea25e59f780 Mon Sep 17 00:00:00 2001 From: xipang Date: Sat, 22 Nov 2025 02:30:04 +0100 Subject: [PATCH 69/82] test flake8 fix on wrappers --- benchmarks/wrappers/azure/python/run_subworkflow.py | 1 + benchmarks/wrappers/azure/python/run_workflow.py | 1 + 2 files changed, 2 insertions(+) diff --git a/benchmarks/wrappers/azure/python/run_subworkflow.py b/benchmarks/wrappers/azure/python/run_subworkflow.py index e8bf98e22..ac8af5611 100644 --- a/benchmarks/wrappers/azure/python/run_subworkflow.py +++ b/benchmarks/wrappers/azure/python/run_subworkflow.py @@ -32,6 +32,7 @@ def handler(context: df.DurableOrchestrationContext): def now(): return datetime.datetime.now().timestamp() + duration = 0 input = context.get_input() diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index f8c0b9328..f19de7d93 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -39,6 +39,7 @@ def handler(context: df.DurableOrchestrationContext): def now(): return datetime.datetime.now().timestamp() + duration = 0 with open("definition.json") as f: From 44802901d9991ac77e8b7154dbb0bd3b249eec03 Mon Sep 17 00:00:00 2001 From: xipang Date: Sat, 22 Nov 2025 02:39:10 +0100 Subject: [PATCH 70/82] fix interval index linting issue --- benchmarks/600.workflows/6100.1000-genome/input.py | 4 +++- .../600.workflows/6101.1000-genome-individuals/input.py | 4 +++- benchmarks/600.workflows/650.vid/python/decode.py | 3 ++- benchmarks/600.workflows/660.map-reduce/python/split.py | 7 +++++-- benchmarks/600.workflows/680.excamera/python/split.py | 3 ++- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index a7619984d..bf3ecf95c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -55,7 +55,9 @@ def generate_input( end = i * range_per_job + range_per_job # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start) : int(end)] + # data = content[int(start) : int(end)] + idx = slice(int(start), int(end)) + data = content[idx] # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index a89e2252c..4223d9f05 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -56,7 +56,9 @@ def generate_input( end = i * range_per_job + range_per_job # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) # data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start) : int(end)] + # data = content[int(start) : int(end)] + idx = slice(int(start), int(end)) + data = content[idx] # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] diff --git a/benchmarks/600.workflows/650.vid/python/decode.py b/benchmarks/600.workflows/650.vid/python/decode.py index 88504201c..3414ed42b 100644 --- a/benchmarks/600.workflows/650.vid/python/decode.py +++ b/benchmarks/600.workflows/650.vid/python/decode.py @@ -9,7 +9,8 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i : i + n] + idx = slice(i, i + n) + yield lst[idx] def load_video(benchmark_bucket, bucket, blob, dest_dir): diff --git a/benchmarks/600.workflows/660.map-reduce/python/split.py b/benchmarks/600.workflows/660.map-reduce/python/split.py index 20aaaf0b1..860e04e41 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/split.py +++ b/benchmarks/600.workflows/660.map-reduce/python/split.py @@ -7,8 +7,11 @@ def chunks(lst, n): m = int(len(lst) / n) for i in range(n - 1): - yield lst[i * m : i * m + m] - tail = lst[(n - 1) * m :] + idx = slice(i * m, i * m + m) + yield lst[idx] + + idx2 = slice((n - 1) * m, len(lst)) + tail = lst[idx2] if len(tail) > 0: yield tail diff --git a/benchmarks/600.workflows/680.excamera/python/split.py b/benchmarks/600.workflows/680.excamera/python/split.py index 69801699a..3a650efa0 100644 --- a/benchmarks/600.workflows/680.excamera/python/split.py +++ b/benchmarks/600.workflows/680.excamera/python/split.py @@ -3,7 +3,8 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i : i + n] + idx = slice(i, i + n) + yield lst[idx] def handler(event): From 5c7572af42609b692e8643349033ea0075bc4662 Mon Sep 17 00:00:00 2001 From: McLavish Date: Sat, 6 Dec 2025 15:51:38 +0100 Subject: [PATCH 71/82] re-added missing imports to 690.ml workflow. re-added double schedule.pop added directive to ignore linter unused imports errors --- benchmarks/600.workflows/690.ml/python/train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index 8b1807ce3..81e935fa6 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -1,9 +1,10 @@ import os import uuid from . import storage - from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC # noqa: F401 +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier # noqa: F401 import numpy as np @@ -52,6 +53,7 @@ def handler(schedule): bucket = schedule.pop("bucket") benchmark_bucket = schedule.pop("benchmark_bucket") schedule.pop("request-id", None) + schedule.pop("request_id", None) clf = str_to_cls(name)(**schedule) From e779c9da2b08f9bb207b8adf97c5da93843ca692 Mon Sep 17 00:00:00 2001 From: down-street Date: Tue, 9 Dec 2025 04:53:10 +0100 Subject: [PATCH 72/82] add OCR-pipeline workflow without detection (#30) * add OCR-pipeline workflow without detection * add-OCR-pipeline data --- .flake8.cfg | 2 +- .gitignore | 1 + benchmarks-data | 2 +- .../6xx.OCR-pipeline/config.json | 6 ++ .../6xx.OCR-pipeline/definition.json | 39 +++++++++ .../600.workflows/6xx.OCR-pipeline/input.py | 66 +++++++++++++++ .../6xx.OCR-pipeline/python/detect.py | 6 ++ .../6xx.OCR-pipeline/python/merge.py | 73 +++++++++++++++++ .../6xx.OCR-pipeline/python/recognize.py | 80 +++++++++++++++++++ .../6xx.OCR-pipeline/python/requirements.txt | 3 + .../6xx.OCR-pipeline/python/split.py | 29 +++++++ run_local_workflows.sh | 1 + 12 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/config.json create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/definition.json create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/input.py create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt create mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py diff --git a/.flake8.cfg b/.flake8.cfg index 940dbb99f..18533a19f 100644 --- a/.flake8.cfg +++ b/.flake8.cfg @@ -2,4 +2,4 @@ max-line-length = 100 import-order-style = pep8 application-import-names = sebs - +extend-ignore = E203, W503 diff --git a/.gitignore b/.gitignore index 274165ed8..ed4120268 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ perf-cost* python-venv cache* !cache.py +experiment minio-volume scylladb-volume diff --git a/benchmarks-data b/benchmarks-data index 25c2bb40b..b4239ab43 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 +Subproject commit b4239ab431c82829f36d3681f52ad86218a58fd5 diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/config.json b/benchmarks/600.workflows/6xx.OCR-pipeline/config.json new file mode 100644 index 000000000..07b3b1e5d --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 540, + "memory": 256, + "languages": ["python"], + "modules": ["storage"] +} \ No newline at end of file diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json b/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json new file mode 100644 index 000000000..3f2c4f7cb --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json @@ -0,0 +1,39 @@ +{ + "root": "split", + "states": { + "split": { + "type": "task", + "func_name": "split", + "next": "detect-state" + }, + "detect-state": { + "type": "map", + "root": "detect", + "array": "segments", + "next": "recognize-state", + "states": { + "detect": { + "type": "task", + "func_name": "detect" + } + } + }, + "recognize-state": { + "type": "map", + "root": "recognize", + "array": "segments", + "next": "merge-state", + "states": { + "recognize": { + "type": "task", + "func_name": "recognize" + } + } + }, + "merge-state": { + "type": "loop", + "func_name": "merge", + "array": "segments" + } + } +} diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/input.py b/benchmarks/600.workflows/6xx.OCR-pipeline/input.py new file mode 100644 index 000000000..fcc30c594 --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/input.py @@ -0,0 +1,66 @@ +# benchmarks/600.workflows/6xx.OCR-pipeline/input.py +import os + +size_generators = { + "test": (16, 4), + "small": (64, 8), + "large": (256, 16), +} + + +def buckets_count(): + return (1, 1) + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + if data_dir is None: + raise ValueError( + "/path/to/ocr_data/\n" + " └── pages/\n" + " ├── page1.png\n" + " ├── page2.png\n" + " └── ..." + ) + + num_pages, batch_size = size_generators[size] + + # 1) upload pages + pages_dir = os.path.join(data_dir, "pages") + if not os.path.isdir(pages_dir): + raise ValueError(f"pages dir not exist: {pages_dir}") + + page_files = sorted( + f for f in os.listdir(pages_dir) if f.lower().endswith((".png", ".jpg", ".jpeg")) + ) + if not page_files: + raise ValueError(f"no jpg/png under dir: {pages_dir}") + + new_pages = [] + for i in range(num_pages): + page = page_files[i % len(page_files)] + # unified: 00000000.png + ext = os.path.splitext(page)[1].lower() or ".png" + name = f"{i:08d}{ext}" + path = os.path.join(pages_dir, page) + + new_pages.append(name) + upload_func(0, name, path) + + assert len(new_pages) == num_pages + + return { + "segments": new_pages, + "benchmark_bucket": benchmarks_bucket, + "input_bucket": input_buckets[0], + "output_bucket": output_buckets[0], + "batch_size": batch_size, + "lang": "en", + } diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py new file mode 100644 index 000000000..22270a199 --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py @@ -0,0 +1,6 @@ +# benchmarks/600.workflows/6xx.OCR-pipeline/detect.py + + +def handler(event): + # todo: implement page detection if needed + return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py new file mode 100644 index 000000000..b46095436 --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py @@ -0,0 +1,73 @@ +# benchmarks/600.workflows/6xx.OCR-pipeline/merge.py +import os +import uuid +import shutil + +from . import storage + +client = storage.storage.get_instance() + + +def _download_page_txt( + page: str, + tmp_dir: str, + benchmark_bucket: str, + output_bucket: str, + prefix: str, +): + base, _ = os.path.splitext(page) + remote_name = f"{prefix}{base}.txt" + local_txt = os.path.join(tmp_dir, f"{base}.txt") + + try: + client.download( + benchmark_bucket, + f"{output_bucket}/{remote_name}", + local_txt, + ) + return local_txt + except Exception: + # return None + return None + + +def handler(event): + pages = event["segments"] + benchmark_bucket = event["benchmark_bucket"] + output_bucket = event["output_bucket"] + prefix = event["prefix"] + + tmp_dir = os.path.join("/tmp", str(uuid.uuid4())) + os.makedirs(tmp_dir, exist_ok=True) + + try: + page_txt_paths = [] + for page in pages: + p = _download_page_txt(page, tmp_dir, benchmark_bucket, output_bucket, prefix) + page_txt_paths.append((page, p)) + + # merge all pages + final_doc = os.path.join(tmp_dir, "document.txt") + with open(final_doc, "w", encoding="utf-8") as fout: + for page, txt_path in sorted(page_txt_paths, key=lambda x: x[0]): + fout.write(f"===== Page {page} =====\n") + if txt_path and os.path.exists(txt_path): + with open(txt_path, "r", encoding="utf-8") as fin: + fout.write(fin.read()) + else: + fout.write("[NO_TEXT]\n") + fout.write("\n\n") + + # upload final document + remote_name = f"{prefix}document.txt" + client.upload( + benchmark_bucket, + f"{output_bucket}/{remote_name}", + final_doc, + unique_name=False, + ) + + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py new file mode 100644 index 000000000..6d01aadfc --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py @@ -0,0 +1,80 @@ +# benchmarks/600.workflows/6xx.OCR-pipeline/recognize.py +import os +import uuid +import shutil +import logging + +import easyocr + +from . import storage + +logger = logging.getLogger(__name__) +client = storage.storage.get_instance() + +_readers = {} + + +def _get_reader(lang: str): + """ + lazy initialize easyocr.Reader + lang: 'en', 'ch_sim', 'ch_tra', 'ja' etc + """ + if lang not in _readers: + _readers[lang] = easyocr.Reader([lang], gpu=False) + logger.info("Initialized easyocr.Reader for lang=%s", lang) + return _readers[lang] + + +def handler(event): + input_bucket = event["input_bucket"] + output_bucket = event["output_bucket"] + benchmark_bucket = event["benchmark_bucket"] + pages = event["segments"] + prefix = event["prefix"] + lang = event.get("lang", "en") + + tmp_dir = os.path.join("/tmp", str(uuid.uuid4())) + os.makedirs(tmp_dir, exist_ok=True) + + reader = _get_reader(lang) + + try: + for page in pages: + # 1) download image + local_img = os.path.join(tmp_dir, page) + client.download( + benchmark_bucket, + f"{input_bucket}/{page}", + local_img, + ) + + # 2) OCR + # detail=0 -> only text + try: + results = reader.readtext(local_img, detail=0) + except Exception as e: + logger.error("OCR failed on %s: %s", page, e) + results = [f"[OCR_ERROR] {e}"] + + text = "\n".join(str(x) for x in results) + + # 3) per-page txt + base, _ = os.path.splitext(page) + local_txt = os.path.join(tmp_dir, f"{base}.txt") + with open(local_txt, "w", encoding="utf-8") as f: + f.write(text) + + # 4) upload to output_bucket(with prefix) + remote_name = f"{prefix}{base}.txt" + client.upload( + benchmark_bucket, + f"{output_bucket}/{remote_name}", + local_txt, + unique_name=False, + ) + + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + # pass to merge + return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt b/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt new file mode 100644 index 000000000..f954cc1b7 --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt @@ -0,0 +1,3 @@ +easyocr +torch +torchvision \ No newline at end of file diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py new file mode 100644 index 000000000..684a2687d --- /dev/null +++ b/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py @@ -0,0 +1,29 @@ +# benchmarks/600.workflows/6xx.OCR-pipeline/split.py +import uuid + + +def _chunks(lst, n): + for i in range(0, len(lst), n): + yield lst[i : i + n] + + +def handler(event): + segs = _chunks(event["segments"], event["batch_size"]) + input_bucket = event["input_bucket"] + output_bucket = event["output_bucket"] + benchmark_bucket = event["benchmark_bucket"] + lang = event.get("lang", "en") + + return { + "segments": [ + { + "prefix": str(uuid.uuid4().int & ((1 << 64) - 1))[:8], + "segments": ss, + "lang": lang, + "input_bucket": input_bucket, + "output_bucket": output_bucket, + "benchmark_bucket": benchmark_bucket, + } + for ss in segs + ] + } diff --git a/run_local_workflows.sh b/run_local_workflows.sh index 68c9e86a7..d23af3f05 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -95,6 +95,7 @@ WORKFLOWS=( "670.auth" "680.excamera" "690.ml" + "6xx.OCR-pipeline" ) for wf in "${WORKFLOWS[@]}"; do From eaf1ca9d4ed569d6907c846cd353a6addbb2fb58 Mon Sep 17 00:00:00 2001 From: down-street Date: Tue, 9 Dec 2025 04:59:08 +0100 Subject: [PATCH 73/82] Revert "add OCR-pipeline workflow without detection (#30)" (#31) This reverts commit e779c9da2b08f9bb207b8adf97c5da93843ca692. --- .flake8.cfg | 2 +- .gitignore | 1 - benchmarks-data | 2 +- .../6xx.OCR-pipeline/config.json | 6 -- .../6xx.OCR-pipeline/definition.json | 39 --------- .../600.workflows/6xx.OCR-pipeline/input.py | 66 --------------- .../6xx.OCR-pipeline/python/detect.py | 6 -- .../6xx.OCR-pipeline/python/merge.py | 73 ----------------- .../6xx.OCR-pipeline/python/recognize.py | 80 ------------------- .../6xx.OCR-pipeline/python/requirements.txt | 3 - .../6xx.OCR-pipeline/python/split.py | 29 ------- run_local_workflows.sh | 1 - 12 files changed, 2 insertions(+), 306 deletions(-) delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/config.json delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/definition.json delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/input.py delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt delete mode 100644 benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py diff --git a/.flake8.cfg b/.flake8.cfg index 18533a19f..940dbb99f 100644 --- a/.flake8.cfg +++ b/.flake8.cfg @@ -2,4 +2,4 @@ max-line-length = 100 import-order-style = pep8 application-import-names = sebs -extend-ignore = E203, W503 + diff --git a/.gitignore b/.gitignore index ed4120268..274165ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ perf-cost* python-venv cache* !cache.py -experiment minio-volume scylladb-volume diff --git a/benchmarks-data b/benchmarks-data index b4239ab43..25c2bb40b 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit b4239ab431c82829f36d3681f52ad86218a58fd5 +Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/config.json b/benchmarks/600.workflows/6xx.OCR-pipeline/config.json deleted file mode 100644 index 07b3b1e5d..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "timeout": 540, - "memory": 256, - "languages": ["python"], - "modules": ["storage"] -} \ No newline at end of file diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json b/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json deleted file mode 100644 index 3f2c4f7cb..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/definition.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "root": "split", - "states": { - "split": { - "type": "task", - "func_name": "split", - "next": "detect-state" - }, - "detect-state": { - "type": "map", - "root": "detect", - "array": "segments", - "next": "recognize-state", - "states": { - "detect": { - "type": "task", - "func_name": "detect" - } - } - }, - "recognize-state": { - "type": "map", - "root": "recognize", - "array": "segments", - "next": "merge-state", - "states": { - "recognize": { - "type": "task", - "func_name": "recognize" - } - } - }, - "merge-state": { - "type": "loop", - "func_name": "merge", - "array": "segments" - } - } -} diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/input.py b/benchmarks/600.workflows/6xx.OCR-pipeline/input.py deleted file mode 100644 index fcc30c594..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/input.py +++ /dev/null @@ -1,66 +0,0 @@ -# benchmarks/600.workflows/6xx.OCR-pipeline/input.py -import os - -size_generators = { - "test": (16, 4), - "small": (64, 8), - "large": (256, 16), -} - - -def buckets_count(): - return (1, 1) - - -def generate_input( - data_dir, - size, - benchmarks_bucket, - input_buckets, - output_buckets, - upload_func, - nosql_func, -): - if data_dir is None: - raise ValueError( - "/path/to/ocr_data/\n" - " └── pages/\n" - " ├── page1.png\n" - " ├── page2.png\n" - " └── ..." - ) - - num_pages, batch_size = size_generators[size] - - # 1) upload pages - pages_dir = os.path.join(data_dir, "pages") - if not os.path.isdir(pages_dir): - raise ValueError(f"pages dir not exist: {pages_dir}") - - page_files = sorted( - f for f in os.listdir(pages_dir) if f.lower().endswith((".png", ".jpg", ".jpeg")) - ) - if not page_files: - raise ValueError(f"no jpg/png under dir: {pages_dir}") - - new_pages = [] - for i in range(num_pages): - page = page_files[i % len(page_files)] - # unified: 00000000.png - ext = os.path.splitext(page)[1].lower() or ".png" - name = f"{i:08d}{ext}" - path = os.path.join(pages_dir, page) - - new_pages.append(name) - upload_func(0, name, path) - - assert len(new_pages) == num_pages - - return { - "segments": new_pages, - "benchmark_bucket": benchmarks_bucket, - "input_bucket": input_buckets[0], - "output_bucket": output_buckets[0], - "batch_size": batch_size, - "lang": "en", - } diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py deleted file mode 100644 index 22270a199..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/python/detect.py +++ /dev/null @@ -1,6 +0,0 @@ -# benchmarks/600.workflows/6xx.OCR-pipeline/detect.py - - -def handler(event): - # todo: implement page detection if needed - return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py deleted file mode 100644 index b46095436..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/python/merge.py +++ /dev/null @@ -1,73 +0,0 @@ -# benchmarks/600.workflows/6xx.OCR-pipeline/merge.py -import os -import uuid -import shutil - -from . import storage - -client = storage.storage.get_instance() - - -def _download_page_txt( - page: str, - tmp_dir: str, - benchmark_bucket: str, - output_bucket: str, - prefix: str, -): - base, _ = os.path.splitext(page) - remote_name = f"{prefix}{base}.txt" - local_txt = os.path.join(tmp_dir, f"{base}.txt") - - try: - client.download( - benchmark_bucket, - f"{output_bucket}/{remote_name}", - local_txt, - ) - return local_txt - except Exception: - # return None - return None - - -def handler(event): - pages = event["segments"] - benchmark_bucket = event["benchmark_bucket"] - output_bucket = event["output_bucket"] - prefix = event["prefix"] - - tmp_dir = os.path.join("/tmp", str(uuid.uuid4())) - os.makedirs(tmp_dir, exist_ok=True) - - try: - page_txt_paths = [] - for page in pages: - p = _download_page_txt(page, tmp_dir, benchmark_bucket, output_bucket, prefix) - page_txt_paths.append((page, p)) - - # merge all pages - final_doc = os.path.join(tmp_dir, "document.txt") - with open(final_doc, "w", encoding="utf-8") as fout: - for page, txt_path in sorted(page_txt_paths, key=lambda x: x[0]): - fout.write(f"===== Page {page} =====\n") - if txt_path and os.path.exists(txt_path): - with open(txt_path, "r", encoding="utf-8") as fin: - fout.write(fin.read()) - else: - fout.write("[NO_TEXT]\n") - fout.write("\n\n") - - # upload final document - remote_name = f"{prefix}document.txt" - client.upload( - benchmark_bucket, - f"{output_bucket}/{remote_name}", - final_doc, - unique_name=False, - ) - - finally: - shutil.rmtree(tmp_dir, ignore_errors=True) - - return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py deleted file mode 100644 index 6d01aadfc..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/python/recognize.py +++ /dev/null @@ -1,80 +0,0 @@ -# benchmarks/600.workflows/6xx.OCR-pipeline/recognize.py -import os -import uuid -import shutil -import logging - -import easyocr - -from . import storage - -logger = logging.getLogger(__name__) -client = storage.storage.get_instance() - -_readers = {} - - -def _get_reader(lang: str): - """ - lazy initialize easyocr.Reader - lang: 'en', 'ch_sim', 'ch_tra', 'ja' etc - """ - if lang not in _readers: - _readers[lang] = easyocr.Reader([lang], gpu=False) - logger.info("Initialized easyocr.Reader for lang=%s", lang) - return _readers[lang] - - -def handler(event): - input_bucket = event["input_bucket"] - output_bucket = event["output_bucket"] - benchmark_bucket = event["benchmark_bucket"] - pages = event["segments"] - prefix = event["prefix"] - lang = event.get("lang", "en") - - tmp_dir = os.path.join("/tmp", str(uuid.uuid4())) - os.makedirs(tmp_dir, exist_ok=True) - - reader = _get_reader(lang) - - try: - for page in pages: - # 1) download image - local_img = os.path.join(tmp_dir, page) - client.download( - benchmark_bucket, - f"{input_bucket}/{page}", - local_img, - ) - - # 2) OCR - # detail=0 -> only text - try: - results = reader.readtext(local_img, detail=0) - except Exception as e: - logger.error("OCR failed on %s: %s", page, e) - results = [f"[OCR_ERROR] {e}"] - - text = "\n".join(str(x) for x in results) - - # 3) per-page txt - base, _ = os.path.splitext(page) - local_txt = os.path.join(tmp_dir, f"{base}.txt") - with open(local_txt, "w", encoding="utf-8") as f: - f.write(text) - - # 4) upload to output_bucket(with prefix) - remote_name = f"{prefix}{base}.txt" - client.upload( - benchmark_bucket, - f"{output_bucket}/{remote_name}", - local_txt, - unique_name=False, - ) - - finally: - shutil.rmtree(tmp_dir, ignore_errors=True) - - # pass to merge - return event diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt b/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt deleted file mode 100644 index f954cc1b7..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/python/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -easyocr -torch -torchvision \ No newline at end of file diff --git a/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py b/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py deleted file mode 100644 index 684a2687d..000000000 --- a/benchmarks/600.workflows/6xx.OCR-pipeline/python/split.py +++ /dev/null @@ -1,29 +0,0 @@ -# benchmarks/600.workflows/6xx.OCR-pipeline/split.py -import uuid - - -def _chunks(lst, n): - for i in range(0, len(lst), n): - yield lst[i : i + n] - - -def handler(event): - segs = _chunks(event["segments"], event["batch_size"]) - input_bucket = event["input_bucket"] - output_bucket = event["output_bucket"] - benchmark_bucket = event["benchmark_bucket"] - lang = event.get("lang", "en") - - return { - "segments": [ - { - "prefix": str(uuid.uuid4().int & ((1 << 64) - 1))[:8], - "segments": ss, - "lang": lang, - "input_bucket": input_bucket, - "output_bucket": output_bucket, - "benchmark_bucket": benchmark_bucket, - } - for ss in segs - ] - } diff --git a/run_local_workflows.sh b/run_local_workflows.sh index d23af3f05..68c9e86a7 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -95,7 +95,6 @@ WORKFLOWS=( "670.auth" "680.excamera" "690.ml" - "6xx.OCR-pipeline" ) for wf in "${WORKFLOWS[@]}"; do From 692de3a27b2d83946757a1f49adb670e6206b09a Mon Sep 17 00:00:00 2001 From: xipang Date: Sun, 14 Dec 2025 16:28:24 +0100 Subject: [PATCH 74/82] test sonataflow --- QUICKSTART_SONATAFLOW.md | 56 ++++++ config/example.json | 22 +++ config/systems.json | 69 +++++++ sebs.py | 2 +- sebs/faas/config.py | 3 + sebs/sebs.py | 4 + sebs/sonataflow/__init__.py | 6 + sebs/sonataflow/config.py | 138 +++++++++++++ sebs/sonataflow/generator.py | 148 ++++++++++++++ sebs/sonataflow/sonataflow.py | 352 ++++++++++++++++++++++++++++++++++ sebs/sonataflow/triggers.py | 82 ++++++++ sebs/sonataflow/workflow.py | 79 ++++++++ sebs/types.py | 1 + 13 files changed, 961 insertions(+), 1 deletion(-) create mode 100644 QUICKSTART_SONATAFLOW.md create mode 100644 sebs/sonataflow/__init__.py create mode 100644 sebs/sonataflow/config.py create mode 100644 sebs/sonataflow/generator.py create mode 100644 sebs/sonataflow/sonataflow.py create mode 100644 sebs/sonataflow/triggers.py create mode 100644 sebs/sonataflow/workflow.py diff --git a/QUICKSTART_SONATAFLOW.md b/QUICKSTART_SONATAFLOW.md new file mode 100644 index 000000000..61efc2d67 --- /dev/null +++ b/QUICKSTART_SONATAFLOW.md @@ -0,0 +1,56 @@ +# SonataFlow quickstart + +This backend generates Serverless Workflow definitions from SeBS workflow specs and drives them through a running SonataFlow (Kogito) instance. Functions still run as local SeBS containers; SonataFlow orchestrates them via HTTP. + +## Prerequisites +- Docker available locally. +- A SonataFlow dev-mode/runtime reachable at `http://localhost:8080` (default). Example: + ```bash + docker run --rm -it -p 8080:8080 \ + -v "$PWD/output/workflow_resources/sonataflow":/home/kogito/serverless-workflow-project/src/main/resources/workflows \ + quay.io/kiegroup/kogito-swf-devmode:latest + ``` + The volume mount should point to the directory where SeBS writes generated `.sw.json` files. +- Local object/NoSQL/redis services (reuse `run_local_workflows.sh` setup or `./sebs.py storage start all config/storage.json`). + +## Configure +Add a `deployment.sonataflow` block to your config (based on `config/example.json`): +```json +{ + "deployment": { + "name": "sonataflow", + "sonataflow": { + "resources": { + "redis": { "host": "localhost:6380", "password": "" }, + "runtime": { "url": "http://localhost:8080", "endpoint_prefix": "services" } + }, + "storage": { + "type": "minio", + "address": "localhost", + "mapped_port": 9000, + "access_key": "minio", + "secret_key": "minio123", + "instance_id": "minio", + "input_buckets": [], + "output_buckets": [] + } + } + } +} +``` +Adjust storage/redis endpoints to match your local services. + +## Run +1. Start storage/redis (as in `run_local_workflows.sh`). +2. Start SonataFlow dev-mode and mount the output directory (see above). +3. Execute a workflow benchmark: + ```bash + ./sebs.py benchmark workflow 610.gen test \ + --config config/your-sonataflow-config.json \ + --deployment sonataflow --trigger http --repetitions 1 --verbose + ``` + +On first run SeBS will: +- Package workflow functions into local containers. +- Translate `definition.json` into `workflow_resources/sonataflow/.sw.json` under the generated code package directory (inside your `--output-dir` tree). +- Invoke SonataFlow at `{runtime_url}/{endpoint_prefix}/{workflow_id}` with the workflow payload. diff --git a/config/example.json b/config/example.json index 136fe75bd..15b7c98f3 100644 --- a/config/example.json +++ b/config/example.json @@ -88,6 +88,28 @@ "type": "minio" } }, + "sonataflow": { + "resources": { + "redis": { + "host": "", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, "openwhisk": { "shutdownStorage": false, "removeCluster": false, diff --git a/config/systems.json b/config/systems.json index a5e105faf..6f5c50d4b 100644 --- a/config/systems.json +++ b/config/systems.json @@ -72,6 +72,75 @@ "architecture": ["x64"], "deployments": ["package"] }, + "sonataflow": { + "experiments": { + "python": [ + "papi", + "time", + "disk-io", + "memory" + ], + "nodejs": [ + "time" + ] + }, + "languages": { + "python": { + "base_images": { + "x64": { + "3.7": "python:3.7-slim", + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.py", + "nosql.py", + "function_workflow.py" + ], + "packages": [ + "redis" + ], + "module_packages": { + "nosql": [ + "boto3==1.28.3" + ] + } + } + }, + "nodejs": { + "base_images": { + "x64": { + "14": "node:14-slim", + "16": "node:16-slim", + "18": "node:18-slim", + "20": "node:20-slim" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.js" + ], + "packages": [] + } + } + }, + "architecture": ["x64"], + "deployments": ["package"] + }, "aws": { "languages": { "python": { diff --git a/sebs.py b/sebs.py index d13e378ab..6c32f9b2e 100755 --- a/sebs.py +++ b/sebs.py @@ -91,7 +91,7 @@ def common_params(func): @click.option( "--deployment", default=None, - type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]), + type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk", "sonataflow"]), help="Cloud deployment to use.", ) @click.option( diff --git a/sebs/faas/config.py b/sebs/faas/config.py index ad3a631be..6aebd514a 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -209,6 +209,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config name = config["name"] implementations = {"local": LocalConfig.deserialize} + from sebs.sonataflow.config import SonataFlowConfig + + implementations["sonataflow"] = SonataFlowConfig.deserialize if has_platform("aws"): from sebs.aws.config import AWSConfig diff --git a/sebs/sebs.py b/sebs/sebs.py index ab8a54b9d..39489987d 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -90,6 +90,10 @@ def get_deployment( name = dep_config["name"] implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} + from sebs.sonataflow import SonataFlow + + implementations["sonataflow"] = SonataFlow + if has_platform("aws"): from sebs.aws import AWS diff --git a/sebs/sonataflow/__init__.py b/sebs/sonataflow/__init__.py new file mode 100644 index 000000000..8468f1ba6 --- /dev/null +++ b/sebs/sonataflow/__init__.py @@ -0,0 +1,6 @@ +from .sonataflow import SonataFlow +from .config import SonataFlowConfig +from .workflow import SonataFlowWorkflow +from .triggers import WorkflowSonataFlowTrigger + +__all__ = ["SonataFlow", "SonataFlowConfig", "SonataFlowWorkflow", "WorkflowSonataFlowTrigger"] diff --git a/sebs/sonataflow/config.py b/sebs/sonataflow/config.py new file mode 100644 index 000000000..2cc0932ab --- /dev/null +++ b/sebs/sonataflow/config.py @@ -0,0 +1,138 @@ +from typing import cast, Optional, Set + +from sebs.cache import Cache +from sebs.faas.config import Config, Credentials, Resources +from sebs.storage.resources import SelfHostedResources +from sebs.storage.config import NoSQLStorageConfig, PersistentStorageConfig +from sebs.utils import LoggingHandlers + + +class SonataFlowCredentials(Credentials): + def serialize(self) -> dict: + return {} + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + return SonataFlowCredentials() + + +class SonataFlowResources(SelfHostedResources): + def __init__( + self, + storage_cfg: Optional[PersistentStorageConfig] = None, + nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, + ): + super().__init__("sonataflow", storage_cfg, nosql_storage_cfg) + self._allocated_ports: Set[int] = set() + self._runtime_url: str = "http://localhost:8080" + self._endpoint_prefix: str = "services" + + @property + def allocated_ports(self) -> set: + return self._allocated_ports + + @property + def runtime_url(self) -> str: + return self._runtime_url + + @property + def endpoint_prefix(self) -> str: + return self._endpoint_prefix + + def serialize(self) -> dict: + out = super().serialize() + out["allocated_ports"] = list(self._allocated_ports) + out["runtime"] = { + "url": self._runtime_url, + "endpoint_prefix": self._endpoint_prefix, + } + return out + + @staticmethod + def _initialize_resources(res: "SonataFlowResources", cfg: dict): + source = cfg.get("resources", cfg) + if "allocated_ports" in source: + res._allocated_ports = set(source["allocated_ports"]) + runtime = source.get("runtime", {}) + res._runtime_url = runtime.get("url", res._runtime_url) + res._endpoint_prefix = runtime.get("endpoint_prefix", res._endpoint_prefix) + + @staticmethod + def initialize(res: Resources, config: dict): + resources = cast(SonataFlowResources, res) + SonataFlowResources._initialize_resources(resources, config) + + def update_cache(self, cache: Cache): + super().update_cache(cache) + cache.update_config(val=list(self._allocated_ports), keys=["sonataflow", "resources", "allocated_ports"]) + cache.update_config(val=self._runtime_url, keys=["sonataflow", "resources", "runtime", "url"]) + cache.update_config( + val=self._endpoint_prefix, + keys=["sonataflow", "resources", "runtime", "endpoint_prefix"], + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + ret = SonataFlowResources() + + cached_config = cache.get_config("sonataflow") + ret._deserialize(ret, config, cached_config) + + if "resources" in config: + ret.load_redis(config["resources"]) + elif cached_config and "resources" in cached_config: + ret.load_redis(cached_config["resources"]) + + if cached_config and "resources" in cached_config: + SonataFlowResources._initialize_resources(ret, cached_config["resources"]) + ret.logging_handlers = handlers + ret.logging.info("Using cached resources for SonataFlow") + else: + ret.logging_handlers = handlers + SonataFlowResources._initialize_resources(ret, config) + + return ret + + +class SonataFlowConfig(Config): + def __init__(self): + super().__init__(name="sonataflow") + self._credentials = SonataFlowCredentials() + self._resources = SonataFlowResources() + + @staticmethod + def typename() -> str: + return "SonataFlow.Config" + + @staticmethod + def initialize(cfg: Config, dct: dict): + pass + + @property + def credentials(self) -> SonataFlowCredentials: + return self._credentials + + @property + def resources(self) -> SonataFlowResources: + return self._resources + + @resources.setter + def resources(self, val: SonataFlowResources): + self._resources = val + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + cfg = SonataFlowConfig() + cfg.resources = cast(SonataFlowResources, SonataFlowResources.deserialize(config, cache, handlers)) + cfg.logging_handlers = handlers + return cfg + + def serialize(self) -> dict: + return { + "name": "sonataflow", + "region": self._region, + "resources": self._resources.serialize(), + } + + def update_cache(self, cache: Cache): + self.resources.update_cache(cache) diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py new file mode 100644 index 000000000..1d4c96826 --- /dev/null +++ b/sebs/sonataflow/generator.py @@ -0,0 +1,148 @@ +import json +from typing import Dict, List, Union + +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat, Loop, Parallel + + +class SonataFlowGenerator(Generator): + """ + Translate a SeBS workflow definition into a SonataFlow Serverless Workflow definition. + Currently supports task, switch, map (as foreach), repeat, loop and parallel constructs + with a best-effort mapping to SonataFlow branches. + """ + + def __init__(self, workflow_id: str, bindings: Dict[str, Dict[str, str]]): + super().__init__(export_func=lambda obj: json.dumps(obj, indent=2)) + self._workflow_id = workflow_id + self._bindings = bindings + self._functions: Dict[str, Dict[str, str]] = {} + + def _function_ref(self, func_name: str) -> Dict[str, str]: + binding = self._bindings.get(func_name) + if not binding: + raise ValueError(f"No binding found for function {func_name}") + ref_name = binding.get("workflow_function_name", func_name) + if ref_name not in self._functions: + host = binding["host"] + port = binding["port"] + url = f"http://{host}:{port}/" + self._functions[ref_name] = {"name": ref_name, "operation": url} + return {"refName": ref_name} + + def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[str, object]: + ref = self._function_ref(func_name) + ref["arguments"] = {"payload": payload_ref} + return {"name": func_name, "functionRef": ref} + + def postprocess(self, payloads: List[dict]) -> dict: + return { + "id": self._workflow_id, + "name": self._workflow_id, + "version": "0.1", + "description": "Auto-generated from SeBS workflow definition.", + "functions": list(self._functions.values()), + "start": self.root.name, + "states": payloads, + } + + def encode_task(self, state: Task) -> Union[dict, List[dict]]: + payload: Dict[str, object] = { + "name": state.name, + "type": "operation", + "actions": [self._default_action(state.func_name, "${ . }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + if state.failure is not None: + payload["onErrors"] = [{"transition": state.failure}] + return payload + + def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: + def _condition(case: Switch.Case) -> str: + # Serverless Workflow uses jq-like expressions; keep it simple. + return f"{case.var} {case.op} {json.dumps(case.val)}" + + return { + "name": state.name, + "type": "switch", + "dataConditions": [ + {"condition": _condition(c), "transition": c.next} for c in state.cases + ], + "defaultCondition": {"transition": state.default} if state.default else {"end": True}, + } + + def encode_map(self, state: Map) -> Union[dict, List[dict]]: + iteration_param = "item" + action_args = "${ " + iteration_param + " }" + if state.common_params: + # Merge map element with selected common parameters. + merged = {"array_element": "${ " + iteration_param + " }"} + for param in [p.strip() for p in state.common_params.split(",") if p.strip()]: + merged[param] = "${ ." + param + " }" + action_args = merged # type: ignore + + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": "${ ." + state.array + " }", + "outputCollection": "${ ." + state.array + " }", + "iterationParam": iteration_param, + "actions": [self._default_action(state.root, action_args)], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: + # Encode as a foreach over a generated range. + iterations = list(range(state.count)) + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": iterations, + "iterationParam": "idx", + "actions": [self._default_action(state.func_name, "${ . }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": "${ ." + state.array + " }", + "iterationParam": "item", + "actions": [self._default_action(state.func_name, "${ .item }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def _encode_branch(self, subworkflow: dict) -> Dict[str, object]: + states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + payloads: List[dict] = [] + for s in states.values(): + obj = self.encode_state(s) + if isinstance(obj, list): + payloads.extend(obj) + else: + payloads.append(obj) + return {"name": subworkflow["root"], "states": payloads} + + def encode_parallel(self, state: Parallel) -> Union[dict, List[dict]]: + branches = [self._encode_branch(sw) for sw in state.funcs] + payload: Dict[str, object] = {"name": state.name, "type": "parallel", "branches": branches} + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload diff --git a/sebs/sonataflow/sonataflow.py b/sebs/sonataflow/sonataflow.py new file mode 100644 index 000000000..29eccc6db --- /dev/null +++ b/sebs/sonataflow/sonataflow.py @@ -0,0 +1,352 @@ +import os +import shutil +from typing import cast, Dict, List, Optional, Tuple, Set, Type + +import docker + +from sebs.cache import Cache +from sebs.config import SeBSConfig +from sebs.storage.resources import SelfHostedSystemResources +from sebs.utils import LoggingHandlers +from sebs.sonataflow.config import SonataFlowConfig +from sebs.sonataflow.workflow import SonataFlowWorkflow +from sebs.sonataflow.triggers import WorkflowSonataFlowTrigger +from sebs.sonataflow.generator import SonataFlowGenerator +from sebs.faas.function import ( + CloudBenchmark, + Function, + FunctionConfig, + ExecutionResult, + Trigger, + Workflow, +) +from sebs.faas.system import System +from sebs.faas.config import Resources +from sebs.benchmark import Benchmark +from sebs.faas.fsm import State, Task, Map, Repeat, Loop, Parallel +from sebs.local.function import LocalFunction +from sebs.local.local import Local + + +def _collect_task_names(state: State) -> Set[str]: + names: Set[str] = set() + if isinstance(state, Task): + names.add(state.func_name) + elif isinstance(state, Repeat): + names.add(state.func_name) + elif isinstance(state, Loop): + names.add(state.func_name) + elif isinstance(state, Map): + for nested_name, nested_state in state.funcs.items(): + nested_obj = ( + nested_state + if isinstance(nested_state, State) + else State.deserialize(nested_name, nested_state) + ) + names.update(_collect_task_names(nested_obj)) + elif isinstance(state, Parallel): + for subworkflow in state.funcs: + for nested_name, nested_state in subworkflow["states"].items(): + names.update(_collect_task_names(State.deserialize(nested_name, nested_state))) + return names + + +def _workflow_task_names(definition: dict) -> Set[str]: + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} + names: Set[str] = set() + for state in states.values(): + names.update(_collect_task_names(state)) + return names + + +class SonataFlow(Local): + DEFAULT_PORT = 9000 + + @staticmethod + def name(): + return "sonataflow" + + @staticmethod + def typename(): + return "SonataFlow" + + @staticmethod + def function_type() -> "Type[Function]": + return LocalFunction + + @staticmethod + def workflow_type() -> "Type[Workflow]": + return SonataFlowWorkflow + + @property + def config(self) -> SonataFlowConfig: + return self._config + + def __init__( + self, + sebs_config: SeBSConfig, + config: SonataFlowConfig, + cache_client: Cache, + docker_client: docker.client, + logger_handlers: LoggingHandlers, + ): + System.__init__( + self, + sebs_config, + cache_client, + docker_client, + SelfHostedSystemResources( + "sonataflow", config, cache_client, docker_client, logger_handlers + ), + ) + self.logging_handlers = logger_handlers + self._config = config + self._remove_containers = True + self._memory_measurement_path: Optional[str] = None + self._measure_interval = -1 + self._bridge_ip: Optional[str] = self._detect_bridge_ip() + self.initialize_resources(select_prefix="sonataflow") + + # Reuse networking helpers from Local + def _detect_bridge_ip(self) -> Optional[str]: + return Local._detect_bridge_ip(self) + + def _container_service_address(self, endpoint: str) -> str: + return Local._container_service_address(self, endpoint) + + def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: + return Local._function_network_endpoint(self, func) + + def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: + return Local._workflow_env(self, workflow_name, module_name) + + def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: + return Local._allocate_host_port(self, start_port, range_size) + + def _start_container( + self, + code_package: Benchmark, + func_name: str, + func: Optional[LocalFunction], + env_overrides: Optional[Dict[str, str]] = None, + ) -> LocalFunction: + return Local._start_container(self, code_package, func_name, func, env_overrides) + + def _load_workflow_definition(self, path: str) -> dict: + return Local._load_workflow_definition(path) + + def _prepare_workflow_functions( + self, + code_package: Benchmark, + workflow_name: str, + workflow_id: str, + definition_path: str, + definition: dict, + existing_workflow: Optional[SonataFlowWorkflow] = None, + ) -> Tuple[List[LocalFunction], Dict[str, Dict[str, str]], str]: + task_names = sorted(_workflow_task_names(definition)) + if not task_names: + raise RuntimeError("Workflow definition does not contain any task states.") + + existing_funcs = ( + {func.name: func for func in existing_workflow.functions} if existing_workflow else {} + ) + + functions: List[LocalFunction] = [] + bindings: Dict[str, Dict[str, str]] = {} + + required_containers = {f"{workflow_name}___{task}" for task in task_names} + obsolete_funcs = set(existing_funcs.keys()) - required_containers + for obsolete in obsolete_funcs: + existing_funcs[obsolete].stop() + + for task_name in task_names: + container_name = f"{workflow_name}___{task_name}" + existing_func = existing_funcs.get(container_name) + if existing_func: + existing_func.stop() + + env = self._workflow_env(workflow_name, task_name) + func_instance = self._start_container(code_package, container_name, existing_func, env) + functions.append(func_instance) + host, port = self._function_network_endpoint(func_instance) + workflow_function_name = f"{workflow_id}_{task_name}" + bindings[task_name] = { + "type": "custom", + "operation": "rest:post:/", + "host": host, + "port": port, + "workflow_function_name": workflow_function_name, + } + + resources_dir = os.path.join(code_package.code_location, "workflow_resources") + workflows_dir = os.path.join(resources_dir, "workflows") + os.makedirs(workflows_dir, exist_ok=True) + os.makedirs(resources_dir, exist_ok=True) + definition_copy = os.path.join(workflows_dir, f"{workflow_id}.json") + shutil.copy2(definition_path, definition_copy) + + return functions, bindings, definition_copy + + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: + workflow_name = self.format_function_name(workflow_name) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow_name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = Local._normalize_workflow_id(workflow_name) + + functions, bindings, definition_copy = self._prepare_workflow_functions( + code_package, workflow_name, workflow_id, definition_path, definition + ) + + generator = SonataFlowGenerator(workflow_id, bindings) + generator.parse(definition_path) + sonataflow_definition = generator.generate() + + sf_dir = os.path.join(code_package.code_location, "workflow_resources", "sonataflow") + os.makedirs(sf_dir, exist_ok=True) + sonataflow_path = os.path.join(sf_dir, f"{workflow_id}.sw.json") + with open(sonataflow_path, "w") as outf: + outf.write(sonataflow_definition) + + function_cfg = FunctionConfig.from_benchmark(code_package) + workflow = SonataFlowWorkflow( + workflow_name, + functions, + code_package.benchmark, + workflow_id, + code_package.hash, + function_cfg, + sonataflow_path, + bindings, + ) + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + return workflow + + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: + workflow = cast(SonataFlowWorkflow, workflow) + if trigger_type != Trigger.TriggerType.HTTP: + raise RuntimeError("SonataFlow workflows currently support only HTTP triggers.") + + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + self.cache_client.update_benchmark(workflow) + return trigger + + def update_workflow(self, workflow: Workflow, code_package: Benchmark): + workflow = cast(SonataFlowWorkflow, workflow) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow.name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = workflow.workflow_id if workflow.workflow_id else Local._normalize_workflow_id(workflow.name) + functions, bindings, _ = self._prepare_workflow_functions( + code_package, + workflow.name, + workflow_id, + definition_path, + definition, + workflow, + ) + + generator = SonataFlowGenerator(workflow_id, bindings) + generator.parse(definition_path) + sonataflow_definition = generator.generate() + sonataflow_path = os.path.join( + code_package.code_location, "workflow_resources", "sonataflow", f"{workflow_id}.sw.json" + ) + os.makedirs(os.path.dirname(sonataflow_path), exist_ok=True) + with open(sonataflow_path, "w") as outf: + outf.write(sonataflow_definition) + + workflow.set_functions(functions) + workflow.definition_path = sonataflow_path + workflow.function_bindings = bindings + workflow.workflow_id = workflow_id + + triggers = workflow.triggers(Trigger.TriggerType.HTTP) + if not triggers: + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + else: + for trigger in triggers: + if isinstance(trigger, WorkflowSonataFlowTrigger): + trigger.update(self.config.resources.runtime_url, self.config.resources.endpoint_prefix) + + self.logging.info(f"Updated SonataFlow workflow {workflow.name} definition.") + + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + self.initialize_resources(select_prefix=resource_prefix or "sonataflow") + + def package_code( + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, + ) -> Tuple[str, int, str]: + return Local.package_code(self, code_package, directory, is_workflow, is_cached) + + def create_function( + self, + code_package: Benchmark, + func_name: str, + container_deployment: bool, + container_uri: str, + ) -> Function: + raise RuntimeError("SonataFlow deployment does not support individual function creation.") + + def update_function( + self, + code_package: Benchmark, + func: Function, + container_deployment: bool, + container_uri: str, + ) -> Function: + raise RuntimeError("SonataFlow deployment does not support individual function updates.") + + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + raise RuntimeError("SonataFlow deployment does not support function triggers.") + + def update_function_trigger(self, func: Function, trigger: Trigger): + raise RuntimeError("SonataFlow deployment does not support function triggers.") + + def execute( + self, + code_package: Benchmark, + trigger: Trigger, + input: dict, + repetitions: int, + sync: bool, + ) -> List[ExecutionResult]: + return Local.execute(self, code_package, trigger, input, repetitions, sync) + + def get_function(self, code_package: Benchmark, func_name: str) -> Function: + raise RuntimeError("Function retrieval is not supported in SonataFlow mode.") + + def prepare_experiment(self, benchmark: CloudBenchmark): + return Local.prepare_experiment(self, benchmark) + + def shutdown(self) -> None: + super().shutdown() diff --git a/sebs/sonataflow/triggers.py b/sebs/sonataflow/triggers.py new file mode 100644 index 000000000..ec966d6f6 --- /dev/null +++ b/sebs/sonataflow/triggers.py @@ -0,0 +1,82 @@ +import concurrent.futures +import datetime +import uuid +from typing import Optional + +import requests + +from sebs.faas.function import ExecutionResult, Trigger + + +class WorkflowSonataFlowTrigger(Trigger): + def __init__(self, workflow_id: str, base_url: str, endpoint_prefix: str = "services"): + super().__init__() + self._workflow_id = workflow_id + self._base_url = base_url.rstrip("/") + self._endpoint_prefix = endpoint_prefix.strip("/") + + @staticmethod + def typename() -> str: + return "SonataFlow.WorkflowTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + def _endpoint(self) -> str: + if self._endpoint_prefix: + return f"{self._base_url}/{self._endpoint_prefix}/{self._workflow_id}" + return f"{self._base_url}/{self._workflow_id}" + + def _invoke(self, payload: dict) -> ExecutionResult: + request_id = str(uuid.uuid4())[0:8] + begin = datetime.datetime.now() + result = ExecutionResult.from_times(begin, begin) + try: + resp = requests.post( + self._endpoint(), + json={"payload": payload, "request_id": request_id}, + timeout=900, + ) + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + if resp.status_code >= 300: + result.stats.failure = True + self.logging.error( + f"SonataFlow invocation failed ({resp.status_code}): {resp.text}" + ) + else: + result.output = resp.json() + except Exception as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"SonataFlow invocation error: {exc}") + return result + + def sync_invoke(self, payload: dict) -> ExecutionResult: + return self._invoke(payload) + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + return pool.submit(self._invoke, payload) + + def serialize(self) -> dict: + return { + "type": "SONATAFLOW", + "workflow_id": self._workflow_id, + "base_url": self._base_url, + "endpoint_prefix": self._endpoint_prefix, + } + + @classmethod + def deserialize(cls, obj: dict) -> "WorkflowSonataFlowTrigger": + return cls(obj["workflow_id"], obj["base_url"], obj.get("endpoint_prefix", "services")) + + def update(self, base_url: Optional[str] = None, endpoint_prefix: Optional[str] = None): + if base_url: + self._base_url = base_url.rstrip("/") + if endpoint_prefix: + self._endpoint_prefix = endpoint_prefix.strip("/") diff --git a/sebs/sonataflow/workflow.py b/sebs/sonataflow/workflow.py new file mode 100644 index 000000000..ac2e8e6b6 --- /dev/null +++ b/sebs/sonataflow/workflow.py @@ -0,0 +1,79 @@ +import os +import logging +from typing import Dict, List + +from sebs.faas.function import FunctionConfig, Workflow +from sebs.local.function import LocalFunction + + +class SonataFlowWorkflow(Workflow): + def __init__( + self, + name: str, + functions: List[LocalFunction], + benchmark: str, + workflow_id: str, + code_package_hash: str, + cfg: FunctionConfig, + definition_path: str, + function_bindings: Dict[str, Dict], + ): + super().__init__(benchmark, name, code_package_hash, cfg) + self._functions: Dict[str, LocalFunction] = {func.name: func for func in functions} + self.workflow_id = workflow_id + self.definition_path = definition_path + self.function_bindings = function_bindings + self.needs_refresh = False + + @property + def functions(self) -> List[LocalFunction]: + return list(self._functions.values()) + + def set_functions(self, functions: List[LocalFunction]): + self._functions = {func.name: func for func in functions} + + def update_function(self, func: LocalFunction): + self._functions[func.name] = func + + @staticmethod + def typename() -> str: + return "SonataFlow.Workflow" + + def serialize(self) -> dict: + serialized = { + **super().serialize(), + "functions": [func.serialize() for func in self._functions.values()], + "definition_path": self.definition_path, + "function_bindings": self.function_bindings, + "workflow_id": self.workflow_id, + } + serialized["triggers"] = [] + return serialized + + @staticmethod + def deserialize(cached_config: dict) -> "SonataFlowWorkflow": + funcs: List[LocalFunction] = [] + missing_function = False + for entry in cached_config["functions"]: + try: + funcs.append(LocalFunction.deserialize(entry)) + except RuntimeError as exc: + logging.getLogger(__name__).warning( + "Skipping cached function for workflow %s: %s", + cached_config.get("name", ""), + exc, + ) + missing_function = True + cfg = FunctionConfig.deserialize(cached_config["config"]) + workflow = SonataFlowWorkflow( + cached_config["name"], + funcs, + cached_config["benchmark"], + cached_config.get("workflow_id", cached_config["name"]), + cached_config["hash"], + cfg, + cached_config.get("definition_path", ""), + cached_config.get("function_bindings", {}), + ) + workflow.needs_refresh = missing_function + return workflow diff --git a/sebs/types.py b/sebs/types.py index b87516fba..914c9af70 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -12,6 +12,7 @@ class Platforms(str, Enum): GCP = "gcp" LOCAL = "local" OPENWHISK = "openwhisk" + SONATAFLOW = "sonataflow" class Storage(str, Enum): From 87460dcab749b902932062343ba1e4e835a63dcd Mon Sep 17 00:00:00 2001 From: xipang Date: Sun, 14 Dec 2025 16:48:40 +0100 Subject: [PATCH 75/82] test sonataflow --- config/sonataflow_local.json | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 config/sonataflow_local.json diff --git a/config/sonataflow_local.json b/config/sonataflow_local.json new file mode 100644 index 000000000..9f2c9962f --- /dev/null +++ b/config/sonataflow_local.json @@ -0,0 +1,57 @@ +{ + "experiments": { + "deployment": "sonataflow", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.8" + }, + "type": "invocation-overhead", + "invocation-overhead": { + "repetitions": 1, + "N": 5, + "type": "payload", + "payload_begin": 1024, + "payload_end": 2048, + "payload_points": 2, + "code_begin": 1048576, + "code_end": 2097152, + "code_points": 2 + } + }, + "deployment": { + "name": "sonataflow", + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6380", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, + "storage": { + "type": "minio", + "address": "localhost", + "mapped_port": 9000, + "access_key": "minio", + "secret_key": "minio123", + "instance_id": "minio", + "input_buckets": [], + "output_buckets": [] + }, + "nosql": { + "type": "scylladb", + "address": "localhost", + "mapped_port": 9042, + "instance_id": "scylladb" + } + } + } +} From 6874b433371c0ebbfc377e19bba748a92796bd4d Mon Sep 17 00:00:00 2001 From: Russellpang Date: Sun, 14 Dec 2025 22:15:56 +0100 Subject: [PATCH 76/82] log current state --- .dockerignore | 6 + benchmarks-data | 2 +- .../sonataflow/python/function_workflow.py | 96 +++++++++ .../wrappers/sonataflow/python/nosql.py | 131 +++++++++++++ .../wrappers/sonataflow/python/storage.py | 80 ++++++++ config/local_deployment.json | 162 ++++++++++++++++ config/local_workflows.json | 162 ++++++++++++++++ dockerfiles/sonataflow/entrypoint.sh | 21 ++ .../sonataflow/nodejs/Dockerfile.build | 16 ++ dockerfiles/sonataflow/nodejs/Dockerfile.run | 27 +++ dockerfiles/sonataflow/nodejs/config.js | 2 + dockerfiles/sonataflow/nodejs/package.json | 7 + dockerfiles/sonataflow/nodejs/run_server.sh | 3 + dockerfiles/sonataflow/nodejs/runners.json | 6 + dockerfiles/sonataflow/nodejs/server.js | 48 +++++ dockerfiles/sonataflow/nodejs/time-in-proc.js | 72 +++++++ dockerfiles/sonataflow/nodejs/timeit.sh | 11 ++ dockerfiles/sonataflow/nodejs/tools.js | 44 +++++ .../sonataflow/python/Dockerfile.build | 18 ++ dockerfiles/sonataflow/python/Dockerfile.run | 25 +++ .../sonataflow/python/analyzer-runner.py | 64 ++++++ dockerfiles/sonataflow/python/config.py | 5 + dockerfiles/sonataflow/python/papi-runner.py | 104 ++++++++++ dockerfiles/sonataflow/python/run_server.sh | 3 + dockerfiles/sonataflow/python/runners.json | 7 + dockerfiles/sonataflow/python/server.py | 38 ++++ dockerfiles/sonataflow/python/time-in-proc.py | 59 ++++++ dockerfiles/sonataflow/python/timeit.sh | 5 + dockerfiles/sonataflow/python/tools.py | 21 ++ dockerfiles/sonataflow/run.sh | 13 ++ dockerfiles/sonataflow/runner.py | 62 ++++++ dockerfiles/sonataflow/time-out-proc.py | 56 ++++++ dockerfiles/sonataflow/utils.py | 21 ++ experiments.json | 125 ++++++++++++ out_storage.json | 33 ++++ .../sebd-610.gen-python-3.8/sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sebd-650.vid-python-3.8/sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sebd-670.auth-python-3.8/sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sebd-690.ml-python-3.8/sonataflow.csv | 1 + run_sonataflow_workflows.sh | 163 ++++++++++++++++ sebs/sonataflow/generator.py | 54 ++++-- sebs/sonataflow/sonataflow.py | 41 +++- sonataflow-workflows/application.properties | 3 + .../sebd_610_gen_python_3_8.sw.json | 137 +++++++++++++ .../sebd_6200_trip_booking_python_3_8.sw.json | 183 ++++++++++++++++++ tools/build_docker_images.py | 2 +- 55 files changed, 2134 insertions(+), 17 deletions(-) create mode 100644 benchmarks/wrappers/sonataflow/python/function_workflow.py create mode 100644 benchmarks/wrappers/sonataflow/python/nosql.py create mode 100644 benchmarks/wrappers/sonataflow/python/storage.py create mode 100644 config/local_deployment.json create mode 100644 config/local_workflows.json create mode 100755 dockerfiles/sonataflow/entrypoint.sh create mode 100755 dockerfiles/sonataflow/nodejs/Dockerfile.build create mode 100755 dockerfiles/sonataflow/nodejs/Dockerfile.run create mode 100644 dockerfiles/sonataflow/nodejs/config.js create mode 100644 dockerfiles/sonataflow/nodejs/package.json create mode 100755 dockerfiles/sonataflow/nodejs/run_server.sh create mode 100644 dockerfiles/sonataflow/nodejs/runners.json create mode 100644 dockerfiles/sonataflow/nodejs/server.js create mode 100644 dockerfiles/sonataflow/nodejs/time-in-proc.js create mode 100644 dockerfiles/sonataflow/nodejs/timeit.sh create mode 100644 dockerfiles/sonataflow/nodejs/tools.js create mode 100755 dockerfiles/sonataflow/python/Dockerfile.build create mode 100755 dockerfiles/sonataflow/python/Dockerfile.run create mode 100644 dockerfiles/sonataflow/python/analyzer-runner.py create mode 100644 dockerfiles/sonataflow/python/config.py create mode 100644 dockerfiles/sonataflow/python/papi-runner.py create mode 100755 dockerfiles/sonataflow/python/run_server.sh create mode 100644 dockerfiles/sonataflow/python/runners.json create mode 100644 dockerfiles/sonataflow/python/server.py create mode 100644 dockerfiles/sonataflow/python/time-in-proc.py create mode 100755 dockerfiles/sonataflow/python/timeit.sh create mode 100644 dockerfiles/sonataflow/python/tools.py create mode 100644 dockerfiles/sonataflow/run.sh create mode 100644 dockerfiles/sonataflow/runner.py create mode 100644 dockerfiles/sonataflow/time-out-proc.py create mode 100644 dockerfiles/sonataflow/utils.py create mode 100644 experiments.json create mode 100644 out_storage.json create mode 100644 results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv create mode 100755 run_sonataflow_workflows.sh create mode 100644 sonataflow-workflows/application.properties create mode 100644 sonataflow-workflows/sebd_610_gen_python_3_8.sw.json create mode 100644 sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json diff --git a/.dockerignore b/.dockerignore index 84416f19a..a62f9158b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,3 +7,9 @@ cache python-venv regression-* *_code +scylladb-volume +minio-volume +output +results +*.json +out_storage.json diff --git a/benchmarks-data b/benchmarks-data index 25c2bb40b..6a5990b81 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 +Subproject commit 6a5990b81eaa14df5144b41321354903cb4ea3a8 diff --git a/benchmarks/wrappers/sonataflow/python/function_workflow.py b/benchmarks/wrappers/sonataflow/python/function_workflow.py new file mode 100644 index 000000000..3b359bf84 --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/function_workflow.py @@ -0,0 +1,96 @@ +import datetime +import importlib +import json +import os +import uuid + +from redis import Redis + + +_FUNCTION_HANDLER = None + + +def _load_function_handler(): + global _FUNCTION_HANDLER + if _FUNCTION_HANDLER: + return _FUNCTION_HANDLER + + module_name = os.getenv("SEBS_WORKFLOW_MODULE") + if not module_name: + raise RuntimeError("Environment variable SEBS_WORKFLOW_MODULE is not set.") + + module = importlib.import_module(module_name) + if not hasattr(module, "handler"): + raise RuntimeError(f"Module {module_name} does not provide a handler(payload) function.") + _FUNCTION_HANDLER = module.handler + return _FUNCTION_HANDLER + + +def _maybe_push_measurement(event, duration_start, duration_end): + redis_host = os.getenv("SEBS_REDIS_HOST") + if not redis_host: + return + + workflow_name = os.getenv("SEBS_WORKFLOW_NAME", "workflow") + func_name = os.getenv("SEBS_WORKFLOW_FUNC", "function") + request_id = event["request_id"] + + payload = { + "func": func_name, + "start": duration_start, + "end": duration_end, + "is_cold": False, + "container_id": os.getenv("HOSTNAME", "local"), + "provider.request_id": request_id, + } + + func_res = os.getenv("SEBS_FUNCTION_RESULT") + if func_res: + payload["result"] = json.loads(func_res) + + upload_bytes = os.getenv("STORAGE_UPLOAD_BYTES", "0") + download_bytes = os.getenv("STORAGE_DOWNLOAD_BYTES", "0") + if upload_bytes.isdigit(): + payload["blob.upload"] = int(upload_bytes) + if download_bytes.isdigit(): + payload["blob.download"] = int(download_bytes) + + redis = Redis( + host=redis_host, + port=int(os.getenv("SEBS_REDIS_PORT", "6379")), + decode_responses=True, + socket_connect_timeout=10, + password=os.getenv("SEBS_REDIS_PASSWORD"), + ) + + key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) + redis.set(key, json.dumps(payload)) + print(f"[workflow] stored measurement {key}") + + +def handler(event): + """ + Entry point used by the local workflow containers. Expects events with + {"payload": , "request_id": "..."} format and returns the same + structure expected by our workflow orchestrator. + """ + + if "payload" not in event: + raise RuntimeError("Workflow invocation payload must include 'payload' key.") + + request_id = event.get("request_id", str(uuid.uuid4())) + event["request_id"] = request_id + payload = event["payload"] + handler_fn = _load_function_handler() + + begin = datetime.datetime.now().timestamp() + print(f"[workflow] handler input: {event}", flush=True) + result = handler_fn(payload) + end = datetime.datetime.now().timestamp() + + _maybe_push_measurement(event, begin, end) + + return { + "request_id": request_id, + "payload": result, + } diff --git a/benchmarks/wrappers/sonataflow/python/nosql.py b/benchmarks/wrappers/sonataflow/python/nosql.py new file mode 100644 index 000000000..0e816954c --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/nosql.py @@ -0,0 +1,131 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + + if environ["NOSQL_STORAGE_TYPE"] != "scylladb": + raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") + + self.client = boto3.resource( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", + ) + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/sonataflow/python/storage.py b/benchmarks/wrappers/sonataflow/python/storage.py new file mode 100644 index 000000000..d2fb5d4d3 --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/storage.py @@ -0,0 +1,80 @@ +import os +import uuid + +import minio + + +class storage: + instance = None + client = None + + def __init__(self): + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] + self.client = minio.Minio( + address, access_key=access_key, secret_key=secret_key, secure=False + ) + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) + + def upload(self, bucket, file, filepath, unique_name=True): + key_name = storage.unique_name(file) if unique_name else file + self.client.fput_object(bucket, key_name, filepath) + return key_name + + def download(self, bucket, file, filepath): + data = self.client.get_object(bucket, file) + size = data.headers.get("Content-Length") + if size: + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + int(size) + ) + self.client.fget_object(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(bucket, prefix, recursive=True) + for obj in objects: + file_name = obj.object_name + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, bytes_data): + key_name = storage.unique_name(file) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) + return key_name + + def download_stream(self, bucket, file): + data = self.client.get_object(bucket, file) + body = data.read() + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(body) + ) + return body + + def download_within_range(self, bucket, file, start_byte, stop_byte): + range_header = f"bytes={start_byte}-{stop_byte}" + resp = self.client.get_object(bucket, file, request_headers={"Range": range_header}) + data = resp.read().decode("utf-8") + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(data.encode("utf-8")) + ) + return data + + def list_directory(self, bucket, prefix): + if hasattr(self.client, "list_objects_v2"): + iterator = self.client.list_objects_v2(bucket, prefix, recursive=True) + else: + iterator = self.client.list_objects(bucket, prefix, recursive=True) + for obj in iterator: + yield obj.object_name + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/config/local_deployment.json b/config/local_deployment.json new file mode 100644 index 000000000..03bafd186 --- /dev/null +++ b/config/local_deployment.json @@ -0,0 +1,162 @@ +{ + "experiments": { + "deployment": "local", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.8" + }, + "type": "invocation-overhead", + "perf-cost": { + "benchmark": "110.dynamic-html", + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [ + 128, + 256 + ] + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "invocation-overhead": { + "repetitions": 5, + "N": 20, + "type": "payload", + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20 + }, + "eviction-model": { + "invocations": 1, + "function_copy_idx": 0, + "repetitions": 5, + "sleep": 1 + } + }, + "deployment": { + "name": "sonataflow", + "aws": { + "region": "us-east-1", + "lambda-role": "", + "resources": { + "redis": { + "host": "ec2-54-86-32-136.compute-1.amazonaws.com", + "password": "xB46z3u9I6WJ" + } + } + }, + "azure": { + "region": "westeurope" + }, + "gcp": { + "region": "europe-west1", + "project_name": "", + "credentials": "" + }, + "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6380", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, + "storage": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "skPhf3f8aEMLd0P8n81M8OrA6fq8ZKCx6dn313lq2ws", + "secret_key": "4c15b2336fe9e89fac929dd13b4f43e222c9f8f0ae3e528572f46d94e93a1a13", + "instance_id": "b59d6d8581f4d62f8fd53e9d2184f3f9b4ab5661370d42f4dabbe739d6bda579", + "input_buckets": [], + "output_buckets": [], + "type": "minio", + "object": { + "type": "minio", + "minio": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", + "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", + "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "input_buckets": [], + "output_buckets": [] + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb" + } + } + }, + "nosql": { + "type": "scylladb", + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "841c8a0b85fae2647f214170eb8fa666cd7ee01a361a1614f9c752e011b1a757" + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } + } +} diff --git a/config/local_workflows.json b/config/local_workflows.json new file mode 100644 index 000000000..03bafd186 --- /dev/null +++ b/config/local_workflows.json @@ -0,0 +1,162 @@ +{ + "experiments": { + "deployment": "local", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.8" + }, + "type": "invocation-overhead", + "perf-cost": { + "benchmark": "110.dynamic-html", + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [ + 128, + 256 + ] + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "invocation-overhead": { + "repetitions": 5, + "N": 20, + "type": "payload", + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20 + }, + "eviction-model": { + "invocations": 1, + "function_copy_idx": 0, + "repetitions": 5, + "sleep": 1 + } + }, + "deployment": { + "name": "sonataflow", + "aws": { + "region": "us-east-1", + "lambda-role": "", + "resources": { + "redis": { + "host": "ec2-54-86-32-136.compute-1.amazonaws.com", + "password": "xB46z3u9I6WJ" + } + } + }, + "azure": { + "region": "westeurope" + }, + "gcp": { + "region": "europe-west1", + "project_name": "", + "credentials": "" + }, + "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6380", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, + "storage": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "skPhf3f8aEMLd0P8n81M8OrA6fq8ZKCx6dn313lq2ws", + "secret_key": "4c15b2336fe9e89fac929dd13b4f43e222c9f8f0ae3e528572f46d94e93a1a13", + "instance_id": "b59d6d8581f4d62f8fd53e9d2184f3f9b4ab5661370d42f4dabbe739d6bda579", + "input_buckets": [], + "output_buckets": [], + "type": "minio", + "object": { + "type": "minio", + "minio": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", + "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", + "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "input_buckets": [], + "output_buckets": [] + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb" + } + } + }, + "nosql": { + "type": "scylladb", + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "841c8a0b85fae2647f214170eb8fa666cd7ee01a361a1614f9c752e011b1a757" + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } + } +} diff --git a/dockerfiles/sonataflow/entrypoint.sh b/dockerfiles/sonataflow/entrypoint.sh new file mode 100755 index 000000000..5451f551a --- /dev/null +++ b/dockerfiles/sonataflow/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +USER_ID=${CONTAINER_UID} +GROUP_ID=${CONTAINER_GID} +USER=${CONTAINER_USER} + +useradd --non-unique -m -u ${USER_ID} ${USER} +groupmod --non-unique -g ${GROUP_ID} ${USER} +export HOME=/home/${USER} +echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}" + +if [ ! -z "$CMD" ]; then + gosu ${USER} $CMD +fi + +chown -R ${USER}:${USER} /sebs/ +echo "$USER ALL=(ALL:ALL) NOPASSWD: ALL" | tee /etc/sudoers.d/dont-prompt-$USER-for-password +usermod -aG sudo ${USER} + +exec gosu ${USER} "$@" + diff --git a/dockerfiles/sonataflow/nodejs/Dockerfile.build b/dockerfiles/sonataflow/nodejs/Dockerfile.build new file mode 100755 index 000000000..f65fd75f7 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/Dockerfile.build @@ -0,0 +1,16 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends zip gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/nodejs_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/nodejs/Dockerfile.run b/dockerfiles/sonataflow/nodejs/Dockerfile.run new file mode 100755 index 000000000..33e531524 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/Dockerfile.run @@ -0,0 +1,27 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + && apt-get install -y --no-install-recommends curl net-tools gosu python3 sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps} + +RUN mkdir -p /sebs +RUN cd /sebs/ && npm install -g uuid strftime express minio +# NODE_PATH=$(npm root --quiet -g) +# https://github.com/moby/moby/issues/29110 +ENV NODE_PATH=/usr/local/lib/node_modules + +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/nodejs/*.js /sebs/ +COPY dockerfiles/local/nodejs/run_server.sh /sebs/ +COPY dockerfiles/local/nodejs/timeit.sh /sebs/ +COPY dockerfiles/local/nodejs/runners.json /sebs/ +COPY dockerfiles/local/nodejs/package.json /sebs/ + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/nodejs/config.js b/dockerfiles/sonataflow/nodejs/config.js new file mode 100644 index 000000000..19e7f075f --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/config.js @@ -0,0 +1,2 @@ +var tools = require('./tools'); +console.log( JSON.stringify(tools.get_config()) ) diff --git a/dockerfiles/sonataflow/nodejs/package.json b/dockerfiles/sonataflow/nodejs/package.json new file mode 100644 index 000000000..635c8b693 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "minio": "^7.0.13", + "strftime": "^0.10.0", + "uuid": "^3.4.0" + } +} diff --git a/dockerfiles/sonataflow/nodejs/run_server.sh b/dockerfiles/sonataflow/nodejs/run_server.sh new file mode 100755 index 000000000..c257e1fb7 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +node /sebs/server.js "$@" diff --git a/dockerfiles/sonataflow/nodejs/runners.json b/dockerfiles/sonataflow/nodejs/runners.json new file mode 100644 index 000000000..77eda0117 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/runners.json @@ -0,0 +1,6 @@ +{ + "time" : {"warm" : "time-in-proc.js", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.js", + "disk-io": "analyzer-runner.js", + "config": ["node", "config.js"] +} diff --git a/dockerfiles/sonataflow/nodejs/server.js b/dockerfiles/sonataflow/nodejs/server.js new file mode 100644 index 000000000..c98b3fa72 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/server.js @@ -0,0 +1,48 @@ +const http = require('http'), + strftime = require('strftime'), + express = require('express'), + f = require('/function/function/function'); +//import { v4 as uuidv4 } from 'uuid'; +const { v4: uuidv4 } = require('uuid'); + + +var app = express(); +app.use(express.json()); + +app.post('/alive', function (req, res) { + res.send(JSON.stringify({ + status: "ok" + })); +}); + +app.post('/', function (req, res) { + + let begin = Date.now(); + let ret = f.handler(req.body); + ret.then((func_res) => { + + let end = Date.now(); + res.setHeader('Content-Type', 'application/json'); + res.end(JSON.stringify({ + begin: strftime('%s.%L', new Date(begin)), + end: strftime('%s.%L', new Date(end)), + request_id: uuidv4(), + is_cold: false, + result: { + output: func_res + } + })); + }, + (reason) => { + console.log('Function invocation failed!'); + console.log(reason); + process.exit(1); + } + ); +}); + +app.listen(port=process.argv[2], function () { + console.log(`Server listening on port ${process.argv[2]}.`); +}); + + diff --git a/dockerfiles/sonataflow/nodejs/time-in-proc.js b/dockerfiles/sonataflow/nodejs/time-in-proc.js new file mode 100644 index 000000000..fd829d9e9 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/time-in-proc.js @@ -0,0 +1,72 @@ + +const tools = require('./tools'), + fs = require('fs'), + strftime = require('strftime'), + f = require('./function/function'), + util = require('util'); +const createCsvWriter = require('csv-writer').createArrayCsvWriter; + +let cfg = JSON.parse(fs.readFileSync(process.argv[2])); +let repetitions = cfg.benchmark.repetitions; +let disable_gc = cfg.benchmark.disable_gc; +let input_data = cfg.input; +let timedata = new Array(repetitions); +process.on('unhandledRejection', r => console.log(r)); + +// Due to the async nature of nodejs, we use 'then' functionality +// of promise to make sure that we start a new instance only after finishing +// the previous one. There's no other option to achieve true waiting and we don't +// want to start multiple instances and let them work concurrently. +let measurer = async function(repetition, finish) { + if (repetition < repetitions) { + let begin_timestamp = Date.now(); + let begin = process.hrtime(); + let cpuTimeBegin = process.cpuUsage(); + let ret = f.handler(input_data); + ret.then((res) => { + let cpuTimeEnd = process.cpuUsage(); + let stop_timestamp = Date.now(); + let stop = process.hrtime(begin); + let output_file = tools.get_result_prefix(tools.LOGS_DIR, 'output', 'txt'); + fs.writeFileSync(output_file, JSON.stringify(res)); + let userTime = cpuTimeEnd.user - cpuTimeBegin.user; + let sysTime = cpuTimeEnd.system - cpuTimeBegin.system; + timedata[repetition] = [begin_timestamp, stop_timestamp, stop[0]*1e6 + stop[1]/1e3, userTime, sysTime]; + measurer(repetition + 1, finish); + }, + (reason) => { + console.log('Function invocation failed!'); + console.log(reason); + process.exit(1); + } + ); + } else{ + finish(); + } +} +start = tools.start_benchmarking(); +measurer(0, + () => { + end = tools.stop_benchmarking(); + let result = tools.get_result_prefix(tools.RESULTS_DIR, cfg.benchmark.name, 'csv') + let csvWriter = createCsvWriter({ + path: result, + header: ['Begin','End','Duration','User','Sys'] + }); + for(let i = 0; i < repetitions; ++i) { + timedata[i][0] = strftime('%s.%L', new Date(timedata[i][0])); + timedata[i][1] = strftime('%s.%L', new Date(timedata[i][1])); + } + let p = csvWriter.writeRecords(timedata); + p.then( () => { + let reduce_array = timedata.map( x => { x.pop(); return x} ); + experiment_data = { + repetitions: repetitions, + start: start, + end: end, + timestamps: reduce_array + } + console.log( JSON.stringify({experiment: experiment_data, runtime: tools.get_config()}, null, 2) ) + }); + } +); diff --git a/dockerfiles/sonataflow/nodejs/timeit.sh b/dockerfiles/sonataflow/nodejs/timeit.sh new file mode 100644 index 000000000..15fd78b5d --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/timeit.sh @@ -0,0 +1,11 @@ +#!/bin/bash +OUT=$1 +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time node --expose-gc -e "var fs = require('fs'), f = require('./function/function'); +async function test() { + var input = JSON.parse(fs.readFileSync('input.json', 'utf-8')); + return await f.handler(input); +} +test().then( (data) => console.log(data) );" > $OUT +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/sonataflow/nodejs/tools.js b/dockerfiles/sonataflow/nodejs/tools.js new file mode 100644 index 000000000..991344979 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/tools.js @@ -0,0 +1,44 @@ + +const glob = require('glob'), + path = require('path'); + +const RESULTS_DIR = 'results'; +exports.RESULTS_DIR = RESULTS_DIR; +const LOGS_DIR = 'logs'; +exports.LOGS_DIR = LOGS_DIR; + + +exports.get_config = function () { + return { + name: 'nodejs', + version: process.version, + modules: process.moduleLoadList + }; +} + +exports.start_benchmarking = function() { + return Date.now() +} + +exports.stop_benchmarking = function() { + return Date.now() +} + +exports.get_result_prefix = function(dirname, name, suffix) { + name = path.join(dirname, name); + let counter = 0 + while( + glob.sync( + name + '_' + counter.toString().padStart(2, '0') + '*.' + suffix + ).length + ) { + counter += 1 + } + // util.format ignores padding zeroes + return name + '_' + counter.toString().padStart(2, '0') + '.' + suffix +} + +exports.process_timestamps = function(timestamps) { + +} + diff --git a/dockerfiles/sonataflow/python/Dockerfile.build b/dockerfiles/sonataflow/python/Dockerfile.build new file mode 100755 index 000000000..5892c6500 --- /dev/null +++ b/dockerfiles/sonataflow/python/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gcc build-essential python3-dev libxml2 libxml2-dev zlib1g-dev gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/python_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/python/Dockerfile.run b/dockerfiles/sonataflow/python/Dockerfile.run new file mode 100755 index 000000000..2c1e27df7 --- /dev/null +++ b/dockerfiles/sonataflow/python/Dockerfile.run @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + # for route and sudo + && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps}\ + && pip3 install cffi minio bottle redis + +RUN mkdir -p /sebs +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/python/*.py /sebs/ +COPY dockerfiles/local/python/run_server.sh /sebs/ +COPY dockerfiles/local/python/timeit.sh /sebs/ +COPY dockerfiles/local/python/runners.json /sebs/ +ADD third-party/pypapi/pypapi /sebs/pypapi +ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/python/analyzer-runner.py b/dockerfiles/sonataflow/python/analyzer-runner.py new file mode 100644 index 000000000..624459795 --- /dev/null +++ b/dockerfiles/sonataflow/python/analyzer-runner.py @@ -0,0 +1,64 @@ + +import datetime, json, sys, subprocess, os +ip_address = os.environ['DOCKER_HOST_IP'] +cfg = json.load(open(sys.argv[1], 'r')) +ret = subprocess.run(['curl', '-X', 'POST', + '{}:{}/start'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2])], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) +if ret.returncode != 0: + import sys + print('Analyzer initialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(100) + + +from utils import * +from tools import * +# imported function +from function import function + +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + res = function.handler(input_data) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() + + ret = subprocess.run( + [ + 'curl', '-X', 'POST', + '{}:{}/stop'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2]) + ], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if ret.returncode != 0: + import sys + print('Analyzer deinitialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(101) + experiment_data = {} + experiment_data['repetitions'] = repetitions + experiment_data['timestamps'] = process_timestamps(timedata) + experiment_data['start'] = str(start) + experiment_data['end'] = str(end) + print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) +except Exception as e: + print('Exception caught!') + print(e) + sys.exit(102) +sys.exit(0) diff --git a/dockerfiles/sonataflow/python/config.py b/dockerfiles/sonataflow/python/config.py new file mode 100644 index 000000000..e7115cc73 --- /dev/null +++ b/dockerfiles/sonataflow/python/config.py @@ -0,0 +1,5 @@ +import json + +from tools import * + +print(json.dumps(get_config())) diff --git a/dockerfiles/sonataflow/python/papi-runner.py b/dockerfiles/sonataflow/python/papi-runner.py new file mode 100644 index 000000000..0c82d476d --- /dev/null +++ b/dockerfiles/sonataflow/python/papi-runner.py @@ -0,0 +1,104 @@ + +import datetime, json, sys, traceback, csv + +from utils import * +from tools import * + +# imported function +from function import function + +import pypapi.exceptions + +class papi_benchmarker: + from pypapi import papi_low as papi + from pypapi import events as papi_events + + def __init__(self, papi_cfg): + self.events = [] + self.events_names = [] + self.count = 0 + + self.papi.library_init() + self.events = self.papi.create_eventset() + for event in papi_cfg['events']: + try: + self.papi.add_event(self.events, getattr(self.papi_events, event)) + except pypapi.exceptions.PapiInvalidValueError as err: + print('Adding event {event} failed!'.format(event=event)) + sys.exit(100) + + self.events_names = papi_cfg['events'] + self.count = len(papi_cfg['events']) + self.results = [] + + self.ins_granularity = papi_cfg['overflow_instruction_granularity'] + self.buffer_size = papi_cfg['overflow_buffer_size'] + self.start_time = datetime.datetime.now() + + self.papi.overflow_sampling(self.events, self.papi_events.PAPI_TOT_INS, + int(self.ins_granularity), int(self.buffer_size)) + + def start_overflow(self): + self.papi.start(self.events) + + def stop_overflow(self): + self.papi.stop(self.events) + + def get_results(self): + data = self.papi.overflow_sampling_results(self.events) + for vals in data: + for i in range(0, len(vals), self.count + 1): + chunks = vals[i:i+self.count+1] + measurement_time = datetime.datetime.fromtimestamp(chunks[0]/1e6) + time = (measurement_time - self.start_time) / datetime.timedelta(microseconds = 1) + self.results.append([measurement_time.strftime("%s.%f"), time] + list(chunks[1:])) + + def finish(self): + self.papi.cleanup_eventset(self.events) + self.papi.destroy_eventset(self.events) + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +papi_experiments = papi_benchmarker(cfg['benchmark']['papi']) + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + papi_experiments.start_overflow() + res = function.handler(input_data) + papi_experiments.stop_overflow() + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +papi_experiments.get_results() +papi_experiments.finish() +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow( + ['Time','RelativeTime'] + papi_experiments.events_names + ) + for val in papi_experiments.results: + csv_writer.writerow(val) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/sonataflow/python/run_server.sh b/dockerfiles/sonataflow/python/run_server.sh new file mode 100755 index 000000000..fa9a82297 --- /dev/null +++ b/dockerfiles/sonataflow/python/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python3 /sebs/server.py "$@" diff --git a/dockerfiles/sonataflow/python/runners.json b/dockerfiles/sonataflow/python/runners.json new file mode 100644 index 000000000..1a7a9d84c --- /dev/null +++ b/dockerfiles/sonataflow/python/runners.json @@ -0,0 +1,7 @@ +{ + "papi": "papi-runner.py", + "time" : {"warm" : "time-in-proc.py", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.py", + "disk-io": "analyzer-runner.py", + "config": ["python3", "config.py"] +} diff --git a/dockerfiles/sonataflow/python/server.py b/dockerfiles/sonataflow/python/server.py new file mode 100644 index 000000000..4ed1314f2 --- /dev/null +++ b/dockerfiles/sonataflow/python/server.py @@ -0,0 +1,38 @@ +import datetime +import os +import sys +import uuid + +import bottle +from bottle import route, run, template, request + +CODE_LOCATION='/function' + +@route('/alive', method='GET') +def alive(): + return { + "result:" "ok" + } + +@route('/', method='POST') +def process_request(): + begin = datetime.datetime.now() + from function import function + end = datetime.datetime.now() + # FIXME: measurements? + ret = function.handler(request.json) + + return { + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + "request_id": str(uuid.uuid4()), + "is_cold": False, + "result": { + "output": ret + } + } + +sys.path.append(os.path.join(CODE_LOCATION)) +sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/')) +run(host='0.0.0.0', port=int(sys.argv[1]), debug=True) + diff --git a/dockerfiles/sonataflow/python/time-in-proc.py b/dockerfiles/sonataflow/python/time-in-proc.py new file mode 100644 index 000000000..962da527a --- /dev/null +++ b/dockerfiles/sonataflow/python/time-in-proc.py @@ -0,0 +1,59 @@ + +import datetime, json, sys, traceback, csv, resource + +from utils import * +from tools import * + +# imported function +from function import function + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +os_times = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + begin_times = resource.getrusage(resource.RUSAGE_SELF) + res = function.handler(input_data) + end_times = resource.getrusage(resource.RUSAGE_SELF) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + os_times[i] = [begin_times, end_times] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; CPU times are in microseconds']) + csv_writer.writerow(['Begin','End','Duration','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + (timedata[i][1] - timedata[i][0]) / + datetime.timedelta(microseconds=1), + (os_times[i][1].ru_utime - os_times[i][0].ru_utime) * 1e6, + (os_times[i][1].ru_stime - os_times[i][0].ru_stime) * 1e6 + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/sonataflow/python/timeit.sh b/dockerfiles/sonataflow/python/timeit.sh new file mode 100755 index 000000000..fed626b97 --- /dev/null +++ b/dockerfiles/sonataflow/python/timeit.sh @@ -0,0 +1,5 @@ +#!/bin/bash +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time python3 -c "from json import load; from function import function; print(function.handler(load(open('input.json', 'r'))))" > $1 +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/sonataflow/python/tools.py b/dockerfiles/sonataflow/python/tools.py new file mode 100644 index 000000000..33213f70c --- /dev/null +++ b/dockerfiles/sonataflow/python/tools.py @@ -0,0 +1,21 @@ + +import datetime, gc, platform, os, sys + +def start_benchmarking(disable_gc): + if disable_gc: + gc.disable() + return datetime.datetime.now() + +def stop_benchmarking(): + end = datetime.datetime.now() + gc.enable() + return end + +def get_config(): + # get currently loaded modules + # https://stackoverflow.com/questions/4858100/how-to-list-imported-modules + modulenames = set(sys.modules) & set(globals()) + allmodules = [sys.modules[name] for name in modulenames] + return {'name': 'python', + 'version': platform.python_version(), + 'modules': str(allmodules)} diff --git a/dockerfiles/sonataflow/run.sh b/dockerfiles/sonataflow/run.sh new file mode 100644 index 000000000..9ecc13e5b --- /dev/null +++ b/dockerfiles/sonataflow/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +export DOCKER_HOST_IP=$(route -n | awk '/UG[ \t]/{print $2}') +EXPERIMENT_INPUT="$1" +file_name=logs/execution_00.log +counter=0 + +while [ -e "${file_name}" ]; do + counter=$((counter + 1)) + file_name=$(printf '%s_%02d.log' "logs/execution" "$(( counter ))") +done + +script -e -c "python3 runner.py ${EXPERIMENT_INPUT}" -f "${file_name}" diff --git a/dockerfiles/sonataflow/runner.py b/dockerfiles/sonataflow/runner.py new file mode 100644 index 000000000..96261fc33 --- /dev/null +++ b/dockerfiles/sonataflow/runner.py @@ -0,0 +1,62 @@ +import csv, gc, sys, imp, datetime, json, os, subprocess, uuid, sys + +from distutils.dir_util import copy_tree +from utils import * + +def get_language(lang): + languages = {'python': 'python3', 'nodejs': 'nodejs'} + return languages[lang] + +def get_runner(experiment, options=None): + runners = json.load(open('runners.json', 'r')) + return runners[experiment][options] if options is not None else runners[experiment] + +def get_runner_cmd(lang, experiment, options): + executable = get_language(lang) + script = get_runner(experiment, options) + script_name, extension = os.path.splitext(script) + # Out-of-proc measurements don't require languge-specific implementations + if extension == '.py': + executable = get_language('python') + return [executable, script] + +def export_storage_config(config): + if config is not None: + os.environ['MINIO_ADDRESS'] = config['address'] + os.environ['MINIO_ACCESS_KEY'] = config['access_key'] + os.environ['MINIO_SECRET_KEY'] = config['secret_key'] + +if __name__ == "__main__": + cfg = json.load(open(sys.argv[1], 'r')) + input_data = cfg['input'] + repetitions = cfg['benchmark']['repetitions'] + experiment = cfg['benchmark']['type'] + language = cfg['benchmark']['language'] + export_storage_config(cfg['benchmark'].get('storage', None)) + experiment_options = cfg['benchmark'].get('experiment_options', None) + + # copy code to main directory + copy_tree('code', '.') + + runner = get_runner_cmd(language, experiment, experiment_options) + uuid = uuid.uuid1() + ret = subprocess.run(runner + [sys.argv[1], str(uuid)], stdout=subprocess.PIPE) + if ret.returncode != 0: + print('Experiment finished incorrectly! Exit code {}'.format(ret.returncode)) + print('Output: ', ret.stdout.decode('utf-8')) + sys.exit(1) + + # Dump experiment data + result = {'input': cfg} + try: + experiment_data = json.loads(ret.stdout.decode('utf-8')) + for v in ['experiment', 'runtime']: + result[v] = experiment_data[v] + result_dir = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'json') + with open(result_dir, 'w') as f: + json.dump(result, f, indent = 2) + except json.decoder.JSONDecodeError as e: + print('Experiment output is not valid!') + print(e) + print(ret.stdout.decode('utf-8')) + sys.exit(1) diff --git a/dockerfiles/sonataflow/time-out-proc.py b/dockerfiles/sonataflow/time-out-proc.py new file mode 100644 index 000000000..9613d1ab5 --- /dev/null +++ b/dockerfiles/sonataflow/time-out-proc.py @@ -0,0 +1,56 @@ + +import datetime, json, subprocess, sys, traceback, csv + +from utils import * + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +json.dump(input_data, open('input.json', 'w')) + +timedata = [0] * repetitions +durations = [0] * repetitions +try: + start = datetime.datetime.now() + for i in range(0, repetitions): + prefix = get_result_prefix(LOGS_DIR, 'output', 'txt') + begin = datetime.datetime.now() + ret = subprocess.run(['/bin/bash', 'timeit.sh', prefix], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stop = datetime.datetime.now() + if ret.returncode != 0: + raise RuntimeError() + timedata[i] = [begin, stop] + # time returns WALL,USER,SYS + durations[i] = ret.stdout.decode('utf-8').rstrip().split(',') + end = datetime.datetime.now() +except ValueError as e: + print('Incorrect output from function') + print(ret.stdout.decode('utf-8')) +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; Duration in miliseconds']) + csv_writer.writerow(['Begin','End','Wallclock','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + *durations[i] + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +ret = subprocess.run(json.load(open('runners.json', 'r'))['config'], stdout=subprocess.PIPE) +config = json.loads(ret.stdout.decode('utf-8')) +print(json.dumps({'experiment': experiment_data, 'runtime': config}, indent=2)) diff --git a/dockerfiles/sonataflow/utils.py b/dockerfiles/sonataflow/utils.py new file mode 100644 index 000000000..087ec397a --- /dev/null +++ b/dockerfiles/sonataflow/utils.py @@ -0,0 +1,21 @@ +import glob, os + +RESULTS_DIR = 'results' +LOGS_DIR = 'logs' + +def get_result_prefix(dirname, name, suffix): + name = os.path.join(dirname, name) + counter = 0 + while glob.glob( '{}_{:02d}*.{}'.format(name, counter, suffix) ): + counter +=1 + return '{}_{:02d}.{}'.format(name, counter, suffix) + +def process_timestamps(timestamps): + # convert list of lists of times data to proper timestamps + return list(map( + lambda times : list(map( + lambda x: x.strftime('%s.%f'), + times + )), + timestamps + )) diff --git a/experiments.json b/experiments.json new file mode 100644 index 000000000..0b74d9f86 --- /dev/null +++ b/experiments.json @@ -0,0 +1,125 @@ +{ + "_invocations": { + "sebd-610.gen-python-3.8": { + "6e0f75aa": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "6e0f75aa", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 93292, + "client_begin": "2025-12-14 22:00:40.135117", + "client_end": "2025-12-14 22:00:40.228409", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1765746040.129077, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.17.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb", + "mapped_port": 9012, + "memory": -1, + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "services", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", + "address": "172.17.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "mapped_port": 9011, + "output_buckets": [], + "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.8" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1765746040.228735, + "result_bucket": null +} \ No newline at end of file diff --git a/out_storage.json b/out_storage.json new file mode 100644 index 000000000..393a38027 --- /dev/null +++ b/out_storage.json @@ -0,0 +1,33 @@ +{ + "object": { + "type": "minio", + "minio": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", + "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", + "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume" + } + } +} \ No newline at end of file diff --git a/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh new file mode 100755 index 000000000..c6aa17a01 --- /dev/null +++ b/run_sonataflow_workflows.sh @@ -0,0 +1,163 @@ +#!/bin/bash +set -euo pipefail + +# Prepare local configuration files +if [ ! -f config/local_workflows.json ]; then + cp config/example.json config/local_workflows.json +fi +if [ ! -f config/local_deployment.json ]; then + cp config/example.json config/local_deployment.json +fi + +DATA_FLAG="benchmarks-data/600.workflows/6100.1000-genome/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" +if [ ! -f "$DATA_FLAG" ]; then + echo "Workflow datasets missing, running download_datasets.sh..." + (cd benchmarks-data/600.workflows && ./download_datasets.sh) +else + echo "Workflow datasets present, skipping download." +fi + +cleanup() { + echo "Stopping all running Docker containers..." + docker ps -q | xargs -r docker stop >/dev/null || true +} +trap cleanup EXIT + +./sebs.py storage start all config/storage.json --output-json out_storage.json + +MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) +MINIO_PORT=$(jq -r '.object.minio.mapped_port' out_storage.json) +MINIO_ACCESS=$(jq -r '.object.minio.access_key' out_storage.json) +MINIO_SECRET=$(jq -r '.object.minio.secret_key' out_storage.json) +MINIO_INSTANCE=$(jq -r '.object.minio.instance_id' out_storage.json) +SCYLLA_ADDRESS=$(jq -r '.nosql.scylladb.address' out_storage.json) +SCYLLA_PORT=$(jq -r '.nosql.scylladb.mapped_port' out_storage.json) +SCYLLA_INSTANCE=$(jq -r '.nosql.scylladb.instance_id' out_storage.json) + +for cfg in config/local_workflows.json config/local_deployment.json; do + tmp=$(mktemp) + jq \ + --arg addr "$MINIO_ADDRESS" \ + --argjson port "$MINIO_PORT" \ + --arg access "$MINIO_ACCESS" \ + --arg secret "$MINIO_SECRET" \ + --arg inst "$MINIO_INSTANCE" \ + --arg saddr "$SCYLLA_ADDRESS" \ + --argjson sport "$SCYLLA_PORT" \ + --arg sinst "$SCYLLA_INSTANCE" \ + --arg redis_host "localhost:6380" \ + --arg redis_pass "" \ + --arg runtime_url "http://localhost:8080" \ + --arg endpoint_prefix "services" \ + '(.deployment.name = "sonataflow") + | (.deployment.sonataflow.storage.object.type = "minio") + | (.deployment.sonataflow.storage.object.minio.address = $addr) + | (.deployment.sonataflow.storage.object.minio.mapped_port = $port) + | (.deployment.sonataflow.storage.object.minio.access_key = $access) + | (.deployment.sonataflow.storage.object.minio.secret_key = $secret) + | (.deployment.sonataflow.storage.object.minio.instance_id = $inst) + | (.deployment.sonataflow.storage.object.minio.input_buckets = []) + | (.deployment.sonataflow.storage.object.minio.output_buckets = []) + | (.deployment.sonataflow.storage.nosql.type = "scylladb") + | (.deployment.sonataflow.storage.nosql.scylladb.address = $saddr) + | (.deployment.sonataflow.storage.nosql.scylladb.mapped_port = $sport) + | (.deployment.sonataflow.storage.nosql.scylladb.instance_id = $sinst) + | (.deployment.sonataflow.resources.redis.host = $redis_host) + | (.deployment.sonataflow.resources.redis.password = $redis_pass) + | (.deployment.sonataflow.resources.runtime.url = $runtime_url) + | (.deployment.sonataflow.resources.runtime.endpoint_prefix = $endpoint_prefix) + ' "$cfg" > "$tmp" + mv "$tmp" "$cfg" +done + +if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then + docker rm -f sebs-redis >/dev/null +fi +docker run -d --name sebs-redis -p 6380:6379 redis:7 + +# Prepare SonataFlow workflow directory +SONATAFLOW_WORKFLOWS_DIR="$PWD/sonataflow-workflows" +mkdir -p "$SONATAFLOW_WORKFLOWS_DIR" + +# Function to copy workflow definitions to SonataFlow directory after each benchmark +copy_workflows_to_sonataflow() { + find cache -name "*.sw.json" -path "*/sonataflow/*" 2>/dev/null | while read -r swfile; do + cp -f "$swfile" "$SONATAFLOW_WORKFLOWS_DIR/" 2>/dev/null || true + done +} + +# Create Docker network for SonataFlow and functions if it doesn't exist +docker network inspect sebs-network >/dev/null 2>&1 || docker network create sebs-network + +# Note: We'll start SonataFlow runtime AFTER generating the first workflow +# so that it can detect workflows at startup and enable the processes generator + +# Ensure native helper for selfish-detour is built before packaging +SELFISH_DIR="benchmarks/600.workflows/640.selfish-detour/python" +SELFISH_SRC="$SELFISH_DIR/selfish-detour.c" +SELFISH_SO="$SELFISH_DIR/selfish-detour.so" +if [ -f "$SELFISH_SRC" ]; then + if [ ! -f "$SELFISH_SO" ] || [ "$SELFISH_SRC" -nt "$SELFISH_SO" ]; then + echo "Compiling selfish-detour shared object..." + gcc -O2 -shared -fPIC -o "$SELFISH_SO" "$SELFISH_SRC" + fi +fi + +WORKFLOWS=( + "610.gen" + "6100.1000-genome" + "6101.1000-genome-individuals" + "620.func-invo" + "6200.trip-booking" + "630.parallel-sleep" + "631.parallel-download" + "640.selfish-detour" + "650.vid" + "660.map-reduce" + "670.auth" + "680.excamera" + "690.ml" +) + +SONATAFLOW_STARTED=false +for wf in "${WORKFLOWS[@]}"; do + echo "===== Running $wf =====" + + # First, create the workflow (without invoking it yet) by running with --repetitions 0 + # This generates the .sw.json file + ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment sonataflow --trigger http --repetitions 0 \ + --output-dir results/local-workflows --verbose || true + + # Copy newly generated workflow definitions to SonataFlow directory + copy_workflows_to_sonataflow + echo "Copied workflow definitions to SonataFlow directory" + + # Start SonataFlow runtime on first iteration (after first workflow is generated) + if [ "$SONATAFLOW_STARTED" = false ]; then + echo "Starting SonataFlow runtime container..." + if docker ps -a --format '{{.Names}}' | grep -q '^sonataflow-runtime$'; then + docker rm -f sonataflow-runtime >/dev/null + fi + docker run -d --name sonataflow-runtime --network sebs-network -p 8080:8080 \ + -v "$SONATAFLOW_WORKFLOWS_DIR":/home/kogito/serverless-workflow-project/src/main/resources \ + quay.io/kiegroup/kogito-swf-devmode:latest + + echo "Waiting for SonataFlow runtime to start and load workflows..." + sleep 20 + SONATAFLOW_STARTED=true + else + # Wait for SonataFlow to detect and load the new workflow (dev mode auto-reload) + echo "Waiting for SonataFlow to load workflow..." + sleep 10 + fi + + # Now run the actual benchmark + ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment sonataflow --trigger http --repetitions 1 \ + --output-dir results/local-workflows --verbose || true + + sleep 5 +done diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index 1d4c96826..45f9fdd86 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -16,6 +16,7 @@ def __init__(self, workflow_id: str, bindings: Dict[str, Dict[str, str]]): self._workflow_id = workflow_id self._bindings = bindings self._functions: Dict[str, Dict[str, str]] = {} + self._uses_errors = False # Track if any state uses onErrors def _function_ref(self, func_name: str) -> Dict[str, str]: binding = self._bindings.get(func_name) @@ -25,8 +26,14 @@ def _function_ref(self, func_name: str) -> Dict[str, str]: if ref_name not in self._functions: host = binding["host"] port = binding["port"] + # SonataFlow custom REST function format: operation is "rest:METHOD:URL" + # Use absolute URL since we know the host and port url = f"http://{host}:{port}/" - self._functions[ref_name] = {"name": ref_name, "operation": url} + self._functions[ref_name] = { + "name": ref_name, + "operation": f"rest:post:{url}", + "type": "custom" + } return {"refName": ref_name} def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[str, object]: @@ -35,15 +42,23 @@ def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[s return {"name": func_name, "functionRef": ref} def postprocess(self, payloads: List[dict]) -> dict: - return { + workflow_def = { "id": self._workflow_id, "name": self._workflow_id, "version": "0.1", + "specVersion": "0.8", "description": "Auto-generated from SeBS workflow definition.", "functions": list(self._functions.values()), "start": self.root.name, "states": payloads, } + # Add error definitions if any state uses onErrors + if self._uses_errors: + workflow_def["errors"] = [{ + "name": "workflow_error", + "code": "*" # Catch all errors + }] + return workflow_def def encode_task(self, state: Task) -> Union[dict, List[dict]]: payload: Dict[str, object] = { @@ -56,7 +71,11 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: else: payload["end"] = True if state.failure is not None: - payload["onErrors"] = [{"transition": state.failure}] + self._uses_errors = True + payload["onErrors"] = [{ + "errorRef": "workflow_error", + "transition": state.failure + }] return payload def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: @@ -83,13 +102,18 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: merged[param] = "${ ." + param + " }" action_args = merged # type: ignore + # Resolve the actual function name from the root state + # state.root is the name of the nested state, state.funcs contains the state definitions + root_state_def = state.funcs.get(state.root, {}) + func_name = root_state_def.get("func_name", state.root) + payload: Dict[str, object] = { "name": state.name, "type": "foreach", "inputCollection": "${ ." + state.array + " }", "outputCollection": "${ ." + state.array + " }", "iterationParam": iteration_param, - "actions": [self._default_action(state.root, action_args)], + "actions": [self._default_action(func_name, action_args)], } if state.next: payload["transition"] = state.next @@ -128,15 +152,21 @@ def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: return payload def _encode_branch(self, subworkflow: dict) -> Dict[str, object]: + # For SonataFlow, branches cannot contain nested states. + # We need to flatten the subworkflow into actions. + # For now, we'll encode the root state's function call as the branch action. states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} - payloads: List[dict] = [] - for s in states.values(): - obj = self.encode_state(s) - if isinstance(obj, list): - payloads.extend(obj) - else: - payloads.append(obj) - return {"name": subworkflow["root"], "states": payloads} + root_state = states.get(subworkflow["root"]) + if not root_state: + raise ValueError(f"Root state {subworkflow['root']} not found in subworkflow") + + # Extract the function name from the root state + if isinstance(root_state, Task): + func_name = root_state.func_name + action = self._default_action(func_name, "${ . }") + return {"name": subworkflow["root"], "actions": [action]} + else: + raise ValueError(f"Parallel branches currently only support Task states, got {type(root_state).__name__}") def encode_parallel(self, state: Parallel) -> Union[dict, List[dict]]: branches = [self._encode_branch(sw) for sw in state.funcs] diff --git a/sebs/sonataflow/sonataflow.py b/sebs/sonataflow/sonataflow.py index 29eccc6db..57c9946d1 100644 --- a/sebs/sonataflow/sonataflow.py +++ b/sebs/sonataflow/sonataflow.py @@ -123,6 +123,23 @@ def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: return Local._allocate_host_port(self, start_port, range_size) + @staticmethod + def _normalize_workflow_id_for_sonataflow(name: str) -> str: + """ + Normalize workflow ID for SonataFlow. + SonataFlow generates Java classes from workflow IDs, so they must be valid Java identifiers. + Replace hyphens with underscores and ensure it starts with a letter. + """ + import re + # Replace any non-alphanumeric characters (except underscore) with underscore + sanitized = re.sub(r"[^A-Za-z0-9_]", "_", name) + if not sanitized: + sanitized = "wf" + # Ensure it starts with a letter + if not sanitized[0].isalpha(): + sanitized = f"wf_{sanitized}" + return sanitized + def _start_container( self, code_package: Benchmark, @@ -130,7 +147,25 @@ def _start_container( func: Optional[LocalFunction], env_overrides: Optional[Dict[str, str]] = None, ) -> LocalFunction: - return Local._start_container(self, code_package, func_name, func, env_overrides) + # Override to use custom network for SonataFlow + # Create sebs-network if it doesn't exist + try: + self._docker_client.networks.get("sebs-network") + except docker.errors.NotFound: + self._docker_client.networks.create("sebs-network", driver="bridge") + + # Call parent method to start the container + func_instance = Local._start_container(self, code_package, func_name, func, env_overrides) + + # Connect the container to sebs-network + try: + network = self._docker_client.networks.get("sebs-network") + network.connect(func_instance.container.id) + self.logging.info(f"Connected container {func_instance.container.name} to sebs-network") + except Exception as e: + self.logging.warning(f"Failed to connect container to sebs-network: {e}") + + return func_instance def _load_workflow_definition(self, path: str) -> dict: return Local._load_workflow_definition(path) @@ -195,7 +230,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workfl raise ValueError(f"No workflow definition found for {workflow_name}") definition = self._load_workflow_definition(definition_path) - workflow_id = Local._normalize_workflow_id(workflow_name) + workflow_id = self._normalize_workflow_id_for_sonataflow(workflow_name) functions, bindings, definition_copy = self._prepare_workflow_functions( code_package, workflow_name, workflow_id, definition_path, definition @@ -255,7 +290,7 @@ def update_workflow(self, workflow: Workflow, code_package: Benchmark): raise ValueError(f"No workflow definition found for {workflow.name}") definition = self._load_workflow_definition(definition_path) - workflow_id = workflow.workflow_id if workflow.workflow_id else Local._normalize_workflow_id(workflow.name) + workflow_id = workflow.workflow_id if workflow.workflow_id else self._normalize_workflow_id_for_sonataflow(workflow.name) functions, bindings, _ = self._prepare_workflow_functions( code_package, workflow.name, diff --git a/sonataflow-workflows/application.properties b/sonataflow-workflows/application.properties new file mode 100644 index 000000000..9219b2ca7 --- /dev/null +++ b/sonataflow-workflows/application.properties @@ -0,0 +1,3 @@ +# Enable Kogito process/workflow generation +kogito.codegen.processes.enabled=true +quarkus.kogito.codegen.processes.enabled=true diff --git a/sonataflow-workflows/sebd_610_gen_python_3_8.sw.json b/sonataflow-workflows/sebd_610_gen_python_3_8.sw.json new file mode 100644 index 000000000..20b7e997b --- /dev/null +++ b/sonataflow-workflows/sebd_610_gen_python_3_8.sw.json @@ -0,0 +1,137 @@ +{ + "id": "sebd_610_gen_python_3_8", + "name": "sebd_610_gen_python_3_8", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_610_gen_python_3_8_get_astros", + "operation": "rest:post:http://172.17.1.112:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_8_few_people", + "operation": "rest:post:http://172.17.1.111:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_8_many_people", + "operation": "rest:post:http://172.17.1.113:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_8_map_astros", + "operation": "rest:post:http://172.17.1.114:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_8_process_astros", + "operation": "rest:post:http://172.17.1.115:9000/", + "type": "custom" + } + ], + "start": "get_astros", + "states": [ + { + "name": "get_astros", + "type": "operation", + "actions": [ + { + "name": "get_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_8_get_astros", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "select_astros_number" + }, + { + "name": "select_astros_number", + "type": "switch", + "dataConditions": [ + { + "condition": "astros.number < 10", + "transition": "few_people" + }, + { + "condition": "astros.number >= 10", + "transition": "many_people" + } + ], + "defaultCondition": { + "transition": "few_people" + } + }, + { + "name": "few_people", + "type": "operation", + "actions": [ + { + "name": "few_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_8_few_people", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "map_astros" + }, + { + "name": "many_people", + "type": "operation", + "actions": [ + { + "name": "many_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_8_many_people", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "map_astros" + }, + { + "name": "map_astros", + "type": "foreach", + "inputCollection": "${ .astros.people }", + "outputCollection": "${ .astros.people }", + "iterationParam": "item", + "actions": [ + { + "name": "map_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_8_map_astros", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "process_astros" + }, + { + "name": "process_astros", + "type": "operation", + "actions": [ + { + "name": "process_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_8_process_astros", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json b/sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json new file mode 100644 index 000000000..f5a7f4612 --- /dev/null +++ b/sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json @@ -0,0 +1,183 @@ +{ + "id": "sebd_6200_trip_booking_python_3_8", + "name": "sebd_6200_trip_booking_python_3_8", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6200_trip_booking_python_3_8_reserve_hotel", + "operation": "rest:post:http://172.17.1.104:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_reserve_rental", + "operation": "rest:post:http://172.17.1.105:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_reserve_flight", + "operation": "rest:post:http://172.17.1.103:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_confirm", + "operation": "rest:post:http://172.17.1.102:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_cancel_flight", + "operation": "rest:post:http://172.17.1.99:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_cancel_rental", + "operation": "rest:post:http://172.17.1.101:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_8_cancel_hotel", + "operation": "rest:post:http://172.17.1.100:9000/", + "type": "custom" + } + ], + "start": "hotel", + "states": [ + { + "name": "hotel", + "type": "operation", + "actions": [ + { + "name": "reserve_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_reserve_hotel", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "rental" + }, + { + "name": "rental", + "type": "operation", + "actions": [ + { + "name": "reserve_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_reserve_rental", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "flight", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_hotel" + } + ] + }, + { + "name": "flight", + "type": "operation", + "actions": [ + { + "name": "reserve_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_reserve_flight", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "confirm", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_rental" + } + ] + }, + { + "name": "confirm", + "type": "operation", + "actions": [ + { + "name": "confirm", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_confirm", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true, + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_flight" + } + ] + }, + { + "name": "cancel_flight", + "type": "operation", + "actions": [ + { + "name": "cancel_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_cancel_flight", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "cancel_rental" + }, + { + "name": "cancel_rental", + "type": "operation", + "actions": [ + { + "name": "cancel_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_cancel_rental", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "cancel_hotel" + }, + { + "name": "cancel_hotel", + "type": "operation", + "actions": [ + { + "name": "cancel_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_8_cancel_hotel", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ], + "errors": [ + { + "name": "workflow_error", + "code": "*" + } + ] +} \ No newline at end of file diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py index 5336fb485..7269c2183 100755 --- a/tools/build_docker_images.py +++ b/tools/build_docker_images.py @@ -10,7 +10,7 @@ parser = argparse.ArgumentParser(description="Run local app experiments.") parser.add_argument( - "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store" + "--deployment", default=None, choices=["local", "aws", "azure", "gcp", "sonataflow"], action="store" ) parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store") parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store") From f35071a8240f95a285723890c90bcbb7935ca608 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 15 Dec 2025 17:00:08 +0100 Subject: [PATCH 77/82] garbage --- QUICKSTART_SONATAFLOW.md | 9 +- config/local_deployment.json | 81 ++++--- config/local_workflows.json | 12 +- experiments.json | 46 +--- out_storage.json | 8 +- .../sebd-610.gen-python-3.11/sonataflow.csv | 1 + .../results/sebd-610.gen-python-3.8/local.csv | 16 ++ .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sonataflow.csv | 1 + .../sebd-620.func-invo-python-3.8/local.csv | 10 + .../sonataflow.csv | 1 + .../local.csv | 5 + .../sonataflow.csv | 1 + .../local.csv | 4 + .../sonataflow.csv | 1 + .../local.csv | 7 + .../sonataflow.csv | 1 + .../local.csv | 2 + .../sebd-650.vid-python-3.11/sonataflow.csv | 1 + .../results/sebd-650.vid-python-3.8/local.csv | 4 + .../sonataflow.csv | 1 + .../sebd-660.map-reduce-python-3.8/local.csv | 11 + .../sebd-670.auth-python-3.11/sonataflow.csv | 1 + .../sebd-670.auth-python-3.8/local.csv | 2 + .../sonataflow.csv | 1 + .../sebd-680.excamera-python-3.8/local.csv | 11 + .../sebd-690.ml-python-3.11/sonataflow.csv | 1 + .../results/sebd-690.ml-python-3.8/local.csv | 3 + .../local.csv | 1 + .../local.csv | 1 + run_local_workflows.sh | 27 +-- run_sonataflow_workflows.sh | 208 +++++++++++++++++- sebs/sonataflow/generator.py | 38 ++-- sebs/sonataflow/sonataflow.py | 12 + sebs/sonataflow/triggers.py | 40 +++- .../sebd_6100_1000_genome_python_3_11.sw.json | 135 ++++++++++++ ...000_genome_individuals_python_3_11.sw.json | 45 ++++ ...1000_genome_individuals_python_3_8.sw.json | 45 ++++ .../sebd_610_gen_python_3_11.sw.json | 137 ++++++++++++ .../sebd_610_gen_python_3_8.sw.json | 10 +- ...sebd_6200_trip_booking_python_3_11.sw.json | 183 +++++++++++++++ .../sebd_6200_trip_booking_python_3_8.sw.json | 0 .../sebd_620_func_invo_python_3_11.sw.json | 65 ++++++ ...ebd_630_parallel_sleep_python_3_11.sw.json | 57 +++++ ..._631_parallel_download_python_3_11.sw.json | 57 +++++ ...ebd_640_selfish_detour_python_3_11.sw.json | 33 +++ .../sebd_650_vid_python_3_11.sw.json | 78 +++++++ .../sebd_660_map_reduce_python_3_11.sw.json | 102 +++++++++ .../sebd_670_auth_python_3_11.sw.json | 33 +++ .../sebd_680_excamera_python_3_11.sw.json | 104 +++++++++ .../workflows/sebd_690_ml_python_3_11.sw.json | 57 +++++ 52 files changed, 1578 insertions(+), 134 deletions(-) create mode 100644 results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-610.gen-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-650.vid-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-670.auth-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv create mode 100644 results/local-workflows/results/sebd-690.ml-python-3.8/local.csv create mode 100644 results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv create mode 100644 results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv create mode 100644 sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json rename sonataflow-workflows/{ => workflows}/sebd_610_gen_python_3_8.sw.json (91%) create mode 100644 sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json rename sonataflow-workflows/{ => workflows}/sebd_6200_trip_booking_python_3_8.sw.json (100%) create mode 100644 sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json diff --git a/QUICKSTART_SONATAFLOW.md b/QUICKSTART_SONATAFLOW.md index 61efc2d67..b1c236946 100644 --- a/QUICKSTART_SONATAFLOW.md +++ b/QUICKSTART_SONATAFLOW.md @@ -11,6 +11,9 @@ This backend generates Serverless Workflow definitions from SeBS workflow specs quay.io/kiegroup/kogito-swf-devmode:latest ``` The volume mount should point to the directory where SeBS writes generated `.sw.json` files. + If you also need to provide `application.properties`, mount a directory to + `/home/kogito/serverless-workflow-project/src/main/resources` that contains both + `application.properties` and a `workflows/` subdirectory. - Local object/NoSQL/redis services (reuse `run_local_workflows.sh` setup or `./sebs.py storage start all config/storage.json`). ## Configure @@ -22,7 +25,7 @@ Add a `deployment.sonataflow` block to your config (based on `config/example.jso "sonataflow": { "resources": { "redis": { "host": "localhost:6380", "password": "" }, - "runtime": { "url": "http://localhost:8080", "endpoint_prefix": "services" } + "runtime": { "url": "http://localhost:8080", "endpoint_prefix": "" } }, "storage": { "type": "minio", @@ -53,4 +56,6 @@ Adjust storage/redis endpoints to match your local services. On first run SeBS will: - Package workflow functions into local containers. - Translate `definition.json` into `workflow_resources/sonataflow/.sw.json` under the generated code package directory (inside your `--output-dir` tree). -- Invoke SonataFlow at `{runtime_url}/{endpoint_prefix}/{workflow_id}` with the workflow payload. +- Invoke SonataFlow at `{runtime_url}/{workflow_id}` with the workflow payload (and auto-fallback to `/services/{workflow_id}` if needed). + +If SonataFlow dev-mode fails with a “Duplicated item found with id …” error, ensure there is only one `.sw.json` file per workflow id under the mounted resources directory. diff --git a/config/local_deployment.json b/config/local_deployment.json index 03bafd186..199a5934a 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -51,7 +51,7 @@ } }, "deployment": { - "name": "sonataflow", + "name": "local", "aws": { "region": "us-east-1", "lambda-role": "", @@ -78,20 +78,43 @@ } }, "storage": { - "address": "", - "mapped_port": -1, - "access_key": "", - "secret_key": "", - "instance_id": "", - "input_buckets": [], - "output_buckets": [], - "type": "minio" + "object": { + "type": "minio", + "minio": { + "address": "10.5.38.121:9011", + "mapped_port": 9011, + "access_key": "rB907YMFJW7gUgnUnefzcni9RExzy4aP0vr52tGzYgQ", + "secret_key": "f47e06c50a29f37f68b01eb2b96ea1679cab8b6e72102078fa36ed72a07f8ec9", + "instance_id": "5fac4b34c68172c95f81230df52c2989d06e3f7891b5bd901905c085258e56b9", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "10.5.38.121:9012", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "784e07f6d7cfe6b2670fc65d840fd79864d1e6336422dcc2fb340975b8131a4d", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume" + } + } } }, "sonataflow": { "resources": { "redis": { - "host": "localhost:6380", + "host": "", "password": "" }, "runtime": { @@ -100,40 +123,14 @@ } }, "storage": { - "address": "172.17.0.2:9000", - "mapped_port": 9011, - "access_key": "skPhf3f8aEMLd0P8n81M8OrA6fq8ZKCx6dn313lq2ws", - "secret_key": "4c15b2336fe9e89fac929dd13b4f43e222c9f8f0ae3e528572f46d94e93a1a13", - "instance_id": "b59d6d8581f4d62f8fd53e9d2184f3f9b4ab5661370d42f4dabbe739d6bda579", + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", "input_buckets": [], "output_buckets": [], - "type": "minio", - "object": { - "type": "minio", - "minio": { - "address": "172.17.0.2:9000", - "mapped_port": 9011, - "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", - "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", - "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", - "input_buckets": [], - "output_buckets": [] - } - }, - "nosql": { - "type": "scylladb", - "scylladb": { - "address": "172.17.0.3:8000", - "mapped_port": 9012, - "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb" - } - } - }, - "nosql": { - "type": "scylladb", - "address": "172.17.0.3:8000", - "mapped_port": 9012, - "instance_id": "841c8a0b85fae2647f214170eb8fa666cd7ee01a361a1614f9c752e011b1a757" + "type": "minio" } }, "openwhisk": { diff --git a/config/local_workflows.json b/config/local_workflows.json index 03bafd186..f3727226e 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -8,7 +8,7 @@ "container_deployment": true, "runtime": { "language": "python", - "version": "3.8" + "version": "3.11" }, "type": "invocation-overhead", "perf-cost": { @@ -96,7 +96,7 @@ }, "runtime": { "url": "http://localhost:8080", - "endpoint_prefix": "services" + "endpoint_prefix": "" } }, "storage": { @@ -113,9 +113,9 @@ "minio": { "address": "172.17.0.2:9000", "mapped_port": 9011, - "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", - "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", - "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "access_key": "hV4e0XGmzrJlsP8UW0OKDDrVqD1hE0IfCPYwZvTakJE", + "secret_key": "aec745a602364ac958b24787b93c87f469e27524a8ed1e1e420ad5218af38bd8", + "instance_id": "22b8cf06c8f28c429c88732bca34bb3d516d5583af59651c68925cd472de4a85", "input_buckets": [], "output_buckets": [] } @@ -125,7 +125,7 @@ "scylladb": { "address": "172.17.0.3:8000", "mapped_port": 9012, - "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb" + "instance_id": "a411b4a644addd73d1943883d9094b57f31ad5a87f94ecec9e5784e00118a727" } } }, diff --git a/experiments.json b/experiments.json index 0b74d9f86..c50a66b3e 100644 --- a/experiments.json +++ b/experiments.json @@ -1,35 +1,7 @@ { - "_invocations": { - "sebd-610.gen-python-3.8": { - "6e0f75aa": { - "billing": { - "_billed_time": null, - "_gb_seconds": 0, - "_memory": null - }, - "output": {}, - "provider_times": { - "execution": 0, - "initialization": 0 - }, - "request_id": "6e0f75aa", - "stats": { - "cold_start": false, - "failure": true, - "memory_used": null - }, - "times": { - "benchmark": 0, - "client": 93292, - "client_begin": "2025-12-14 22:00:40.135117", - "client_end": "2025-12-14 22:00:40.228409", - "initialization": 0 - } - } - } - }, + "_invocations": {}, "_metrics": {}, - "begin_time": 1765746040.129077, + "begin_time": 1765812589.989892, "config": { "deployment": { "name": "sonataflow", @@ -42,7 +14,7 @@ "alternator_port": 8000, "cpus": -1, "data_volume": "", - "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb", + "instance_id": "a411b4a644addd73d1943883d9094b57f31ad5a87f94ecec9e5784e00118a727", "mapped_port": 9012, "memory": -1, "region": "None", @@ -50,18 +22,18 @@ "version": "" }, "runtime": { - "endpoint_prefix": "services", + "endpoint_prefix": "", "url": "http://localhost:8080" }, "storage": { - "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", + "access_key": "hV4e0XGmzrJlsP8UW0OKDDrVqD1hE0IfCPYwZvTakJE", "address": "172.17.0.2:9000", "data_volume": "", "input_buckets": [], - "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "instance_id": "22b8cf06c8f28c429c88732bca34bb3d516d5583af59651c68925cd472de4a85", "mapped_port": 9011, "output_buckets": [], - "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", + "secret_key": "aec745a602364ac958b24787b93c87f469e27524a8ed1e1e420ad5218af38bd8", "type": "minio", "version": "" } @@ -114,12 +86,12 @@ "flags": {}, "runtime": { "language": "python", - "version": "3.8" + "version": "3.11" }, "update_code": false, "update_storage": false } }, - "end_time": 1765746040.228735, + "end_time": 1765812589.995802, "result_bucket": null } \ No newline at end of file diff --git a/out_storage.json b/out_storage.json index 393a38027..d3777d636 100644 --- a/out_storage.json +++ b/out_storage.json @@ -4,9 +4,9 @@ "minio": { "address": "172.17.0.2:9000", "mapped_port": 9011, - "access_key": "dZgkU1z39_K77ONo22flFxi0Std9C-IxRSihbDGV_w8", - "secret_key": "b91c091999ffd06ef36d169ff483456d437dbd56c924e51c3444efab1bc7a1bd", - "instance_id": "5dcae415d9adf6e71fbbf21c2c912eeda32e402d57282f026f42a7ba49ed9217", + "access_key": "rB907YMFJW7gUgnUnefzcni9RExzy4aP0vr52tGzYgQ", + "secret_key": "f47e06c50a29f37f68b01eb2b96ea1679cab8b6e72102078fa36ed72a07f8ec9", + "instance_id": "5fac4b34c68172c95f81230df52c2989d06e3f7891b5bd901905c085258e56b9", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", @@ -22,7 +22,7 @@ "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "07961446bedf02c284b7657182c3ae4c0f3b25cab2d9a863c14e12dd9d886bdb", + "instance_id": "784e07f6d7cfe6b2670fc65d840fd79864d1e6336422dcc2fb340975b8131a4d", "region": "None", "cpus": 1, "memory": "750", diff --git a/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv b/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv new file mode 100644 index 000000000..0e8e794cc --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv @@ -0,0 +1,16 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +map_astros,1765748134.354224,1765748134.354254,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.328433,1765748134.328455,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.343733,1765748134.343765,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.332384,1765748134.332417,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.350819,1765748134.35085,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.336082,1765748134.336113,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +process_astros,1765748135.419784,1765748135.419843,False,5c4ed0b239b9,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748135.407162,1765748135.407214,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +get_astros,1765748133.951551,1765748134.3044,False,a8eea933c3b9,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.35759,1765748134.35763,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +many_people,1765748134.315323,1765748134.315371,False,d7e1c83e01e7,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.347346,1765748134.347378,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.324549,1765748134.324576,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.361045,1765748134.361076,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.339969,1765748134.339993,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 diff --git a/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv new file mode 100644 index 000000000..d9925d643 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv @@ -0,0 +1,10 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process,1765747664.143729,1765747664.143746,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.141557,1765747664.141582,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.136148,1765747664.136173,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.126044,1765747664.126068,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.129636,1765747664.129651,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.13179,1765747664.131804,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.133686,1765747664.133701,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.138938,1765747664.138953,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +gen,1765747664.092395,1765747664.092421,False,4d70bed1abfe,9f28b7c0,0,0,9f28b7c0,0 diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv new file mode 100644 index 000000000..3a0822a8d --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv @@ -0,0 +1,5 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +reserve_rental,1765747687.98063,1765747687.9935,False,74204aa51c51,dd46e1af,0,0,dd46e1af,0 +confirm,1765747688.208023,1765747688.224673,False,bf1e5dc4e095,dd46e1af,0,0,dd46e1af,0 +reserve_hotel,1765747687.861356,1765747687.865317,False,a42e4157c8e8,dd46e1af,0,0,dd46e1af,0 +reserve_flight,1765747688.099512,1765747688.10294,False,6aa1001caa28,dd46e1af,0,0,dd46e1af,0 diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv new file mode 100644 index 000000000..7e81486fd --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv @@ -0,0 +1,4 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1765747700.97717,1765747700.977188,False,320acc55d242,47737366,0,0,47737366,0 +process,1765747701.01707,1765747703.019128,False,640d6a710572,47737366,0,0,47737366,0 +process,1765747703.024391,1765747705.025678,False,640d6a710572,47737366,0,0,47737366,0 diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv new file mode 100644 index 000000000..622ea2382 --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv @@ -0,0 +1,7 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process,1765747717.312196,1765747717.315331,False,e4d1911fe02a,acf04ece,0,10,acf04ece,0 +process,1765747717.329192,1765747717.330548,False,e4d1911fe02a,acf04ece,0,50,acf04ece,0 +process,1765747717.322696,1765747717.323698,False,e4d1911fe02a,acf04ece,0,30,acf04ece,0 +process,1765747717.326168,1765747717.32726,False,e4d1911fe02a,acf04ece,0,40,acf04ece,0 +process,1765747717.318985,1765747717.319929,False,e4d1911fe02a,acf04ece,0,20,acf04ece,0 +generate,1765747717.197459,1765747717.197475,False,3ca6c483a8fd,acf04ece,0,0,acf04ece,0 diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv new file mode 100644 index 000000000..79d75c4f2 --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,result.min_diff,result.num_iterations,result.timestamps,result.tps,request_id,rep +measure,1765747728.917587,1765747738.933023,False,c3a5e2a3a4ff,087c8db5,0,0,44,979240,"[950319, 965368, 1582816, 1622185, 2179788, 2180260, 2180260, 2180969, 2181349, 2181798, 2181798, 2182265, 4002006, 4008839, 4267482, 4268238, 4268238, 4269151, 4269151, 4269665, 6421125, 6432373, 6993537, 6994279, 6994279, 6994882, 6995051, 6995671, 6995671, 6996484, 8290402, 8297852, 8840221, 8873054, 11259472, 11272153, 12111464, 12112159, 12112159, 12113344, 12113344, 12114100, 12364730, 12369412, 13678544, 13683824, 14180323, 14181004, 14181004, 14182475, 15857470, 15858100, 15858100, 15859165, 15859165, 15859795, 16097807, 16113998, 16190769, 16191317, 17944018, 17947912, 18517003, 18522260, 19202207, 19202874, 19202874, 19203870, 19204122, 19204947, 19204947, 19205402, 20936138, 20943241, 21139459, 21152149, 22193358, 22206157, 23355330, 23366056, 25774404, 25782254, 25910878, 25923777, 26951622, 26964354, 28193667, 28202125, 30612877, 30620435, 31590019, 31594360, 33031963, 33040423, 33644199, 33644728, 33644728, 33645685]",2419314588.0,087c8db5,0 diff --git a/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv b/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv new file mode 100644 index 000000000..bd473f9f6 --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv @@ -0,0 +1,4 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +analyse,1765747762.429102,1765747763.720146,False,3522a8327935,adf53a22,0,127875949,adf53a22,0 +decode,1765747761.878056,1765747762.267019,False,538e1395f46c,adf53a22,0,1258844,adf53a22,0 +summarize,1765747763.755012,1765747763.75504,False,b5931fa654fb,adf53a22,0,0,adf53a22,0 diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv new file mode 100644 index 000000000..52f98c35c --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv @@ -0,0 +1,11 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +map,1765747779.271765,1765747779.308599,False,52dfb2249ba4,aac31972,0,761,aac31972,0 +map,1765747779.230342,1765747779.269322,False,52dfb2249ba4,aac31972,0,383,aac31972,0 +reduce,1765747779.575957,1765747779.578511,False,4e82bda63d6d,aac31972,0,24,aac31972,0 +reduce,1765747779.571899,1765747779.57446,False,4e82bda63d6d,aac31972,0,18,aac31972,0 +shuffle,1765747779.450749,1765747779.456999,False,e13b2c9c8fba,aac31972,0,0,aac31972,0 +reduce,1765747779.579605,1765747779.582346,False,4e82bda63d6d,aac31972,0,30,aac31972,0 +split,1765747779.074395,1765747779.128264,False,7fb21a2dee8a,aac31972,0,1150,aac31972,0 +reduce,1765747779.556904,1765747779.564585,False,4e82bda63d6d,aac31972,0,6,aac31972,0 +reduce,1765747779.56733,1765747779.569981,False,4e82bda63d6d,aac31972,0,12,aac31972,0 +map,1765747779.31135,1765747779.350271,False,52dfb2249ba4,aac31972,0,1151,aac31972,0 diff --git a/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv b/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv new file mode 100644 index 000000000..e5d1e62f4 --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +auth,1765747791.875554,1765747791.876051,False,cb3d1c2c9564,4824a414,0,0,4824a414,0 diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv b/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv new file mode 100644 index 000000000..56421e27a --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv @@ -0,0 +1,11 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +reencode,1765747815.673114,1765747816.138713,False,c0eeb709dd78,55c1b086,0,55320350,55c1b086,0 +reencode,1765747815.09418,1765747815.669897,False,c0eeb709dd78,55c1b086,0,42806402,55c1b086,0 +rebase,1765747816.2347,1765747816.765206,False,762c3ef9fc1d,55c1b086,0,24585228,55c1b086,0 +encode,1765747810.208044,1765747812.434979,False,b9794f24732a,55c1b086,0,39678856,55c1b086,0 +rebase,1765747817.294315,1765747817.774332,False,762c3ef9fc1d,55c1b086,0,49348109,55c1b086,0 +split,1765747808.178205,1765747808.178237,False,ee0abb69372f,55c1b086,0,0,55c1b086,0 +reencode,1765747814.463546,1765747815.091485,False,c0eeb709dd78,55c1b086,0,27515255,55c1b086,0 +rebase,1765747816.769762,1765747817.290442,False,762c3ef9fc1d,55c1b086,0,37002777,55c1b086,0 +encode,1765747812.4392,1765747814.360625,False,b9794f24732a,55c1b086,0,53688436,55c1b086,0 +encode,1765747808.281911,1765747810.204151,False,b9794f24732a,55c1b086,0,23088628,55c1b086,0 diff --git a/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv b/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv new file mode 100644 index 000000000..69fb33dac --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv @@ -0,0 +1,3 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +train,1765747848.855069,1765747848.882302,False,724812af4c78,1e122cc3,0,5056,1e122cc3,0 +generate,1765747847.846056,1765747847.870281,False,2e3894765f0e,1e122cc3,0,0,1e122cc3,0 diff --git a/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv b/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv b/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv @@ -0,0 +1 @@ + diff --git a/run_local_workflows.sh b/run_local_workflows.sh index 68c9e86a7..00a059817 100755 --- a/run_local_workflows.sh +++ b/run_local_workflows.sh @@ -82,19 +82,20 @@ if [ -f "$SELFISH_SRC" ]; then fi WORKFLOWS=( - "610.gen" - "6100.1000-genome" - "6101.1000-genome-individuals" - "620.func-invo" - "6200.trip-booking" - "630.parallel-sleep" - "631.parallel-download" - "640.selfish-detour" - "650.vid" - "660.map-reduce" - "670.auth" - "680.excamera" - "690.ml" + # "610.gen" + # "6100.1000-genome" + # "6101.1000-genome-individuals" + # "620.func-invo" + # "6200.trip-booking" + # "630.parallel-sleep" + # "631.parallel-download" + # "640.selfish-detour" + # "650.vid" + # "660.map-reduce" + # "670.auth" + # "680.excamera" + # "690.ml" + "721.gpu-fraud-detect" ) for wf in "${WORKFLOWS[@]}"; do diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh index c6aa17a01..1204b6c90 100755 --- a/run_sonataflow_workflows.sh +++ b/run_sonataflow_workflows.sh @@ -1,6 +1,16 @@ #!/bin/bash set -euo pipefail +# Ensure SeBS (python docker SDK) uses the same Docker daemon as `docker` CLI. +# This avoids "No route to host" issues when containers are created in one daemon +# (e.g., `/var/run/docker.sock`) but `docker run` uses another (e.g., Docker Desktop). +if command -v docker >/dev/null 2>&1; then + DOCKER_HOST_FROM_CONTEXT=$(docker context inspect --format '{{.Endpoints.docker.Host}}' 2>/dev/null || true) + if [ -n "${DOCKER_HOST_FROM_CONTEXT:-}" ]; then + export DOCKER_HOST="$DOCKER_HOST_FROM_CONTEXT" + fi +fi + # Prepare local configuration files if [ ! -f config/local_workflows.json ]; then cp config/example.json config/local_workflows.json @@ -17,6 +27,11 @@ else echo "Workflow datasets present, skipping download." fi +RUNTIME_URL="http://localhost:8080" +# Recent `quay.io/kiegroup/kogito-swf-devmode` images expose workflow endpoints at `/{workflowId}`. +# Some older setups used `/services/{workflowId}`; SeBS will auto-fallback on 404. +ENDPOINT_PREFIX="" + cleanup() { echo "Stopping all running Docker containers..." docker ps -q | xargs -r docker stop >/dev/null || true @@ -47,8 +62,8 @@ for cfg in config/local_workflows.json config/local_deployment.json; do --arg sinst "$SCYLLA_INSTANCE" \ --arg redis_host "localhost:6380" \ --arg redis_pass "" \ - --arg runtime_url "http://localhost:8080" \ - --arg endpoint_prefix "services" \ + --arg runtime_url "$RUNTIME_URL" \ + --arg endpoint_prefix "$ENDPOINT_PREFIX" \ '(.deployment.name = "sonataflow") | (.deployment.sonataflow.storage.object.type = "minio") | (.deployment.sonataflow.storage.object.minio.address = $addr) @@ -75,15 +90,173 @@ if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then fi docker run -d --name sebs-redis -p 6380:6379 redis:7 -# Prepare SonataFlow workflow directory -SONATAFLOW_WORKFLOWS_DIR="$PWD/sonataflow-workflows" +# Prepare SonataFlow resources directory structure expected by kogito-swf-devmode: +# - `src/main/resources/application.properties` +# - `src/main/resources/workflows/*.sw.json` +SONATAFLOW_RESOURCES_DIR="$PWD/sonataflow-workflows" +SONATAFLOW_WORKFLOWS_DIR="$SONATAFLOW_RESOURCES_DIR/workflows" mkdir -p "$SONATAFLOW_WORKFLOWS_DIR" +if [ ! -f "$SONATAFLOW_RESOURCES_DIR/application.properties" ]; then + cat >"$SONATAFLOW_RESOURCES_DIR/application.properties" <<'EOF' +# Enable Kogito process/workflow generation +kogito.codegen.processes.enabled=true +quarkus.kogito.codegen.processes.enabled=true +EOF +fi + +# Read the runtime settings so we only stage matching workflow variants. +RUNTIME_LANG=$(jq -r '.experiments.runtime.language // "python"' config/local_workflows.json) +RUNTIME_VER=$(jq -r '.experiments.runtime.version // "3.11"' config/local_workflows.json) +ARCH=$(jq -r '.experiments.architecture // "x64"' config/local_workflows.json) + +dedupe_sw_files() { + local dir=$1 + declare -A seen=() + # Consider any `.sw.json` under resources (root + workflows/) to avoid Quarkus duplicates. + while IFS= read -r -d '' f; do + local wid + wid=$(jq -r '.id // empty' "$f" 2>/dev/null || true) + [ -n "$wid" ] || continue + if [ -n "${seen[$wid]:-}" ] && [ "${seen[$wid]}" != "$f" ]; then + echo "Removing duplicate workflow id '$wid' at $f (keeping ${seen[$wid]})" + rm -f "$f" + else + seen[$wid]="$f" + fi + done < <(find "$dir" -maxdepth 2 -name "*.sw.json" -print0 2>/dev/null) +} + +# If older runs put `.sw.json` in the resources root, move them into `workflows/` +# so Quarkus only sees a single copy. +while IFS= read -r -d '' f; do + mv -f "$f" "$SONATAFLOW_WORKFLOWS_DIR/" || true +done < <(find "$SONATAFLOW_RESOURCES_DIR" -maxdepth 1 -name "*.sw.json" -print0 2>/dev/null) +dedupe_sw_files "$SONATAFLOW_RESOURCES_DIR" # Function to copy workflow definitions to SonataFlow directory after each benchmark copy_workflows_to_sonataflow() { - find cache -name "*.sw.json" -path "*/sonataflow/*" 2>/dev/null | while read -r swfile; do + find cache -name "*.sw.json" \ + -path "*/sonataflow/${RUNTIME_LANG}/${RUNTIME_VER}/${ARCH}/*" \ + -path "*/workflow_resources/sonataflow/*" 2>/dev/null | while read -r swfile; do cp -f "$swfile" "$SONATAFLOW_WORKFLOWS_DIR/" 2>/dev/null || true done + dedupe_sw_files "$SONATAFLOW_RESOURCES_DIR" +} + +get_workflow_id_for() { + local wf_name=$1 + local pattern="${wf_name//./_}" + for f in "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json; do + [ -f "$f" ] || continue + if printf '%s\n' "$f" | grep -q "$pattern"; then + jq -r '.id' "$f" + return 0 + fi + done + local newest + newest=$(ls -1t "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json 2>/dev/null | head -n1) + if [ -n "$newest" ]; then + jq -r '.id' "$newest" + return 0 + fi + return 1 +} + +wait_for_health() { + local url=$1 + local attempts=40 + local delay=3 + echo "Waiting for SonataFlow runtime health at $url ..." + for i in $(seq 1 $attempts); do + code=$(curl -s -o /dev/null -w "%{http_code}" "$url/q/health/ready" || true) + if [ "$code" = "200" ]; then + echo "SonataFlow runtime is ready." + return 0 + fi + sleep "$delay" + done + echo "Warning: SonataFlow runtime health endpoint not ready after $((attempts * delay))s" +} + +wait_for_workflow_endpoint() { + local workflow_id=$1 + local base_url=$2 + local endpoint_prefix=$3 + local prefix="${endpoint_prefix#/}" + local -a urls=() + if [ -n "$prefix" ]; then + urls+=("${base_url%/}/${prefix}/${workflow_id}") + fi + urls+=("${base_url%/}/${workflow_id}") + if [ "$prefix" != "services" ]; then + urls+=("${base_url%/}/services/${workflow_id}") + fi + local attempts=40 + local delay=3 + echo "Waiting for workflow endpoint(s): ${urls[*]} ..." + for i in $(seq 1 $attempts); do + for url in "${urls[@]}"; do + # GET will likely return 405 for POST-only endpoints; 404 means not loaded yet + code=$(curl -s -o /dev/null -w "%{http_code}" "$url" || true) + if [ "$code" != "404" ] && [ "$code" != "000" ]; then + echo "Workflow endpoint responding at $url with HTTP $code." + return 0 + fi + done + sleep "$delay" + done + echo "Warning: Workflow endpoint(s) not responding after $((attempts * delay))s" +} + +preflight_runtime_function_connectivity() { + local sw_json=$1 + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + if ! docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^sonataflow-runtime$'; then + return 0 + fi + if ! command -v jq >/dev/null 2>&1; then + return 0 + fi + if [ ! -f "$sw_json" ]; then + return 0 + fi + + # Extract function base URLs from `rest:post:http://host:port/` operations. + mapfile -t urls < <(jq -r '.functions[]?.operation // empty' "$sw_json" 2>/dev/null \ + | sed -n 's#^rest:post:##p' | sed -e 's#/*$#/#' | sort -u) + if [ "${#urls[@]}" -eq 0 ]; then + return 0 + fi + + echo "Preflight: checking SonataFlow runtime connectivity to function containers..." + # Try curl first, then wget, then python. + local http_cmd + http_cmd=$(docker exec sonataflow-runtime sh -lc 'if command -v curl >/dev/null 2>&1; then echo curl; elif command -v wget >/dev/null 2>&1; then echo wget; elif command -v python3 >/dev/null 2>&1; then echo python3; elif command -v python >/dev/null 2>&1; then echo python; else echo none; fi' 2>/dev/null || echo none) + if [ "$http_cmd" = "none" ]; then + echo "Preflight skipped: no curl/wget/python found inside sonataflow-runtime." + return 0 + fi + local failed=0 + for u in "${urls[@]}"; do + # Use `/alive` which SeBS function containers expose. + if [ "$http_cmd" = "curl" ]; then + docker exec sonataflow-runtime sh -lc "curl -fsS --max-time 3 '${u}alive' >/dev/null" >/dev/null 2>&1 || failed=1 + elif [ "$http_cmd" = "wget" ]; then + docker exec sonataflow-runtime sh -lc "wget -q -T 3 -O - '${u}alive' >/dev/null" >/dev/null 2>&1 || failed=1 + else + docker exec sonataflow-runtime sh -lc "$http_cmd - <<'PY'\nimport sys, urllib.request\nurl=sys.argv[1]\nurllib.request.urlopen(url, timeout=3).read(1)\nPY\n'${u}alive'" >/dev/null 2>&1 || failed=1 + fi + if [ "$failed" -ne 0 ]; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + fi + done + if [ "$failed" -ne 0 ]; then + echo "Preflight failed: SonataFlow cannot reach one or more function containers." + echo "Hint: ensure sonataflow-runtime and sebd-*___* function containers share a Docker network, and that SeBS and docker CLI use the same Docker daemon/context." + return 1 + fi } # Create Docker network for SonataFlow and functions if it doesn't exist @@ -134,25 +307,46 @@ for wf in "${WORKFLOWS[@]}"; do copy_workflows_to_sonataflow echo "Copied workflow definitions to SonataFlow directory" + if ! ls "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json >/dev/null 2>&1; then + echo "No workflow definitions found in $SONATAFLOW_WORKFLOWS_DIR after generating $wf" + exit 1 + fi + + WF_ID=$(get_workflow_id_for "$wf" || true) + if [ -z "$WF_ID" ] || [ "$WF_ID" = "null" ]; then + echo "Could not determine workflow id for $wf; available definitions:" + ls -l "$SONATAFLOW_WORKFLOWS_DIR" + exit 1 + fi + echo "Workflow id for $wf: $WF_ID" + # Start SonataFlow runtime on first iteration (after first workflow is generated) if [ "$SONATAFLOW_STARTED" = false ]; then echo "Starting SonataFlow runtime container..." if docker ps -a --format '{{.Names}}' | grep -q '^sonataflow-runtime$'; then docker rm -f sonataflow-runtime >/dev/null fi + # Start on `sebs-network` (primary) and also attach to `bridge` so the runtime can reach + # function containers whether SeBS exposes them via `sebs-network` or `bridge`. docker run -d --name sonataflow-runtime --network sebs-network -p 8080:8080 \ - -v "$SONATAFLOW_WORKFLOWS_DIR":/home/kogito/serverless-workflow-project/src/main/resources \ + -v "$SONATAFLOW_RESOURCES_DIR":/home/kogito/serverless-workflow-project/src/main/resources \ quay.io/kiegroup/kogito-swf-devmode:latest + docker network connect bridge sonataflow-runtime >/dev/null 2>&1 || true echo "Waiting for SonataFlow runtime to start and load workflows..." - sleep 20 + wait_for_health "$RUNTIME_URL" + wait_for_workflow_endpoint "$WF_ID" "$RUNTIME_URL" "$ENDPOINT_PREFIX" SONATAFLOW_STARTED=true else # Wait for SonataFlow to detect and load the new workflow (dev mode auto-reload) echo "Waiting for SonataFlow to load workflow..." sleep 10 + wait_for_workflow_endpoint "$WF_ID" "$RUNTIME_URL" "$ENDPOINT_PREFIX" fi + # Ensure runtime can reach function containers before invoking the workflow. + preflight_runtime_function_connectivity "$SONATAFLOW_WORKFLOWS_DIR/${WF_ID}.sw.json" || exit 1 + # Now run the actual benchmark ./sebs.py benchmark workflow "$wf" test \ --config config/local_workflows.json \ diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index 45f9fdd86..a375b334e 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -32,7 +32,7 @@ def _function_ref(self, func_name: str) -> Dict[str, str]: self._functions[ref_name] = { "name": ref_name, "operation": f"rest:post:{url}", - "type": "custom" + "type": "custom", } return {"refName": ref_name} @@ -54,10 +54,7 @@ def postprocess(self, payloads: List[dict]) -> dict: } # Add error definitions if any state uses onErrors if self._uses_errors: - workflow_def["errors"] = [{ - "name": "workflow_error", - "code": "*" # Catch all errors - }] + workflow_def["errors"] = [{"name": "workflow_error", "code": "*"}] # Catch all errors return workflow_def def encode_task(self, state: Task) -> Union[dict, List[dict]]: @@ -72,10 +69,7 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: payload["end"] = True if state.failure is not None: self._uses_errors = True - payload["onErrors"] = [{ - "errorRef": "workflow_error", - "transition": state.failure - }] + payload["onErrors"] = [{"errorRef": "workflow_error", "transition": state.failure}] return payload def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: @@ -152,21 +146,33 @@ def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: return payload def _encode_branch(self, subworkflow: dict) -> Dict[str, object]: - # For SonataFlow, branches cannot contain nested states. - # We need to flatten the subworkflow into actions. - # For now, we'll encode the root state's function call as the branch action. + """ + For SonataFlow, branches are flat lists of actions. We flatten the root state + of each subworkflow to a single action by selecting the function name. + """ states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} root_state = states.get(subworkflow["root"]) if not root_state: raise ValueError(f"Root state {subworkflow['root']} not found in subworkflow") - # Extract the function name from the root state + func_name = None if isinstance(root_state, Task): func_name = root_state.func_name - action = self._default_action(func_name, "${ . }") - return {"name": subworkflow["root"], "actions": [action]} + elif isinstance(root_state, Map): + # Use the mapped state's root function as the branch action. + root_def = root_state.funcs.get(root_state.root, {}) + func_name = root_def.get("func_name", root_state.root) + elif isinstance(root_state, Repeat): + func_name = root_state.func_name + elif isinstance(root_state, Loop): + func_name = root_state.func_name else: - raise ValueError(f"Parallel branches currently only support Task states, got {type(root_state).__name__}") + raise ValueError( + f"Parallel branches currently support Task/Map/Repeat/Loop root states, got {type(root_state).__name__}" + ) + + action = self._default_action(func_name, "${ . }") + return {"name": subworkflow["root"], "actions": [action]} def encode_parallel(self, state: Parallel) -> Union[dict, List[dict]]: branches = [self._encode_branch(sw) for sw in state.funcs] diff --git a/sebs/sonataflow/sonataflow.py b/sebs/sonataflow/sonataflow.py index 57c9946d1..5336dd049 100644 --- a/sebs/sonataflow/sonataflow.py +++ b/sebs/sonataflow/sonataflow.py @@ -115,6 +115,18 @@ def _container_service_address(self, endpoint: str) -> str: return Local._container_service_address(self, endpoint) def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: + # SonataFlow runtime runs in `sebs-network` and invokes workflow functions from within + # that network. Use the function container's `sebs-network` IP and the container port. + try: + func.container.reload() + networks = func.container.attrs.get("NetworkSettings", {}).get("Networks", {}) + sf_net = networks.get("sebs-network", {}) + ip = sf_net.get("IPAddress") + if ip: + return ip, str(Local.DEFAULT_PORT) + except Exception: + pass + # Fallback to Local behavior (bridge IP + published host port). return Local._function_network_endpoint(self, func) def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: diff --git a/sebs/sonataflow/triggers.py b/sebs/sonataflow/triggers.py index ec966d6f6..30f06dfe2 100644 --- a/sebs/sonataflow/triggers.py +++ b/sebs/sonataflow/triggers.py @@ -28,16 +28,52 @@ def _endpoint(self) -> str: return f"{self._base_url}/{self._endpoint_prefix}/{self._workflow_id}" return f"{self._base_url}/{self._workflow_id}" + def _candidate_endpoints(self) -> list[tuple[str, str]]: + """ + Return a list of candidate endpoints to try. + + Kogito/SonataFlow images have historically exposed the workflow start endpoint + either at `/{workflowId}` or at `/services/{workflowId}` depending on version/config. + """ + candidates = [self._endpoint_prefix, "", "services"] + seen: set[str] = set() + out: list[tuple[str, str]] = [] + for prefix in candidates: + prefix = (prefix or "").strip("/") + if prefix in seen: + continue + seen.add(prefix) + if prefix: + out.append((prefix, f"{self._base_url}/{prefix}/{self._workflow_id}")) + else: + out.append((prefix, f"{self._base_url}/{self._workflow_id}")) + return out + def _invoke(self, payload: dict) -> ExecutionResult: request_id = str(uuid.uuid4())[0:8] begin = datetime.datetime.now() result = ExecutionResult.from_times(begin, begin) try: + endpoint_used = self._endpoint() resp = requests.post( - self._endpoint(), + endpoint_used, json={"payload": payload, "request_id": request_id}, timeout=900, ) + if resp.status_code == 404: + # Auto-detect the correct endpoint layout. + for prefix, endpoint in self._candidate_endpoints(): + if endpoint == endpoint_used: + continue + resp = requests.post( + endpoint, + json={"payload": payload, "request_id": request_id}, + timeout=900, + ) + endpoint_used = endpoint + if resp.status_code != 404: + self._endpoint_prefix = prefix + break end = datetime.datetime.now() result = ExecutionResult.from_times(begin, end) result.request_id = request_id @@ -78,5 +114,5 @@ def deserialize(cls, obj: dict) -> "WorkflowSonataFlowTrigger": def update(self, base_url: Optional[str] = None, endpoint_prefix: Optional[str] = None): if base_url: self._base_url = base_url.rstrip("/") - if endpoint_prefix: + if endpoint_prefix is not None: self._endpoint_prefix = endpoint_prefix.strip("/") diff --git a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json new file mode 100644 index 000000000..ad60a0fe4 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json @@ -0,0 +1,135 @@ +{ + "id": "sebd_6100_1000_genome_python_3_11", + "name": "sebd_6100_1000_genome_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6100_1000_genome_python_3_11_individuals", + "operation": "rest:post:http://172.17.0.54:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "operation": "rest:post:http://172.17.0.55:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_sifting", + "operation": "rest:post:http://172.17.0.57:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_frequency", + "operation": "rest:post:http://172.17.0.48:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "operation": "rest:post:http://172.17.0.56:9000/", + "type": "custom" + } + ], + "start": "individuals", + "states": [ + { + "name": "individuals", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + } + } + ], + "transition": "merge_and_sifting" + }, + { + "name": "merge_and_sifting", + "type": "parallel", + "branches": [ + { + "name": "individuals_merge", + "actions": [ + { + "name": "individuals_merge", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "arguments": { + "payload": "${ . }" + } + } + } + ] + }, + { + "name": "sifting", + "actions": [ + { + "name": "sifting", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_sifting", + "arguments": { + "payload": "${ . }" + } + } + } + ] + } + ], + "transition": "frequency_and_overlap" + }, + { + "name": "frequency_and_overlap", + "type": "parallel", + "branches": [ + { + "name": "frequency", + "actions": [ + { + "name": "frequency", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_frequency", + "arguments": { + "payload": "${ . }" + } + } + } + ] + }, + { + "name": "mutation_overlap", + "actions": [ + { + "name": "mutation_overlap", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "arguments": { + "payload": "${ . }" + } + } + } + ] + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json new file mode 100644 index 000000000..d4b8cfe6f --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json @@ -0,0 +1,45 @@ +{ + "id": "sebd_6101_1000_genome_individuals_python_3_11", + "name": "sebd_6101_1000_genome_individuals_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "operation": "rest:post:http://172.17.0.58:9000/", + "type": "custom" + } + ], + "start": "individuals_state", + "states": [ + { + "name": "individuals_state", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json new file mode 100644 index 000000000..6c921e38b --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json @@ -0,0 +1,45 @@ +{ + "id": "sebd_6101_1000_genome_individuals_python_3_8", + "name": "sebd_6101_1000_genome_individuals_python_3_8", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6101_1000_genome_individuals_python_3_8_individuals", + "operation": "rest:post:http://172.17.0.19:9000/", + "type": "custom" + } + ], + "start": "individuals_state", + "states": [ + { + "name": "individuals_state", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6101_1000_genome_individuals_python_3_8_individuals", + "arguments": { + "payload": { + "array_element": "${ item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json new file mode 100644 index 000000000..6efb86d9b --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json @@ -0,0 +1,137 @@ +{ + "id": "sebd_610_gen_python_3_11", + "name": "sebd_610_gen_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_610_gen_python_3_11_get_astros", + "operation": "rest:post:http://172.18.0.3:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_few_people", + "operation": "rest:post:http://172.18.0.2:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_many_people", + "operation": "rest:post:http://172.18.0.4:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_map_astros", + "operation": "rest:post:http://172.18.0.5:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_process_astros", + "operation": "rest:post:http://172.18.0.6:9000/", + "type": "custom" + } + ], + "start": "get_astros", + "states": [ + { + "name": "get_astros", + "type": "operation", + "actions": [ + { + "name": "get_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_get_astros", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "select_astros_number" + }, + { + "name": "select_astros_number", + "type": "switch", + "dataConditions": [ + { + "condition": "astros.number < 10", + "transition": "few_people" + }, + { + "condition": "astros.number >= 10", + "transition": "many_people" + } + ], + "defaultCondition": { + "transition": "few_people" + } + }, + { + "name": "few_people", + "type": "operation", + "actions": [ + { + "name": "few_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_few_people", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "map_astros" + }, + { + "name": "many_people", + "type": "operation", + "actions": [ + { + "name": "many_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_many_people", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "map_astros" + }, + { + "name": "map_astros", + "type": "foreach", + "inputCollection": "${ .astros.people }", + "outputCollection": "${ .astros.people }", + "iterationParam": "item", + "actions": [ + { + "name": "map_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_map_astros", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "process_astros" + }, + { + "name": "process_astros", + "type": "operation", + "actions": [ + { + "name": "process_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_process_astros", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/sebd_610_gen_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json similarity index 91% rename from sonataflow-workflows/sebd_610_gen_python_3_8.sw.json rename to sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json index 20b7e997b..fd5b55f57 100644 --- a/sonataflow-workflows/sebd_610_gen_python_3_8.sw.json +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json @@ -7,27 +7,27 @@ "functions": [ { "name": "sebd_610_gen_python_3_8_get_astros", - "operation": "rest:post:http://172.17.1.112:9000/", + "operation": "rest:post:http://172.17.0.5:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_8_few_people", - "operation": "rest:post:http://172.17.1.111:9000/", + "operation": "rest:post:http://172.17.0.4:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_8_many_people", - "operation": "rest:post:http://172.17.1.113:9000/", + "operation": "rest:post:http://172.17.0.6:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_8_map_astros", - "operation": "rest:post:http://172.17.1.114:9000/", + "operation": "rest:post:http://172.17.0.7:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_8_process_astros", - "operation": "rest:post:http://172.17.1.115:9000/", + "operation": "rest:post:http://172.17.0.8:9000/", "type": "custom" } ], diff --git a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json new file mode 100644 index 000000000..5bfd6ceb6 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json @@ -0,0 +1,183 @@ +{ + "id": "sebd_6200_trip_booking_python_3_11", + "name": "sebd_6200_trip_booking_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "operation": "rest:post:http://172.17.0.66:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "operation": "rest:post:http://172.17.0.67:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "operation": "rest:post:http://172.17.0.65:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_confirm", + "operation": "rest:post:http://172.17.0.64:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "operation": "rest:post:http://172.17.0.61:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "operation": "rest:post:http://172.17.0.63:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "operation": "rest:post:http://172.17.0.62:9000/", + "type": "custom" + } + ], + "start": "hotel", + "states": [ + { + "name": "hotel", + "type": "operation", + "actions": [ + { + "name": "reserve_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "rental" + }, + { + "name": "rental", + "type": "operation", + "actions": [ + { + "name": "reserve_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "flight", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_hotel" + } + ] + }, + { + "name": "flight", + "type": "operation", + "actions": [ + { + "name": "reserve_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "confirm", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_rental" + } + ] + }, + { + "name": "confirm", + "type": "operation", + "actions": [ + { + "name": "confirm", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_confirm", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true, + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_flight" + } + ] + }, + { + "name": "cancel_flight", + "type": "operation", + "actions": [ + { + "name": "cancel_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "cancel_rental" + }, + { + "name": "cancel_rental", + "type": "operation", + "actions": [ + { + "name": "cancel_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "cancel_hotel" + }, + { + "name": "cancel_hotel", + "type": "operation", + "actions": [ + { + "name": "cancel_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ], + "errors": [ + { + "name": "workflow_error", + "code": "*" + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json similarity index 100% rename from sonataflow-workflows/sebd_6200_trip_booking_python_3_8.sw.json rename to sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json diff --git a/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json new file mode 100644 index 000000000..55cd5de05 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json @@ -0,0 +1,65 @@ +{ + "id": "sebd_620_func_invo_python_3_11", + "name": "sebd_620_func_invo_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_620_func_invo_python_3_11_gen", + "operation": "rest:post:http://172.17.0.59:9000/", + "type": "custom" + }, + { + "name": "sebd_620_func_invo_python_3_11_process", + "operation": "rest:post:http://172.17.0.60:9000/", + "type": "custom" + } + ], + "start": "gen", + "states": [ + { + "name": "gen", + "type": "operation", + "actions": [ + { + "name": "gen", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_gen", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "process" + }, + { + "name": "process", + "type": "foreach", + "inputCollection": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "iterationParam": "idx", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_process", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json new file mode 100644 index 000000000..8d47cd48c --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json @@ -0,0 +1,57 @@ +{ + "id": "sebd_630_parallel_sleep_python_3_11", + "name": "sebd_630_parallel_sleep_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_630_parallel_sleep_python_3_11_generate", + "operation": "rest:post:http://172.17.0.68:9000/", + "type": "custom" + }, + { + "name": "sebd_630_parallel_sleep_python_3_11_process", + "operation": "rest:post:http://172.17.0.69:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "process-state" + }, + { + "name": "process-state", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_process", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json new file mode 100644 index 000000000..1ad6ffb86 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json @@ -0,0 +1,57 @@ +{ + "id": "sebd_631_parallel_download_python_3_11", + "name": "sebd_631_parallel_download_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_631_parallel_download_python_3_11_generate", + "operation": "rest:post:http://172.17.0.70:9000/", + "type": "custom" + }, + { + "name": "sebd_631_parallel_download_python_3_11_process", + "operation": "rest:post:http://172.17.0.71:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "process1" + }, + { + "name": "process1", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_process", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json new file mode 100644 index 000000000..44d626158 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json @@ -0,0 +1,33 @@ +{ + "id": "sebd_640_selfish_detour_python_3_11", + "name": "sebd_640_selfish_detour_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_640_selfish_detour_python_3_11_measure", + "operation": "rest:post:http://172.17.0.44:9000/", + "type": "custom" + } + ], + "start": "measure", + "states": [ + { + "name": "measure", + "type": "operation", + "actions": [ + { + "name": "measure", + "functionRef": { + "refName": "sebd_640_selfish_detour_python_3_11_measure", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json new file mode 100644 index 000000000..05747d39a --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json @@ -0,0 +1,78 @@ +{ + "id": "sebd_650_vid_python_3_11", + "name": "sebd_650_vid_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_650_vid_python_3_11_decode", + "operation": "rest:post:http://172.17.0.46:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_analyse", + "operation": "rest:post:http://172.17.0.45:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_summarize", + "operation": "rest:post:http://172.17.0.47:9000/", + "type": "custom" + } + ], + "start": "decode", + "states": [ + { + "name": "decode", + "type": "operation", + "actions": [ + { + "name": "decode", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_decode", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "analyse-map" + }, + { + "name": "analyse-map", + "type": "foreach", + "inputCollection": "${ .frames }", + "outputCollection": "${ .frames }", + "iterationParam": "item", + "actions": [ + { + "name": "analyse", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_analyse", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "summarize" + }, + { + "name": "summarize", + "type": "operation", + "actions": [ + { + "name": "summarize", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_summarize", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json new file mode 100644 index 000000000..a23dcc014 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json @@ -0,0 +1,102 @@ +{ + "id": "sebd_660_map_reduce_python_3_11", + "name": "sebd_660_map_reduce_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_660_map_reduce_python_3_11_split", + "operation": "rest:post:http://172.17.0.35:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_map", + "operation": "rest:post:http://172.17.0.32:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_shuffle", + "operation": "rest:post:http://172.17.0.34:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_reduce", + "operation": "rest:post:http://172.17.0.33:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "map-state" + }, + { + "name": "map-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "map", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_map", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "shuffle" + }, + { + "name": "shuffle", + "type": "operation", + "actions": [ + { + "name": "shuffle", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_shuffle", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "reduce-state" + }, + { + "name": "reduce-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "reduce", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_reduce", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json new file mode 100644 index 000000000..53f4d351d --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json @@ -0,0 +1,33 @@ +{ + "id": "sebd_670_auth_python_3_11", + "name": "sebd_670_auth_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_670_auth_python_3_11_auth", + "operation": "rest:post:http://172.17.0.36:9000/", + "type": "custom" + } + ], + "start": "auth", + "states": [ + { + "name": "auth", + "type": "operation", + "actions": [ + { + "name": "auth", + "functionRef": { + "refName": "sebd_670_auth_python_3_11_auth", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json new file mode 100644 index 000000000..abda66182 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json @@ -0,0 +1,104 @@ +{ + "id": "sebd_680_excamera_python_3_11", + "name": "sebd_680_excamera_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_680_excamera_python_3_11_split", + "operation": "rest:post:http://172.17.0.40:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_encode", + "operation": "rest:post:http://172.17.0.37:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_reencode", + "operation": "rest:post:http://172.17.0.39:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_rebase", + "operation": "rest:post:http://172.17.0.38:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "encode-state" + }, + { + "name": "encode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "encode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_encode", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "reencode-state" + }, + { + "name": "reencode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "reencode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_reencode", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "transition": "rebase-state" + }, + { + "name": "rebase-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "rebase", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_rebase", + "arguments": { + "payload": "${ .item }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json new file mode 100644 index 000000000..9934c854e --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json @@ -0,0 +1,57 @@ +{ + "id": "sebd_690_ml_python_3_11", + "name": "sebd_690_ml_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_690_ml_python_3_11_generate", + "operation": "rest:post:http://172.17.0.41:9000/", + "type": "custom" + }, + { + "name": "sebd_690_ml_python_3_11_train", + "operation": "rest:post:http://172.17.0.42:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + } + } + ], + "transition": "train-state" + }, + { + "name": "train-state", + "type": "foreach", + "inputCollection": "${ .schedules }", + "outputCollection": "${ .schedules }", + "iterationParam": "item", + "actions": [ + { + "name": "train", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_train", + "arguments": { + "payload": "${ item }" + } + } + } + ], + "end": true + } + ] +} \ No newline at end of file From 9955675e7097b50480d054d12eed461c6c790690 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 16 Dec 2025 15:42:49 +0100 Subject: [PATCH 78/82] log temp res --- config/local_deployment.json | 28 +++++++++++++++++--- config/local_workflows.json | 8 +++--- experiments.json | 12 ++++----- run_sonataflow_workflows.sh | 51 +++++++++++++++++++++++++++++------- sebs/storage/minio.py | 6 ++++- sebs/storage/scylladb.py | 6 ++++- 6 files changed, 85 insertions(+), 26 deletions(-) diff --git a/config/local_deployment.json b/config/local_deployment.json index 199a5934a..cbd17ee8d 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -51,7 +51,7 @@ } }, "deployment": { - "name": "local", + "name": "sonataflow", "aws": { "region": "us-east-1", "lambda-role": "", @@ -114,12 +114,12 @@ "sonataflow": { "resources": { "redis": { - "host": "", + "host": "localhost:6380", "password": "" }, "runtime": { "url": "http://localhost:8080", - "endpoint_prefix": "services" + "endpoint_prefix": "" } }, "storage": { @@ -130,7 +130,27 @@ "instance_id": "", "input_buckets": [], "output_buckets": [], - "type": "minio" + "type": "minio", + "object": { + "type": "minio", + "minio": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", + "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", + "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", + "input_buckets": [], + "output_buckets": [] + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed" + } + } } }, "openwhisk": { diff --git a/config/local_workflows.json b/config/local_workflows.json index f3727226e..a7966253e 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -113,9 +113,9 @@ "minio": { "address": "172.17.0.2:9000", "mapped_port": 9011, - "access_key": "hV4e0XGmzrJlsP8UW0OKDDrVqD1hE0IfCPYwZvTakJE", - "secret_key": "aec745a602364ac958b24787b93c87f469e27524a8ed1e1e420ad5218af38bd8", - "instance_id": "22b8cf06c8f28c429c88732bca34bb3d516d5583af59651c68925cd472de4a85", + "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", + "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", + "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", "input_buckets": [], "output_buckets": [] } @@ -125,7 +125,7 @@ "scylladb": { "address": "172.17.0.3:8000", "mapped_port": 9012, - "instance_id": "a411b4a644addd73d1943883d9094b57f31ad5a87f94ecec9e5784e00118a727" + "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed" } } }, diff --git a/experiments.json b/experiments.json index c50a66b3e..65d183397 100644 --- a/experiments.json +++ b/experiments.json @@ -1,7 +1,7 @@ { "_invocations": {}, "_metrics": {}, - "begin_time": 1765812589.989892, + "begin_time": 1765895964.383506, "config": { "deployment": { "name": "sonataflow", @@ -14,7 +14,7 @@ "alternator_port": 8000, "cpus": -1, "data_volume": "", - "instance_id": "a411b4a644addd73d1943883d9094b57f31ad5a87f94ecec9e5784e00118a727", + "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed", "mapped_port": 9012, "memory": -1, "region": "None", @@ -26,14 +26,14 @@ "url": "http://localhost:8080" }, "storage": { - "access_key": "hV4e0XGmzrJlsP8UW0OKDDrVqD1hE0IfCPYwZvTakJE", + "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", "address": "172.17.0.2:9000", "data_volume": "", "input_buckets": [], - "instance_id": "22b8cf06c8f28c429c88732bca34bb3d516d5583af59651c68925cd472de4a85", + "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", "mapped_port": 9011, "output_buckets": [], - "secret_key": "aec745a602364ac958b24787b93c87f469e27524a8ed1e1e420ad5218af38bd8", + "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", "type": "minio", "version": "" } @@ -92,6 +92,6 @@ "update_storage": false } }, - "end_time": 1765812589.995802, + "end_time": 1765895964.386292, "result_bucket": null } \ No newline at end of file diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh index 1204b6c90..c6373405a 100755 --- a/run_sonataflow_workflows.sh +++ b/run_sonataflow_workflows.sh @@ -1,13 +1,32 @@ #!/bin/bash set -euo pipefail -# Ensure SeBS (python docker SDK) uses the same Docker daemon as `docker` CLI. -# This avoids "No route to host" issues when containers are created in one daemon -# (e.g., `/var/run/docker.sock`) but `docker run` uses another (e.g., Docker Desktop). -if command -v docker >/dev/null 2>&1; then - DOCKER_HOST_FROM_CONTEXT=$(docker context inspect --format '{{.Endpoints.docker.Host}}' 2>/dev/null || true) - if [ -n "${DOCKER_HOST_FROM_CONTEXT:-}" ]; then - export DOCKER_HOST="$DOCKER_HOST_FROM_CONTEXT" +# Use a single Docker daemon for both SeBS (python docker SDK) and `docker` CLI. +# On Linux, prefer the native engine at `/var/run/docker.sock` when available: +# Docker Desktop's VM-backed filesystem sharing can break bind-mounted volumes for MinIO/ScyllaDB. +if [ -z "${DOCKER_HOST:-}" ]; then + if [ -S /var/run/docker.sock ] && DOCKER_HOST=unix:///var/run/docker.sock docker info >/dev/null 2>&1; then + export DOCKER_HOST="unix:///var/run/docker.sock" + elif command -v docker >/dev/null 2>&1; then + DOCKER_HOST_FROM_CONTEXT=$(docker context inspect --format '{{.Endpoints.docker.Host}}' 2>/dev/null || true) + if [ -n "${DOCKER_HOST_FROM_CONTEXT:-}" ]; then + export DOCKER_HOST="$DOCKER_HOST_FROM_CONTEXT" + fi + fi +fi + +# Prefer the repo's virtualenv (avoids missing deps when not activated). +SEBS_PYTHON="${SEBS_PYTHON:-}" +if [ -z "${SEBS_PYTHON}" ]; then + if [ -x "$PWD/python-venv/bin/python" ]; then + SEBS_PYTHON="$PWD/python-venv/bin/python" + elif command -v python3 >/dev/null 2>&1; then + SEBS_PYTHON="$(command -v python3)" + elif command -v python >/dev/null 2>&1; then + SEBS_PYTHON="$(command -v python)" + else + echo "ERROR: python not found (set SEBS_PYTHON or install python3)." + exit 1 fi fi @@ -38,7 +57,7 @@ cleanup() { } trap cleanup EXIT -./sebs.py storage start all config/storage.json --output-json out_storage.json +"$SEBS_PYTHON" ./sebs.py storage start all config/storage.json --output-json out_storage.json MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) MINIO_PORT=$(jq -r '.object.minio.mapped_port' out_storage.json) @@ -49,6 +68,18 @@ SCYLLA_ADDRESS=$(jq -r '.nosql.scylladb.address' out_storage.json) SCYLLA_PORT=$(jq -r '.nosql.scylladb.mapped_port' out_storage.json) SCYLLA_INSTANCE=$(jq -r '.nosql.scylladb.instance_id' out_storage.json) +# Fail fast if storage containers were created in a different daemon/context. +if ! docker inspect "$MINIO_INSTANCE" >/dev/null 2>&1; then + echo "ERROR: MinIO container $MINIO_INSTANCE not found in the current Docker daemon." + echo "Hint: set DOCKER_HOST to the daemon SeBS uses (e.g., unix:///var/run/docker.sock)." + exit 1 +fi +if ! docker inspect "$SCYLLA_INSTANCE" >/dev/null 2>&1; then + echo "ERROR: ScyllaDB container $SCYLLA_INSTANCE not found in the current Docker daemon." + echo "Hint: set DOCKER_HOST to the daemon SeBS uses (e.g., unix:///var/run/docker.sock)." + exit 1 +fi + for cfg in config/local_workflows.json config/local_deployment.json; do tmp=$(mktemp) jq \ @@ -298,7 +329,7 @@ for wf in "${WORKFLOWS[@]}"; do # First, create the workflow (without invoking it yet) by running with --repetitions 0 # This generates the .sw.json file - ./sebs.py benchmark workflow "$wf" test \ + "$SEBS_PYTHON" ./sebs.py benchmark workflow "$wf" test \ --config config/local_workflows.json \ --deployment sonataflow --trigger http --repetitions 0 \ --output-dir results/local-workflows --verbose || true @@ -348,7 +379,7 @@ for wf in "${WORKFLOWS[@]}"; do preflight_runtime_function_connectivity "$SONATAFLOW_WORKFLOWS_DIR/${WF_ID}.sw.json" || exit 1 # Now run the actual benchmark - ./sebs.py benchmark workflow "$wf" test \ + "$SEBS_PYTHON" ./sebs.py benchmark workflow "$wf" test \ --config config/local_workflows.json \ --deployment sonataflow --trigger http --repetitions 1 \ --output-dir results/local-workflows --verbose || true diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index bb9112a22..ea1d486ce 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -279,7 +279,11 @@ def _deserialize( try: obj._storage_container = docker_client.containers.get(instance_id) except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.warning( + f"Storage container {instance_id} not found; continuing without container handle." + ) + obj._storage_container = None + obj._cfg.instance_id = "" else: obj._storage_container = None obj._input_prefixes = copy.copy(cached_config.input_buckets) diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index aae97815d..fd52ff2c0 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -196,7 +196,11 @@ def _deserialize( try: obj._storage_container = docker_client.containers.get(instance_id) except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.warning( + f"Storage container {instance_id} not found; continuing without container handle." + ) + obj._storage_container = None + obj._cfg.instance_id = "" else: obj._storage_container = None return obj From 300ecb0b019ef0b1217d5b8879a063833c3f3079 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Mon, 29 Dec 2025 13:57:11 +0100 Subject: [PATCH 79/82] log curr re --- config/local_deployment.json | 12 +- config/local_workflows.json | 12 +- config/storage.json | 6 +- experiments.json | 48 ++++- out_storage.json | 16 +- run_sonataflow_workflows.sh | 22 +-- sebs/sonataflow/generator.py | 32 ++- sebs/storage/config.py | 2 + sebs/storage/minio.py | 17 +- sebs/storage/scylladb.py | 17 +- .../sebd_6100_1000_genome_python_3_11.sw.json | 10 +- ...000_genome_individuals_python_3_11.sw.json | 45 ----- ...1000_genome_individuals_python_3_8.sw.json | 45 ----- .../sebd_610_gen_python_3_11.sw.json | 14 +- .../workflows/sebd_610_gen_python_3_8.sw.json | 137 ------------- ...sebd_6200_trip_booking_python_3_11.sw.json | 183 ------------------ .../sebd_6200_trip_booking_python_3_8.sw.json | 183 ------------------ .../sebd_620_func_invo_python_3_11.sw.json | 65 ------- ...ebd_630_parallel_sleep_python_3_11.sw.json | 57 ------ ..._631_parallel_download_python_3_11.sw.json | 57 ------ ...ebd_640_selfish_detour_python_3_11.sw.json | 33 ---- .../sebd_650_vid_python_3_11.sw.json | 78 -------- .../sebd_660_map_reduce_python_3_11.sw.json | 102 ---------- .../sebd_670_auth_python_3_11.sw.json | 33 ---- .../sebd_680_excamera_python_3_11.sw.json | 104 ---------- .../workflows/sebd_690_ml_python_3_11.sw.json | 57 ------ 26 files changed, 141 insertions(+), 1246 deletions(-) delete mode 100644 sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json delete mode 100644 sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json diff --git a/config/local_deployment.json b/config/local_deployment.json index cbd17ee8d..a56e4fb46 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -134,11 +134,11 @@ "object": { "type": "minio", "minio": { - "address": "172.17.0.2:9000", + "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", - "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", - "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", + "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", + "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", + "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", "input_buckets": [], "output_buckets": [] } @@ -146,9 +146,9 @@ "nosql": { "type": "scylladb", "scylladb": { - "address": "172.17.0.3:8000", + "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed" + "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76" } } } diff --git a/config/local_workflows.json b/config/local_workflows.json index a7966253e..1813356f2 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -111,11 +111,11 @@ "object": { "type": "minio", "minio": { - "address": "172.17.0.2:9000", + "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", - "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", - "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", + "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", + "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", + "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", "input_buckets": [], "output_buckets": [] } @@ -123,9 +123,9 @@ "nosql": { "type": "scylladb", "scylladb": { - "address": "172.17.0.3:8000", + "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed" + "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76" } } }, diff --git a/config/storage.json b/config/storage.json index 9ea14d31d..644db56f9 100644 --- a/config/storage.json +++ b/config/storage.json @@ -4,7 +4,8 @@ "minio": { "mapped_port": 9011, "version": "RELEASE.2024-07-16T23-46-41Z", - "data_volume": "minio-volume" + "data_volume": "minio-volume", + "network_name": "sebs-network" } }, "nosql": { @@ -14,7 +15,8 @@ "version": "6.0", "cpus": 1, "memory": "750", - "data_volume": "scylladb-volume" + "data_volume": "scylladb-volume", + "network_name": "sebs-network" } } } diff --git a/experiments.json b/experiments.json index 65d183397..3911f9f9d 100644 --- a/experiments.json +++ b/experiments.json @@ -1,7 +1,35 @@ { - "_invocations": {}, + "_invocations": { + "sebd-6100.1000-genome-python-3.11": { + "30bee4aa": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "30bee4aa", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 30220, + "client_begin": "2025-12-28 02:00:14.362132", + "client_end": "2025-12-28 02:00:14.392352", + "initialization": 0 + } + } + } + }, "_metrics": {}, - "begin_time": 1765895964.383506, + "begin_time": 1766883614.36036, "config": { "deployment": { "name": "sonataflow", @@ -10,13 +38,14 @@ "allocated_ports": [], "nosql": { "access_key": "None", - "address": "172.17.0.3:8000", + "address": "172.18.0.3:8000", "alternator_port": 8000, "cpus": -1, "data_volume": "", - "instance_id": "77290431c8bf4b8a5029dacf6e8a8d47676e4360206b35619565ef5dc65935ed", + "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76", "mapped_port": 9012, "memory": -1, + "network_name": "bridge", "region": "None", "secret_key": "None", "version": "" @@ -26,14 +55,15 @@ "url": "http://localhost:8080" }, "storage": { - "access_key": "hmhh57YIS1h2Q5qCAtyBhSByAWsri1dLdyTPq82u9eU", - "address": "172.17.0.2:9000", + "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", + "address": "172.18.0.2:9000", "data_volume": "", "input_buckets": [], - "instance_id": "178ad7ca573dff5e10326286bfbd239a2338a1f63b0ec29c93da497f99d0f8b1", + "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", "mapped_port": 9011, + "network_name": "bridge", "output_buckets": [], - "secret_key": "d2cd1575c092f49ddd3522f017c60e34bbbbdff72765ff2235211b047e6bd311", + "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", "type": "minio", "version": "" } @@ -92,6 +122,6 @@ "update_storage": false } }, - "end_time": 1765895964.386292, + "end_time": 1766883614.392648, "result_bucket": null } \ No newline at end of file diff --git a/out_storage.json b/out_storage.json index d3777d636..dc41e0466 100644 --- a/out_storage.json +++ b/out_storage.json @@ -2,32 +2,34 @@ "object": { "type": "minio", "minio": { - "address": "172.17.0.2:9000", + "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "rB907YMFJW7gUgnUnefzcni9RExzy4aP0vr52tGzYgQ", - "secret_key": "f47e06c50a29f37f68b01eb2b96ea1679cab8b6e72102078fa36ed72a07f8ec9", - "instance_id": "5fac4b34c68172c95f81230df52c2989d06e3f7891b5bd901905c085258e56b9", + "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", + "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", + "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", "data_volume": "minio-volume", + "network_name": "sebs-network", "type": "minio" } }, "nosql": { "type": "scylladb", "scylladb": { - "address": "172.17.0.3:8000", + "address": "172.18.0.3:8000", "mapped_port": 9012, "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "784e07f6d7cfe6b2670fc65d840fd79864d1e6336422dcc2fb340975b8131a4d", + "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76", "region": "None", "cpus": 1, "memory": "750", "version": "6.0", - "data_volume": "scylladb-volume" + "data_volume": "scylladb-volume", + "network_name": "sebs-network" } } } \ No newline at end of file diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh index c6373405a..02a014fb5 100755 --- a/run_sonataflow_workflows.sh +++ b/run_sonataflow_workflows.sh @@ -310,17 +310,17 @@ fi WORKFLOWS=( "610.gen" "6100.1000-genome" - "6101.1000-genome-individuals" - "620.func-invo" - "6200.trip-booking" - "630.parallel-sleep" - "631.parallel-download" - "640.selfish-detour" - "650.vid" - "660.map-reduce" - "670.auth" - "680.excamera" - "690.ml" + # "6101.1000-genome-individuals" + # "620.func-invo" + # "6200.trip-booking" + # "630.parallel-sleep" + # "631.parallel-download" + # "640.selfish-detour" + # "650.vid" + # "660.map-reduce" + # "670.auth" + # "680.excamera" + # "690.ml" ) SONATAFLOW_STARTED=false diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index a375b334e..fb604a8b1 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -74,8 +74,19 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: def _condition(case: Switch.Case) -> str: - # Serverless Workflow uses jq-like expressions; keep it simple. - return f"{case.var} {case.op} {json.dumps(case.val)}" + # Serverless Workflow uses jq expressions wrapped in ${ } + var = case.var.strip() + needs_dot_prefix = not var.startswith((".", "$")) and not any(ch in var for ch in " ()|+*/-") + + # Ensure field path has dot prefix for jq + if needs_dot_prefix: + var = "." + self._quote_field_path(var) + elif var.startswith(".") and "." in var[1:]: + # Already has a dot prefix + var = "." + self._quote_field_path(var[1:]) + + # Wrap the condition in ${ } as per SonataFlow documentation + return f"${{ {var} {case.op} {json.dumps(case.val)} }}" return { "name": state.name, @@ -86,6 +97,12 @@ def _condition(case: Switch.Case) -> str: "defaultCondition": {"transition": state.default} if state.default else {"end": True}, } + def _quote_field_path(self, path: str) -> str: + """Return field path as-is for jq expressions. + Simple dot notation like "astros.people" works fine in jq. + """ + return path + def encode_map(self, state: Map) -> Union[dict, List[dict]]: iteration_param = "item" action_args = "${ " + iteration_param + " }" @@ -93,7 +110,8 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: # Merge map element with selected common parameters. merged = {"array_element": "${ " + iteration_param + " }"} for param in [p.strip() for p in state.common_params.split(",") if p.strip()]: - merged[param] = "${ ." + param + " }" + quoted_param = self._quote_field_path(param) + merged[param] = "${ ." + quoted_param + " }" action_args = merged # type: ignore # Resolve the actual function name from the root state @@ -101,11 +119,12 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: root_state_def = state.funcs.get(state.root, {}) func_name = root_state_def.get("func_name", state.root) + quoted_array = self._quote_field_path(state.array) payload: Dict[str, object] = { "name": state.name, "type": "foreach", - "inputCollection": "${ ." + state.array + " }", - "outputCollection": "${ ." + state.array + " }", + "inputCollection": "${ ." + quoted_array + " }", + "outputCollection": "${ ." + quoted_array + " }", "iterationParam": iteration_param, "actions": [self._default_action(func_name, action_args)], } @@ -132,10 +151,11 @@ def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: return payload def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + quoted_array = self._quote_field_path(state.array) payload: Dict[str, object] = { "name": state.name, "type": "foreach", - "inputCollection": "${ ." + state.array + " }", + "inputCollection": "${ ." + quoted_array + " }", "iterationParam": "item", "actions": [self._default_action(state.func_name, "${ .item }")], } diff --git a/sebs/storage/config.py b/sebs/storage/config.py index cd47df391..b4d92da60 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -29,6 +29,7 @@ class MinioConfig(PersistentStorageConfig): input_buckets: List[str] = field(default_factory=lambda: []) version: str = "" data_volume: str = "" + network_name: str = "bridge" type: str = "minio" def update_cache(self, path: List[str], cache: Cache): @@ -79,6 +80,7 @@ class ScyllaDBConfig(NoSQLStorageConfig): memory: int = -1 version: str = "" data_volume: str = "" + network_name: str = "bridge" def update_cache(self, path: List[str], cache: Cache): diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index ea1d486ce..4d17a2155 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -93,7 +93,7 @@ def start(self): self._storage_container = self._docker_client.containers.run( f"minio/minio:{self._cfg.version}", command="server /data", - network_mode="bridge", + network_mode=self._cfg.network_name, ports={"9000": str(self._cfg.mapped_port)}, environment={ "MINIO_ACCESS_KEY": self._cfg.access_key, @@ -129,9 +129,18 @@ def configure_connection(self): # Check if the system is Linux and that it's not WSL if is_linux(): networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=9000 - ) + # Use the configured network name instead of hardcoded "bridge" + network_info = networks.get(self._cfg.network_name) + if network_info: + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=network_info["IPAddress"], Port=9000 + ) + else: + # Fallback: use the first available network + first_network = next(iter(networks.values())) + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=first_network["IPAddress"], Port=9000 + ) else: # System is either WSL, Windows, or Mac self._cfg.address = f"localhost:{self._cfg.mapped_port}" diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index fd52ff2c0..85cc9c18b 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -90,7 +90,7 @@ def start(self): command=scylladb_args, name="some-scylla", hostname="some-scylla", - network_mode="bridge", + network_mode=self._cfg.network_name, volumes=volumes, ports={"8000": str(self._cfg.mapped_port)}, remove=True, @@ -143,9 +143,18 @@ def configure_connection(self): # Check if the system is Linux and that it's not WSL if platform.system() == "Linux" and "microsoft" not in platform.release().lower(): networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=self._cfg.alternator_port - ) + # Use the configured network name instead of hardcoded "bridge" + network_info = networks.get(self._cfg.network_name) + if network_info: + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=network_info["IPAddress"], Port=self._cfg.alternator_port + ) + else: + # Fallback: use the first available network + first_network = next(iter(networks.values())) + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=first_network["IPAddress"], Port=self._cfg.alternator_port + ) else: # System is either WSL, Windows, or Mac self._cfg.address = f"localhost:{self._cfg.mapped_port}" diff --git a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json index ad60a0fe4..45aa90fba 100644 --- a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json +++ b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json @@ -7,27 +7,27 @@ "functions": [ { "name": "sebd_6100_1000_genome_python_3_11_individuals", - "operation": "rest:post:http://172.17.0.54:9000/", + "operation": "rest:post:http://172.18.0.11:9000/", "type": "custom" }, { "name": "sebd_6100_1000_genome_python_3_11_individuals_merge", - "operation": "rest:post:http://172.17.0.55:9000/", + "operation": "rest:post:http://172.18.0.12:9000/", "type": "custom" }, { "name": "sebd_6100_1000_genome_python_3_11_sifting", - "operation": "rest:post:http://172.17.0.57:9000/", + "operation": "rest:post:http://172.18.0.14:9000/", "type": "custom" }, { "name": "sebd_6100_1000_genome_python_3_11_frequency", - "operation": "rest:post:http://172.17.0.48:9000/", + "operation": "rest:post:http://172.18.0.10:9000/", "type": "custom" }, { "name": "sebd_6100_1000_genome_python_3_11_mutation_overlap", - "operation": "rest:post:http://172.17.0.56:9000/", + "operation": "rest:post:http://172.18.0.13:9000/", "type": "custom" } ], diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json deleted file mode 100644 index d4b8cfe6f..000000000 --- a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "id": "sebd_6101_1000_genome_individuals_python_3_11", - "name": "sebd_6101_1000_genome_individuals_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_6101_1000_genome_individuals_python_3_11_individuals", - "operation": "rest:post:http://172.17.0.58:9000/", - "type": "custom" - } - ], - "start": "individuals_state", - "states": [ - { - "name": "individuals_state", - "type": "foreach", - "inputCollection": "${ .blob }", - "outputCollection": "${ .blob }", - "iterationParam": "item", - "actions": [ - { - "name": "individuals", - "functionRef": { - "refName": "sebd_6101_1000_genome_individuals_python_3_11_individuals", - "arguments": { - "payload": { - "array_element": "${ item }", - "benchmark_bucket": "${ .benchmark_bucket }", - "bucket": "${ .bucket }", - "columns": "${ .columns }", - "columns_bucket": "${ .columns_bucket }", - "populations": "${ .populations }", - "sifting_input": "${ .sifting_input }", - "individuals_file": "${ .individuals_file }" - } - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json deleted file mode 100644 index 6c921e38b..000000000 --- a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_8.sw.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "id": "sebd_6101_1000_genome_individuals_python_3_8", - "name": "sebd_6101_1000_genome_individuals_python_3_8", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_6101_1000_genome_individuals_python_3_8_individuals", - "operation": "rest:post:http://172.17.0.19:9000/", - "type": "custom" - } - ], - "start": "individuals_state", - "states": [ - { - "name": "individuals_state", - "type": "foreach", - "inputCollection": "${ .blob }", - "outputCollection": "${ .blob }", - "iterationParam": "item", - "actions": [ - { - "name": "individuals", - "functionRef": { - "refName": "sebd_6101_1000_genome_individuals_python_3_8_individuals", - "arguments": { - "payload": { - "array_element": "${ item }", - "benchmark_bucket": "${ .benchmark_bucket }", - "bucket": "${ .bucket }", - "columns": "${ .columns }", - "columns_bucket": "${ .columns_bucket }", - "populations": "${ .populations }", - "sifting_input": "${ .sifting_input }", - "individuals_file": "${ .individuals_file }" - } - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json index 6efb86d9b..7206dd887 100644 --- a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json @@ -7,27 +7,27 @@ "functions": [ { "name": "sebd_610_gen_python_3_11_get_astros", - "operation": "rest:post:http://172.18.0.3:9000/", + "operation": "rest:post:http://172.18.0.5:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_few_people", - "operation": "rest:post:http://172.18.0.2:9000/", + "operation": "rest:post:http://172.18.0.4:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_many_people", - "operation": "rest:post:http://172.18.0.4:9000/", + "operation": "rest:post:http://172.18.0.6:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_map_astros", - "operation": "rest:post:http://172.18.0.5:9000/", + "operation": "rest:post:http://172.18.0.7:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_process_astros", - "operation": "rest:post:http://172.18.0.6:9000/", + "operation": "rest:post:http://172.18.0.8:9000/", "type": "custom" } ], @@ -54,11 +54,11 @@ "type": "switch", "dataConditions": [ { - "condition": "astros.number < 10", + "condition": "${ .astros.number < 10 }", "transition": "few_people" }, { - "condition": "astros.number >= 10", + "condition": "${ .astros.number >= 10 }", "transition": "many_people" } ], diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json deleted file mode 100644 index fd5b55f57..000000000 --- a/sonataflow-workflows/workflows/sebd_610_gen_python_3_8.sw.json +++ /dev/null @@ -1,137 +0,0 @@ -{ - "id": "sebd_610_gen_python_3_8", - "name": "sebd_610_gen_python_3_8", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_610_gen_python_3_8_get_astros", - "operation": "rest:post:http://172.17.0.5:9000/", - "type": "custom" - }, - { - "name": "sebd_610_gen_python_3_8_few_people", - "operation": "rest:post:http://172.17.0.4:9000/", - "type": "custom" - }, - { - "name": "sebd_610_gen_python_3_8_many_people", - "operation": "rest:post:http://172.17.0.6:9000/", - "type": "custom" - }, - { - "name": "sebd_610_gen_python_3_8_map_astros", - "operation": "rest:post:http://172.17.0.7:9000/", - "type": "custom" - }, - { - "name": "sebd_610_gen_python_3_8_process_astros", - "operation": "rest:post:http://172.17.0.8:9000/", - "type": "custom" - } - ], - "start": "get_astros", - "states": [ - { - "name": "get_astros", - "type": "operation", - "actions": [ - { - "name": "get_astros", - "functionRef": { - "refName": "sebd_610_gen_python_3_8_get_astros", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "select_astros_number" - }, - { - "name": "select_astros_number", - "type": "switch", - "dataConditions": [ - { - "condition": "astros.number < 10", - "transition": "few_people" - }, - { - "condition": "astros.number >= 10", - "transition": "many_people" - } - ], - "defaultCondition": { - "transition": "few_people" - } - }, - { - "name": "few_people", - "type": "operation", - "actions": [ - { - "name": "few_people", - "functionRef": { - "refName": "sebd_610_gen_python_3_8_few_people", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "map_astros" - }, - { - "name": "many_people", - "type": "operation", - "actions": [ - { - "name": "many_people", - "functionRef": { - "refName": "sebd_610_gen_python_3_8_many_people", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "map_astros" - }, - { - "name": "map_astros", - "type": "foreach", - "inputCollection": "${ .astros.people }", - "outputCollection": "${ .astros.people }", - "iterationParam": "item", - "actions": [ - { - "name": "map_astros", - "functionRef": { - "refName": "sebd_610_gen_python_3_8_map_astros", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "transition": "process_astros" - }, - { - "name": "process_astros", - "type": "operation", - "actions": [ - { - "name": "process_astros", - "functionRef": { - "refName": "sebd_610_gen_python_3_8_process_astros", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json deleted file mode 100644 index 5bfd6ceb6..000000000 --- a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json +++ /dev/null @@ -1,183 +0,0 @@ -{ - "id": "sebd_6200_trip_booking_python_3_11", - "name": "sebd_6200_trip_booking_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_6200_trip_booking_python_3_11_reserve_hotel", - "operation": "rest:post:http://172.17.0.66:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_reserve_rental", - "operation": "rest:post:http://172.17.0.67:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_reserve_flight", - "operation": "rest:post:http://172.17.0.65:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_confirm", - "operation": "rest:post:http://172.17.0.64:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_cancel_flight", - "operation": "rest:post:http://172.17.0.61:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_cancel_rental", - "operation": "rest:post:http://172.17.0.63:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_11_cancel_hotel", - "operation": "rest:post:http://172.17.0.62:9000/", - "type": "custom" - } - ], - "start": "hotel", - "states": [ - { - "name": "hotel", - "type": "operation", - "actions": [ - { - "name": "reserve_hotel", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_reserve_hotel", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "rental" - }, - { - "name": "rental", - "type": "operation", - "actions": [ - { - "name": "reserve_rental", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_reserve_rental", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "flight", - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_hotel" - } - ] - }, - { - "name": "flight", - "type": "operation", - "actions": [ - { - "name": "reserve_flight", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_reserve_flight", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "confirm", - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_rental" - } - ] - }, - { - "name": "confirm", - "type": "operation", - "actions": [ - { - "name": "confirm", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_confirm", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true, - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_flight" - } - ] - }, - { - "name": "cancel_flight", - "type": "operation", - "actions": [ - { - "name": "cancel_flight", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_cancel_flight", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "cancel_rental" - }, - { - "name": "cancel_rental", - "type": "operation", - "actions": [ - { - "name": "cancel_rental", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_cancel_rental", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "cancel_hotel" - }, - { - "name": "cancel_hotel", - "type": "operation", - "actions": [ - { - "name": "cancel_hotel", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_11_cancel_hotel", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ], - "errors": [ - { - "name": "workflow_error", - "code": "*" - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json deleted file mode 100644 index f5a7f4612..000000000 --- a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_8.sw.json +++ /dev/null @@ -1,183 +0,0 @@ -{ - "id": "sebd_6200_trip_booking_python_3_8", - "name": "sebd_6200_trip_booking_python_3_8", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_6200_trip_booking_python_3_8_reserve_hotel", - "operation": "rest:post:http://172.17.1.104:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_reserve_rental", - "operation": "rest:post:http://172.17.1.105:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_reserve_flight", - "operation": "rest:post:http://172.17.1.103:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_confirm", - "operation": "rest:post:http://172.17.1.102:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_cancel_flight", - "operation": "rest:post:http://172.17.1.99:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_cancel_rental", - "operation": "rest:post:http://172.17.1.101:9000/", - "type": "custom" - }, - { - "name": "sebd_6200_trip_booking_python_3_8_cancel_hotel", - "operation": "rest:post:http://172.17.1.100:9000/", - "type": "custom" - } - ], - "start": "hotel", - "states": [ - { - "name": "hotel", - "type": "operation", - "actions": [ - { - "name": "reserve_hotel", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_reserve_hotel", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "rental" - }, - { - "name": "rental", - "type": "operation", - "actions": [ - { - "name": "reserve_rental", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_reserve_rental", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "flight", - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_hotel" - } - ] - }, - { - "name": "flight", - "type": "operation", - "actions": [ - { - "name": "reserve_flight", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_reserve_flight", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "confirm", - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_rental" - } - ] - }, - { - "name": "confirm", - "type": "operation", - "actions": [ - { - "name": "confirm", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_confirm", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true, - "onErrors": [ - { - "errorRef": "workflow_error", - "transition": "cancel_flight" - } - ] - }, - { - "name": "cancel_flight", - "type": "operation", - "actions": [ - { - "name": "cancel_flight", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_cancel_flight", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "cancel_rental" - }, - { - "name": "cancel_rental", - "type": "operation", - "actions": [ - { - "name": "cancel_rental", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_cancel_rental", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "cancel_hotel" - }, - { - "name": "cancel_hotel", - "type": "operation", - "actions": [ - { - "name": "cancel_hotel", - "functionRef": { - "refName": "sebd_6200_trip_booking_python_3_8_cancel_hotel", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ], - "errors": [ - { - "name": "workflow_error", - "code": "*" - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json deleted file mode 100644 index 55cd5de05..000000000 --- a/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "id": "sebd_620_func_invo_python_3_11", - "name": "sebd_620_func_invo_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_620_func_invo_python_3_11_gen", - "operation": "rest:post:http://172.17.0.59:9000/", - "type": "custom" - }, - { - "name": "sebd_620_func_invo_python_3_11_process", - "operation": "rest:post:http://172.17.0.60:9000/", - "type": "custom" - } - ], - "start": "gen", - "states": [ - { - "name": "gen", - "type": "operation", - "actions": [ - { - "name": "gen", - "functionRef": { - "refName": "sebd_620_func_invo_python_3_11_gen", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "process" - }, - { - "name": "process", - "type": "foreach", - "inputCollection": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "iterationParam": "idx", - "actions": [ - { - "name": "process", - "functionRef": { - "refName": "sebd_620_func_invo_python_3_11_process", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json deleted file mode 100644 index 8d47cd48c..000000000 --- a/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "id": "sebd_630_parallel_sleep_python_3_11", - "name": "sebd_630_parallel_sleep_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_630_parallel_sleep_python_3_11_generate", - "operation": "rest:post:http://172.17.0.68:9000/", - "type": "custom" - }, - { - "name": "sebd_630_parallel_sleep_python_3_11_process", - "operation": "rest:post:http://172.17.0.69:9000/", - "type": "custom" - } - ], - "start": "generate", - "states": [ - { - "name": "generate", - "type": "operation", - "actions": [ - { - "name": "generate", - "functionRef": { - "refName": "sebd_630_parallel_sleep_python_3_11_generate", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "process-state" - }, - { - "name": "process-state", - "type": "foreach", - "inputCollection": "${ .buffer }", - "outputCollection": "${ .buffer }", - "iterationParam": "item", - "actions": [ - { - "name": "process", - "functionRef": { - "refName": "sebd_630_parallel_sleep_python_3_11_process", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json deleted file mode 100644 index 1ad6ffb86..000000000 --- a/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "id": "sebd_631_parallel_download_python_3_11", - "name": "sebd_631_parallel_download_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_631_parallel_download_python_3_11_generate", - "operation": "rest:post:http://172.17.0.70:9000/", - "type": "custom" - }, - { - "name": "sebd_631_parallel_download_python_3_11_process", - "operation": "rest:post:http://172.17.0.71:9000/", - "type": "custom" - } - ], - "start": "generate", - "states": [ - { - "name": "generate", - "type": "operation", - "actions": [ - { - "name": "generate", - "functionRef": { - "refName": "sebd_631_parallel_download_python_3_11_generate", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "process1" - }, - { - "name": "process1", - "type": "foreach", - "inputCollection": "${ .buffer }", - "outputCollection": "${ .buffer }", - "iterationParam": "item", - "actions": [ - { - "name": "process", - "functionRef": { - "refName": "sebd_631_parallel_download_python_3_11_process", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json deleted file mode 100644 index 44d626158..000000000 --- a/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "sebd_640_selfish_detour_python_3_11", - "name": "sebd_640_selfish_detour_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_640_selfish_detour_python_3_11_measure", - "operation": "rest:post:http://172.17.0.44:9000/", - "type": "custom" - } - ], - "start": "measure", - "states": [ - { - "name": "measure", - "type": "operation", - "actions": [ - { - "name": "measure", - "functionRef": { - "refName": "sebd_640_selfish_detour_python_3_11_measure", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json deleted file mode 100644 index 05747d39a..000000000 --- a/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "id": "sebd_650_vid_python_3_11", - "name": "sebd_650_vid_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_650_vid_python_3_11_decode", - "operation": "rest:post:http://172.17.0.46:9000/", - "type": "custom" - }, - { - "name": "sebd_650_vid_python_3_11_analyse", - "operation": "rest:post:http://172.17.0.45:9000/", - "type": "custom" - }, - { - "name": "sebd_650_vid_python_3_11_summarize", - "operation": "rest:post:http://172.17.0.47:9000/", - "type": "custom" - } - ], - "start": "decode", - "states": [ - { - "name": "decode", - "type": "operation", - "actions": [ - { - "name": "decode", - "functionRef": { - "refName": "sebd_650_vid_python_3_11_decode", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "analyse-map" - }, - { - "name": "analyse-map", - "type": "foreach", - "inputCollection": "${ .frames }", - "outputCollection": "${ .frames }", - "iterationParam": "item", - "actions": [ - { - "name": "analyse", - "functionRef": { - "refName": "sebd_650_vid_python_3_11_analyse", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "transition": "summarize" - }, - { - "name": "summarize", - "type": "operation", - "actions": [ - { - "name": "summarize", - "functionRef": { - "refName": "sebd_650_vid_python_3_11_summarize", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json deleted file mode 100644 index a23dcc014..000000000 --- a/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json +++ /dev/null @@ -1,102 +0,0 @@ -{ - "id": "sebd_660_map_reduce_python_3_11", - "name": "sebd_660_map_reduce_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_660_map_reduce_python_3_11_split", - "operation": "rest:post:http://172.17.0.35:9000/", - "type": "custom" - }, - { - "name": "sebd_660_map_reduce_python_3_11_map", - "operation": "rest:post:http://172.17.0.32:9000/", - "type": "custom" - }, - { - "name": "sebd_660_map_reduce_python_3_11_shuffle", - "operation": "rest:post:http://172.17.0.34:9000/", - "type": "custom" - }, - { - "name": "sebd_660_map_reduce_python_3_11_reduce", - "operation": "rest:post:http://172.17.0.33:9000/", - "type": "custom" - } - ], - "start": "split", - "states": [ - { - "name": "split", - "type": "operation", - "actions": [ - { - "name": "split", - "functionRef": { - "refName": "sebd_660_map_reduce_python_3_11_split", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "map-state" - }, - { - "name": "map-state", - "type": "foreach", - "inputCollection": "${ .list }", - "outputCollection": "${ .list }", - "iterationParam": "item", - "actions": [ - { - "name": "map", - "functionRef": { - "refName": "sebd_660_map_reduce_python_3_11_map", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "transition": "shuffle" - }, - { - "name": "shuffle", - "type": "operation", - "actions": [ - { - "name": "shuffle", - "functionRef": { - "refName": "sebd_660_map_reduce_python_3_11_shuffle", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "reduce-state" - }, - { - "name": "reduce-state", - "type": "foreach", - "inputCollection": "${ .list }", - "outputCollection": "${ .list }", - "iterationParam": "item", - "actions": [ - { - "name": "reduce", - "functionRef": { - "refName": "sebd_660_map_reduce_python_3_11_reduce", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json deleted file mode 100644 index 53f4d351d..000000000 --- a/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "id": "sebd_670_auth_python_3_11", - "name": "sebd_670_auth_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_670_auth_python_3_11_auth", - "operation": "rest:post:http://172.17.0.36:9000/", - "type": "custom" - } - ], - "start": "auth", - "states": [ - { - "name": "auth", - "type": "operation", - "actions": [ - { - "name": "auth", - "functionRef": { - "refName": "sebd_670_auth_python_3_11_auth", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json deleted file mode 100644 index abda66182..000000000 --- a/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "id": "sebd_680_excamera_python_3_11", - "name": "sebd_680_excamera_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_680_excamera_python_3_11_split", - "operation": "rest:post:http://172.17.0.40:9000/", - "type": "custom" - }, - { - "name": "sebd_680_excamera_python_3_11_encode", - "operation": "rest:post:http://172.17.0.37:9000/", - "type": "custom" - }, - { - "name": "sebd_680_excamera_python_3_11_reencode", - "operation": "rest:post:http://172.17.0.39:9000/", - "type": "custom" - }, - { - "name": "sebd_680_excamera_python_3_11_rebase", - "operation": "rest:post:http://172.17.0.38:9000/", - "type": "custom" - } - ], - "start": "split", - "states": [ - { - "name": "split", - "type": "operation", - "actions": [ - { - "name": "split", - "functionRef": { - "refName": "sebd_680_excamera_python_3_11_split", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "encode-state" - }, - { - "name": "encode-state", - "type": "foreach", - "inputCollection": "${ .segments }", - "outputCollection": "${ .segments }", - "iterationParam": "item", - "actions": [ - { - "name": "encode", - "functionRef": { - "refName": "sebd_680_excamera_python_3_11_encode", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "transition": "reencode-state" - }, - { - "name": "reencode-state", - "type": "foreach", - "inputCollection": "${ .segments }", - "outputCollection": "${ .segments }", - "iterationParam": "item", - "actions": [ - { - "name": "reencode", - "functionRef": { - "refName": "sebd_680_excamera_python_3_11_reencode", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "transition": "rebase-state" - }, - { - "name": "rebase-state", - "type": "foreach", - "inputCollection": "${ .segments }", - "iterationParam": "item", - "actions": [ - { - "name": "rebase", - "functionRef": { - "refName": "sebd_680_excamera_python_3_11_rebase", - "arguments": { - "payload": "${ .item }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json deleted file mode 100644 index 9934c854e..000000000 --- a/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "id": "sebd_690_ml_python_3_11", - "name": "sebd_690_ml_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_690_ml_python_3_11_generate", - "operation": "rest:post:http://172.17.0.41:9000/", - "type": "custom" - }, - { - "name": "sebd_690_ml_python_3_11_train", - "operation": "rest:post:http://172.17.0.42:9000/", - "type": "custom" - } - ], - "start": "generate", - "states": [ - { - "name": "generate", - "type": "operation", - "actions": [ - { - "name": "generate", - "functionRef": { - "refName": "sebd_690_ml_python_3_11_generate", - "arguments": { - "payload": "${ . }" - } - } - } - ], - "transition": "train-state" - }, - { - "name": "train-state", - "type": "foreach", - "inputCollection": "${ .schedules }", - "outputCollection": "${ .schedules }", - "iterationParam": "item", - "actions": [ - { - "name": "train", - "functionRef": { - "refName": "sebd_690_ml_python_3_11_train", - "arguments": { - "payload": "${ item }" - } - } - } - ], - "end": true - } - ] -} \ No newline at end of file From 927f6f0e7990ef0dd047a1f70e175adb7143032c Mon Sep 17 00:00:00 2001 From: Russellpang Date: Tue, 6 Jan 2026 22:24:25 +0100 Subject: [PATCH 80/82] log --- benchmarks-data | 2 +- .../sonataflow/python/function_workflow.py | 31 ++-- config/local_deployment.json | 10 +- config/local_workflows.json | 10 +- dockerfiles/sonataflow/nodejs/server.js | 37 ++--- dockerfiles/sonataflow/python/server.py | 37 +++-- experiments.json | 42 +----- out_storage.json | 8 +- .../sebd-610.gen-python-3.11/sonataflow.csv | 1 + run_sonataflow_workflows.sh | 108 +++++++++++--- sebs.py | 8 +- sebs/sonataflow/generator.py | 8 +- sebs/sonataflow/sonataflow.py | 59 +++++++- sebs/sonataflow/triggers.py | 101 ++++++++++--- .../sebd_6100_1000_genome_python_3_11.sw.json | 135 ------------------ .../sebd_610_gen_python_3_11.sw.json | 25 +++- 16 files changed, 343 insertions(+), 279 deletions(-) create mode 100644 results/sebd-610.gen-python-3.11/sonataflow.csv delete mode 100644 sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json diff --git a/benchmarks-data b/benchmarks-data index 6a5990b81..48c6af825 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a5990b81eaa14df5144b41321354903cb4ea3a8 +Subproject commit 48c6af825425662691c107c5a52b765f6bf1822c diff --git a/benchmarks/wrappers/sonataflow/python/function_workflow.py b/benchmarks/wrappers/sonataflow/python/function_workflow.py index 3b359bf84..4c0648295 100644 --- a/benchmarks/wrappers/sonataflow/python/function_workflow.py +++ b/benchmarks/wrappers/sonataflow/python/function_workflow.py @@ -28,13 +28,17 @@ def _load_function_handler(): def _maybe_push_measurement(event, duration_start, duration_end): redis_host = os.getenv("SEBS_REDIS_HOST") + redis_port = int(os.getenv("SEBS_REDIS_PORT", "6379")) if not redis_host: + print(f"[workflow] SEBS_REDIS_HOST not set, skipping measurement", flush=True) return workflow_name = os.getenv("SEBS_WORKFLOW_NAME", "workflow") func_name = os.getenv("SEBS_WORKFLOW_FUNC", "function") request_id = event["request_id"] + print(f"[workflow] attempting to connect to Redis at {redis_host}:{redis_port}", flush=True) + payload = { "func": func_name, "start": duration_start, @@ -55,17 +59,22 @@ def _maybe_push_measurement(event, duration_start, duration_end): if download_bytes.isdigit(): payload["blob.download"] = int(download_bytes) - redis = Redis( - host=redis_host, - port=int(os.getenv("SEBS_REDIS_PORT", "6379")), - decode_responses=True, - socket_connect_timeout=10, - password=os.getenv("SEBS_REDIS_PASSWORD"), - ) - - key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) - redis.set(key, json.dumps(payload)) - print(f"[workflow] stored measurement {key}") + try: + redis = Redis( + host=redis_host, + port=redis_port, + decode_responses=True, + socket_connect_timeout=10, + password=os.getenv("SEBS_REDIS_PASSWORD"), + ) + + key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) + redis.set(key, json.dumps(payload)) + print(f"[workflow] stored measurement {key}", flush=True) + except Exception as e: + print(f"[workflow] ERROR storing measurement to Redis: {e}", flush=True) + import traceback + traceback.print_exc() def handler(event): diff --git a/config/local_deployment.json b/config/local_deployment.json index a56e4fb46..9bf310602 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -114,7 +114,7 @@ "sonataflow": { "resources": { "redis": { - "host": "localhost:6380", + "host": "localhost:6381", "password": "" }, "runtime": { @@ -136,9 +136,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", - "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", - "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", + "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", + "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", + "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", "input_buckets": [], "output_buckets": [] } @@ -148,7 +148,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76" + "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3" } } } diff --git a/config/local_workflows.json b/config/local_workflows.json index 1813356f2..ccc44b3f4 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -91,7 +91,7 @@ "sonataflow": { "resources": { "redis": { - "host": "localhost:6380", + "host": "localhost:6381", "password": "" }, "runtime": { @@ -113,9 +113,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", - "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", - "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", + "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", + "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", + "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", "input_buckets": [], "output_buckets": [] } @@ -125,7 +125,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76" + "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3" } } }, diff --git a/dockerfiles/sonataflow/nodejs/server.js b/dockerfiles/sonataflow/nodejs/server.js index c98b3fa72..cdd0af5aa 100644 --- a/dockerfiles/sonataflow/nodejs/server.js +++ b/dockerfiles/sonataflow/nodejs/server.js @@ -17,26 +17,30 @@ app.post('/alive', function (req, res) { app.post('/', function (req, res) { - let begin = Date.now(); - let ret = f.handler(req.body); - ret.then((func_res) => { + // SonataFlow sends requests wrapped in {"payload": ...} + // Unwrap the payload before passing to the function + let function_input = req.body; + if (req.body && typeof req.body === 'object' && Object.prototype.hasOwnProperty.call(req.body, 'payload')) { + function_input = req.body.payload; + } - let end = Date.now(); + let ret = f.handler(function_input); + ret.then((func_res) => { + let output = func_res; + if (func_res && typeof func_res === 'object' && Object.prototype.hasOwnProperty.call(func_res, 'payload')) { + output = func_res.payload; + } res.setHeader('Content-Type', 'application/json'); - res.end(JSON.stringify({ - begin: strftime('%s.%L', new Date(begin)), - end: strftime('%s.%L', new Date(end)), - request_id: uuidv4(), - is_cold: false, - result: { - output: func_res - } - })); + res.end(JSON.stringify(output)); }, (reason) => { - console.log('Function invocation failed!'); - console.log(reason); - process.exit(1); + console.error('Function invocation failed!'); + console.error('Request body:', JSON.stringify(req.body, null, 2)); + console.error('Error:', reason); + res.status(500).json({ + error: reason.message || String(reason), + stack: reason.stack + }); } ); }); @@ -45,4 +49,3 @@ app.listen(port=process.argv[2], function () { console.log(`Server listening on port ${process.argv[2]}.`); }); - diff --git a/dockerfiles/sonataflow/python/server.py b/dockerfiles/sonataflow/python/server.py index 4ed1314f2..0917e42db 100644 --- a/dockerfiles/sonataflow/python/server.py +++ b/dockerfiles/sonataflow/python/server.py @@ -11,28 +11,35 @@ @route('/alive', method='GET') def alive(): return { - "result:" "ok" + "result": "ok" } @route('/', method='POST') def process_request(): - begin = datetime.datetime.now() from function import function - end = datetime.datetime.now() - # FIXME: measurements? - ret = function.handler(request.json) + import traceback + try: + # SonataFlow sends requests wrapped in {"payload": ...} + # Unwrap the payload before passing to the function + request_data = request.json + if isinstance(request_data, dict) and "payload" in request_data: + function_input = request_data["payload"] + else: + function_input = request_data - return { - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - "request_id": str(uuid.uuid4()), - "is_cold": False, - "result": { - "output": ret - } - } + ret = function.handler(function_input) + + # Wrap response in payload if not already wrapped + if isinstance(ret, dict) and "payload" in ret: + return ret["payload"] + return ret + except Exception as e: + print(f"Error processing request: {e}", file=sys.stderr) + print(f"Request JSON: {request.json}", file=sys.stderr) + traceback.print_exc() + bottle.response.status = 500 + return {"error": str(e), "traceback": traceback.format_exc()} sys.path.append(os.path.join(CODE_LOCATION)) sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/')) run(host='0.0.0.0', port=int(sys.argv[1]), debug=True) - diff --git a/experiments.json b/experiments.json index 3911f9f9d..d67ba145f 100644 --- a/experiments.json +++ b/experiments.json @@ -1,35 +1,7 @@ { - "_invocations": { - "sebd-6100.1000-genome-python-3.11": { - "30bee4aa": { - "billing": { - "_billed_time": null, - "_gb_seconds": 0, - "_memory": null - }, - "output": {}, - "provider_times": { - "execution": 0, - "initialization": 0 - }, - "request_id": "30bee4aa", - "stats": { - "cold_start": false, - "failure": true, - "memory_used": null - }, - "times": { - "benchmark": 0, - "client": 30220, - "client_begin": "2025-12-28 02:00:14.362132", - "client_end": "2025-12-28 02:00:14.392352", - "initialization": 0 - } - } - } - }, + "_invocations": {}, "_metrics": {}, - "begin_time": 1766883614.36036, + "begin_time": 1767730851.130424, "config": { "deployment": { "name": "sonataflow", @@ -42,7 +14,7 @@ "alternator_port": 8000, "cpus": -1, "data_volume": "", - "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76", + "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3", "mapped_port": 9012, "memory": -1, "network_name": "bridge", @@ -55,15 +27,15 @@ "url": "http://localhost:8080" }, "storage": { - "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", + "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", "address": "172.18.0.2:9000", "data_volume": "", "input_buckets": [], - "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", + "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", "mapped_port": 9011, "network_name": "bridge", "output_buckets": [], - "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", + "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", "type": "minio", "version": "" } @@ -122,6 +94,6 @@ "update_storage": false } }, - "end_time": 1766883614.392648, + "end_time": 1767730851.133506, "result_bucket": null } \ No newline at end of file diff --git a/out_storage.json b/out_storage.json index dc41e0466..9ec7f5912 100644 --- a/out_storage.json +++ b/out_storage.json @@ -4,9 +4,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "1BLdaNtOSM0ydc6JdiCnVUkuFG17-F-WNgEjQDZ5OOk", - "secret_key": "13a8d3970b537c17c25e3a772c6ffaa4635423ca99d942508d3f5a626e242289", - "instance_id": "f95ec8af54b7c94293469dac2e20a46fe244bc33761a75a6b0c1cd9e04f2fcd4", + "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", + "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", + "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", @@ -23,7 +23,7 @@ "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "6186beb7178ed2d8c9317eafdd2726d66ce0798a65e74b9e6b6234fc9dba2c76", + "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3", "region": "None", "cpus": 1, "memory": "750", diff --git a/results/sebd-610.gen-python-3.11/sonataflow.csv b/results/sebd-610.gen-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/sebd-610.gen-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh index 02a014fb5..6426c4e42 100755 --- a/run_sonataflow_workflows.sh +++ b/run_sonataflow_workflows.sh @@ -57,6 +57,18 @@ cleanup() { } trap cleanup EXIT +# Clean up stale workflow definitions and cached workflows from previous runs +# These contain hardcoded IPs that become invalid when containers restart +echo "Cleaning up stale workflow definitions and cached workflows..." +rm -f "$PWD/sonataflow-workflows/workflows"/*.sw.json 2>/dev/null || true +# Delete entire workflow cache directories to force full regeneration +if command -v docker >/dev/null 2>&1 && [ -d cache ]; then + docker run --rm -v "$PWD/cache:/cache" alpine sh -c "find /cache -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} +" 2>/dev/null || true +fi +if [ -d "$PWD/cache" ]; then + rm -rf "$PWD/cache"/* 2>/dev/null || true +fi + "$SEBS_PYTHON" ./sebs.py storage start all config/storage.json --output-json out_storage.json MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) @@ -91,7 +103,7 @@ for cfg in config/local_workflows.json config/local_deployment.json; do --arg saddr "$SCYLLA_ADDRESS" \ --argjson sport "$SCYLLA_PORT" \ --arg sinst "$SCYLLA_INSTANCE" \ - --arg redis_host "localhost:6380" \ + --arg redis_host "localhost:6381" \ --arg redis_pass "" \ --arg runtime_url "$RUNTIME_URL" \ --arg endpoint_prefix "$ENDPOINT_PREFIX" \ @@ -116,10 +128,16 @@ for cfg in config/local_workflows.json config/local_deployment.json; do mv "$tmp" "$cfg" done -if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then - docker rm -f sebs-redis >/dev/null +# Create sebs-network if it doesn't exist (needed before Redis starts) +docker network inspect sebs-network >/dev/null 2>&1 || docker network create sebs-network + +# Start Redis if not already running +if ! docker ps --format '{{.Names}}' | grep -q '^sebs-redis$'; then + # Remove any stopped Redis container + docker rm -f sebs-redis >/dev/null 2>&1 || true + # Start Redis on sebs-network so function containers can reach it + docker run -d --name sebs-redis --network sebs-network -p 6381:6379 redis:7 fi -docker run -d --name sebs-redis -p 6380:6379 redis:7 # Prepare SonataFlow resources directory structure expected by kogito-swf-devmode: # - `src/main/resources/application.properties` @@ -222,16 +240,19 @@ wait_for_workflow_endpoint() { if [ "$prefix" != "services" ]; then urls+=("${base_url%/}/services/${workflow_id}") fi - local attempts=40 - local delay=3 + local attempts=60 + local delay=5 echo "Waiting for workflow endpoint(s): ${urls[*]} ..." for i in $(seq 1 $attempts); do for url in "${urls[@]}"; do # GET will likely return 405 for POST-only endpoints; 404 means not loaded yet + # 500/503 mean workflow is loading/compiling, keep waiting code=$(curl -s -o /dev/null -w "%{http_code}" "$url" || true) - if [ "$code" != "404" ] && [ "$code" != "000" ]; then + if [ "$code" = "200" ] || [ "$code" = "405" ]; then echo "Workflow endpoint responding at $url with HTTP $code." return 0 + elif [ "$code" != "404" ] && [ "$code" != "000" ]; then + echo "Workflow endpoint at $url returned HTTP $code (still loading), waiting..." fi done sleep "$delay" @@ -239,6 +260,27 @@ wait_for_workflow_endpoint() { echo "Warning: Workflow endpoint(s) not responding after $((attempts * delay))s" } +ensure_runtime_networks() { + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + if ! docker ps -a --format '{{.Names}}' 2>/dev/null | grep -q '^sonataflow-runtime$'; then + return 0 + fi + if ! command -v jq >/dev/null 2>&1; then + return 0 + fi + + local networks + networks=$(docker inspect -f '{{json .NetworkSettings.Networks}}' sonataflow-runtime 2>/dev/null || echo "{}") + if ! echo "$networks" | jq -e 'has("sebs-network")' >/dev/null 2>&1; then + docker network connect sebs-network sonataflow-runtime >/dev/null 2>&1 || true + fi + if ! echo "$networks" | jq -e 'has("bridge")' >/dev/null 2>&1; then + docker network connect bridge sonataflow-runtime >/dev/null 2>&1 || true + fi +} + preflight_runtime_function_connectivity() { local sw_json=$1 if ! command -v docker >/dev/null 2>&1; then @@ -254,6 +296,8 @@ preflight_runtime_function_connectivity() { return 0 fi + ensure_runtime_networks + # Extract function base URLs from `rest:post:http://host:port/` operations. mapfile -t urls < <(jq -r '.functions[]?.operation // empty' "$sw_json" 2>/dev/null \ | sed -n 's#^rest:post:##p' | sed -e 's#/*$#/#' | sort -u) @@ -266,23 +310,45 @@ preflight_runtime_function_connectivity() { local http_cmd http_cmd=$(docker exec sonataflow-runtime sh -lc 'if command -v curl >/dev/null 2>&1; then echo curl; elif command -v wget >/dev/null 2>&1; then echo wget; elif command -v python3 >/dev/null 2>&1; then echo python3; elif command -v python >/dev/null 2>&1; then echo python; else echo none; fi' 2>/dev/null || echo none) if [ "$http_cmd" = "none" ]; then - echo "Preflight skipped: no curl/wget/python found inside sonataflow-runtime." - return 0 + http_cmd="" fi local failed=0 - for u in "${urls[@]}"; do - # Use `/alive` which SeBS function containers expose. - if [ "$http_cmd" = "curl" ]; then - docker exec sonataflow-runtime sh -lc "curl -fsS --max-time 3 '${u}alive' >/dev/null" >/dev/null 2>&1 || failed=1 - elif [ "$http_cmd" = "wget" ]; then - docker exec sonataflow-runtime sh -lc "wget -q -T 3 -O - '${u}alive' >/dev/null" >/dev/null 2>&1 || failed=1 - else - docker exec sonataflow-runtime sh -lc "$http_cmd - <<'PY'\nimport sys, urllib.request\nurl=sys.argv[1]\nurllib.request.urlopen(url, timeout=3).read(1)\nPY\n'${u}alive'" >/dev/null 2>&1 || failed=1 - fi - if [ "$failed" -ne 0 ]; then - echo " Cannot reach ${u}alive from sonataflow-runtime" + if [ -n "$http_cmd" ]; then + for u in "${urls[@]}"; do + # Use `/alive` which SeBS function containers expose. + if [ "$http_cmd" = "curl" ]; then + if ! docker exec sonataflow-runtime sh -lc "curl -fsS --max-time 3 '${u}alive' >/dev/null" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + elif [ "$http_cmd" = "wget" ]; then + if ! docker exec sonataflow-runtime sh -lc "wget -q -T 3 -O - '${u}alive' >/dev/null" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + else + if ! docker exec sonataflow-runtime sh -lc "$http_cmd - <<'PY'\nimport sys, urllib.request\nurl=sys.argv[1]\nurllib.request.urlopen(url, timeout=3).read(1)\nPY\n'${u}alive'" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + fi + done + else + if ! docker network inspect sebs-network >/dev/null 2>&1; then + echo "Preflight skipped: sebs-network not found and sonataflow-runtime lacks curl/wget/python." + return 0 fi - done + docker run --rm --network sebs-network busybox sh -c ' +failed=0 +for u in "$@"; do + if ! wget -q -T 3 -O - "${u}alive" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sebs-network" + failed=1 + fi +done +exit $failed +' -- "${urls[@]}" || failed=1 + fi if [ "$failed" -ne 0 ]; then echo "Preflight failed: SonataFlow cannot reach one or more function containers." echo "Hint: ensure sonataflow-runtime and sebd-*___* function containers share a Docker network, and that SeBS and docker CLI use the same Docker daemon/context." diff --git a/sebs.py b/sebs.py index 6c32f9b2e..02af08e42 100755 --- a/sebs.py +++ b/sebs.py @@ -37,8 +37,14 @@ def __call__(self, *args, **kwargs): logging.info("# Experiments failed! See out.log for details") finally: # Close + # For SonataFlow deployments, skip shutdown to keep containers alive + # The external script will manage container lifecycle if deployment_client is not None: - deployment_client.shutdown() + deployment_name = getattr(deployment_client.config, '_name', '') + if deployment_name != 'sonataflow': + deployment_client.shutdown() + else: + logging.info("Skipping deployment shutdown for SonataFlow (containers kept alive)") if sebs_client is not None: sebs_client.shutdown() diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index fb604a8b1..f946c04dc 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -17,6 +17,8 @@ def __init__(self, workflow_id: str, bindings: Dict[str, Dict[str, str]]): self._bindings = bindings self._functions: Dict[str, Dict[str, str]] = {} self._uses_errors = False # Track if any state uses onErrors + # Unwrap SeBS local server responses so workflow state data stays as payload. + self._action_results_expr = "${ .result.output.payload // .payload // . }" def _function_ref(self, func_name: str) -> Dict[str, str]: binding = self._bindings.get(func_name) @@ -39,7 +41,11 @@ def _function_ref(self, func_name: str) -> Dict[str, str]: def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[str, object]: ref = self._function_ref(func_name) ref["arguments"] = {"payload": payload_ref} - return {"name": func_name, "functionRef": ref} + return { + "name": func_name, + "functionRef": ref, + "actionDataFilter": {"results": self._action_results_expr}, + } def postprocess(self, payloads: List[dict]) -> dict: workflow_def = { diff --git a/sebs/sonataflow/sonataflow.py b/sebs/sonataflow/sonataflow.py index 5336dd049..4cb9714cc 100644 --- a/sebs/sonataflow/sonataflow.py +++ b/sebs/sonataflow/sonataflow.py @@ -130,7 +130,32 @@ def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: return Local._function_network_endpoint(self, func) def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: - return Local._workflow_env(self, workflow_name, module_name) + # Get base environment from Local + env = Local._workflow_env(self, workflow_name, module_name) + + # Override Redis configuration for SonataFlow containers on sebs-network + # Function containers are on sebs-network and should use the Redis container hostname + redis_host = self.config.resources.redis_host + if redis_host: + if ":" in redis_host: + host, port = redis_host.split(":", 1) + else: + host, port = redis_host, "6379" + + # If the config specifies localhost, use the Redis container hostname instead + if host in ("127.0.0.1", "localhost"): + env["SEBS_REDIS_HOST"] = "sebs-redis" + env["SEBS_REDIS_PORT"] = "6379" # Use internal port, not mapped port + self.logging.info(f"Overriding Redis config for {module_name}: sebs-redis:6379") + else: + env["SEBS_REDIS_HOST"] = host + env["SEBS_REDIS_PORT"] = port + + if self.config.resources.redis_password: + env["SEBS_REDIS_PASSWORD"] = self.config.resources.redis_password + + self.logging.debug(f"Container env for {module_name}: SEBS_REDIS_HOST={env.get('SEBS_REDIS_HOST')}, SEBS_REDIS_PORT={env.get('SEBS_REDIS_PORT')}") + return env def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: return Local._allocate_host_port(self, start_port, range_size) @@ -159,6 +184,9 @@ def _start_container( func: Optional[LocalFunction], env_overrides: Optional[Dict[str, str]] = None, ) -> LocalFunction: + import requests + import time + # Override to use custom network for SonataFlow # Create sebs-network if it doesn't exist try: @@ -172,8 +200,33 @@ def _start_container( # Connect the container to sebs-network try: network = self._docker_client.networks.get("sebs-network") - network.connect(func_instance.container.id) - self.logging.info(f"Connected container {func_instance.container.name} to sebs-network") + network.connect(func_instance.container.id, aliases=[func_name]) + self.logging.info( + f"Connected container {func_instance.container.name} to sebs-network (alias {func_name})" + ) + + # Wait for the container to be reachable on sebs-network + # Get the sebs-network IP + func_instance.container.reload() + networks = func_instance.container.attrs.get("NetworkSettings", {}).get("Networks", {}) + sf_net = networks.get("sebs-network", {}) + sebs_ip = sf_net.get("IPAddress") + + if sebs_ip: + # Health check on sebs-network IP + max_attempts = 10 + attempts = 0 + while attempts < max_attempts: + try: + requests.get(f"http://{sebs_ip}:{Local.DEFAULT_PORT}/alive", timeout=1) + self.logging.debug(f"Container {func_instance.container.name} ready on sebs-network at {sebs_ip}") + break + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + time.sleep(0.25) + attempts += 1 + + if attempts >= max_attempts: + self.logging.warning(f"Container {func_instance.container.name} not responding on sebs-network IP {sebs_ip} after {max_attempts} attempts") except Exception as e: self.logging.warning(f"Failed to connect container to sebs-network: {e}") diff --git a/sebs/sonataflow/triggers.py b/sebs/sonataflow/triggers.py index 30f06dfe2..29bab2100 100644 --- a/sebs/sonataflow/triggers.py +++ b/sebs/sonataflow/triggers.py @@ -50,40 +50,101 @@ def _candidate_endpoints(self) -> list[tuple[str, str]]: return out def _invoke(self, payload: dict) -> ExecutionResult: + import time request_id = str(uuid.uuid4())[0:8] begin = datetime.datetime.now() result = ExecutionResult.from_times(begin, begin) try: + body = payload + if isinstance(payload, dict): + body = dict(payload) + body.setdefault("request_id", request_id) endpoint_used = self._endpoint() - resp = requests.post( - endpoint_used, - json={"payload": payload, "request_id": request_id}, - timeout=900, - ) - if resp.status_code == 404: - # Auto-detect the correct endpoint layout. - for prefix, endpoint in self._candidate_endpoints(): - if endpoint == endpoint_used: + + # Retry logic for 404 (workflow not loaded yet) + max_retries = 30 + retry_delay = 2 + resp = None + original_endpoint = endpoint_used + + for attempt in range(max_retries): + # Try the main endpoint first + resp = requests.post( + endpoint_used, + json=body, + timeout=900, + ) + self.logging.debug(f"Attempt {attempt + 1}: {endpoint_used} returned {resp.status_code}") + + # Check if we should retry + if resp.status_code == 404: + # Auto-detect the correct endpoint layout. + found_endpoint = False + for prefix, endpoint in self._candidate_endpoints(): + if endpoint == original_endpoint: + # Already tried this one as the main attempt + continue + self.logging.debug(f"Trying candidate: {endpoint}") + resp = requests.post( + endpoint, + json=body, + timeout=900, + ) + self.logging.debug(f"Candidate {endpoint} returned {resp.status_code}") + if resp.status_code != 404 and resp.status_code != 503: + # Found the correct endpoint! + self._endpoint_prefix = prefix + endpoint_used = endpoint + found_endpoint = True + self.logging.info(f"Found workflow at {endpoint}") + break + + if not found_endpoint and attempt < max_retries - 1: + # Workflow not loaded yet, wait and retry + self.logging.info( + f"Workflow endpoint not ready (404), retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + # Reset to original endpoint for next attempt + endpoint_used = original_endpoint continue - resp = requests.post( - endpoint, - json={"payload": payload, "request_id": request_id}, - timeout=900, - ) - endpoint_used = endpoint - if resp.status_code != 404: - self._endpoint_prefix = prefix + elif not found_endpoint: + # Final attempt failed + self.logging.error(f"Workflow endpoint not found after {max_retries} attempts") break + elif resp.status_code in [500, 503] and attempt < max_retries - 1: + # Service error (SonataFlow loading/restarting), wait and retry + self.logging.info( + f"SonataFlow not ready ({resp.status_code}), retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + endpoint_used = original_endpoint + continue + + # Success or non-retryable error, break out of retry loop + break + end = datetime.datetime.now() result = ExecutionResult.from_times(begin, end) result.request_id = request_id - if resp.status_code >= 300: + if resp and resp.status_code >= 300: result.stats.failure = True + try: + error_text = resp.text[:500] if len(resp.text) > 500 else resp.text + except: + error_text = "" self.logging.error( - f"SonataFlow invocation failed ({resp.status_code}): {resp.text}" + f"SonataFlow invocation failed ({resp.status_code}): {error_text}" ) + elif resp: + try: + result.output = resp.json() + except Exception as e: + result.stats.failure = True + self.logging.error(f"Failed to parse SonataFlow response: {e}") else: - result.output = resp.json() + result.stats.failure = True + self.logging.error("SonataFlow invocation failed: No response received") except Exception as exc: end = datetime.datetime.now() result = ExecutionResult.from_times(begin, end) diff --git a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json deleted file mode 100644 index 45aa90fba..000000000 --- a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json +++ /dev/null @@ -1,135 +0,0 @@ -{ - "id": "sebd_6100_1000_genome_python_3_11", - "name": "sebd_6100_1000_genome_python_3_11", - "version": "0.1", - "specVersion": "0.8", - "description": "Auto-generated from SeBS workflow definition.", - "functions": [ - { - "name": "sebd_6100_1000_genome_python_3_11_individuals", - "operation": "rest:post:http://172.18.0.11:9000/", - "type": "custom" - }, - { - "name": "sebd_6100_1000_genome_python_3_11_individuals_merge", - "operation": "rest:post:http://172.18.0.12:9000/", - "type": "custom" - }, - { - "name": "sebd_6100_1000_genome_python_3_11_sifting", - "operation": "rest:post:http://172.18.0.14:9000/", - "type": "custom" - }, - { - "name": "sebd_6100_1000_genome_python_3_11_frequency", - "operation": "rest:post:http://172.18.0.10:9000/", - "type": "custom" - }, - { - "name": "sebd_6100_1000_genome_python_3_11_mutation_overlap", - "operation": "rest:post:http://172.18.0.13:9000/", - "type": "custom" - } - ], - "start": "individuals", - "states": [ - { - "name": "individuals", - "type": "foreach", - "inputCollection": "${ .blob }", - "outputCollection": "${ .blob }", - "iterationParam": "item", - "actions": [ - { - "name": "individuals", - "functionRef": { - "refName": "sebd_6100_1000_genome_python_3_11_individuals", - "arguments": { - "payload": { - "array_element": "${ item }", - "benchmark_bucket": "${ .benchmark_bucket }", - "bucket": "${ .bucket }", - "columns": "${ .columns }", - "columns_bucket": "${ .columns_bucket }", - "populations": "${ .populations }", - "sifting_input": "${ .sifting_input }", - "individuals_file": "${ .individuals_file }" - } - } - } - } - ], - "transition": "merge_and_sifting" - }, - { - "name": "merge_and_sifting", - "type": "parallel", - "branches": [ - { - "name": "individuals_merge", - "actions": [ - { - "name": "individuals_merge", - "functionRef": { - "refName": "sebd_6100_1000_genome_python_3_11_individuals_merge", - "arguments": { - "payload": "${ . }" - } - } - } - ] - }, - { - "name": "sifting", - "actions": [ - { - "name": "sifting", - "functionRef": { - "refName": "sebd_6100_1000_genome_python_3_11_sifting", - "arguments": { - "payload": "${ . }" - } - } - } - ] - } - ], - "transition": "frequency_and_overlap" - }, - { - "name": "frequency_and_overlap", - "type": "parallel", - "branches": [ - { - "name": "frequency", - "actions": [ - { - "name": "frequency", - "functionRef": { - "refName": "sebd_6100_1000_genome_python_3_11_frequency", - "arguments": { - "payload": "${ . }" - } - } - } - ] - }, - { - "name": "mutation_overlap", - "actions": [ - { - "name": "mutation_overlap", - "functionRef": { - "refName": "sebd_6100_1000_genome_python_3_11_mutation_overlap", - "arguments": { - "payload": "${ . }" - } - } - } - ] - } - ], - "end": true - } - ] -} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json index 7206dd887..062f1bdb7 100644 --- a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json @@ -7,27 +7,27 @@ "functions": [ { "name": "sebd_610_gen_python_3_11_get_astros", - "operation": "rest:post:http://172.18.0.5:9000/", + "operation": "rest:post:http://172.18.0.20:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_few_people", - "operation": "rest:post:http://172.18.0.4:9000/", + "operation": "rest:post:http://172.18.0.19:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_many_people", - "operation": "rest:post:http://172.18.0.6:9000/", + "operation": "rest:post:http://172.18.0.21:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_map_astros", - "operation": "rest:post:http://172.18.0.7:9000/", + "operation": "rest:post:http://172.18.0.22:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_process_astros", - "operation": "rest:post:http://172.18.0.8:9000/", + "operation": "rest:post:http://172.18.0.23:9000/", "type": "custom" } ], @@ -44,6 +44,9 @@ "arguments": { "payload": "${ . }" } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" } } ], @@ -77,6 +80,9 @@ "arguments": { "payload": "${ . }" } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" } } ], @@ -93,6 +99,9 @@ "arguments": { "payload": "${ . }" } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" } } ], @@ -112,6 +121,9 @@ "arguments": { "payload": "${ item }" } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" } } ], @@ -128,6 +140,9 @@ "arguments": { "payload": "${ . }" } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" } } ], From e2c728c4a891ba5c3fea3b6118d28b014df1a025 Mon Sep 17 00:00:00 2001 From: Russellpang Date: Wed, 7 Jan 2026 19:05:51 +0100 Subject: [PATCH 81/82] log results --- .../6100.1000-genome/python/requirements.txt | 4 +- .../local/python/function_workflow.py | 12 +- .../sonataflow/python/function_workflow.py | 12 +- config/local_deployment.json | 8 +- config/local_workflows.json | 8 +- experiments.json | 64 ++++- experiments_sebd-610.gen-python-3.11.json | 268 ++++++++++++++++++ ...nts_sebd-6100.1000-genome-python-3.11.json | 212 ++++++++++++++ ...1.1000-genome-individuals-python-3.11.json | 127 +++++++++ ...iments_sebd-620.func-invo-python-3.11.json | 127 +++++++++ ...ts_sebd-6200.trip-booking-python-3.11.json | 127 +++++++++ ...s_sebd-630.parallel-sleep-python-3.11.json | 138 +++++++++ ...ebd-631.parallel-download-python-3.11.json | 142 ++++++++++ ...s_sebd-640.selfish-detour-python-3.11.json | 134 +++++++++ experiments_sebd-650.vid-python-3.11.json | 188 ++++++++++++ ...ments_sebd-660.map-reduce-python-3.11.json | 159 +++++++++++ experiments_sebd-670.auth-python-3.11.json | 135 +++++++++ ...riments_sebd-680.excamera-python-3.11.json | 184 ++++++++++++ experiments_sebd-690.ml-python-3.11.json | 149 ++++++++++ out_storage.json | 8 +- .../sebd-610.gen-python-3.11/sonataflow.csv | 5 +- .../sonataflow.csv | 4 +- .../sonataflow.csv | 3 +- .../sonataflow.csv | 8 +- .../sonataflow.csv | 3 +- .../sebd-650.vid-python-3.11/sonataflow.csv | 4 +- .../sonataflow.csv | 4 +- .../sebd-670.auth-python-3.11/sonataflow.csv | 3 +- .../sonataflow.csv | 3 +- .../sebd-690.ml-python-3.11/sonataflow.csv | 3 +- run_sonataflow.pid | 1 + run_sonataflow_workflows.sh | 22 +- sebs.py | 6 +- sebs/sonataflow/generator.py | 72 ++++- sebs/sonataflow/triggers.py | 4 +- .../sebd_6100_1000_genome_python_3_11.sw.json | 161 +++++++++++ ...000_genome_individuals_python_3_11.sw.json | 48 ++++ .../sebd_610_gen_python_3_11.sw.json | 12 +- ...sebd_6200_trip_booking_python_3_11.sw.json | 204 +++++++++++++ .../sebd_620_func_invo_python_3_11.sw.json | 71 +++++ ...ebd_630_parallel_sleep_python_3_11.sw.json | 63 ++++ ..._631_parallel_download_python_3_11.sw.json | 63 ++++ ...ebd_640_selfish_detour_python_3_11.sw.json | 36 +++ .../sebd_650_vid_python_3_11.sw.json | 87 ++++++ .../sebd_660_map_reduce_python_3_11.sw.json | 114 ++++++++ .../sebd_670_auth_python_3_11.sw.json | 36 +++ .../sebd_680_excamera_python_3_11.sw.json | 116 ++++++++ .../workflows/sebd_690_ml_python_3_11.sw.json | 63 ++++ 48 files changed, 3359 insertions(+), 66 deletions(-) create mode 100644 experiments_sebd-610.gen-python-3.11.json create mode 100644 experiments_sebd-6100.1000-genome-python-3.11.json create mode 100644 experiments_sebd-6101.1000-genome-individuals-python-3.11.json create mode 100644 experiments_sebd-620.func-invo-python-3.11.json create mode 100644 experiments_sebd-6200.trip-booking-python-3.11.json create mode 100644 experiments_sebd-630.parallel-sleep-python-3.11.json create mode 100644 experiments_sebd-631.parallel-download-python-3.11.json create mode 100644 experiments_sebd-640.selfish-detour-python-3.11.json create mode 100644 experiments_sebd-650.vid-python-3.11.json create mode 100644 experiments_sebd-660.map-reduce-python-3.11.json create mode 100644 experiments_sebd-670.auth-python-3.11.json create mode 100644 experiments_sebd-680.excamera-python-3.11.json create mode 100644 experiments_sebd-690.ml-python-3.11.json create mode 100644 run_sonataflow.pid create mode 100644 sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json create mode 100644 sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json diff --git a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt index 21479b821..ba14aed5c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt +++ b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt @@ -1,2 +1,2 @@ -numpy==1.17 -matplotlib +numpy==1.26.4 +matplotlib==3.8.4 diff --git a/benchmarks/wrappers/local/python/function_workflow.py b/benchmarks/wrappers/local/python/function_workflow.py index 3b359bf84..d0d0a0f7a 100644 --- a/benchmarks/wrappers/local/python/function_workflow.py +++ b/benchmarks/wrappers/local/python/function_workflow.py @@ -26,6 +26,16 @@ def _load_function_handler(): return _FUNCTION_HANDLER +def _extract_request_id(event): + request_id = event.get("request_id") + if request_id: + return request_id + payload = event.get("payload") + if isinstance(payload, dict): + return payload.get("request_id") or payload.get("request-id") + return None + + def _maybe_push_measurement(event, duration_start, duration_end): redis_host = os.getenv("SEBS_REDIS_HOST") if not redis_host: @@ -78,7 +88,7 @@ def handler(event): if "payload" not in event: raise RuntimeError("Workflow invocation payload must include 'payload' key.") - request_id = event.get("request_id", str(uuid.uuid4())) + request_id = _extract_request_id(event) or str(uuid.uuid4()) event["request_id"] = request_id payload = event["payload"] handler_fn = _load_function_handler() diff --git a/benchmarks/wrappers/sonataflow/python/function_workflow.py b/benchmarks/wrappers/sonataflow/python/function_workflow.py index 4c0648295..3881b02c8 100644 --- a/benchmarks/wrappers/sonataflow/python/function_workflow.py +++ b/benchmarks/wrappers/sonataflow/python/function_workflow.py @@ -26,6 +26,16 @@ def _load_function_handler(): return _FUNCTION_HANDLER +def _extract_request_id(event): + request_id = event.get("request_id") + if request_id: + return request_id + payload = event.get("payload") + if isinstance(payload, dict): + return payload.get("request_id") or payload.get("request-id") + return None + + def _maybe_push_measurement(event, duration_start, duration_end): redis_host = os.getenv("SEBS_REDIS_HOST") redis_port = int(os.getenv("SEBS_REDIS_PORT", "6379")) @@ -87,7 +97,7 @@ def handler(event): if "payload" not in event: raise RuntimeError("Workflow invocation payload must include 'payload' key.") - request_id = event.get("request_id", str(uuid.uuid4())) + request_id = _extract_request_id(event) or str(uuid.uuid4()) event["request_id"] = request_id payload = event["payload"] handler_fn = _load_function_handler() diff --git a/config/local_deployment.json b/config/local_deployment.json index 9bf310602..bd44e9b84 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -136,9 +136,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", - "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", - "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", "input_buckets": [], "output_buckets": [] } @@ -148,7 +148,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3" + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b" } } } diff --git a/config/local_workflows.json b/config/local_workflows.json index ccc44b3f4..3875462b4 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -113,9 +113,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", - "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", - "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", "input_buckets": [], "output_buckets": [] } @@ -125,7 +125,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3" + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b" } } }, diff --git a/experiments.json b/experiments.json index d67ba145f..cb06338e4 100644 --- a/experiments.json +++ b/experiments.json @@ -1,7 +1,57 @@ { - "_invocations": {}, + "_invocations": { + "sebd-690.ml-python-3.11": { + "e6b2713e": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "f3d89608-c7c2-4054-8849-c2daa7ecece4", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "classifiers": [ + { + "C": 0.025, + "kernel": "linear", + "name": "SVC" + } + ], + "dataset_bucket": "690.ml-0-output", + "n_features": 5, + "n_samples": 100, + "request_id": "e6b2713e", + "schedules": [ + { + "name": "SVC", + "score": 0.925 + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "e6b2713e", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2380060, + "client_begin": "2026-01-07 03:56:42.451065", + "client_end": "2026-01-07 03:56:44.831125", + "initialization": 0 + } + } + } + }, "_metrics": {}, - "begin_time": 1767730851.130424, + "begin_time": 1767754602.448902, "config": { "deployment": { "name": "sonataflow", @@ -14,7 +64,7 @@ "alternator_port": 8000, "cpus": -1, "data_volume": "", - "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3", + "instance_id": "ec89b6c77f2efe30d9442b9b4fa12242c0e0a47b56fd494db2c2f77d0eac9804", "mapped_port": 9012, "memory": -1, "network_name": "bridge", @@ -27,15 +77,15 @@ "url": "http://localhost:8080" }, "storage": { - "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", + "access_key": "veg9pif0_IdAKBcFAOx3tbI0t1FAEkLnOf4GXymojwo", "address": "172.18.0.2:9000", "data_volume": "", "input_buckets": [], - "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", + "instance_id": "2e7b46cd4f88d0d2c9831c8fc25200176030abbeec32169897cf1558450d920b", "mapped_port": 9011, "network_name": "bridge", "output_buckets": [], - "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", + "secret_key": "0c938f69cd525d8c85a911111e2688c0b837806ef4b2b0e2665b293155401a99", "type": "minio", "version": "" } @@ -94,6 +144,6 @@ "update_storage": false } }, - "end_time": 1767730851.133506, + "end_time": 1767754604.831985, "result_bucket": null } \ No newline at end of file diff --git a/experiments_sebd-610.gen-python-3.11.json b/experiments_sebd-610.gen-python-3.11.json new file mode 100644 index 000000000..388dfe644 --- /dev/null +++ b/experiments_sebd-610.gen-python-3.11.json @@ -0,0 +1,268 @@ +{ + "_invocations": { + "sebd-610.gen-python-3.11": { + "64ea8b9d": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "f4661a5e-1415-4aa9-b4d8-cfe517725d9b", + "workflowdata": { + "astros": { + "astros": { + "message": "success", + "number": 12, + "people": [ + { + "craft": "ISS", + "name": "Oleg Kononenko", + "name_rev": "Kononenko Oleg" + }, + { + "craft": "ISS", + "name": "Nikolai Chub", + "name_rev": "Chub Nikolai" + }, + { + "craft": "ISS", + "name": "Tracy Caldwell Dyson", + "name_rev": "Dyson Tracy" + }, + { + "craft": "ISS", + "name": "Matthew Dominick", + "name_rev": "Dominick Matthew" + }, + { + "craft": "ISS", + "name": "Michael Barratt", + "name_rev": "Barratt Michael" + }, + { + "craft": "ISS", + "name": "Jeanette Epps", + "name_rev": "Epps Jeanette" + }, + { + "craft": "ISS", + "name": "Alexander Grebenkin", + "name_rev": "Grebenkin Alexander" + }, + { + "craft": "ISS", + "name": "Butch Wilmore", + "name_rev": "Wilmore Butch" + }, + { + "craft": "ISS", + "name": "Sunita Williams", + "name_rev": "Williams Sunita" + }, + { + "craft": "Tiangong", + "name": "Li Guangsu", + "name_rev": "Guangsu Li" + }, + { + "craft": "Tiangong", + "name": "Li Cong", + "name_rev": "Cong Li" + }, + { + "craft": "Tiangong", + "name": "Ye Guangfu", + "name_rev": "Guangfu Ye" + } + ] + }, + "many_astros": true, + "message": "success", + "number": 12, + "people": [ + { + "craft": "ISS", + "name": "Oleg Kononenko", + "name_rev": "Kononenko Oleg" + }, + { + "craft": "ISS", + "name": "Nikolai Chub", + "name_rev": "Chub Nikolai" + }, + { + "craft": "ISS", + "name": "Tracy Caldwell Dyson", + "name_rev": "Dyson Tracy" + }, + { + "craft": "ISS", + "name": "Matthew Dominick", + "name_rev": "Dominick Matthew" + }, + { + "craft": "ISS", + "name": "Michael Barratt", + "name_rev": "Barratt Michael" + }, + { + "craft": "ISS", + "name": "Jeanette Epps", + "name_rev": "Epps Jeanette" + }, + { + "craft": "ISS", + "name": "Alexander Grebenkin", + "name_rev": "Grebenkin Alexander" + }, + { + "craft": "ISS", + "name": "Butch Wilmore", + "name_rev": "Wilmore Butch" + }, + { + "craft": "ISS", + "name": "Sunita Williams", + "name_rev": "Williams Sunita" + }, + { + "craft": "Tiangong", + "name": "Li Guangsu", + "name_rev": "Guangsu Li" + }, + { + "craft": "Tiangong", + "name": "Li Cong", + "name_rev": "Cong Li" + }, + { + "craft": "Tiangong", + "name": "Ye Guangfu", + "name_rev": "Guangfu Ye" + } + ], + "request_id": "64ea8b9d" + }, + "done": true, + "many_astros": true, + "request_id": "64ea8b9d" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "64ea8b9d", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 3920895, + "client_begin": "2026-01-07 18:42:35.640173", + "client_end": "2026-01-07 18:42:39.561068", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767807755.637872, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767807759.562647, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6100.1000-genome-python-3.11.json b/experiments_sebd-6100.1000-genome-python-3.11.json new file mode 100644 index 000000000..ba8d54350 --- /dev/null +++ b/experiments_sebd-6100.1000-genome-python-3.11.json @@ -0,0 +1,212 @@ +{ + "_invocations": { + "sebd-6100.1000-genome-python-3.11": { + "9f32c850": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "ab66c56a-339c-4138-bc28-32603f76bfcb", + "workflowdata": { + "_parallel_frequency_and_overlap_0_results": [ + { + "output_frequency": "chr21-AFR-freq.tar.a23079f0.gz" + }, + { + "output_frequency": "chr21-ALL-freq.tar.334d18ab.gz" + }, + { + "output_frequency": "chr21-AMR-freq.tar.ae171774.gz" + }, + { + "output_frequency": "chr21-EAS-freq.tar.66e46eae.gz" + }, + { + "output_frequency": "chr21-EUR-freq.tar.9a95a5c0.gz" + }, + { + "output_frequency": "chr21-GBR-freq.tar.4c2097c9.gz" + } + ], + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "blob": [ + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.9ca02841.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.1d0671ec.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.64de5b48.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.0941b71e.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.720b7319.gz" + } + ], + "bucket": "6100.1000-genome-0-output", + "columns": "columns.txt", + "columns_bucket": "6100.1000-genome-0-input", + "individuals_file": "ALL.chr21.1250.vcf", + "individuals_merge": { + "merge_outputfile_name": "chr21n.tar.a25b5cf3.gz" + }, + "populations": [ + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR" + ], + "request_id": "9f32c850", + "sifting": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "6100.1000-genome-0-input", + "output_bucket": "6100.1000-genome-0-output", + "output_sifting": "sifted.SIFT.chr21.4c5c74d2.txt", + "populations": [ + { + "output_mutation_overlap": "chr21-AFR.tar.d5be3c0f.gz" + }, + { + "output_mutation_overlap": "chr21-ALL.tar.0cc10d0b.gz" + }, + { + "output_mutation_overlap": "chr21-AMR.tar.78ae52f3.gz" + }, + { + "output_mutation_overlap": "chr21-EAS.tar.afbf7206.gz" + }, + { + "output_mutation_overlap": "chr21-EUR.tar.df68aa89.gz" + }, + { + "output_mutation_overlap": "chr21-GBR.tar.02bb7bf3.gz" + } + ] + }, + "sifting_input": "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "9f32c850", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 201910288, + "client_begin": "2026-01-07 18:43:22.499229", + "client_end": "2026-01-07 18:46:44.409517", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767807802.497806, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808004.410645, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6101.1000-genome-individuals-python-3.11.json b/experiments_sebd-6101.1000-genome-individuals-python-3.11.json new file mode 100644 index 000000000..1f5f9d03a --- /dev/null +++ b/experiments_sebd-6101.1000-genome-individuals-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-6101.1000-genome-individuals-python-3.11": { + "ea1e2488": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "ea1e2488", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 59380648, + "client_begin": "2026-01-07 18:48:18.625256", + "client_end": "2026-01-07 18:49:18.005904", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808098.624247, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808158.006176, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-620.func-invo-python-3.11.json b/experiments_sebd-620.func-invo-python-3.11.json new file mode 100644 index 000000000..dde5f32c1 --- /dev/null +++ b/experiments_sebd-620.func-invo-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-620.func-invo-python-3.11": { + "eddf325d": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "eddf325d", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 58520101, + "client_begin": "2026-01-07 18:49:46.989085", + "client_end": "2026-01-07 18:50:45.509186", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808186.986613, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808245.509385, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6200.trip-booking-python-3.11.json b/experiments_sebd-6200.trip-booking-python-3.11.json new file mode 100644 index 000000000..ef83535fe --- /dev/null +++ b/experiments_sebd-6200.trip-booking-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-6200.trip-booking-python-3.11": { + "4ca0f1dd": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "4ca0f1dd", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 58675677, + "client_begin": "2026-01-07 18:51:26.188106", + "client_end": "2026-01-07 18:52:24.863783", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808286.185467, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808344.864041, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-630.parallel-sleep-python-3.11.json b/experiments_sebd-630.parallel-sleep-python-3.11.json new file mode 100644 index 000000000..ac631aebe --- /dev/null +++ b/experiments_sebd-630.parallel-sleep-python-3.11.json @@ -0,0 +1,138 @@ +{ + "_invocations": { + "sebd-630.parallel-sleep-python-3.11": { + "34a1f4bd": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "569cab83-368a-44ad-9502-13337e95ad53", + "workflowdata": { + "buffer": [ + "ok", + "ok" + ], + "count": 2, + "request_id": "34a1f4bd", + "sleep": 2 + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "34a1f4bd", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 4123684, + "client_begin": "2026-01-07 18:52:54.327731", + "client_end": "2026-01-07 18:52:58.451415", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808374.32533, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808378.453515, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-631.parallel-download-python-3.11.json b/experiments_sebd-631.parallel-download-python-3.11.json new file mode 100644 index 000000000..3f2e770cc --- /dev/null +++ b/experiments_sebd-631.parallel-download-python-3.11.json @@ -0,0 +1,142 @@ +{ + "_invocations": { + "sebd-631.parallel-download-python-3.11": { + "77f89ce3": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "5a60d717-f8ad-463f-8b89-e16ea3d711a3", + "workflowdata": { + "blob": "631.parallel-download-0-input/data-10.txt", + "bucket": "sebs-benchmarks-sonataflow-075e240d", + "buffer": [ + "ok", + "ok", + "ok", + "ok", + "ok" + ], + "count": 5, + "request_id": "77f89ce3" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "77f89ce3", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 169956, + "client_begin": "2026-01-07 18:53:27.747029", + "client_end": "2026-01-07 18:53:27.916985", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808407.745584, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808407.920742, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-640.selfish-detour-python-3.11.json b/experiments_sebd-640.selfish-detour-python-3.11.json new file mode 100644 index 000000000..ed1fdd4ad --- /dev/null +++ b/experiments_sebd-640.selfish-detour-python-3.11.json @@ -0,0 +1,134 @@ +{ + "_invocations": { + "sebd-640.selfish-detour-python-3.11": { + "4ea3bae4": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "38e353c8-294d-4205-92e4-4947c841396f", + "workflowdata": { + "num_samples": 100, + "request_id": "4ea3bae4", + "response": "ok" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "4ea3bae4", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 10086565, + "client_begin": "2026-01-07 18:53:56.036991", + "client_end": "2026-01-07 18:54:06.123556", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808436.034907, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808446.125367, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-650.vid-python-3.11.json b/experiments_sebd-650.vid-python-3.11.json new file mode 100644 index 000000000..9efb48664 --- /dev/null +++ b/experiments_sebd-650.vid-python-3.11.json @@ -0,0 +1,188 @@ +{ + "_invocations": { + "sebd-650.vid-python-3.11": { + "eb3c486c": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "3d2a17ce-283d-4860-97fc-62c71fec55b7", + "workflowdata": { + "650": [ + { + "class": "car", + "score": 0.7900354862213135 + }, + { + "class": "car", + "score": 0.6695976853370667 + }, + { + "class": "car", + "score": 0.5970374345779419 + }, + { + "class": "car", + "score": 0.5392462015151978 + }, + { + "class": "car", + "score": 0.5122644901275635 + } + ], + "batch_size": 10, + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "frames": [ + { + "650": [ + { + "class": "car", + "score": 0.7900354862213135 + }, + { + "class": "car", + "score": 0.6695976853370667 + }, + { + "class": "car", + "score": 0.5970374345779419 + }, + { + "class": "car", + "score": 0.5392462015151978 + }, + { + "class": "car", + "score": 0.5122644901275635 + } + ] + } + ], + "frames_bucket": "650.vid-0-output", + "input_bucket": "650.vid-0-input", + "model_config": "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", + "model_weights": "frozen_inference_graph.pb", + "n_frames": 3, + "request_id": "eb3c486c", + "video": "video_test.mp4" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "eb3c486c", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2047114, + "client_begin": "2026-01-07 18:54:45.415712", + "client_end": "2026-01-07 18:54:47.462826", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808485.414011, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808487.464517, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-660.map-reduce-python-3.11.json b/experiments_sebd-660.map-reduce-python-3.11.json new file mode 100644 index 000000000..55d72845f --- /dev/null +++ b/experiments_sebd-660.map-reduce-python-3.11.json @@ -0,0 +1,159 @@ +{ + "_invocations": { + "sebd-660.map-reduce-python-3.11": { + "284ec8d7": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "681af736-c2d6-4eb5-9812-07f41744975c", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "list": [ + { + "count": 50, + "word": "bird" + }, + { + "count": 50, + "word": "pig" + }, + { + "count": 50, + "word": "dog" + }, + { + "count": 50, + "word": "horse" + }, + { + "count": 50, + "word": "cat" + } + ], + "n_mappers": 3, + "output_bucket": "660.map-reduce-0-output", + "request_id": "284ec8d7", + "words": "words", + "words_bucket": "660.map-reduce-0-input" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "284ec8d7", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 558964, + "client_begin": "2026-01-07 18:55:19.711042", + "client_end": "2026-01-07 18:55:20.270006", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808519.709199, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808520.27119, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-670.auth-python-3.11.json b/experiments_sebd-670.auth-python-3.11.json new file mode 100644 index 000000000..41688dc5b --- /dev/null +++ b/experiments_sebd-670.auth-python-3.11.json @@ -0,0 +1,135 @@ +{ + "_invocations": { + "sebd-670.auth-python-3.11": { + "8d3e9b67": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "21f2650b-6bff-4d69-b5f7-637b65e3393e", + "workflowdata": { + "message": "Who let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\n", + "request_id": "8d3e9b67", + "response": "IoDRYyZtnY6ZtQCw16+m5IDqdXi35RMI2fWGPqKxuvbFQEfrRePgGE9M+Ob2GdkUckqZV0RCbMqRozEgS2WlnzAFiUxJMewZrIJH3Tsg/s7WJuHbr9/uyS78JUtXGPsaZbO2CwfqUDrK3urbpxZuGlFOtOREJypD7i0iQKW1ocZROppH4QMqvzrJ0+LCOvY0yjDa7w4p224s//Rxuhfmjq9nKCRexzfBU3+2jowMdQzCvjWBcCMoifPfiVIW5CVi6z7iATo9swodlLePxspu/zwHX3bKi2IuqcgkRw==", + "token": "allow" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "8d3e9b67", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 45661, + "client_begin": "2026-01-07 18:55:49.146095", + "client_end": "2026-01-07 18:55:49.191756", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808549.14383, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808549.193409, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-680.excamera-python-3.11.json b/experiments_sebd-680.excamera-python-3.11.json new file mode 100644 index 000000000..4f3ec5b13 --- /dev/null +++ b/experiments_sebd-680.excamera-python-3.11.json @@ -0,0 +1,184 @@ +{ + "_invocations": { + "sebd-680.excamera-python-3.11": { + "9ccda4f4": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "75b9689e-a1fb-4f31-83e2-a6fbff49217f", + "workflowdata": { + "batch_size": 6, + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "quality": 1, + "request_id": "9ccda4f4", + "segments": [ + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "12945549", + "quality": 1, + "segments": [ + "00000000.y4m", + "00000001.y4m", + "00000002.y4m", + "00000003.y4m", + "00000004.y4m", + "00000005.y4m" + ] + }, + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "11097751", + "quality": 1, + "segments": [ + "00000006.y4m", + "00000007.y4m", + "00000008.y4m", + "00000009.y4m", + "00000010.y4m", + "00000011.y4m" + ] + }, + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "11371388", + "quality": 1, + "segments": [ + "00000012.y4m", + "00000013.y4m", + "00000014.y4m", + "00000015.y4m", + "00000016.y4m", + "00000017.y4m" + ] + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "9ccda4f4", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 9202517, + "client_begin": "2026-01-07 18:56:23.637670", + "client_end": "2026-01-07 18:56:32.840187", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808583.635935, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808592.841583, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-690.ml-python-3.11.json b/experiments_sebd-690.ml-python-3.11.json new file mode 100644 index 000000000..42a973dfe --- /dev/null +++ b/experiments_sebd-690.ml-python-3.11.json @@ -0,0 +1,149 @@ +{ + "_invocations": { + "sebd-690.ml-python-3.11": { + "1e4e7cce": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "39f9c1b4-20e2-40aa-b583-18c822b99f55", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "classifiers": [ + { + "C": 0.025, + "kernel": "linear", + "name": "SVC" + } + ], + "dataset_bucket": "690.ml-0-output", + "n_features": 5, + "n_samples": 100, + "request_id": "1e4e7cce", + "schedules": [ + { + "name": "SVC", + "score": 0.925 + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "1e4e7cce", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2207477, + "client_begin": "2026-01-07 18:57:12.129611", + "client_end": "2026-01-07 18:57:14.337088", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808632.127185, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808634.339108, + "result_bucket": null +} \ No newline at end of file diff --git a/out_storage.json b/out_storage.json index 9ec7f5912..0ed1219f4 100644 --- a/out_storage.json +++ b/out_storage.json @@ -4,9 +4,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "JFWdNebAtS321z6UUsallfkxYXlz1jSJ-5v9Uqx5NEc", - "secret_key": "ae469750563d84c1d8aed83655cdd17443da9ba4ce8729f95268d63a336a61b5", - "instance_id": "d4ea989f0e9206f0df21547073fc3303c5a00fac9dad579025ce9e4bd3756d16", + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", "output_buckets": [], "input_buckets": [], "version": "RELEASE.2024-07-16T23-46-41Z", @@ -23,7 +23,7 @@ "alternator_port": 8000, "access_key": "None", "secret_key": "None", - "instance_id": "d1cf3f482b4f9c344271a3dd54da68050eaabf2c047151a816c01046577361d3", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", "region": "None", "cpus": 1, "memory": "750", diff --git a/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv index 8b1378917..d9821413e 100644 --- a/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv @@ -1 +1,4 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process_astros,1767807759.496729,1767807759.496746,False,242064b010bb,64ea8b9d,0,0,64ea8b9d,0 +get_astros,1767807755.751181,1767807759.31053,False,aa63d8a3dc01,64ea8b9d,0,0,64ea8b9d,0 +many_people,1767807759.382087,1767807759.382103,False,bdd11f6cfcf6,64ea8b9d,0,0,64ea8b9d,0 diff --git a/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv index 8b1378917..3eabafc79 100644 --- a/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv @@ -1 +1,3 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +sifting,1767807876.686747,1767807877.226287,False,db9ff909d2b3,9f32c850,0,267806263,9f32c850,0 +individuals_merge,1767807877.313827,1767807878.402442,False,86874c18a723,9f32c850,0,312614,9f32c850,0 diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv index 8b1378917..54922a532 100644 --- a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv @@ -1 +1,2 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808374.378038,1767808374.378049,False,a5a5d0db5fd7,34a1f4bd,0,0,34a1f4bd,0 diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv index 8b1378917..ac543654e 100644 --- a/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv @@ -1 +1,7 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808407.789139,1767808407.789153,False,6afde6a7f512,77f89ce3,0,0,77f89ce3,0 +process,1767808407.898752,1767808407.899614,False,78336b62f860,77f89ce3,0,50,77f89ce3,0 +process,1767808407.887955,1767808407.889048,False,78336b62f860,77f89ce3,0,20,77f89ce3,0 +process,1767808407.895273,1767808407.896161,False,78336b62f860,77f89ce3,0,40,77f89ce3,0 +process,1767808407.891864,1767808407.892736,False,78336b62f860,77f89ce3,0,30,77f89ce3,0 +process,1767808407.88167,1767808407.884182,False,78336b62f860,77f89ce3,0,10,77f89ce3,0 diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv index 8b1378917..b4963046f 100644 --- a/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv @@ -1 +1,2 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,result.min_diff,result.num_iterations,result.timestamps,result.tps,request_id,rep +measure,1767808436.089672,1767808446.114368,False,a77dea6ab60d,4ea3bae4,0,0,63,3736172,"[794517, 812041, 1365470, 1416311, 1589671, 1590327, 1590327, 1591748, 1591748, 1592707, 3547264, 3547902, 3548869, 3549806, 3784359, 3802844, 5150022, 5163813, 6150680, 6151259, 6151259, 6151936, 6151936, 6152710, 6152710, 6153507, 6203325, 6238664, 8622529, 8637723, 10450174, 10451438, 10451438, 10452326, 11041670, 11053240, 11535090, 11549288, 12586443, 12600715, 13460793, 13469871, 15879957, 15882613, 18299035, 18301563, 20718237, 20720724, 23137441, 23139983, 25556576, 25559681, 26047252, 26057661, 27092134, 27102539, 27975776, 27978485, 30394846, 30397351, 32814042, 32816527, 35233248, 35235823, 35616767, 35627172, 36656456, 36666967, 37652387, 37654945, 38227374, 38237766, 39266235, 39276596, 40071587, 40074198, 42490783, 42493252, 44909918, 44912259, 45137720, 45148432, 46176010, 46186393, 47329187, 47332054, 49748387, 49751090, 52167514, 52170349, 52551582, 52561924, 53592872, 53603318, 54586653, 54589317, 54632072, 54642627]",2419324120.0,4ea3bae4,0 diff --git a/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv index 8b1378917..532a5d6fe 100644 --- a/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv @@ -1 +1,3 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +decode,1767808485.59046,1767808486.000908,False,880ac8f8fb66,eb3c486c,0,1258844,eb3c486c,0 +summarize,1767808487.453,1767808487.453017,False,3c3d7afcc453,eb3c486c,0,0,eb3c486c,0 diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv index 8b1378917..b50c2d26e 100644 --- a/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv @@ -1 +1,3 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +split,1767808519.80488,1767808519.838882,False,e86dfb1e3fe5,284ec8d7,0,1150,284ec8d7,0 +shuffle,1767808520.140384,1767808520.145531,False,e3d0e3b63419,284ec8d7,0,0,284ec8d7,0 diff --git a/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv index 8b1378917..b5bcff1ee 100644 --- a/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv @@ -1 +1,2 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +auth,1767808549.184025,1767808549.184416,False,1a4ee4af7980,8d3e9b67,0,0,8d3e9b67,0 diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv index 8b1378917..301a80872 100644 --- a/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv @@ -1 +1,2 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +split,1767808583.672572,1767808583.672602,False,b06eec9c6029,9ccda4f4,0,0,9ccda4f4,0 diff --git a/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv index 8b1378917..92a0e087a 100644 --- a/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv +++ b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv @@ -1 +1,2 @@ - +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808633.177356,1767808633.211837,False,4afe812649fc,1e4e7cce,0,0,1e4e7cce,0 diff --git a/run_sonataflow.pid b/run_sonataflow.pid new file mode 100644 index 000000000..524339ec5 --- /dev/null +++ b/run_sonataflow.pid @@ -0,0 +1 @@ +169510 diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh index 6426c4e42..da9af15a4 100755 --- a/run_sonataflow_workflows.sh +++ b/run_sonataflow_workflows.sh @@ -376,17 +376,17 @@ fi WORKFLOWS=( "610.gen" "6100.1000-genome" - # "6101.1000-genome-individuals" - # "620.func-invo" - # "6200.trip-booking" - # "630.parallel-sleep" - # "631.parallel-download" - # "640.selfish-detour" - # "650.vid" - # "660.map-reduce" - # "670.auth" - # "680.excamera" - # "690.ml" + "6101.1000-genome-individuals" + "620.func-invo" + "6200.trip-booking" + "630.parallel-sleep" + "631.parallel-download" + "640.selfish-detour" + "650.vid" + "660.map-reduce" + "670.auth" + "680.excamera" + "690.ml" ) SONATAFLOW_STARTED=false diff --git a/sebs.py b/sebs.py index 02af08e42..cba6537e6 100755 --- a/sebs.py +++ b/sebs.py @@ -396,9 +396,11 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam df = pd.DataFrame(measurements) df.to_csv(path, index=False) - with open("experiments.json", "w") as out_f: + # Use workflow name to create unique file + experiment_file = f"experiments_{workflow.name}.json" + with open(experiment_file, "w") as out_f: out_f.write(sebs.utils.serialize(result)) - sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) + sebs_client.logging.info("Save results to {}".format(os.path.abspath(experiment_file))) @benchmark.command() diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index f946c04dc..04c0fb3d1 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -1,5 +1,5 @@ import json -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat, Loop, Parallel @@ -18,7 +18,8 @@ def __init__(self, workflow_id: str, bindings: Dict[str, Dict[str, str]]): self._functions: Dict[str, Dict[str, str]] = {} self._uses_errors = False # Track if any state uses onErrors # Unwrap SeBS local server responses so workflow state data stays as payload. - self._action_results_expr = "${ .result.output.payload // .payload // . }" + self._action_results_expr_inner = ".result.output.payload // .payload // ." + self._action_results_expr = f"${{ {self._action_results_expr_inner} }}" def _function_ref(self, func_name: str) -> Dict[str, str]: binding = self._bindings.get(func_name) @@ -111,10 +112,10 @@ def _quote_field_path(self, path: str) -> str: def encode_map(self, state: Map) -> Union[dict, List[dict]]: iteration_param = "item" - action_args = "${ " + iteration_param + " }" + action_args = "${ ." + iteration_param + " }" if state.common_params: # Merge map element with selected common parameters. - merged = {"array_element": "${ " + iteration_param + " }"} + merged = {"array_element": "${ ." + iteration_param + " }"} for param in [p.strip() for p in state.common_params.split(",") if p.strip()]: quoted_param = self._quote_field_path(param) merged[param] = "${ ." + quoted_param + " }" @@ -126,11 +127,13 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: func_name = root_state_def.get("func_name", state.root) quoted_array = self._quote_field_path(state.array) + output_array = getattr(state, "output_array", state.array) + quoted_output = self._quote_field_path(output_array) payload: Dict[str, object] = { "name": state.name, "type": "foreach", "inputCollection": "${ ." + quoted_array + " }", - "outputCollection": "${ ." + quoted_array + " }", + "outputCollection": "${ ." + quoted_output + " }", "iterationParam": iteration_param, "actions": [self._default_action(func_name, action_args)], } @@ -197,14 +200,59 @@ def _encode_branch(self, subworkflow: dict) -> Dict[str, object]: f"Parallel branches currently support Task/Map/Repeat/Loop root states, got {type(root_state).__name__}" ) + results_expr = ( + f"${{ {{\"{subworkflow['root']}\": {self._action_results_expr_inner}}} }}" + ) action = self._default_action(func_name, "${ . }") + action["actionDataFilter"] = {"results": results_expr} return {"name": subworkflow["root"], "actions": [action]} def encode_parallel(self, state: Parallel) -> Union[dict, List[dict]]: - branches = [self._encode_branch(sw) for sw in state.funcs] - payload: Dict[str, object] = {"name": state.name, "type": "parallel", "branches": branches} - if state.next: - payload["transition"] = state.next - else: - payload["end"] = True - return payload + branch_roots: List[State] = [] + has_complex = False + for subworkflow in state.funcs: + states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + root_state = states.get(subworkflow["root"]) + if root_state is None: + raise ValueError(f"Root state {subworkflow['root']} not found in subworkflow") + branch_roots.append(root_state) + if not isinstance(root_state, Task): + has_complex = True + + if not has_complex: + branches = [self._encode_branch(sw) for sw in state.funcs] + payload: Dict[str, object] = {"name": state.name, "type": "parallel", "branches": branches} + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def _clone_state(root: State, name: str, next_name: Optional[str]) -> State: + if isinstance(root, Task): + return Task(name, root.func_name, next_name, root.failure) + if isinstance(root, Map): + return Map(name, root.funcs, root.array, root.root, next_name, root.common_params) + if isinstance(root, Repeat): + return Repeat(name, root.func_name, root.count, next_name) + if isinstance(root, Loop): + return Loop(name, root.func_name, root.array, next_name) + raise ValueError( + f"Parallel branch {name} uses unsupported root state type {type(root).__name__}" + ) + + encoded_states: List[dict] = [] + for idx, root in enumerate(branch_roots): + branch_name = state.name if idx == 0 else root.name + next_name = ( + branch_roots[idx + 1].name if idx < len(branch_roots) - 1 else state.next + ) + cloned = _clone_state(root, branch_name, next_name) + if isinstance(cloned, Map) and idx < len(branch_roots) - 1: + cloned.output_array = f"_parallel_{state.name}_{idx}_results" + encoded = self.encode_state(cloned) + if isinstance(encoded, list): + encoded_states.extend(encoded) + else: + encoded_states.append(encoded) + return encoded_states diff --git a/sebs/sonataflow/triggers.py b/sebs/sonataflow/triggers.py index 29bab2100..75eea7e53 100644 --- a/sebs/sonataflow/triggers.py +++ b/sebs/sonataflow/triggers.py @@ -127,7 +127,7 @@ def _invoke(self, payload: dict) -> ExecutionResult: end = datetime.datetime.now() result = ExecutionResult.from_times(begin, end) result.request_id = request_id - if resp and resp.status_code >= 300: + if resp is not None and resp.status_code >= 300: result.stats.failure = True try: error_text = resp.text[:500] if len(resp.text) > 500 else resp.text @@ -136,7 +136,7 @@ def _invoke(self, payload: dict) -> ExecutionResult: self.logging.error( f"SonataFlow invocation failed ({resp.status_code}): {error_text}" ) - elif resp: + elif resp is not None: try: result.output = resp.json() except Exception as e: diff --git a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json new file mode 100644 index 000000000..24e1535b3 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json @@ -0,0 +1,161 @@ +{ + "id": "sebd_6100_1000_genome_python_3_11", + "name": "sebd_6100_1000_genome_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6100_1000_genome_python_3_11_individuals", + "operation": "rest:post:http://172.18.0.12:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "operation": "rest:post:http://172.18.0.13:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_sifting", + "operation": "rest:post:http://172.18.0.15:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_frequency", + "operation": "rest:post:http://172.18.0.11:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "operation": "rest:post:http://172.18.0.14:9000/", + "type": "custom" + } + ], + "start": "individuals", + "states": [ + { + "name": "individuals", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ .item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "merge_and_sifting" + }, + { + "name": "merge_and_sifting", + "type": "parallel", + "branches": [ + { + "name": "individuals_merge", + "actions": [ + { + "name": "individuals_merge", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ {\"individuals_merge\": .result.output.payload // .payload // .} }" + } + } + ] + }, + { + "name": "sifting", + "actions": [ + { + "name": "sifting", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_sifting", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ {\"sifting\": .result.output.payload // .payload // .} }" + } + } + ] + } + ], + "transition": "frequency_and_overlap" + }, + { + "name": "frequency_and_overlap", + "type": "foreach", + "inputCollection": "${ .sifting.populations }", + "outputCollection": "${ ._parallel_frequency_and_overlap_0_results }", + "iterationParam": "item", + "actions": [ + { + "name": "frequency", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_frequency", + "arguments": { + "payload": { + "array_element": "${ .item }", + "sifting": "${ .sifting }", + "individuals_merge": "${ .individuals_merge }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "mutation_overlap" + }, + { + "name": "mutation_overlap", + "type": "foreach", + "inputCollection": "${ .sifting.populations }", + "outputCollection": "${ .sifting.populations }", + "iterationParam": "item", + "actions": [ + { + "name": "mutation_overlap", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "arguments": { + "payload": { + "array_element": "${ .item }", + "sifting": "${ .sifting }", + "individuals_merge": "${ .individuals_merge }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json new file mode 100644 index 000000000..3f5c104e9 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json @@ -0,0 +1,48 @@ +{ + "id": "sebd_6101_1000_genome_individuals_python_3_11", + "name": "sebd_6101_1000_genome_individuals_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "operation": "rest:post:http://172.18.0.16:9000/", + "type": "custom" + } + ], + "start": "individuals_state", + "states": [ + { + "name": "individuals_state", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ .item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json index 062f1bdb7..b4e018e2f 100644 --- a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json @@ -7,27 +7,27 @@ "functions": [ { "name": "sebd_610_gen_python_3_11_get_astros", - "operation": "rest:post:http://172.18.0.20:9000/", + "operation": "rest:post:http://172.18.0.6:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_few_people", - "operation": "rest:post:http://172.18.0.19:9000/", + "operation": "rest:post:http://172.18.0.5:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_many_people", - "operation": "rest:post:http://172.18.0.21:9000/", + "operation": "rest:post:http://172.18.0.7:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_map_astros", - "operation": "rest:post:http://172.18.0.22:9000/", + "operation": "rest:post:http://172.18.0.8:9000/", "type": "custom" }, { "name": "sebd_610_gen_python_3_11_process_astros", - "operation": "rest:post:http://172.18.0.23:9000/", + "operation": "rest:post:http://172.18.0.9:9000/", "type": "custom" } ], @@ -119,7 +119,7 @@ "functionRef": { "refName": "sebd_610_gen_python_3_11_map_astros", "arguments": { - "payload": "${ item }" + "payload": "${ .item }" } }, "actionDataFilter": { diff --git a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json new file mode 100644 index 000000000..0d1a95f88 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json @@ -0,0 +1,204 @@ +{ + "id": "sebd_6200_trip_booking_python_3_11", + "name": "sebd_6200_trip_booking_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "operation": "rest:post:http://172.18.0.24:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "operation": "rest:post:http://172.18.0.25:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "operation": "rest:post:http://172.18.0.23:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_confirm", + "operation": "rest:post:http://172.18.0.22:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "operation": "rest:post:http://172.18.0.19:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "operation": "rest:post:http://172.18.0.21:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "operation": "rest:post:http://172.18.0.20:9000/", + "type": "custom" + } + ], + "start": "hotel", + "states": [ + { + "name": "hotel", + "type": "operation", + "actions": [ + { + "name": "reserve_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "rental" + }, + { + "name": "rental", + "type": "operation", + "actions": [ + { + "name": "reserve_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "flight", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_hotel" + } + ] + }, + { + "name": "flight", + "type": "operation", + "actions": [ + { + "name": "reserve_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "confirm", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_rental" + } + ] + }, + { + "name": "confirm", + "type": "operation", + "actions": [ + { + "name": "confirm", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_confirm", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true, + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_flight" + } + ] + }, + { + "name": "cancel_flight", + "type": "operation", + "actions": [ + { + "name": "cancel_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "cancel_rental" + }, + { + "name": "cancel_rental", + "type": "operation", + "actions": [ + { + "name": "cancel_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "cancel_hotel" + }, + { + "name": "cancel_hotel", + "type": "operation", + "actions": [ + { + "name": "cancel_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ], + "errors": [ + { + "name": "workflow_error", + "code": "*" + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json new file mode 100644 index 000000000..42ad38bfb --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json @@ -0,0 +1,71 @@ +{ + "id": "sebd_620_func_invo_python_3_11", + "name": "sebd_620_func_invo_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_620_func_invo_python_3_11_gen", + "operation": "rest:post:http://172.18.0.17:9000/", + "type": "custom" + }, + { + "name": "sebd_620_func_invo_python_3_11_process", + "operation": "rest:post:http://172.18.0.18:9000/", + "type": "custom" + } + ], + "start": "gen", + "states": [ + { + "name": "gen", + "type": "operation", + "actions": [ + { + "name": "gen", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_gen", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process" + }, + { + "name": "process", + "type": "foreach", + "inputCollection": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "iterationParam": "idx", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_process", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json new file mode 100644 index 000000000..85f05c30c --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_630_parallel_sleep_python_3_11", + "name": "sebd_630_parallel_sleep_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_630_parallel_sleep_python_3_11_generate", + "operation": "rest:post:http://172.18.0.26:9000/", + "type": "custom" + }, + { + "name": "sebd_630_parallel_sleep_python_3_11_process", + "operation": "rest:post:http://172.18.0.27:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process-state" + }, + { + "name": "process-state", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_process", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json new file mode 100644 index 000000000..3e89cb0d2 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_631_parallel_download_python_3_11", + "name": "sebd_631_parallel_download_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_631_parallel_download_python_3_11_generate", + "operation": "rest:post:http://172.18.0.28:9000/", + "type": "custom" + }, + { + "name": "sebd_631_parallel_download_python_3_11_process", + "operation": "rest:post:http://172.18.0.29:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process1" + }, + { + "name": "process1", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_process", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json new file mode 100644 index 000000000..1595ed5c6 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json @@ -0,0 +1,36 @@ +{ + "id": "sebd_640_selfish_detour_python_3_11", + "name": "sebd_640_selfish_detour_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_640_selfish_detour_python_3_11_measure", + "operation": "rest:post:http://172.18.0.30:9000/", + "type": "custom" + } + ], + "start": "measure", + "states": [ + { + "name": "measure", + "type": "operation", + "actions": [ + { + "name": "measure", + "functionRef": { + "refName": "sebd_640_selfish_detour_python_3_11_measure", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json new file mode 100644 index 000000000..b3333d736 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json @@ -0,0 +1,87 @@ +{ + "id": "sebd_650_vid_python_3_11", + "name": "sebd_650_vid_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_650_vid_python_3_11_decode", + "operation": "rest:post:http://172.18.0.32:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_analyse", + "operation": "rest:post:http://172.18.0.31:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_summarize", + "operation": "rest:post:http://172.18.0.33:9000/", + "type": "custom" + } + ], + "start": "decode", + "states": [ + { + "name": "decode", + "type": "operation", + "actions": [ + { + "name": "decode", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_decode", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "analyse-map" + }, + { + "name": "analyse-map", + "type": "foreach", + "inputCollection": "${ .frames }", + "outputCollection": "${ .frames }", + "iterationParam": "item", + "actions": [ + { + "name": "analyse", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_analyse", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "summarize" + }, + { + "name": "summarize", + "type": "operation", + "actions": [ + { + "name": "summarize", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_summarize", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json new file mode 100644 index 000000000..de2f7de52 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json @@ -0,0 +1,114 @@ +{ + "id": "sebd_660_map_reduce_python_3_11", + "name": "sebd_660_map_reduce_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_660_map_reduce_python_3_11_split", + "operation": "rest:post:http://172.18.0.37:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_map", + "operation": "rest:post:http://172.18.0.34:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_shuffle", + "operation": "rest:post:http://172.18.0.36:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_reduce", + "operation": "rest:post:http://172.18.0.35:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "map-state" + }, + { + "name": "map-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "map", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_map", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "shuffle" + }, + { + "name": "shuffle", + "type": "operation", + "actions": [ + { + "name": "shuffle", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_shuffle", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "reduce-state" + }, + { + "name": "reduce-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "reduce", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_reduce", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json new file mode 100644 index 000000000..1e0d05182 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json @@ -0,0 +1,36 @@ +{ + "id": "sebd_670_auth_python_3_11", + "name": "sebd_670_auth_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_670_auth_python_3_11_auth", + "operation": "rest:post:http://172.18.0.38:9000/", + "type": "custom" + } + ], + "start": "auth", + "states": [ + { + "name": "auth", + "type": "operation", + "actions": [ + { + "name": "auth", + "functionRef": { + "refName": "sebd_670_auth_python_3_11_auth", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json new file mode 100644 index 000000000..0ac5c43b5 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json @@ -0,0 +1,116 @@ +{ + "id": "sebd_680_excamera_python_3_11", + "name": "sebd_680_excamera_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_680_excamera_python_3_11_split", + "operation": "rest:post:http://172.18.0.42:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_encode", + "operation": "rest:post:http://172.18.0.39:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_reencode", + "operation": "rest:post:http://172.18.0.41:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_rebase", + "operation": "rest:post:http://172.18.0.40:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "encode-state" + }, + { + "name": "encode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "encode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_encode", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "reencode-state" + }, + { + "name": "reencode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "reencode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_reencode", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "rebase-state" + }, + { + "name": "rebase-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "rebase", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_rebase", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json new file mode 100644 index 000000000..981de4db8 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_690_ml_python_3_11", + "name": "sebd_690_ml_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_690_ml_python_3_11_generate", + "operation": "rest:post:http://172.18.0.43:9000/", + "type": "custom" + }, + { + "name": "sebd_690_ml_python_3_11_train", + "operation": "rest:post:http://172.18.0.44:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "train-state" + }, + { + "name": "train-state", + "type": "foreach", + "inputCollection": "${ .schedules }", + "outputCollection": "${ .schedules }", + "iterationParam": "item", + "actions": [ + { + "name": "train", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_train", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file From 447863a243e4b5ce1a570f403a7c43cab6e7f66c Mon Sep 17 00:00:00 2001 From: Russellpang Date: Thu, 8 Jan 2026 02:51:29 +0100 Subject: [PATCH 82/82] fix sonataflow --- .../python/requirements.txt | 4 ++-- .../6200.trip-booking/python/reserve_flight.py | 11 ++++++++++- .../6200.trip-booking/python/reserve_hotel.py | 9 ++++++++- .../6200.trip-booking/python/reserve_rental.py | 11 ++++++++++- config/local_deployment.json | 8 ++++---- config/local_workflows.json | 8 ++++---- sebs/sonataflow/generator.py | 6 ++++-- 7 files changed, 42 insertions(+), 15 deletions(-) diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt index 21479b821..ba14aed5c 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt @@ -1,2 +1,2 @@ -numpy==1.17 -matplotlib +numpy==1.26.4 +matplotlib==3.8.4 diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py index fe55ed0c1..35fde12cc 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py @@ -1,8 +1,17 @@ +import uuid + from . import nosql nosql_client = nosql.nosql.get_instance() nosql_table_name = "flights" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -12,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = event["trip_id"] - flight_id = event["request-id"] + flight_id = _get_request_id(event) # Simulate return from a service flight_price = "1000" diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py index 623d1a8b0..a9d963583 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py @@ -5,6 +5,13 @@ nosql_client = nosql.nosql.get_instance() nosql_table_name = "hotel_booking" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -14,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = str(uuid.uuid4().hex) - hotel_booking_id = event["request-id"] + hotel_booking_id = _get_request_id(event) # Simulate return from a service hotel_price = "130" diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py index 8cf0b11fc..41be88a79 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py @@ -1,8 +1,17 @@ +import uuid + from . import nosql nosql_client = nosql.nosql.get_instance() nosql_table_name = "car_rentals" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -12,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = event["trip_id"] - rental_id = event["request-id"] + rental_id = _get_request_id(event) # Simulate return from a service car_price = "125" diff --git a/config/local_deployment.json b/config/local_deployment.json index bd44e9b84..4d1174e3a 100644 --- a/config/local_deployment.json +++ b/config/local_deployment.json @@ -136,9 +136,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", - "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", - "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "access_key": "66tQNYoeOvF8igk220P1R2waQrtalkZh07nynxuEdCE", + "secret_key": "4fbddba8e5b88597f4c4b781de22de7cff97ed5f6671ef515756574bb0a9491c", + "instance_id": "33484801c78fe94ba30e3f1976962e8aa83610a87aa814abf523618642dc3f89", "input_buckets": [], "output_buckets": [] } @@ -148,7 +148,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b" + "instance_id": "35280a9874ae24335c7da349a729f0a6e02b6d70af340038580e3f4758b83605" } } } diff --git a/config/local_workflows.json b/config/local_workflows.json index 3875462b4..2722611e1 100644 --- a/config/local_workflows.json +++ b/config/local_workflows.json @@ -113,9 +113,9 @@ "minio": { "address": "172.18.0.2:9000", "mapped_port": 9011, - "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", - "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", - "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "access_key": "66tQNYoeOvF8igk220P1R2waQrtalkZh07nynxuEdCE", + "secret_key": "4fbddba8e5b88597f4c4b781de22de7cff97ed5f6671ef515756574bb0a9491c", + "instance_id": "33484801c78fe94ba30e3f1976962e8aa83610a87aa814abf523618642dc3f89", "input_buckets": [], "output_buckets": [] } @@ -125,7 +125,7 @@ "scylladb": { "address": "172.18.0.3:8000", "mapped_port": 9012, - "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b" + "instance_id": "35280a9874ae24335c7da349a729f0a6e02b6d70af340038580e3f4758b83605" } } }, diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py index 04c0fb3d1..c8ae813c4 100644 --- a/sebs/sonataflow/generator.py +++ b/sebs/sonataflow/generator.py @@ -41,7 +41,8 @@ def _function_ref(self, func_name: str) -> Dict[str, str]: def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[str, object]: ref = self._function_ref(func_name) - ref["arguments"] = {"payload": payload_ref} + request_id_expr = '${ .request_id // .requestId // .["request-id"] }' + ref["arguments"] = {"payload": payload_ref, "request_id": request_id_expr} return { "name": func_name, "functionRef": ref, @@ -146,10 +147,11 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: # Encode as a foreach over a generated range. iterations = list(range(state.count)) + input_expr = f"${{ {json.dumps(iterations)} }}" payload: Dict[str, object] = { "name": state.name, "type": "foreach", - "inputCollection": iterations, + "inputCollection": input_expr, "iterationParam": "idx", "actions": [self._default_action(state.func_name, "${ . }")], }