diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index a841c1584..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@1.4.0 - -jobs: - linting: - executor: python/default - steps: - - checkout - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - sudo apt update && sudo apt install libcurl4-openssl-dev - name: Install curl-config from Ubuntu APT - - run: - command: | - python3 install.py --aws --azure --gcp --no-local - name: Install pip dependencies - - run: - command: | - . python-venv/bin/activate - black sebs --check --config .black.toml - name: Python code formatting with black - - run: - command: | - . python-venv/bin/activate - flake8 sebs --config=.flake8.cfg --tee --output-file flake-reports - name: Python code lint with flake8 - - run: - command: | - . python-venv/bin/activate - mypy sebs --config-file=.mypy.ini - name: Python static code verification with mypy - - store_artifacts: - path: flake-reports - destination: flake-reports - test-aws: - executor: python/default - steps: - - checkout - - setup_remote_docker - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - if [[ -d $HOME/docker ]]; - then - ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; - else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x - fi - name: Load Docker images - - run: - command: | - python3 install.py --aws - name: Install pip dependencies - - run: - command: | - mkdir -p $HOME/docker - docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\ - xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz' - name: Save Docker images - - save_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - paths: - - "sebs-virtualenv" - - $HOME/docker - - run: - command: | - . sebs-virtualenv/bin/activate - tests/test_runner.py --deployment aws - name: Execute AWS tests - -workflows: - main: - jobs: - - linting - diff --git a/.dockerignore b/.dockerignore index 84416f19a..a62f9158b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,3 +7,9 @@ cache python-venv regression-* *_code +scylladb-volume +minio-volume +output +results +*.json +out_storage.json diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000..1043be62e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,55 @@ +name: Lint + +on: + push: + pull_request: + +jobs: + linting: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Cache virtualenv + uses: actions/cache@v4 + with: + path: python-venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}-${{ github.ref_name }} + restore-keys: | + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}- + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}- + + - name: Install system packages + run: | + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev + + - name: Install Python dependencies (via install.py) + run: | + python3 install.py --no-aws --no-azure --no-gcp --no-openwhisk --no-local + + - name: Black (check) + run: | + . python-venv/bin/activate + black benchmarks --check --config .black.toml + + - name: Flake8 (lint) + run: | + . python-venv/bin/activate + # write to file and echo to stdout (requires flake8 with --tee support) + flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports + + - name: Upload flake report + if: always() + uses: actions/upload-artifact@v4 + with: + name: flake-reports + path: flake-reports diff --git a/.gitignore b/.gitignore index 0712f6d7b..274165ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,6 @@ cache* minio-volume scylladb-volume - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -188,3 +187,6 @@ cache # IntelliJ IDEA files .idea *.iml + +# MacOS Finder +**/.DS_Store \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 4feae9bfb..c33a17880 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/spcl/serverless-benchmarks-data.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git diff --git a/.mypy.ini b/.mypy.ini index a8860d33e..21f3c0ad6 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -3,6 +3,9 @@ [mypy-docker] ignore_missing_imports = True +[mypy-docker.*] +ignore_missing_imports = True + [mypy-tzlocal] ignore_missing_imports = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..58f8adb8d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +# .pre-commit-config.yaml +repos: + - repo: local + hooks: + - id: flake8-local + name: flake8 (project env) + language: python + additional_dependencies: ["flake8==7.1.1"] + entry: flake8 + args: ["--config=.flake8.cfg"] + types: [python] + files: ^(sebs/|benchmarks/) + - repo: local + hooks: + - id: black-check-local + name: black --check (project env) + language: python + additional_dependencies: ["black==22.8.0"] + entry: black + args: ["--config=.black.toml", "--check", "--diff"] + types: [python] + files: ^(sebs/|benchmarks/) + # - repo: local + # hooks: + # - id: mypy-local + # name: mypy (project venv) + # language: system + # entry: bash -lc 'python -m mypy --config-file=.mypy.ini sebs' + # types: [python] + diff --git a/.tuff.toml b/.tuff.toml new file mode 100644 index 000000000..1caa6b79c --- /dev/null +++ b/.tuff.toml @@ -0,0 +1,7 @@ +line-length = 100 +target-version = "py38" +[lint] +select = ["E", "F", "W"] + +[lint.isort] +known-first-party = ["sebs"] diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..127ae8a76 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + + "black-formatter.importStrategy": "fromEnvironment", + "black-formatter.path": [], + "black-formatter.args": ["--config=.black.toml"], + + "flake8.importStrategy": "fromEnvironment", + "flake8.path": [], + "flake8.args": ["--config=.flake8.cfg"], + "flake8.enabled": true +} diff --git a/QUICKSTART_SONATAFLOW.md b/QUICKSTART_SONATAFLOW.md new file mode 100644 index 000000000..b1c236946 --- /dev/null +++ b/QUICKSTART_SONATAFLOW.md @@ -0,0 +1,61 @@ +# SonataFlow quickstart + +This backend generates Serverless Workflow definitions from SeBS workflow specs and drives them through a running SonataFlow (Kogito) instance. Functions still run as local SeBS containers; SonataFlow orchestrates them via HTTP. + +## Prerequisites +- Docker available locally. +- A SonataFlow dev-mode/runtime reachable at `http://localhost:8080` (default). Example: + ```bash + docker run --rm -it -p 8080:8080 \ + -v "$PWD/output/workflow_resources/sonataflow":/home/kogito/serverless-workflow-project/src/main/resources/workflows \ + quay.io/kiegroup/kogito-swf-devmode:latest + ``` + The volume mount should point to the directory where SeBS writes generated `.sw.json` files. + If you also need to provide `application.properties`, mount a directory to + `/home/kogito/serverless-workflow-project/src/main/resources` that contains both + `application.properties` and a `workflows/` subdirectory. +- Local object/NoSQL/redis services (reuse `run_local_workflows.sh` setup or `./sebs.py storage start all config/storage.json`). + +## Configure +Add a `deployment.sonataflow` block to your config (based on `config/example.json`): +```json +{ + "deployment": { + "name": "sonataflow", + "sonataflow": { + "resources": { + "redis": { "host": "localhost:6380", "password": "" }, + "runtime": { "url": "http://localhost:8080", "endpoint_prefix": "" } + }, + "storage": { + "type": "minio", + "address": "localhost", + "mapped_port": 9000, + "access_key": "minio", + "secret_key": "minio123", + "instance_id": "minio", + "input_buckets": [], + "output_buckets": [] + } + } + } +} +``` +Adjust storage/redis endpoints to match your local services. + +## Run +1. Start storage/redis (as in `run_local_workflows.sh`). +2. Start SonataFlow dev-mode and mount the output directory (see above). +3. Execute a workflow benchmark: + ```bash + ./sebs.py benchmark workflow 610.gen test \ + --config config/your-sonataflow-config.json \ + --deployment sonataflow --trigger http --repetitions 1 --verbose + ``` + +On first run SeBS will: +- Package workflow functions into local containers. +- Translate `definition.json` into `workflow_resources/sonataflow/.sw.json` under the generated code package directory (inside your `--output-dir` tree). +- Invoke SonataFlow at `{runtime_url}/{workflow_id}` with the workflow payload (and auto-fallback to `/services/{workflow_id}` if needed). + +If SonataFlow dev-mode fails with a “Duplicated item found with id …” error, ensure there is only one `.sw.json` file per workflow id under the mounted resources directory. diff --git a/benchmarks-data b/benchmarks-data index 7c7f67be6..48c6af825 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 7c7f67be6d6efd94a5de10607136ce237a673ef7 +Subproject commit 48c6af825425662691c107c5a52b765f6bf1822c diff --git a/benchmarks/000.microbenchmarks/010.sleep/input.py b/benchmarks/000.microbenchmarks/010.sleep/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/input.py +++ b/benchmarks/000.microbenchmarks/010.sleep/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/010.sleep/python/function.py b/benchmarks/000.microbenchmarks/010.sleep/python/function.py index 7dda59a57..64be15557 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/python/function.py +++ b/benchmarks/000.microbenchmarks/010.sleep/python/function.py @@ -1,9 +1,9 @@ - from time import sleep + def handler(event): # start timing - sleep_time = event.get('sleep') + sleep_time = event.get("sleep") sleep(sleep_time) - return { 'result': sleep_time } + return {"result": sleep_time} diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py index 0d969bc74..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py @@ -2,10 +2,12 @@ def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py index eb8ccdcf2..58c376a2d 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py @@ -1,27 +1,26 @@ import csv -import json import os.path import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] i = 0 socket.setdefaulttimeout(3) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 while i < repetitions + 1: @@ -43,16 +42,16 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(2) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") - return { 'result': key } + return {"result": key} diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py index 427215380..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py @@ -1,12 +1,13 @@ - - def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py index 9ffd978ae..9cf93eccf 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py @@ -1,28 +1,27 @@ import csv -import json import os import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] print("Starting communication with {}:{}".format(address, port)) i = 0 socket.setdefaulttimeout(4) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 measurements_not_smaller = 0 @@ -43,11 +42,13 @@ def handler(event): if i > 0: times.append([i, send_begin, recv_end]) cur_time = recv_end - send_begin - print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller)) + print( + "Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller) + ) if cur_time > cur_min and cur_min > 0: measurements_not_smaller += 1 if measurements_not_smaller == repetitions: - message = "stop".encode('utf-8') + message = "stop".encode("utf-8") server_socket.sendto(message, adr) break else: @@ -57,18 +58,18 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(4) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") else: key = None - return { 'result': {'bucket-key': key, 'timestamp': event['income-timestamp']} } + return {"result": {"bucket-key": key, "timestamp": event["income-timestamp"]}} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/input.py b/benchmarks/000.microbenchmarks/040.server-reply/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/input.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py index fb5b57aa3..4c2a294ba 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py @@ -1,11 +1,10 @@ - import socket -from time import sleep + def handler(event): # start timing - addr = (event.get('ip-address'), event.get('port')) + addr = (event.get("ip-address"), event.get("port")) socket.setdefaulttimeout(20) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(addr) diff --git a/benchmarks/100.webapps/110.dynamic-html/input.py b/benchmarks/100.webapps/110.dynamic-html/input.py index 98dac88b2..c20154ec3 100644 --- a/benchmarks/100.webapps/110.dynamic-html/input.py +++ b/benchmarks/100.webapps/110.dynamic-html/input.py @@ -1,11 +1,9 @@ +size_generators = {"test": 10, "small": 1000, "large": 100000} -size_generators = { - 'test' : 10, - 'small' : 1000, - 'large': 100000 -} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - input_config = {'username': 'testname'} - input_config['random_len'] = size_generators[size] +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + input_config = {"username": "testname"} + input_config["random_len"] = size_generators[size] return input_config diff --git a/benchmarks/100.webapps/110.dynamic-html/python/function.py b/benchmarks/100.webapps/110.dynamic-html/python/function.py index 7c990f4eb..6f7b42bc5 100644 --- a/benchmarks/100.webapps/110.dynamic-html/python/function.py +++ b/benchmarks/100.webapps/110.dynamic-html/python/function.py @@ -1,22 +1,21 @@ -from datetime import datetime -from random import sample +from datetime import datetime +from random import sample from os import path -from time import time -import os from jinja2 import Template SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__))) + def handler(event): # start timing - name = event.get('username') - size = event.get('random_len') + name = event.get("username") + size = event.get("random_len") cur_time = datetime.now() random_numbers = sample(range(0, 1000000), size) - template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read()) - html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers) + template = Template(open(path.join(SCRIPT_DIR, "templates", "template.html"), "r").read()) + html = template.render(username=name, cur_time=cur_time, random_numbers=random_numbers) # end timing - # dump stats - return {'result': html} + # dump stats + return {"result": html} diff --git a/benchmarks/100.webapps/120.uploader/input.py b/benchmarks/100.webapps/120.uploader/input.py index ce6169ccb..7aafb2b22 100644 --- a/benchmarks/100.webapps/120.uploader/input.py +++ b/benchmarks/100.webapps/120.uploader/input.py @@ -1,19 +1,25 @@ - url_generators = { # source: mlperf fake_imagenet.sh. 230 kB - 'test' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Jammlich_crop.jpg/800px-Jammlich_crop.jpg', + "test": ( + "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/" + "Jammlich_crop.jpg/800px-Jammlich_crop.jpg" + ), # video: HPX source code, 6.7 MB - 'small': 'https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip', + "small": "https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip", # resnet model from pytorch. 98M - 'large': 'https://download.pytorch.org/models/resnet50-19c8e357.pth' + "large": "https://download.pytorch.org/models/resnet50-19c8e357.pth", } + def buckets_count(): return (0, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - input_config = {'object': {}, 'bucket': {}} - input_config['object']['url'] = url_generators[size] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['output'] = output_buckets[0] + +def generate_input( + data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func +): + input_config = {"object": {}, "bucket": {}} + input_config["object"]["url"] = url_generators[size] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["output"] = output_buckets[0] return input_config diff --git a/benchmarks/100.webapps/120.uploader/python/function.py b/benchmarks/100.webapps/120.uploader/python/function.py index c13656d0c..cb17131f1 100755 --- a/benchmarks/100.webapps/120.uploader/python/function.py +++ b/benchmarks/100.webapps/120.uploader/python/function.py @@ -1,24 +1,31 @@ - import datetime import os -import uuid import urllib.request from . import storage + client = storage.storage.get_instance() +SEBS_USER_AGENT = ( + "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" +) + def handler(event): - - bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') - url = event.get('object').get('url') + + bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") + url = event.get("object").get("url") name = os.path.basename(url) - download_path = '/tmp/{}'.format(name) + download_path = "/tmp/{}".format(name) process_begin = datetime.datetime.now() - urllib.request.urlretrieve(url, filename=download_path) + req = urllib.request.Request(url) + req.add_header("User-Agent", SEBS_USER_AGENT) + with open(download_path, "wb") as f: + with urllib.request.urlopen(req) as response: + f.write(response.read()) size = os.path.getsize(download_path) process_end = datetime.datetime.now() @@ -29,16 +36,12 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'url': url, - 'key': key_name - }, - 'measurement': { - 'download_time': 0, - 'download_size': 0, - 'upload_time': upload_time, - 'upload_size': size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "url": url, "key": key_name}, + "measurement": { + "download_time": 0, + "download_size": 0, + "upload_time": upload_time, + "upload_size": size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/210.thumbnailer/input.py b/benchmarks/200.multimedia/210.thumbnailer/input.py index 8943effed..6f04bfafb 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/input.py +++ b/benchmarks/200.multimedia/210.thumbnailer/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,19 +14,23 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): - for file in glob.glob(os.path.join(data_dir, '*.jpg')): + for file in glob.glob(os.path.join(data_dir, "*.jpg")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['width'] = 200 - input_config['object']['height'] = 200 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["width"] = 200 + input_config["object"]["height"] = 200 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/210.thumbnailer/python/function.py b/benchmarks/200.multimedia/210.thumbnailer/python/function.py index 20527067b..2df0a7bfb 100755 --- a/benchmarks/200.multimedia/210.thumbnailer/python/function.py +++ b/benchmarks/200.multimedia/210.thumbnailer/python/function.py @@ -1,44 +1,45 @@ import datetime import io import os -import sys -import uuid from urllib.parse import unquote_plus from PIL import Image from . import storage + client = storage.storage.get_instance() # Disk-based solution -#def resize_image(image_path, resized_path, w, h): +# def resize_image(image_path, resized_path, w, h): # with Image.open(image_path) as image: # image.thumbnail((w,h)) # image.save(resized_path) + # Memory-based solution def resize_image(image_bytes, w, h): with Image.open(io.BytesIO(image_bytes)) as image: - image.thumbnail((w,h)) + image.thumbnail((w, h)) out = io.BytesIO() - image.save(out, format='jpeg') + image.save(out, format="jpeg") # necessary to rewind to the beginning of the buffer out.seek(0) return out + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = unquote_plus(event.get('object').get('key')) - width = event.get('object').get('width') - height = event.get('object').get('height') + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = unquote_plus(event.get("object").get("key")) + width = event.get("object").get("width") + height = event.get("object").get("height") # UUID to handle multiple calls - #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) - #upload_path = '/tmp/resized-{}'.format(key) - #client.download(input_bucket, key, download_path) - #resize_image(download_path, upload_path, width, height) - #client.upload(output_bucket, key, upload_path) + # download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) + # upload_path = '/tmp/resized-{}'.format(key) + # client.download(input_bucket, key, download_path) + # resize_image(download_path, upload_path, width, height) + # client.upload(output_bucket, key, upload_path) download_begin = datetime.datetime.now() img = client.download_stream(bucket, os.path.join(input_prefix, key)) download_end = datetime.datetime.now() @@ -56,15 +57,12 @@ def handler(event): upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': len(img), - 'upload_time': upload_time, - 'upload_size': resized_size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": len(img), + "upload_time": upload_time, + "upload_size": resized_size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/220.video-processing/input.py b/benchmarks/200.multimedia/220.video-processing/input.py index 6da31647f..86c7191cb 100644 --- a/benchmarks/200.multimedia/220.video-processing/input.py +++ b/benchmarks/200.multimedia/220.video-processing/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,17 +14,21 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.mp4')): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + for file in glob.glob(os.path.join(data_dir, "*.mp4")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['op'] = 'watermark' - input_config['object']['duration'] = 1 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["op"] = "watermark" + input_config["object"]["duration"] = 1 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/220.video-processing/python/function.py b/benchmarks/200.multimedia/220.video-processing/python/function.py index 9f8a869aa..ab132ba2e 100755 --- a/benchmarks/200.multimedia/220.video-processing/python/function.py +++ b/benchmarks/200.multimedia/220.video-processing/python/function.py @@ -7,62 +7,84 @@ from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) + def call_ffmpeg(args): - ret = subprocess.run([os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg'), '-y'] + args, - #subprocess might inherit Lambda's input for some reason - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ret = subprocess.run( + [os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg"), "-y"] + args, + # subprocess might inherit Lambda's input for some reason + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) if ret.returncode != 0: - print('Invocation of ffmpeg failed!') - print('Out: ', ret.stdout.decode('utf-8')) + print("Invocation of ffmpeg failed!") + print("Out: ", ret.stdout.decode("utf-8")) raise RuntimeError() + # https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality def to_gif(video, duration, event): - output = '/tmp/processed-{}.gif'.format(os.path.basename(video)) - call_ffmpeg(["-i", video, - "-t", - "{0}".format(duration), - "-vf", - "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", - "-loop", "0", - output]) + output = "/tmp/processed-{}.gif".format(os.path.basename(video)) + call_ffmpeg( + [ + "-i", + video, + "-t", + "{0}".format(duration), + "-vf", + "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", + "-loop", + "0", + output, + ] + ) return output + # https://devopstar.com/2019/01/28/serverless-watermark-using-aws-lambda-layers-ffmpeg/ def watermark(video, duration, event): - output = '/tmp/processed-{}'.format(os.path.basename(video)) + output = "/tmp/processed-{}".format(os.path.basename(video)) watermark_file = os.path.dirname(os.path.realpath(__file__)) - call_ffmpeg([ - "-i", video, - "-i", os.path.join(watermark_file, os.path.join('resources', 'watermark.png')), - "-t", "{0}".format(duration), - "-filter_complex", "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", - output]) + call_ffmpeg( + [ + "-i", + video, + "-i", + os.path.join(watermark_file, os.path.join("resources", "watermark.png")), + "-t", + "{0}".format(duration), + "-filter_complex", + "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", + output, + ] + ) return output + def transcode_mp3(video, duration, event): pass -operations = { 'transcode' : transcode_mp3, 'extract-gif' : to_gif, 'watermark' : watermark } + +operations = {"transcode": transcode_mp3, "extract-gif": to_gif, "watermark": watermark} + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - duration = event.get('object').get('duration') - op = event.get('object').get('op') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + duration = event.get("object").get("duration") + op = event.get("object").get("op") + download_path = "/tmp/{}".format(key) # Restore executable permission - ffmpeg_binary = os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg') + ffmpeg_binary = os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg") # needed on Azure but read-only filesystem on AWS try: st = os.stat(ffmpeg_binary) @@ -89,16 +111,12 @@ def handler(event): upload_time = (upload_stop - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': upload_key - }, - 'measurement': { - 'download_time': download_time, - 'download_size': download_size, - 'upload_time': upload_time, - 'upload_size': upload_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": upload_key}, + "measurement": { + "download_time": download_time, + "download_size": download_size, + "upload_time": upload_time, + "upload_size": upload_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/300.utilities/311.compression/input.py b/benchmarks/300.utilities/311.compression/input.py index 5f88bc91a..e9e706bd5 100644 --- a/benchmarks/300.utilities/311.compression/input.py +++ b/benchmarks/300.utilities/311.compression/input.py @@ -1,4 +1,5 @@ -import glob, os +import os + def buckets_count(): return (1, 1) @@ -9,11 +10,12 @@ def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -21,8 +23,12 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload different datasets datasets = [] @@ -30,9 +36,9 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, datasets.append(dir) upload_files(data_dir, os.path.join(data_dir, dir), upload_func) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = datasets[0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = datasets[0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/300.utilities/311.compression/python/function.py b/benchmarks/300.utilities/311.compression/python/function.py index f758e14e4..8ceb52d2f 100755 --- a/benchmarks/300.utilities/311.compression/python/function.py +++ b/benchmarks/300.utilities/311.compression/python/function.py @@ -1,13 +1,13 @@ import datetime -import io import os import shutil import uuid -import zlib from . import storage + client = storage.storage.get_instance() + def parse_directory(directory): size = 0 @@ -16,13 +16,14 @@ def parse_directory(directory): size += os.path.getsize(os.path.join(root, file)) return size + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) os.makedirs(download_path) s3_download_begin = datetime.datetime.now() @@ -31,29 +32,27 @@ def handler(event): size = parse_directory(download_path) compress_begin = datetime.datetime.now() - shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path) + shutil.make_archive(os.path.join(download_path, key), "zip", root_dir=download_path) compress_end = datetime.datetime.now() s3_upload_begin = datetime.datetime.now() - archive_name = '{}.zip'.format(key) + archive_name = "{}.zip".format(key) archive_size = os.path.getsize(os.path.join(download_path, archive_name)) - key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name)) + key_name = client.upload( + bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name) + ) s3_upload_stop = datetime.datetime.now() download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1) upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1) process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': size, - 'upload_time': upload_time, - 'upload_size': archive_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": size, + "upload_time": upload_time, + "upload_size": archive_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/400.inference/411.image-recognition/input.py b/benchmarks/400.inference/411.image-recognition/input.py index 45d7215a6..c5ce190d0 100644 --- a/benchmarks/400.inference/411.image-recognition/input.py +++ b/benchmarks/400.inference/411.image-recognition/input.py @@ -1,18 +1,21 @@ -import glob, os +import os + def buckets_count(): return (2, 0) + def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -20,25 +23,29 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload model - model_name = 'resnet50-19c8e357.pth' - upload_func(0, model_name, os.path.join(data_dir, 'model', model_name)) + model_name = "resnet50-19c8e357.pth" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) input_images = [] - resnet_path = os.path.join(data_dir, 'fake-resnet') - with open(os.path.join(resnet_path, 'val_map.txt'), 'r') as f: + resnet_path = os.path.join(data_dir, "fake-resnet") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: for line in f: img, img_class = line.split() input_images.append((img, img_class)) upload_func(1, img, os.path.join(resnet_path, img)) - - input_config = {'object': {}, 'bucket': {}} - input_config['object']['model'] = model_name - input_config['object']['input'] = input_images[0][0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[1] - input_config['bucket']['model'] = input_paths[0] + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] return input_config diff --git a/benchmarks/400.inference/411.image-recognition/python/function.py b/benchmarks/400.inference/411.image-recognition/python/function.py index 411386419..0cfa1c57f 100644 --- a/benchmarks/400.inference/411.image-recognition/python/function.py +++ b/benchmarks/400.inference/411.image-recognition/python/function.py @@ -1,14 +1,20 @@ - -import datetime, json, os, uuid +import datetime +import json +import os +import uuid # Extract zipped torch model - used in Python 3.8 and 3.9 # The reason is that torch versions supported for these Python # versions are too large for Lambda packages. -if os.path.exists('function/torch.zip'): - import zipfile, sys +if os.path.exists("function/torch.zip"): + import sys + import zipfile + # we cannot write to the read-only filesystem - zipfile.ZipFile('function/torch.zip').extractall('/tmp/') - sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages')) + zipfile.ZipFile("function/torch.zip").extractall("/tmp/") + sys.path.append( + os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") + ) from PIL import Image import torch @@ -16,21 +22,23 @@ from torchvision.models import resnet50 from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) -class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), 'r')) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] model = None + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - model_prefix = event.get('bucket').get('model') - key = event.get('object').get('input') - model_key = event.get('object').get('model') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + model_prefix = event.get("bucket").get("model") + key = event.get("object").get("input") + model_key = event.get("object").get("model") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) image_download_begin = datetime.datetime.now() image_path = download_path @@ -40,7 +48,7 @@ def handler(event): global model if not model: model_download_begin = datetime.datetime.now() - model_path = os.path.join('/tmp', model_key) + model_path = os.path.join("/tmp", model_key) client.download(bucket, os.path.join(model_prefix, model_key), model_path) model_download_end = datetime.datetime.now() model_process_begin = datetime.datetime.now() @@ -53,36 +61,38 @@ def handler(event): model_download_end = model_download_begin model_process_begin = datetime.datetime.now() model_process_end = model_process_begin - + process_begin = datetime.datetime.now() input_image = Image.open(image_path) - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) input_tensor = preprocess(input_image) - input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model output = model(input_batch) _, index = torch.max(output, 1) - # The output has unnormalized scores. To get probabilities, you can run a softmax on it. - prob = torch.nn.functional.softmax(output[0], dim=0) - _, indices = torch.sort(output, descending = True) ret = idx2label[index] process_end = datetime.datetime.now() - download_time = (image_download_end- image_download_begin) / datetime.timedelta(microseconds=1) - model_download_time = (model_download_end - model_download_begin) / datetime.timedelta(microseconds=1) - model_process_time = (model_process_end - model_process_begin) / datetime.timedelta(microseconds=1) + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': {'idx': index.item(), 'class': ret}, - 'measurement': { - 'download_time': download_time + model_download_time, - 'compute_time': process_time + model_process_time, - 'model_time': model_process_time, - 'model_download_time': model_download_time - } - } - + "result": {"idx": index.item(), "class": ret}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": process_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/config.json b/benchmarks/400.inference/412.language-bert/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py new file mode 100644 index 000000000..9af7ecb56 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -0,0 +1,33 @@ +import os + + +def buckets_count(): + # model bucket and text bucket + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + filepath = os.path.join(root, file) + relative_key = os.path.join(prefix, file) + upload_func(0, relative_key, filepath) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_archive = "bert-tiny-onnx.tar.gz" + upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) + + text_filename = "sentences.jsonl" + upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_archive + input_config["object"]["input"] = text_filename + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["model"] = input_paths[0] + input_config["bucket"]["text"] = input_paths[1] + return input_config diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py new file mode 100644 index 000000000..7e4f981ef --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -0,0 +1,157 @@ +import datetime +import json +import os +import tarfile +import uuid +from typing import Dict, List, Optional + +import numpy as np +import onnxruntime as ort +from tokenizers import Tokenizer + +from . import storage + +client = storage.storage.get_instance() + +MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" +MODEL_DIRECTORY = "/tmp/bert_language_model" +MODEL_SUBDIR = "bert-tiny-onnx" + +_session: Optional[ort.InferenceSession] = None +_tokenizer: Optional[Tokenizer] = None +_labels: Optional[Dict[int, str]] = None + + +def _ensure_model(bucket: str, model_prefix: str): + """ + Lazily download and initialize the ONNX model and tokenizer. + """ + global _session, _tokenizer, _labels + + model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _tokenizer is None or _labels is None: + if not os.path.exists(model_path): + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") + client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + model_process_begin = datetime.datetime.now() + tokenizer_path = os.path.join(model_path, "tokenizer.json") + _tokenizer = Tokenizer.from_file(tokenizer_path) + _tokenizer.enable_truncation(max_length=128) + _tokenizer.enable_padding(length=128) + + label_map_path = os.path.join(model_path, "label_map.json") + with open(label_map_path, "r") as f: + raw_labels = json.load(f) + _labels = {int(idx): label for idx, label in raw_labels.items()} + + onnx_path = os.path.join(model_path, "model.onnx") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _prepare_inputs(sentences: List[str]): + assert _tokenizer is not None + + encodings = _tokenizer.encode_batch(sentences) + + input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64) + attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64) + token_type_ids = np.array( + [enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings], + dtype=np.int64, + ) + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + text_prefix = event.get("bucket", {}).get("text") + text_key = event.get("object", {}).get("input") + + download_begin = datetime.datetime.now() + text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}") + client.download(bucket, os.path.join(text_prefix, text_key), text_download_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix) + assert _session is not None and _labels is not None and _tokenizer is not None + + with open(text_download_path, "r") as f: + sentences = [json.loads(line)["text"] for line in f if line.strip()] + + os.remove(text_download_path) + + inference_begin = datetime.datetime.now() + inputs = _prepare_inputs(sentences) + outputs = _session.run(None, inputs) + logits = outputs[0] + probabilities = _softmax(logits) + inference_end = datetime.datetime.now() + + results = [] + for sentence, probs in zip(sentences, probabilities): + label_idx = int(np.argmax(probs)) + label = _labels.get(label_idx, str(label_idx)) + results.append( + { + "text": sentence, + "label": label, + "confidence": float(probs[label_idx]), + "raw_scores": probs.tolist(), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": results}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/python/init.sh b/benchmarks/400.inference/412.language-bert/python/init.sh new file mode 100755 index 000000000..160852abe --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for the BERT inference benchmark. diff --git a/benchmarks/400.inference/412.language-bert/python/package.sh b/benchmarks/400.inference/412.language-bert/python/package.sh new file mode 100644 index 000000000..edb27ebe0 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/package.sh @@ -0,0 +1,35 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +TORCH_DIR=".python_packages/lib/site-packages/torch" +if [ -d "$1/${TORCH_DIR}" ]; then + cd $1 + zip -qr torch.zip ${TORCH_DIR} + rm -rf ${TORCH_DIR} + cd ${CUR_DIR} + echo "Torch-zipped size $(du -sh $1 | cut -f1)" +fi diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/413.image-classification/config.json b/benchmarks/400.inference/413.image-classification/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/413.image-classification/input.py b/benchmarks/400.inference/413.image-classification/input.py new file mode 100644 index 000000000..99e8bc4b3 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/input.py @@ -0,0 +1,51 @@ +import os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + "/" + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + + +""" + Generate test, small and large workload for compression test. + + :param data_dir: directory where benchmark data is placed + :param size: workload size + :param input_buckets: input storage containers for this benchmark + :param output_buckets: + :param upload_func: upload function taking three params(bucket_idx, key, filepath) +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + # upload model + model_name = "resnet50.tar.gz" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) + + input_images = [] + resnet_path = os.path.join(data_dir, "data") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: + for line in f: + img, img_class = line.split() + input_images.append((img, img_class)) + upload_func(1, img, os.path.join(resnet_path, img)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] + return input_config diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py new file mode 100644 index 000000000..64795612d --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -0,0 +1,178 @@ +import datetime +import json +import os +import shutil +import tarfile +import uuid +from typing import List, Optional, Tuple + +import numpy as np +import onnxruntime as ort +from PIL import Image + +from . import storage + +client = storage.storage.get_instance() + +SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) +idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] + +MODEL_ARCHIVE = "resnet50.tar.gz" +MODEL_DIRECTORY = "/tmp/image_classification_model" +MODEL_SUBDIR = "resnet50" + +_session: Optional[ort.InferenceSession] = None +_session_input: Optional[str] = None +_session_output: Optional[str] = None +_cached_model_key: Optional[str] = None + +_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) +_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + +def _ensure_model(bucket: str, model_prefix: str, model_key: str) -> Tuple[float, float]: + """ + Lazily download, extract, and initialize the ONNX ResNet model. + """ + global _session, _session_input, _session_output, _cached_model_key + + effective_model_key = model_key or MODEL_ARCHIVE + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _cached_model_key != effective_model_key: + archive_basename = os.path.basename(effective_model_key) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{archive_basename}") + model_dir = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + + if os.path.exists(model_dir): + shutil.rmtree(model_dir) + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + + client.download(bucket, os.path.join(model_prefix, effective_model_key), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + + model_process_begin = datetime.datetime.now() + onnx_path = os.path.join(model_dir, "model.onnx") + if not os.path.exists(onnx_path): + raise FileNotFoundError(f"Expected ONNX model at {onnx_path}") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (providers: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + _session_input = _session.get_inputs()[0].name + _session_output = _session.get_outputs()[0].name + _cached_model_key = effective_model_key + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _resize_shorter_side(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + if width < height: + new_width = size + new_height = int(round(size * height / width)) + else: + new_height = size + new_width = int(round(size * width / height)) + resample = getattr(Image, "Resampling", Image).BILINEAR + return image.resize((new_width, new_height), resample=resample) + + +def _center_crop(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + left = max(0, int(round((width - size) / 2))) + top = max(0, int(round((height - size) / 2))) + right = left + size + bottom = top + size + return image.crop((left, top, right, bottom)) + + +def _prepare_tensor(image_path: str) -> np.ndarray: + image = Image.open(image_path).convert("RGB") + image = _resize_shorter_side(image, 256) + image = _center_crop(image, 224) + + np_image = np.asarray(image).astype(np.float32) / 255.0 + np_image = (np_image - _MEAN) / _STD + np_image = np.transpose(np_image, (2, 0, 1)) + return np_image[np.newaxis, :] + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def _run_inference(batch: np.ndarray) -> Tuple[int, float, List[int]]: + assert _session is not None and _session_input is not None and _session_output is not None + + outputs = _session.run([_session_output], {_session_input: batch}) + logits = outputs[0] + probs = _softmax(logits) + top1_idx = int(np.argmax(probs, axis=1)[0]) + top1_conf = float(probs[0, top1_idx]) + top5_idx = np.argsort(probs[0])[::-1][:5].tolist() + + return top1_idx, top1_conf, top5_idx + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + input_prefix = event.get("bucket", {}).get("input") + model_prefix = event.get("bucket", {}).get("model") + key = event.get("object", {}).get("input") + model_key = event.get("object", {}).get("model") + + download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(key)}") + image_download_begin = datetime.datetime.now() + client.download(bucket, os.path.join(input_prefix, key), download_path) + image_download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix, model_key) + + inference_begin = datetime.datetime.now() + input_batch = _prepare_tensor(download_path) + top1_idx, top1_conf, top5_idx = _run_inference(input_batch) + inference_end = datetime.datetime.now() + + os.remove(download_path) + + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + # gpu_time_ms = 0.0 + + return { + "result": { + "idx": top1_idx, + "class": idx2label[top1_idx], + "confidence": top1_conf, + "top5_idx": top5_idx, + }, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + # "gpu_time_ms": round(gpu_time_ms, 3), + }, + } diff --git a/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json new file mode 100755 index 000000000..5fe0dfefc --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json @@ -0,0 +1 @@ +{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} \ No newline at end of file diff --git a/benchmarks/400.inference/413.image-classification/python/init.sh b/benchmarks/400.inference/413.image-classification/python/init.sh new file mode 100755 index 000000000..71a2e39c0 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/imagenet_class_index.json" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with json ${path}" +fi +cp ${path} ${DIR} diff --git a/benchmarks/400.inference/413.image-classification/python/package.sh b/benchmarks/400.inference/413.image-classification/python/package.sh new file mode 100644 index 000000000..038fac7c5 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr torch.zip $1/torch + rm -rf $1/torch + echo "Torch-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt b/benchmarks/400.inference/413.image-classification/python/requirements.txt new file mode 100755 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 new file mode 100755 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 new file mode 100755 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.recommendation/config.json b/benchmarks/400.inference/413.recommendation/config.json new file mode 100644 index 000000000..649bb78d6 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 1024, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/413.recommendation/input.py b/benchmarks/400.inference/413.recommendation/input.py new file mode 100644 index 000000000..4e48cfa52 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/input.py @@ -0,0 +1,30 @@ +import os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + upload_func(0, os.path.join(prefix, file), os.path.join(root, file)) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_file = "dlrm_tiny.pt" + upload_func(0, model_file, os.path.join(data_dir, "model", model_file)) + + requests_file = "requests.jsonl" + upload_func(1, requests_file, os.path.join(data_dir, "data", requests_file)) + + cfg = {"object": {}, "bucket": {}} + cfg["object"]["model"] = model_file + cfg["object"]["requests"] = requests_file + cfg["bucket"]["bucket"] = benchmarks_bucket + cfg["bucket"]["model"] = input_paths[0] + cfg["bucket"]["requests"] = input_paths[1] + return cfg diff --git a/benchmarks/400.inference/413.recommendation/python/function.py b/benchmarks/400.inference/413.recommendation/python/function.py new file mode 100644 index 000000000..e7b4ae73c --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/function.py @@ -0,0 +1,144 @@ +import datetime +import json +import os +import uuid + +import torch +import torch.nn as nn + +from . import storage + +client = storage.storage.get_instance() + +MODEL_FILE = "dlrm_tiny.pt" +MODEL_CACHE = "/tmp/dlrm_gpu_model" + +_model = None +_device = torch.device("cpu") + + +class TinyDLRM(nn.Module): + def __init__(self, num_users, num_items, num_categories, embed_dim=8): + super().__init__() + self.user_emb = nn.Embedding(num_users, embed_dim) + self.item_emb = nn.Embedding(num_items, embed_dim) + self.category_emb = nn.Embedding(num_categories, embed_dim) + in_dim = embed_dim * 3 + 2 + hidden = 16 + self.mlp = nn.Sequential( + nn.Linear(in_dim, hidden), + nn.ReLU(), + nn.Linear(hidden, 1), + ) + + def forward(self, user_id, item_id, category_id, dense): + features = torch.cat( + [ + self.user_emb(user_id), + self.item_emb(item_id), + self.category_emb(category_id), + dense, + ], + dim=-1, + ) + return torch.sigmoid(self.mlp(features)) + + +def _select_device(): + if torch.cuda.is_available(): + return torch.device("cuda") + raise RuntimeError("CUDA is not available") + return torch.device("cpu") + + +def _load_model(bucket, prefix): + global _model, _device + + if _model is not None: + return 0.0, 0.0 + + download_begin = datetime.datetime.now() + os.makedirs(MODEL_CACHE, exist_ok=True) + tmp_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_FILE}") + client.download(bucket, os.path.join(prefix, MODEL_FILE), tmp_path) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + checkpoint = torch.load(tmp_path, map_location="cpu") + meta = checkpoint["meta"] + _device = _select_device() + model = TinyDLRM( + meta["num_users"], meta["num_items"], meta["num_categories"], meta["embed_dim"] + ) + model.load_state_dict(checkpoint["state_dict"]) + model.to(_device) + model.eval() + _model = model + os.remove(tmp_path) + process_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return download_time, process_time + + +def _prepare_batch(requests): + user_ids = torch.tensor([req["user_id"] for req in requests], dtype=torch.long, device=_device) + item_ids = torch.tensor([req["item_id"] for req in requests], dtype=torch.long, device=_device) + category_ids = torch.tensor( + [req["category_id"] for req in requests], dtype=torch.long, device=_device + ) + dense = torch.tensor( + [req.get("dense", [0.0, 0.0]) for req in requests], dtype=torch.float32, device=_device + ) + return user_ids, item_ids, category_ids, dense + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + requests_prefix = event.get("bucket", {}).get("requests") + requests_key = event.get("object", {}).get("requests") + + download_begin = datetime.datetime.now() + req_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(requests_key)}") + client.download(bucket, os.path.join(requests_prefix, requests_key), req_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _load_model(bucket, model_prefix) + + with open(req_path, "r") as f: + payloads = [json.loads(line) for line in f if line.strip()] + os.remove(req_path) + + inference_begin = datetime.datetime.now() + user_ids, item_ids, category_ids, dense = _prepare_batch(payloads) + + with torch.no_grad(): + scores = _model(user_ids, item_ids, category_ids, dense).squeeze(-1).tolist() + inference_end = datetime.datetime.now() + + predictions = [] + for req, score in zip(payloads, scores): + predictions.append( + { + "user_id": req["user_id"], + "item_id": req["item_id"], + "category_id": req["category_id"], + "score": score, + "device": str(_device), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": predictions}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/413.recommendation/python/init.sh b/benchmarks/400.inference/413.recommendation/python/init.sh new file mode 100644 index 000000000..f42329404 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for GPU recommendation benchmark. diff --git a/benchmarks/400.inference/413.recommendation/python/package.sh b/benchmarks/400.inference/413.recommendation/python/package.sh new file mode 100644 index 000000000..64e9deacb --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/package.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +PACKAGE_DIR=$1 +echo "DLRM GPU package size $(du -sh $1 | cut -f1)" diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt b/benchmarks/400.inference/413.recommendation/python/requirements.txt new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/500.scientific/501.graph-pagerank/input.py b/benchmarks/500.scientific/501.graph-pagerank/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/input.py +++ b/benchmarks/500.scientific/501.graph-pagerank/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/501.graph-pagerank/python/function.py b/benchmarks/500.scientific/501.graph-pagerank/python/function.py index 0e462e9b4..461fc14a9 100755 --- a/benchmarks/500.scientific/501.graph-pagerank/python/function.py +++ b/benchmarks/500.scientific/501.graph-pagerank/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.pagerank() process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/502.graph-mst/input.py b/benchmarks/500.scientific/502.graph-mst/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/502.graph-mst/input.py +++ b/benchmarks/500.scientific/502.graph-mst/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/502.graph-mst/python/function.py b/benchmarks/500.scientific/502.graph-mst/python/function.py index b63fbdce2..69ad77678 100755 --- a/benchmarks/500.scientific/502.graph-mst/python/function.py +++ b/benchmarks/500.scientific/502.graph-mst/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.spanning_tree(None, False) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/503.graph-bfs/input.py b/benchmarks/500.scientific/503.graph-bfs/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/503.graph-bfs/input.py +++ b/benchmarks/500.scientific/503.graph-bfs/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/503.graph-bfs/python/function.py b/benchmarks/500.scientific/503.graph-bfs/python/function.py index 18423ae1a..51a37346b 100755 --- a/benchmarks/500.scientific/503.graph-bfs/python/function.py +++ b/benchmarks/500.scientific/503.graph-bfs/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.bfs(0) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result, - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result, + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/504.dna-visualisation/input.py b/benchmarks/500.scientific/504.dna-visualisation/input.py index a9f376ea2..ea26f48c0 100644 --- a/benchmarks/500.scientific/504.dna-visualisation/input.py +++ b/benchmarks/500.scientific/504.dna-visualisation/input.py @@ -1,16 +1,21 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.fasta')): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + for file in glob.glob(os.path.join(data_dir, "*.fasta")): data = os.path.relpath(file, data_dir) upload_func(0, data, file) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = data - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = data + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/500.scientific/504.dna-visualisation/python/function.py b/benchmarks/500.scientific/504.dna-visualisation/python/function.py index 8362a73a1..ca9f5975e 100755 --- a/benchmarks/500.scientific/504.dna-visualisation/python/function.py +++ b/benchmarks/500.scientific/504.dna-visualisation/python/function.py @@ -1,17 +1,23 @@ -import datetime, io, json, os +import datetime +import io +import json +import os + # using https://squiggle.readthedocs.io/en/latest/ from squiggle import transform from . import storage + client = storage.storage.get_instance() + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}".format(key) download_begin = datetime.datetime.now() client.download(bucket, os.path.join(input_prefix, key), download_path) @@ -34,13 +40,10 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'compute_time': process_time, - 'upload_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "compute_time": process_time, + "upload_time": upload_time, + }, } diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py new file mode 100644 index 000000000..bb53694c9 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"ny": 61, "nx": 61, "nit": 5, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "small": {"ny": 121, "nx": 121, "nit": 10, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "large": {"ny": 201, "nx": 201, "nit": 20, "rho": 1.0, "nu": 0.1, "F": 1.0}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py new file mode 100644 index 000000000..5788880b2 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py @@ -0,0 +1,279 @@ +# Barba, Lorena A., and Forsyth, Gilbert F. (2018). +# CFD Python: the 12 steps to Navier-Stokes equations. +# Journal of Open Source Education, 1(9), 21, +# https://doi.org/10.21105/jose.00021 +# TODO: License +# (c) 2017 Lorena A. Barba, Gilbert F. Forsyth. +# All content is under Creative Commons Attribution CC-BY 4.0, +# and all code is under BSD-3 clause (previously under MIT, and changed on March 8, 2018). + +import datetime + +import jax.numpy as jnp +import jax +from jax import lax +from functools import partial + + +@partial(jax.jit, static_argnums=(0,)) +def build_up_b(rho, dt, dx, dy, u, v): + b = jnp.zeros_like(u) + b = b.at[1:-1, 1:-1].set( + ( + rho + * ( + 1 + / dt + * ( + (u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + + (v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy) + ) + - ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)) ** 2 + - 2 + * ( + (u[2:, 1:-1] - u[0:-2, 1:-1]) + / (2 * dy) + * (v[1:-1, 2:] - v[1:-1, 0:-2]) + / (2 * dx) + ) + - ((v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 2 + b = b.at[1:-1, -1].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx) + (v[2:, -1] - v[0:-2, -1]) / (2 * dy)) + - ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)) ** 2 + - 2 * ((u[2:, -1] - u[0:-2, -1]) / (2 * dy) * (v[1:-1, 0] - v[1:-1, -2]) / (2 * dx)) + - ((v[2:, -1] - v[0:-2, -1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 0 + b = b.at[1:-1, 0].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx) + (v[2:, 0] - v[0:-2, 0]) / (2 * dy)) + - ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)) ** 2 + - 2 * ((u[2:, 0] - u[0:-2, 0]) / (2 * dy) * (v[1:-1, 1] - v[1:-1, -1]) / (2 * dx)) + - ((v[2:, 0] - v[0:-2, 0]) / (2 * dy)) ** 2 + ) + ) + ) + + return b + + +@partial(jax.jit, static_argnums=(0,)) +def pressure_poisson_periodic(nit, p, dx, dy, b): + def body_func(p, q): + pn = p.copy() + p = p.at[1:-1, 1:-1].set( + ((pn[1:-1, 2:] + pn[1:-1, 0:-2]) * dy**2 + (pn[2:, 1:-1] + pn[0:-2, 1:-1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 1:-1] + ) + + # Periodic BC Pressure @ x = 2 + p = p.at[1:-1, -1].set( + ((pn[1:-1, 0] + pn[1:-1, -2]) * dy**2 + (pn[2:, -1] + pn[0:-2, -1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, -1] + ) + + # Periodic BC Pressure @ x = 0 + p = p.at[1:-1, 0].set( + ( + ((pn[1:-1, 1] + pn[1:-1, -1]) * dy**2 + (pn[2:, 0] + pn[0:-2, 0]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 0] + ) + ) + + # Wall boundary conditions, pressure + p = p.at[-1, :].set(p[-2, :]) # dp/dy = 0 at y = 2 + p = p.at[0, :].set(p[1, :]) # dp/dy = 0 at y = 0 + + return p, None + + p, _ = lax.scan(body_func, p, jnp.arange(nit)) + + +@partial(jax.jit, static_argnums=(0, 7, 8, 9)) +def channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F): + udiff = 1 + stepcount = 0 + + array_vals = (udiff, stepcount, u, v, p) + + def conf_func(array_vals): + udiff, _, _, _, _ = array_vals + return udiff > 0.001 + + def body_func(array_vals): + _, stepcount, u, v, p = array_vals + + un = u.copy() + vn = v.copy() + + b = build_up_b(rho, dt, dx, dy, u, v) + pressure_poisson_periodic(nit, p, dx, dy, b) + + u = u.at[1:-1, 1:-1].set( + un[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (un[1:-1, 1:-1] - un[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (un[1:-1, 1:-1] - un[0:-2, 1:-1]) + - dt / (2 * rho * dx) * (p[1:-1, 2:] - p[1:-1, 0:-2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2]) + + dt / dy**2 * (un[2:, 1:-1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1]) + ) + + F * dt + ) + + v = v.at[1:-1, 1:-1].set( + vn[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (vn[1:-1, 1:-1] - vn[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (vn[1:-1, 1:-1] - vn[0:-2, 1:-1]) + - dt / (2 * rho * dy) * (p[2:, 1:-1] - p[0:-2, 1:-1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 2:] - 2 * vn[1:-1, 1:-1] + vn[1:-1, 0:-2]) + + dt / dy**2 * (vn[2:, 1:-1] - 2 * vn[1:-1, 1:-1] + vn[0:-2, 1:-1]) + ) + ) + + # Periodic BC u @ x = 2 + u = u.at[1:-1, -1].set( + un[1:-1, -1] + - un[1:-1, -1] * dt / dx * (un[1:-1, -1] - un[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (un[1:-1, -1] - un[0:-2, -1]) + - dt / (2 * rho * dx) * (p[1:-1, 0] - p[1:-1, -2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 0] - 2 * un[1:-1, -1] + un[1:-1, -2]) + + dt / dy**2 * (un[2:, -1] - 2 * un[1:-1, -1] + un[0:-2, -1]) + ) + + F * dt + ) + + # Periodic BC u @ x = 0 + u = u.at[1:-1, 0].set( + un[1:-1, 0] + - un[1:-1, 0] * dt / dx * (un[1:-1, 0] - un[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (un[1:-1, 0] - un[0:-2, 0]) + - dt / (2 * rho * dx) * (p[1:-1, 1] - p[1:-1, -1]) + + nu + * ( + dt / dx**2 * (un[1:-1, 1] - 2 * un[1:-1, 0] + un[1:-1, -1]) + + dt / dy**2 * (un[2:, 0] - 2 * un[1:-1, 0] + un[0:-2, 0]) + ) + + F * dt + ) + + # Periodic BC v @ x = 2 + v = v.at[1:-1, -1].set( + vn[1:-1, -1] + - un[1:-1, -1] * dt / dx * (vn[1:-1, -1] - vn[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (vn[1:-1, -1] - vn[0:-2, -1]) + - dt / (2 * rho * dy) * (p[2:, -1] - p[0:-2, -1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 0] - 2 * vn[1:-1, -1] + vn[1:-1, -2]) + + dt / dy**2 * (vn[2:, -1] - 2 * vn[1:-1, -1] + vn[0:-2, -1]) + ) + ) + + # Periodic BC v @ x = 0 + v = v.at[1:-1, 0].set( + vn[1:-1, 0] + - un[1:-1, 0] * dt / dx * (vn[1:-1, 0] - vn[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (vn[1:-1, 0] - vn[0:-2, 0]) + - dt / (2 * rho * dy) * (p[2:, 0] - p[0:-2, 0]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 1] - 2 * vn[1:-1, 0] + vn[1:-1, -1]) + + dt / dy**2 * (vn[2:, 0] - 2 * vn[1:-1, 0] + vn[0:-2, 0]) + ) + ) + + # Wall BC: u,v = 0 @ y = 0,2 + u = u.at[0, :].set(0) + u = u.at[-1, :].set(0) + v = v.at[0, :].set(0) + v = v.at[-1, :].set(0) + + udiff = (jnp.sum(u) - jnp.sum(un)) / jnp.sum(u) + stepcount += 1 + + return (udiff, stepcount, u, v, p) + + _, stepcount, _, _, _ = lax.while_loop(conf_func, body_func, array_vals) + + return stepcount + + +def initialize(ny, nx): + u = jnp.zeros((ny, nx), dtype=jnp.float64) + v = jnp.zeros((ny, nx), dtype=jnp.float64) + p = jnp.ones((ny, nx), dtype=jnp.float64) + dx = 2 / (nx - 1) + dy = 2 / (ny - 1) + dt = 0.1 / ((nx - 1) * (ny - 1)) + return u, v, p, dx, dy, dt + + +def handler(event): + + if "size" in event: + size = event["size"] + ny = size["ny"] + nx = size["nx"] + nit = size["nit"] + rho = size["rho"] + nu = size["nu"] + F = size["F"] + + generate_begin = datetime.datetime.now() + + u, v, p, dx, dy, dt = initialize(ny, nx) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if hasattr(results, "item"): + results = results.item() + elif hasattr(results, "tolist"): + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py new file mode 100644 index 000000000..56f136720 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"M": 2000, "N": 2000}, + "small": {"M": 5000, "N": 5000}, + "large": {"M": 16000, "N": 16000}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py new file mode 100644 index 000000000..2e16b320d --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py @@ -0,0 +1,62 @@ +import datetime + +import jax.numpy as jnp +import jax + + +@jax.jit +def compute(array_1, array_2, a, b, c): + return jnp.clip(array_1, 2, 10) * a + array_2 * b + c + + +def initialize(M, N): + from numpy.random import default_rng + + rng = default_rng(42) + array_1 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + array_2 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + a = jnp.int64(4) + b = jnp.int64(3) + c = jnp.int64(9) + return array_1, array_2, a, b, c + + +def handler(event): + + if "size" in event: + size = event["size"] + M = size["M"] + N = size["N"] + + generate_begin = datetime.datetime.now() + + array_1, array_2, a, b, c = initialize(M, N) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = compute(array_1, array_2, a, b, c) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py new file mode 100644 index 000000000..937e96e44 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"N": 8, "W": 14, "H": 14, "C1": 32, "C2": 8}, + "small": {"N": 8, "W": 28, "H": 28, "C1": 64, "C2": 16}, + "large": {"N": 8, "W": 56, "H": 56, "C1": 128, "C2": 32}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py new file mode 100644 index 000000000..f24b2cc71 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py @@ -0,0 +1,123 @@ +import datetime + +import jax.numpy as jnp +import jax +from jax import lax + + +@jax.jit +def relu(x): + return jnp.maximum(x, 0) + + +# Deep learning convolutional operator (stride = 1) +@jax.jit +def conv2d(input, weights): + K = weights.shape[0] # Assuming square kernel + N = input.shape[0] + H_out = input.shape[1] - K + 1 + W_out = input.shape[2] - K + 1 + C_out = weights.shape[3] + output = jnp.empty((N, H_out, W_out, C_out), dtype=jnp.float32) + + def row_update(output, i): + def col_update(output, j): + input_slice = lax.dynamic_slice(input, (0, i, j, 0), (N, K, K, input.shape[-1])) + conv_result = jnp.sum( + input_slice[:, :, :, :, None] * weights[None, :, :, :], axis=(1, 2, 3) + ) + output = lax.dynamic_update_slice(output, conv_result[:, None, None, :], (0, i, j, 0)) + return output, None + + output, _ = lax.scan(col_update, output, jnp.arange(W_out)) + return output, None + + output, _ = lax.scan(row_update, output, jnp.arange(H_out)) + return output + + +# Batch normalization operator, as used in ResNet +@jax.jit +def batchnorm2d(x, eps=1e-5): + mean = jnp.mean(x, axis=0, keepdims=True) + std = jnp.std(x, axis=0, keepdims=True) + return (x - mean) / jnp.sqrt(std + eps) + + +# Bottleneck residual block (after initial convolution, without downsampling) +# in the ResNet-50 CNN (inference) +@jax.jit +def resnet_basicblock(input, conv1, conv2, conv3): + # Pad output of first convolution for second convolution + padded = jnp.zeros( + (input.shape[0], input.shape[1] + 2, input.shape[2] + 2, conv1.shape[3]), + dtype=jnp.float32, + ) + padded = lax.dynamic_update_slice(padded, conv2d(input, conv1), (0, 1, 1, 0)) + x = batchnorm2d(padded) + x = relu(x) + + x = conv2d(x, conv2) + x = batchnorm2d(x) + x = relu(x) + x = conv2d(x, conv3) + x = batchnorm2d(x) + return relu(x + input) + + +def initialize(N, W, H, C1, C2): + from numpy.random import default_rng + + rng = default_rng(42) + + # Input + input = rng.random((N, H, W, C1), dtype=jnp.float32) + # Weights + conv1 = rng.random((1, 1, C1, C2), dtype=jnp.float32) + conv2 = rng.random((3, 3, C2, C2), dtype=jnp.float32) + conv3 = rng.random((1, 1, C2, C1), dtype=jnp.float32) + return (input, conv1, conv2, conv3) + + +def handler(event): + + if "size" in event: + size = event["size"] + N = size["N"] + W = size["W"] + H = size["H"] + C1 = size["C1"] + C2 = size["C2"] + + generate_begin = datetime.datetime.now() + + input, conv1, conv2, conv3 = initialize(N, W, H, C1, C2) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = resnet_basicblock(input, conv1, conv2, conv3) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/600.linearalgebra/601.matmul/config.json b/benchmarks/600.linearalgebra/601.matmul/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/601.matmul/input.py b/benchmarks/600.linearalgebra/601.matmul/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py new file mode 100755 index 000000000..ee88b2e58 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): + alpha = torch.tensor(1.5, dtype=dtype, device=device) + beta = torch.tensor(1.2, dtype=dtype, device=device) + i = torch.arange(NI, device=device) + j = torch.arange(NJ, device=device) + k = torch.arange(NK, device=device) + C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI + A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK + B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ + return alpha, beta, C, A, B + + +def kernel_gemm(alpha, beta, C, A, B, reps=1): + torch.cuda.synchronize() + _ = alpha * (A @ B) + beta * C # warmup + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(reps): + C = alpha * (A @ B) + beta * C + end.record() + torch.cuda.synchronize() + return C, float(start.elapsed_time(end)) # ms for all reps + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + matrix_generating_begin = datetime.datetime.now() + alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=1) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "generating_time": matrix_generating_time, + "compute_time": matmul_time, + }, + } diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/config.json b/benchmarks/600.linearalgebra/602.axpy/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/602.axpy/input.py b/benchmarks/600.linearalgebra/602.axpy/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py new file mode 100755 index 000000000..79117fa1b --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + alpha = torch.randn((), dtype=dtype, device=device) + x = torch.randn(N, dtype=dtype, device=device) + y = torch.randn(N, dtype=dtype, device=device) + return alpha, x, y + + +def kernel_axpy(alpha, x, y, reps=100): + torch.cuda.synchronize() + _ = alpha * x + y # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(reps): + y = alpha * x + y + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + alpha, x, y = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_axpy(alpha, x, y, reps=100) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/config.json b/benchmarks/600.linearalgebra/603.jacobi2d/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/input.py b/benchmarks/600.linearalgebra/603.jacobi2d/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py new file mode 100755 index 000000000..4dc37e2c6 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + i = torch.arange(N, device=device, dtype=dtype).view(-1, 1) + j = torch.arange(N, device=device, dtype=dtype).view(1, -1) + + A = i * (j + 2) / N + B = i * (j + 3) / N + return A, B + + +def kernel_jacobi2d(A, B, iters=50): + torch.cuda.synchronize() + # warmup + if A.shape[0] > 2 and A.shape[1] > 2: + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(iters): + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return A, B, gpu_ms + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + matrix_generating_begin = datetime.datetime.now() + A, B = initialize_torch(size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + A_out, B_out, gpu_ms = kernel_jacobi2d(A, B, iters=50) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "generating_time": matrix_generating_time, + "compute_time": matmul_time, + "gpu_time": gpu_ms, + }, + } diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/config.json b/benchmarks/600.linearalgebra/604.cholesky/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/604.cholesky/input.py b/benchmarks/600.linearalgebra/604.cholesky/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py new file mode 100755 index 000000000..5a7ac77d5 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + j = torch.arange(N, device=device) + v = (torch.remainder(-j, N).to(dtype) / N) + 1 + + L = v.expand(N, -1).clone() + L = torch.tril(L) + L.fill_diagonal_(1.0) + + A = L @ L.transpose(-1, -2) + return A + + +def kernel_cholesky(A): + torch.cuda.synchronize() + _ = torch.linalg.cholesky(A) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(A.size(0)): + L = torch.linalg.cholesky(A) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return L, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + L, gpu_ms = kernel_cholesky(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/config.json b/benchmarks/600.linearalgebra/605.lu/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/605.lu/input.py b/benchmarks/600.linearalgebra/605.lu/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/605.lu/python/function.py b/benchmarks/600.linearalgebra/605.lu/python/function.py new file mode 100755 index 000000000..fc99a3ab9 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/function.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + col = torch.arange(N, device=device) + base = (torch.remainder(-col, N).to(dtype) / N) + 1 + + A = torch.tril(base.expand(N, N)).clone() + + A.fill_diagonal_(torch.tensor(1.0, dtype=dtype, device=device)) + + A = A @ A.T + return A + + +def _kernel_lu(B: torch.Tensor) -> torch.Tensor: + n = B.shape[0] + for i in range(n): + for j in range(i): + B[i, j] = B[i, j] - (B[i, :j] @ B[:j, j]) + B[i, j] = B[i, j] / B[j, j] + for j in range(i, n): + B[i, j] = B[i, j] - (B[i, :i] @ B[:i, j]) + return B + + +def kernel(A: torch.Tensor): + torch.cuda.synchronize() + + _ = _kernel_lu(A.clone()) # Warm-up + + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + + start_evt.record() + B = None + for _ in range(A.size(0)): + B = _kernel_lu(A.clone()) + end_evt.record() + + torch.cuda.synchronize() + + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return B, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + B, gpu_ms = kernel(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/config.json b/benchmarks/600.linearalgebra/606.spmv/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/606.spmv/input.py b/benchmarks/600.linearalgebra/606.spmv/input.py new file mode 100644 index 000000000..e0f215890 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42, "density": 0.01} diff --git a/benchmarks/600.linearalgebra/606.spmv/python/function.py b/benchmarks/600.linearalgebra/606.spmv/python/function.py new file mode 100755 index 000000000..e2c4b0218 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/function.py @@ -0,0 +1,71 @@ +import torch +import datetime + + +def initialize_torch(N, density=0.01, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + nnz = int(N * N * density) + row_indices = torch.randint(0, N, (nnz,), device=device) + col_indices = torch.randint(0, N, (nnz,), device=device) + values = torch.randn(nnz, dtype=dtype, device=device) + + indices = torch.stack([row_indices, col_indices]) + sparse_matrix = torch.sparse_coo_tensor(indices, values, (N, N), dtype=dtype, device=device) + + sparse_matrix_csr = sparse_matrix.to_sparse_csr() + + x = torch.randn(N, dtype=dtype, device=device) + + return sparse_matrix_csr, x + + +def kernel_spmv(A, x, reps=100): + torch.cuda.synchronize() + _ = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(reps): + y = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms + + +def handler(event): + size = event.get("size") + density = event.get("density", 0.01) # default 1% density + + if "seed" in event: + import random + + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + A, x = initialize_torch(size, density=density, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_spmv(A, x, reps=100) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/config.json b/benchmarks/600.linearalgebra/607.fw/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/607.fw/input.py b/benchmarks/600.linearalgebra/607.fw/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/607.fw/python/function.py b/benchmarks/600.linearalgebra/607.fw/python/function.py new file mode 100755 index 000000000..bee06dd03 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/function.py @@ -0,0 +1,71 @@ +import torch +import datetime + + +def initialize_torch(N, dtype=torch.int32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + i, j = torch.meshgrid( + torch.arange(N, device=device), torch.arange(N, device=device), indexing="ij" + ) + path = ((i * j) % 7 + 1).to(dtype) + + mask = ((i + j) % 13 == 0) | ((i + j) % 7 == 0) | ((i + j) % 11 == 0) + path = path.masked_fill(mask, torch.as_tensor(999, dtype=dtype, device=device)) + return path + + +def kernel_fw(path): + torch.cuda.synchronize() + path2 = path.clone() + n = path2.size(0) + for k in range(n): + for i in range(n): + path2[i, :] = torch.minimum(path2[i, :], path2[i, k] + path2[k, :]) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + n = path.size(0) + for k in range(n): + for i in range(n): + path[i, :] = torch.minimum(path[i, :], path[i, k] + path[k, :]) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return path, gpu_ms + + +def handler(event): + size = event.get("size") + + if "seed" in event: + import random + + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + path = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + path_out, gpu_ms = kernel_fw(path) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.workflows/610.gen/config.json b/benchmarks/600.workflows/610.gen/config.json index 8eae08240..8ff6eec59 100644 --- a/benchmarks/600.workflows/610.gen/config.json +++ b/benchmarks/600.workflows/610.gen/config.json @@ -1,5 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python"] + "languages": ["python"], + "modules": [] } diff --git a/benchmarks/600.workflows/610.gen/input.py b/benchmarks/600.workflows/610.gen/input.py index 68f82e81f..2fcf1fcaa 100644 --- a/benchmarks/600.workflows/610.gen/input.py +++ b/benchmarks/600.workflows/610.gen/input.py @@ -1,5 +1,14 @@ def buckets_count(): return (0, 0) -def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): - return dict() \ No newline at end of file + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + return {} diff --git a/benchmarks/600.workflows/610.gen/python/few_people.py b/benchmarks/600.workflows/610.gen/python/few_people.py index 9c70d9fbc..b9555199a 100644 --- a/benchmarks/600.workflows/610.gen/python/few_people.py +++ b/benchmarks/600.workflows/610.gen/python/few_people.py @@ -1,5 +1,2 @@ def handler(event): - return { - "many_astros": False, - **event - } \ No newline at end of file + return {"many_astros": False, **event} diff --git a/benchmarks/600.workflows/610.gen/python/get_astros.py b/benchmarks/600.workflows/610.gen/python/get_astros.py index 627c65231..9532fb816 100644 --- a/benchmarks/600.workflows/610.gen/python/get_astros.py +++ b/benchmarks/600.workflows/610.gen/python/get_astros.py @@ -1,8 +1,7 @@ import requests + def handler(event): res = requests.get("http://api.open-notify.org/astros.json") - return { - "astros": res.json() - } \ No newline at end of file + return {"astros": res.json()} diff --git a/benchmarks/600.workflows/610.gen/python/many_people.py b/benchmarks/600.workflows/610.gen/python/many_people.py index 2d339f325..595eed0dd 100644 --- a/benchmarks/600.workflows/610.gen/python/many_people.py +++ b/benchmarks/600.workflows/610.gen/python/many_people.py @@ -1,5 +1,2 @@ def handler(event): - return { - "many_astros": True, - **event - } \ No newline at end of file + return {"many_astros": True, **event} diff --git a/benchmarks/600.workflows/610.gen/python/map_astros.py b/benchmarks/600.workflows/610.gen/python/map_astros.py index b98b5e9d7..49886ee73 100644 --- a/benchmarks/600.workflows/610.gen/python/map_astros.py +++ b/benchmarks/600.workflows/610.gen/python/map_astros.py @@ -1,7 +1,10 @@ def handler(elem): name = elem["name"] - fn, ln = name.split(" ") - name = " ".join([ln, fn]) - elem["name_rev"] = name - - return elem \ No newline at end of file + parts = name.split() + if len(parts) >= 2: + first = parts[0] + last = parts[-1] + elem["name_rev"] = f"{last} {first}" + else: + elem["name_rev"] = name + return elem diff --git a/benchmarks/600.workflows/610.gen/python/process_astros.py b/benchmarks/600.workflows/610.gen/python/process_astros.py index a981660e0..8483a105a 100644 --- a/benchmarks/600.workflows/610.gen/python/process_astros.py +++ b/benchmarks/600.workflows/610.gen/python/process_astros.py @@ -1,5 +1,2 @@ def handler(arr): - return { - "astros": arr, - "done": True - } \ No newline at end of file + return {"astros": arr, "done": True} diff --git a/benchmarks/600.workflows/610.gen/python/requirements.txt b/benchmarks/600.workflows/610.gen/python/requirements.txt new file mode 100644 index 000000000..f2293605c --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/requirements.txt @@ -0,0 +1 @@ +requests diff --git a/benchmarks/600.workflows/6100.1000-genome/input.py b/benchmarks/600.workflows/6100.1000-genome/input.py index def8d0195..bf3ecf95c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/input.py +++ b/benchmarks/600.workflows/6100.1000-genome/input.py @@ -1,22 +1,41 @@ import os -import re import uuid import io size_generators = { - "test" : (1), - "small": (5), - "small-10": (10), - "large": (10), + "test": 5, + "small": 5, + "small-10": 10, + "large": 10, } + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - files = ["ALL.chr21.1250.vcf", "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", "columns.txt", "AFR", "ALL", "AMR", "EAS", "EUR", "GBR", "SAS"] + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + files = [ + "ALL.chr21.1250.vcf", + "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", + "columns.txt", + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR", + "SAS", + ] for name in files: - #if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -26,30 +45,30 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck start_bytes = 0 with open(os.path.join(data_dir, files[0]), "r") as f: content = f.readlines() - #TODO potentially change if input file with different number of lines is to be processed. - range_per_job = 1250 / num_individuals_jobs + # limit content size for local test runs to keep tasks responsive + content = content[: min(len(content), 500)] + range_per_job = len(content) / num_individuals_jobs for i in range(0, num_individuals_jobs): - #actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. - #regex = re.compile('(?!#)') + # actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. + # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - #print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) - #data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] - #name with start and end lines is not needed as all individuals jobs can just read their entire file. + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) + # data = list(filter(regex.match, content[int(start):int(end)])) + # data = content[int(start) : int(end)] + idx = slice(int(start), int(end)) + data = content[idx] + # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] - + upload_data = io.BytesIO() upload_data.writelines((val).encode("utf-8") for val in data) upload_data.seek(0) - #name = client.upload_stream(output_bucket, name, upload_data) - #TODO keep track of start + stop bytes and return them. + # name = client.upload_stream(output_bucket, name, upload_data) + # TODO keep track of start + stop bytes and return them. nbytes = upload_data.getbuffer().nbytes - output = { - "start_bytes": start_bytes, - "end_bytes": start_bytes + nbytes - 1 - } + output = {"start_bytes": start_bytes, "end_bytes": start_bytes + nbytes - 1} blobs.append(output) start_bytes += nbytes diff --git a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py index 969a76de2..ab22ad4f5 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/frequency.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/frequency.py @@ -1,33 +1,29 @@ +import collections +import os +import tarfile import time - -tic = time.perf_counter() -import numpy as np +from collections import Counter from random import sample -import os.path -import matplotlib -matplotlib.use('Agg') import matplotlib.pyplot as plt -import collections -from collections import Counter - -import datetime +import numpy as np -import os from . import storage +plt.switch_backend("Agg") + class ReadData: def read_names(self, POP, pop_dir, columns_file): - tic = time.perf_counter() + time.perf_counter() namefile = pop_dir + POP - f = open(namefile, 'r') + f = open(namefile, "r") text = f.read() f.close() text = text.split() all_ids = text[0:] file = columns_file - f = open(file, 'r') + f = open(file, "r") text = f.read() f.close() genome_ids = text.split() @@ -36,14 +32,13 @@ def read_names(self, POP, pop_dir, columns_file): return ids def read_rs_numbers(self, siftfile, SIFT): - ## NB This file is in the format of: - ## line number, rs number, ENSG number, SIFT, Phenotype - tic = time.perf_counter() + # NB This file is in the format of: + # line number, rs number, ENSG number, SIFT, Phenotype + time.perf_counter() rs_numbers = [] variations = {} map_variations = {} - all_variations = [] - sift_file = open(siftfile, 'r') + sift_file = open(siftfile, "r") for item in sift_file: item = item.split() if len(item) > 2: @@ -54,18 +49,18 @@ def read_rs_numbers(self, siftfile, SIFT): return rs_numbers, map_variations def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): - tic = time.perf_counter() + time.perf_counter() mutation_index_array = [] for name in ids: - filename = data_dir + individuals_merge_filename + '/' + chrom + '.' + name - f = open(filename, 'r') + filename = data_dir + individuals_merge_filename + "/" + chrom + "." + name + f = open(filename, "r") text = [] for item in f: item = item.split() try: text.append(item[1]) except IndexError as e: - print("ERROR({}): while reading {}: (item: {})".format(str(e), filename, item)) + print("ERROR({}): while reading {}: (item: {})".format(str(e), filename, item)) sifted_mutations = list(set(rs_numbers).intersection(text)) mutation_index_array.append(sifted_mutations) @@ -73,10 +68,9 @@ def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_f class Results: - def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): n_p = len(mutation_index_array) - tic = time.perf_counter() + time.perf_counter() list_p = np.linspace(0, n_p - 1, n_p).astype(int) mutation_overlap = [] random_indiv = [] @@ -95,7 +89,7 @@ def overlap_ind(self, ids, mutation_index_array, n_runs, n_indiv): return mutation_overlap, random_indiv def histogram_overlap(self, mutation_overlap, n_runs): - tic = time.perf_counter() + time.perf_counter() histogram_overlap = [] for run in range(n_runs): final_counts = [count for item, count in mutation_overlap[run].items()] @@ -104,169 +98,174 @@ def histogram_overlap(self, mutation_overlap, n_runs): class PlotData: - def plot_histogram_overlap(self, POP, histogram_overlap, outputFile, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): - output = outputFile + str(run) + '.png' + output = outputFile + str(run) + ".png" final_counts = [count for item, count in histogram_overlap[run].items()] N = len(final_counts) x = range(N) width = 1 / 1.5 - bar1 = plt.bar(x, final_counts, width, color="grey") - plt.ylabel('Mutations') - plt.xlabel('Individuals') + plt.bar(x, final_counts, width, color="grey") + plt.ylabel("Mutations") + plt.xlabel("Individuals") plt.xticks(np.arange(1, N + 1)) plt.savefig(output) plt.close() class WriteData: - def write_histogram_overlap(self, histogram_overlapfile, histogram_overlap, n_runs, n_indiv): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): - overlapfile = histogram_overlapfile + str(run) + '.txt' - f = open(overlapfile, 'w') - f.write('Number Individuals - Number Mutations \n') + overlapfile = histogram_overlapfile + str(run) + ".txt" + f = open(overlapfile, "w") + f.write("Number Individuals - Number Mutations \n") for i in range(1, n_indiv + 1): if i in histogram_overlap[run]: - f.write(str(i) + '-' + str(histogram_overlap[run][i]) + '\n') + f.write(str(i) + "-" + str(histogram_overlap[run][i]) + "\n") else: - f.write(str(i) + '-' + str(0) + '\n') + f.write(str(i) + "-" + str(0) + "\n") f.close() - def write_mutation_overlap(self, mutation_overlapfile, mutation_overlap, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): - overlapfile = mutation_overlapfile + str(run) + '.txt' - f = open(overlapfile, 'w') - f.write('Mutation Index- Number Overlapings \n') + overlapfile = mutation_overlapfile + str(run) + ".txt" + f = open(overlapfile, "w") + f.write("Mutation Index- Number Overlapings \n") for key, count in mutation_overlap[run].items(): - f.write(key + '-' + str(count) + '\n') + f.write(key + "-" + str(count) + "\n") f.close() def write_random_indiv(self, randomindiv_file, random_indiv, n_runs): - tic = time.perf_counter() + time.perf_counter() for run in range(n_runs): - randomfile = randomindiv_file + str(run) + '.txt' - f = open(randomfile, 'w') - f.write('Individuals \n') + randomfile = randomindiv_file + str(run) + ".txt" + f = open(randomfile, "w") + f.write("Individuals \n") for item in random_indiv[run]: f.write("%s\n" % item) f.close() def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() f = open(mutation_index_array_file, "w") for item in mutation_index_array: f.write("%s\n" % item) f.close() def write_map_variations(self, map_variations_file, map_variations): - tic = time.perf_counter() - f = open(map_variations_file, 'w') + time.perf_counter() + f = open(map_variations_file, "w") for key, count in map_variations.items(): - f.write(key + '\t' + str(count) + '\n') + f.write(key + "\t" + str(count) + "\n") f.close() def handler(event): - POP = event["array_element"] - benchmark_bucket = event["sifting"]["benchmark_bucket"] - output_bucket = event["sifting"]["output_bucket"] - input_bucket = event["sifting"]["input_bucket"] - sifting_filename = event["sifting"]["output_sifting"] - individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] - - #download files - siftfile = os.path.join("/tmp", "sifting.txt") - individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") - pop_file = os.path.join("/tmp", POP) - columns_file = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, output_bucket + '/' + sifting_filename, siftfile) - client.download(benchmark_bucket, output_bucket + '/' + individuals_merge_filename, individuals_merge_file) - client.download(benchmark_bucket, input_bucket + '/' + POP, pop_file) - client.download(benchmark_bucket, input_bucket + '/' + "columns.txt", columns_file) - - #chromosome number, doesn't matter here - just used for naming - c = 21 - - SIFT = 'NO-SIFT' - n_runs = 1000 - n_indiv = 52 - - data_dir = '/tmp/' - pop_dir = '/tmp/' - outdata_dir = "/tmp/chr{0}-{1}-freq/output_no_sift/".format(str(c), str(POP)) - plot_dir = "/tmp/chr{0}-{1}-freq/plots_no_sift/".format(str(c), str(POP)) - - if not os.path.exists(outdata_dir): - os.makedirs(outdata_dir, exist_ok=True) - if not os.path.exists(plot_dir): - os.makedirs(plot_dir, exist_ok=True) - - OutputFormat = '.png' - chrom = 'chr' + str(c) - - font = {'family': 'serif', 'size': 14} - plt.rc('font', **font) - - # untar input data - import tarfile - - tar = tarfile.open(individuals_merge_file) - tar.extractall(path='/tmp/' + individuals_merge_filename) - tar.close() - - rd = ReadData() - res = Results() - wr = WriteData() - pd = PlotData() - - histogram_overlapfile = outdata_dir + 'Histogram_mutation_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - mutation_overlapfile = outdata_dir + 'Mutation_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - mutation_index_array_file = outdata_dir + 'mutation_index_array' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - histogram_overlap_plot = plot_dir + 'Frequency_mutations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP - map_variations_file = outdata_dir + 'map_variations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - randomindiv_file = outdata_dir + 'random_indiv' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '_' - - ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids) / 2 - - rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) - mutation_index_array = rd.read_individuals(ids, rs_numbers, data_dir, chrom, individuals_merge_filename) - - wr.write_map_variations(map_variations_file, map_variations) - wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) - - mutation_overlap, random_indiv = res.overlap_ind(ids, mutation_index_array, n_runs, n_indiv) - histogram_overlap = res.histogram_overlap(mutation_overlap, n_runs) - - wr.write_mutation_overlap(mutation_overlapfile, mutation_overlap, n_runs) - wr.write_histogram_overlap(histogram_overlapfile, histogram_overlap, n_runs, n_indiv) - wr.write_random_indiv(randomindiv_file, random_indiv, n_runs) - - pd.plot_histogram_overlap(POP, histogram_overlap, histogram_overlap_plot, n_runs) - - # gen final output - tar = tarfile.open('/tmp/chr%s-%s-freq.tar.gz' % (c, POP), 'w:gz') - tar.add(outdata_dir) - tar.add(plot_dir) - tar.close() - result_name = client.upload(benchmark_bucket, output_bucket + '/' + 'chr%s-%s-freq.tar.gz' % (c, POP), '/tmp/chr%s-%s-freq.tar.gz' % (c, POP)) - result_name = result_name.replace(output_bucket + '/', '') - - return { - "output_frequency": result_name - } + POP = event["array_element"] + benchmark_bucket = event["sifting"]["benchmark_bucket"] + output_bucket = event["sifting"]["output_bucket"] + input_bucket = event["sifting"]["input_bucket"] + sifting_filename = event["sifting"]["output_sifting"] + individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] + + # download files + siftfile = os.path.join("/tmp", "sifting.txt") + individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") + pop_file = os.path.join("/tmp", POP) + columns_file = os.path.join("/tmp", "columns.txt") + + client = storage.storage.get_instance() + client.download(benchmark_bucket, output_bucket + "/" + sifting_filename, siftfile) + client.download( + benchmark_bucket, + output_bucket + "/" + individuals_merge_filename, + individuals_merge_file, + ) + client.download(benchmark_bucket, input_bucket + "/" + POP, pop_file) + client.download(benchmark_bucket, input_bucket + "/" + "columns.txt", columns_file) + + # chromosome number, doesn't matter here - just used for naming + c = 21 + + SIFT = "NO-SIFT" + n_runs = 1000 + n_indiv = 52 + + data_dir = "/tmp/" + pop_dir = "/tmp/" + outdata_dir = "/tmp/chr{0}-{1}-freq/output_no_sift/".format(str(c), str(POP)) + plot_dir = "/tmp/chr{0}-{1}-freq/plots_no_sift/".format(str(c), str(POP)) + + if not os.path.exists(outdata_dir): + os.makedirs(outdata_dir, exist_ok=True) + if not os.path.exists(plot_dir): + os.makedirs(plot_dir, exist_ok=True) + + chrom = "chr" + str(c) + + font = {"family": "serif", "size": 14} + plt.rc("font", **font) + + tar = tarfile.open(individuals_merge_file) + tar.extractall(path="/tmp/" + individuals_merge_filename) + tar.close() + + rd = ReadData() + res = Results() + wr = WriteData() + pd = PlotData() + + histogram_overlapfile = ( + outdata_dir + "Histogram_mutation_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" + ) + mutation_overlapfile = ( + outdata_dir + "Mutation_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" + ) + mutation_index_array_file = ( + outdata_dir + "mutation_index_array" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + histogram_overlap_plot = ( + plot_dir + "Frequency_mutations" + str(c) + "_s" + str(SIFT) + "_" + POP + ) + map_variations_file = ( + outdata_dir + "map_variations" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + randomindiv_file = outdata_dir + "random_indiv" + str(c) + "_s" + str(SIFT) + "_" + POP + "_" + + ids = rd.read_names(POP, pop_dir, columns_file) + len(ids) / 2 + + rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) + mutation_index_array = rd.read_individuals( + ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ) + + wr.write_map_variations(map_variations_file, map_variations) + wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) + + mutation_overlap, random_indiv = res.overlap_ind(ids, mutation_index_array, n_runs, n_indiv) + histogram_overlap = res.histogram_overlap(mutation_overlap, n_runs) + + wr.write_mutation_overlap(mutation_overlapfile, mutation_overlap, n_runs) + wr.write_histogram_overlap(histogram_overlapfile, histogram_overlap, n_runs, n_indiv) + wr.write_random_indiv(randomindiv_file, random_indiv, n_runs) + + pd.plot_histogram_overlap(POP, histogram_overlap, histogram_overlap_plot, n_runs) + + # gen final output + tar = tarfile.open("/tmp/chr%s-%s-freq.tar.gz" % (c, POP), "w:gz") + tar.add(outdata_dir) + tar.add(plot_dir) + tar.close() + result_name = client.upload( + benchmark_bucket, + output_bucket + "/" + "chr%s-%s-freq.tar.gz" % (c, POP), + "/tmp/chr%s-%s-freq.tar.gz" % (c, POP), + ) + result_name = result_name.replace(output_bucket + "/", "") + + return {"output_frequency": result_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py index e156d0f5b..922c6b6cd 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals.py @@ -1,10 +1,8 @@ import os -import uuid import tarfile import shutil import re from . import storage -import datetime client = storage.storage.get_instance() @@ -13,11 +11,13 @@ def compress(output, input_dir): with tarfile.open(output, "w:gz") as file: file.add(input_dir, arcname=os.path.basename(input_dir)) + def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): benchmark_bucket = event["benchmark_bucket"] individuals_bucket = event["bucket"] @@ -29,28 +29,31 @@ def handler(event): columns = event["columns"] columns_bucket = event["columns_bucket"] columns_path = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, columns_bucket + '/' + columns, columns_path) - data = client.download_within_range(benchmark_bucket, columns_bucket + '/' + individuals_input, start_bytes, end_bytes) - - ndir = 'chr{}n-{}/'.format(21, individuals_input) + client = storage.storage.get_instance() + client.download(benchmark_bucket, columns_bucket + "/" + columns, columns_path) + data = client.download_within_range( + benchmark_bucket, + columns_bucket + "/" + individuals_input, + start_bytes, + end_bytes, + ) + + ndir = "chr{}n-{}/".format(21, individuals_input) ndir = os.path.join("/tmp", ndir) os.makedirs(ndir, exist_ok=True) - - regex = re.compile('(?!#)') - #print("data: ", data) + regex = re.compile("(?!#)") + # print("data: ", data) data = data.split("\n") data = list(filter(lambda line: regex.match(line) and line != "", data)) chrp_data = {} - columndata = readfile(columns_path)[0].rstrip('\n').split('\t') + columndata = readfile(columns_path)[0].rstrip("\n").split("\t") start_data = 9 # where the real data start, the first 0|1, 1|1, 1|0 or 0|0 # position of the last element (normally equals to len(data[0].split(' ')) - #end_data = 2504 + # end_data = 2504 end_data = len(columndata) - start_data for i in range(0, end_data): @@ -60,40 +63,49 @@ def handler(event): filename = "{}/chr{}.{}".format(ndir, "21", name) chrp_data[i] = [] - with open(filename, 'w') as f: + with open(filename, "w") as f: zeilennummer = 0 for line in data: zeilennummer += 1 try: - first = line.split('\t')[col] # first =`echo $l | cut -d -f$i` + first = line.split("\t")[col] # first =`echo $l | cut -d -f$i` except Exception as e: - print("faulty line at col = ", col, "zeilennummer:", zeilennummer, " line : ", line) + print( + "faulty line at col = ", + col, + "zeilennummer:", + zeilennummer, + " line : ", + line, + ) raise e - #second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` - second = line.split('\t')[0:8] + # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` + second = line.split("\t")[0:8] # We select the one we want second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] - af_value = second[4].split(';')[8].split('=')[1] + af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value try: - if ',' in af_value: + if "," in af_value: # We only keep the first value if more than one (that's what awk is doing) - af_value = float(af_value.split(',')[0]) + af_value = float(af_value.split(",")[0]) else: af_value = float(af_value) - elem = first.split('|') + elem = first.split("|") # We skip some lines that do not meet these conditions - if af_value >= 0.5 and elem[0] == '0': + if af_value >= 0.5 and elem[0] == "0": chrp_data[i].append(second) - elif af_value < 0.5 and elem[0] == '1': + elif af_value < 0.5 and elem[0] == "1": chrp_data[i].append(second) else: continue - f.write("{0} {1} {2} {3} {4}\n".format( - second[0], second[1], second[2], second[3], second[4]) + f.write( + "{0} {1} {2} {3} {4}\n".format( + second[0], second[1], second[2], second[3], second[4] + ) ) except ValueError: continue @@ -102,15 +114,17 @@ def handler(event): # tar -zcf .. /$outputfile . compress(os.path.join("/tmp/", outputfile), ndir) - outputfile_name = client.upload(benchmark_bucket, individuals_bucket + '/' + outputfile, os.path.join("/tmp/", outputfile)) - outputfile_name = outputfile_name.replace(individuals_bucket + '/', '') - + outputfile_name = client.upload( + benchmark_bucket, + individuals_bucket + "/" + outputfile, + os.path.join("/tmp/", outputfile), + ) + outputfile_name = outputfile_name.replace(individuals_bucket + "/", "") + # Cleaning temporary files try: shutil.rmtree(ndir) except OSError as e: print("Error: %s : %s" % (ndir, e.strerror)) - return { - "individuals_output": outputfile_name - } + return {"individuals_output": outputfile_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py index 7a563366b..7afe3ea26 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/individuals_merge.py @@ -4,56 +4,63 @@ import tarfile import tempfile import shutil -import datetime + def handler(event): - benchmark_bucket = event["benchmark_bucket"] - individuals_output_bucket = event["bucket"] - filenames = [] - for elem in event["blob"]: - filenames.append(elem["individuals_output"]) - - #download files - client = storage.storage.get_instance() - for file in filenames: - client.download(benchmark_bucket, individuals_output_bucket + '/' + file, os.path.join('/tmp', file)) - - #call merging with c and directories. - outputfile_name, outputfile = merging(21, filenames) - - #upload outputfile - outputfile_name = client.upload(benchmark_bucket, individuals_output_bucket + '/' + outputfile_name, outputfile) - outputfile_name = outputfile_name.replace(individuals_output_bucket + '/', '') - - - return { - "merge_outputfile_name": outputfile_name - } + benchmark_bucket = event["benchmark_bucket"] + individuals_output_bucket = event["bucket"] + filenames = [] + for elem in event["blob"]: + filenames.append(elem["individuals_output"]) + + # download files + client = storage.storage.get_instance() + for file in filenames: + client.download( + benchmark_bucket, + individuals_output_bucket + "/" + file, + os.path.join("/tmp", file), + ) + + # call merging with c and directories. + outputfile_name, outputfile = merging(21, filenames) + + # upload outputfile + outputfile_name = client.upload( + benchmark_bucket, individuals_output_bucket + "/" + outputfile_name, outputfile + ) + outputfile_name = outputfile_name.replace(individuals_output_bucket + "/", "") + + return {"merge_outputfile_name": outputfile_name} + def compress(archive, input_dir): with tarfile.open(archive, "w:gz") as f: f.add(input_dir, arcname="") + def extract_all(archive, output_dir): with tarfile.open(archive, "r:*") as f: f.extractall(output_dir) flist = f.getnames() - if flist[0] == '': + if flist[0] == "": flist = flist[1:] return flist + def readfile(filename): - with open(filename, 'r') as f: + with open(filename, "r") as f: content = f.readlines() return content + def writefile(filename, content): - with open(filename, 'w') as f: + with open(filename, "w") as f: f.writelines(content) -def merging(c, tar_files): - tic = time.perf_counter() +def merging(c, tar_files): + time.perf_counter() merged_dir = "merged_chr{}".format(c) merged_dir = os.path.join("/tmp", merged_dir) @@ -62,7 +69,7 @@ def merging(c, tar_files): data = {} for tar in tar_files: - tic_iter = time.perf_counter() + time.perf_counter() os.makedirs("/tmp/temp_dir", exist_ok=True) with tempfile.TemporaryDirectory(dir="/tmp/temp_dir") as temp_dir: for filename in extract_all(os.path.join("/tmp", tar), temp_dir): @@ -72,10 +79,9 @@ def merging(c, tar_files): else: data[filename] = content - - for filename,content in data.items(): + for filename, content in data.items(): writefile(os.path.join(merged_dir, filename), content) - + outputfile_name = "chr{}n.tar.gz".format(c) outputfile = os.path.join("/tmp", outputfile_name) diff --git a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py index 2c377e47c..f0ba2604b 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/mutation_overlap.py @@ -1,379 +1,411 @@ +import itertools +import os +import tarfile import time - -tic = time.perf_counter() -import numpy as np from random import sample -import os -import os.path -import matplotlib -matplotlib.use('Agg') + +import matplotlib as mpl import matplotlib.pyplot as plt -import itertools +import numpy as np from matplotlib import pyplot -import matplotlib as mpl -import collections -from collections import Counter -import datetime -import os from . import storage +plt.switch_backend("Agg") -class ReadData : - def read_names(self, POP, pop_dir, columns_file) : - tic = time.perf_counter() + +class ReadData: + def read_names(self, POP, pop_dir, columns_file): + time.perf_counter() namefile = pop_dir + POP - f = open(namefile, 'r') + f = open(namefile, "r") text = f.read() f.close() text = text.split() all_ids = text[0:] file = columns_file - f = open(file, 'r') + f = open(file, "r") text = f.read() f.close() genome_ids = text.split() - + ids = list(set(all_ids) & set(genome_ids)) - + return ids - def read_rs_numbers(self, siftfile, SIFT) : - ## NB This file is in the format of: - ## line number, rs number, ENSG number, SIFT, Phenotype - tic = time.perf_counter() + def read_rs_numbers(self, siftfile, SIFT): + # NB This file is in the format of: + # line number, rs number, ENSG number, SIFT, Phenotype + time.perf_counter() rs_numbers = [] - variations = {} map_variations = {} - all_variations = [] - sift_file = open(siftfile,'r') + sift_file = open(siftfile, "r") for item in sift_file: item = item.split() if len(item) > 2: rs_numbers.append(item[1]) map_variations[item[1]] = item[2] - + return rs_numbers, map_variations - - def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename) : - tic = time.perf_counter() + + def read_individuals(self, ids, rs_numbers, data_dir, chrom, individuals_merge_filename): + time.perf_counter() mutation_index_array = [] - total_mutations={} - total_mutations_list =[] - for name in ids : - filename = data_dir + individuals_merge_filename + '/' + chrom + '.' + name - f = open(filename, 'r') + total_mutations = {} + total_mutations_list = [] + for name in ids: + filename = data_dir + individuals_merge_filename + "/" + chrom + "." + name + f = open(filename, "r") text = f.read() f.close() text = text.split() sifted_mutations = list(set(rs_numbers).intersection(text)) mutation_index_array.append(sifted_mutations) - total_mutations[name]= len(sifted_mutations) + total_mutations[name] = len(sifted_mutations) total_mutations_list.append(len(sifted_mutations)) - - return mutation_index_array, total_mutations, total_mutations_list - - def read_pairs_overlap(self, indpairsfile) : - tic = time.perf_counter() + + return mutation_index_array, total_mutations, total_mutations_list + + def read_pairs_overlap(self, indpairsfile): + time.perf_counter() pairs_overlap = np.loadtxt(indpairsfile, unpack=True) pairs_overlap = np.transpose(pairs_overlap) return pairs_overlap -class Results : - - def group_indivuals(self, total_mutations_list, n_runs) : - tic = time.perf_counter() +class Results: + def group_indivuals(self, total_mutations_list, n_runs): + time.perf_counter() n_group = 26 - random_mutations_list= [] + random_mutations_list = [] for run in range(n_runs): random_mutations_list.append(sample(total_mutations_list, n_group)) return random_mutations_list - def pair_individuals(self, mutation_index_array, n_runs) : - tic = time.perf_counter() - + def pair_individuals(self, mutation_index_array, n_runs): + time.perf_counter() + n_p = len(mutation_index_array) - n_pairs = int(round(n_p/2)) + n_pairs = int(round(n_p / 2)) list_p = np.linspace(0, n_p - 1, n_p).astype(int) pairs_overlap = np.zeros((n_runs, n_pairs)) - for run in range(n_runs) : - randomized_list = sample(list(list_p) , n_p) - for pq in range(n_pairs) : - array1 = mutation_index_array[randomized_list[2*pq]] - - array2 = mutation_index_array[randomized_list[2*pq]] + for run in range(n_runs): + randomized_list = sample(list(list_p), n_p) + for pq in range(n_pairs): + array1 = mutation_index_array[randomized_list[2 * pq]] + + array2 = mutation_index_array[randomized_list[2 * pq]] pair_array = set(array1) & set(array2) pairs_overlap[run][pq] = len(pair_array) return pairs_overlap - def total_pair_individuals (self, mutation_index_array) : - tic = time.perf_counter() + def total_pair_individuals(self, mutation_index_array): + time.perf_counter() n_p = len(mutation_index_array) total_pairs_overlap = np.zeros((n_p, n_p)) simetric_overlap = np.zeros((n_p, n_p)) for run in range(n_p): - array1 = mutation_index_array[run] - start = run +1 - for pq in range(start, n_p) : - array2 = mutation_index_array[pq] - pairs_array = set(array1) & set(array2) - total_pairs_overlap[run][pq]=len(pairs_array) - simetric_overlap[run][pq] = len(pairs_array) - simetric_overlap[pq][run]= len(pairs_array) - - return total_pairs_overlap , simetric_overlap - - def half_pair_individuals(self, mutation_index_array) : - tic = time.perf_counter() + array1 = mutation_index_array[run] + start = run + 1 + for pq in range(start, n_p): + array2 = mutation_index_array[pq] + pairs_array = set(array1) & set(array2) + total_pairs_overlap[run][pq] = len(pairs_array) + simetric_overlap[run][pq] = len(pairs_array) + simetric_overlap[pq][run] = len(pairs_array) + + return total_pairs_overlap, simetric_overlap + + def half_pair_individuals(self, mutation_index_array): + time.perf_counter() n_p = len(mutation_index_array) - n_pairs = int(round(n_p/2)) + n_pairs = int(round(n_p / 2)) pairs_overlap = np.zeros((n_pairs, n_pairs)) for run in range(n_pairs): array1 = mutation_index_array[run] - index =0 - for pq in range(n_pairs+1, n_p): + index = 0 + for pq in range(n_pairs + 1, n_p): array2 = mutation_index_array[pq] pairs_array = set(array1) & set(array2) - pairs_overlap[run][index]=len(pairs_array) + pairs_overlap[run][index] = len(pairs_array) return pairs_overlap - def gene_pairs(self, mutation_index_array) : + def gene_pairs(self, mutation_index_array): - tic = time.perf_counter() + time.perf_counter() n_p = len(mutation_index_array) gene_pair_list = {} - for pp in range(n_p) : + for pp in range(n_p): pairs = itertools.combinations(mutation_index_array[pp], 2) - for pair in pairs : + for pair in pairs: key = str(pair) - if key not in gene_pair_list : gene_pair_list[key] = 1 - else : gene_pair_list[key] += 1 + if key not in gene_pair_list: + gene_pair_list[key] = 1 + else: + gene_pair_list[key] += 1 - return gene_pair_list -class PlotData : - def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT) : - tic = time.perf_counter() - - pairs_overlap = np.array(pairs_overlap) +class PlotData: + def individual_overlap(self, POP, pairs_overlap, outputFile, c, SIFT): + time.perf_counter() + + pairs_overlap = np.array(pairs_overlap) - min_p = np.min(pairs_overlap) + np.min(pairs_overlap) max_p = np.max(pairs_overlap) nbins = int(max_p) + 1 n_runs = len(pairs_overlap) - nbins = int(np.max(pairs_overlap)) bin_centres = np.linspace(0, nbins, nbins) bin_edges = np.linspace(-0.5, nbins + 0.5, nbins + 1) fig = plt.figure(frameon=False, figsize=(10, 9)) ax = fig.add_subplot(111) - hists = [] max_h = 0 - for run in range(n_runs) : - h, edges = np.histogram(pairs_overlap[run], bins = bin_edges) - ax.plot(bin_centres, h, alpha = 0.5) + for run in range(n_runs): + h, edges = np.histogram(pairs_overlap[run], bins=bin_edges) + ax.plot(bin_centres, h, alpha=0.5) if len(h) > 0: max_h = max(max_h, max(h)) - plt.xlabel('Number of overlapping gene mutations', fontsize = 24) - plt.ylabel(r'frequency', fontsize = 28) - text1 = 'population ' + POP + '\n' +\ - 'chromosome ' + str(c) + '\n' + \ - 'SIFT < ' + str(SIFT) + '\n' + \ - str(n_runs) + ' runs' - plt.text(.95, .95, text1, fontsize = 24, - verticalalignment='top', horizontalalignment='right', - transform = ax.transAxes) - plt.savefig(outputFile) + plt.xlabel("Number of overlapping gene mutations", fontsize=24) + plt.ylabel(r"frequency", fontsize=28) + text1 = ( + "population " + + POP + + "\n" + + "chromosome " + + str(c) + + "\n" + + "SIFT < " + + str(SIFT) + + "\n" + + str(n_runs) + + " runs" + ) + plt.text( + 0.95, + 0.95, + text1, + fontsize=24, + verticalalignment="top", + horizontalalignment="right", + transform=ax.transAxes, + ) + plt.savefig(outputFile) plt.close() def total_colormap_overlap(self, POP, total_pairs_overlap, outputFile): - tic = time.perf_counter() - fig = plt.figure() - cmap = mpl.colors.ListedColormap(['blue','black','red', 'green', 'pink']) - img = pyplot.imshow(total_pairs_overlap,interpolation='nearest', cmap = cmap, origin='lower') - pyplot.colorbar(img,cmap=cmap) + time.perf_counter() + plt.figure() + cmap = mpl.colors.ListedColormap(["blue", "black", "red", "green", "pink"]) + img = pyplot.imshow(total_pairs_overlap, interpolation="nearest", cmap=cmap, origin="lower") + pyplot.colorbar(img, cmap=cmap) - plt.savefig(outputFile) + plt.savefig(outputFile) plt.close() -class WriteData : - def write_pair_individuals(self, indpairsfile, pairs_overlap) : - tic = time.perf_counter() - np.savetxt(indpairsfile, pairs_overlap, fmt = '%i') - - def write_gene_pairs(self, genepairsfile, gene_pair_list) : - tic = time.perf_counter() - f = open(genepairsfile, 'w') - for key, count in gene_pair_list.items() : - f.write(key + '\t' + str(count) + '\n') +class WriteData: + def write_pair_individuals(self, indpairsfile, pairs_overlap): + time.perf_counter() + np.savetxt(indpairsfile, pairs_overlap, fmt="%i") + + def write_gene_pairs(self, genepairsfile, gene_pair_list): + time.perf_counter() + f = open(genepairsfile, "w") + for key, count in gene_pair_list.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - - def write_total_indiv(self, total_mutations_filename, total_mutations) : - tic = time.perf_counter() - f = open(total_mutations_filename, 'w') - for key, count in total_mutations.items() : - f.write(key + '\t' + str(count) + '\n') + + def write_total_indiv(self, total_mutations_filename, total_mutations): + time.perf_counter() + f = open(total_mutations_filename, "w") + for key, count in total_mutations.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - - def write_random_mutations_list(self, random_mutations_filename, random_mutations_list, n_runs) : + + def write_random_mutations_list(self, random_mutations_filename, random_mutations_list, n_runs): for run in range(n_runs): - filename= random_mutations_filename +'_run_' + str(run) + '.txt' - f = open(filename, 'w') - f.writelines(["%s\n" % item for item in random_mutations_list[run]]) - + filename = random_mutations_filename + "_run_" + str(run) + ".txt" + f = open(filename, "w") + f.writelines(["%s\n" % item for item in random_mutations_list[run]]) + def write_mutation_index_array(self, mutation_index_array_file, mutation_index_array): - f=open(mutation_index_array_file,"w") + f = open(mutation_index_array_file, "w") for item in mutation_index_array: f.write("%s\n" % item) f.close() - def write_map_variations(self, map_variations_file, map_variations) : - tic = time.perf_counter() - f = open(map_variations_file, 'w') - for key, count in map_variations.items() : - f.write(key + '\t' + str(count) + '\n') + def write_map_variations(self, map_variations_file, map_variations): + time.perf_counter() + f = open(map_variations_file, "w") + for key, count in map_variations.items(): + f.write(key + "\t" + str(count) + "\n") f.close() - def handler(event): - POP = event["array_element"] - benchmark_bucket = event["sifting"]["benchmark_bucket"] - output_bucket = event["sifting"]["output_bucket"] - input_bucket = event["sifting"]["input_bucket"] - sifting_filename = event["sifting"]["output_sifting"] - individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] - - - #download files - siftfile = os.path.join("/tmp", "sifting.txt") - individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") - pop_file = os.path.join("/tmp", POP) - columns_file = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, output_bucket + '/' + sifting_filename, siftfile) - client.download(benchmark_bucket, output_bucket + '/' + individuals_merge_filename, individuals_merge_file) - client.download(benchmark_bucket, input_bucket + '/' + POP, pop_file) - client.download(benchmark_bucket, input_bucket + '/' + "columns.txt", columns_file) - #chromosome no, doesn't matter. - c = 21 - - SIFT = 'NO-SIFT' - n_runs = 1 - - data_dir = '/tmp/' - pop_dir = '/tmp/' - outdata_dir = "/tmp/chr{0}-{1}/output_no_sift/".format(str(c), str(POP)) - plots_dir = "/tmp/chr{0}-{1}/plots_no_sift/".format(str(c), str(POP)) - - if not os.path.exists(outdata_dir): - os.makedirs(outdata_dir, exist_ok=True) - if not os.path.exists(plots_dir): - os.makedirs(plots_dir, exist_ok=True) - - OutputFormat = '.png' - chrom = 'chr' + str(c) - - font = {'family':'serif', - 'size':14 } - plt.rc('font', **font) - - - # untar input data - import tarfile - tar = tarfile.open(individuals_merge_file) - tar.extractall(path='/tmp/' + individuals_merge_filename) - tar.close() - - tic = time.perf_counter() - - rd = ReadData() - res = Results() - wr = WriteData() - pd = PlotData() - - half_indpairsfile = outdata_dir + 'individual_half_pairs_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - total_indpairsfile = outdata_dir + 'total_individual_pairs_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - genepairsfile = outdata_dir + 'gene_pairs_count_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - random_indpairsfile = outdata_dir + '100_individual_overlap_chr' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - colormap = plots_dir + 'colormap_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - half_overlap = plots_dir + 'half_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - total_overlap = plots_dir + 'total_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - random_overlap = plots_dir + '100_distribution_c' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + OutputFormat - - total_mutations_filename = outdata_dir + 'total_mutations_individual' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - random_mutations_filename = outdata_dir + 'random_mutations_individual' + str(c) + '_s' + \ - str(SIFT) + '_' + POP - - mutation_index_array_file = outdata_dir + 'mutation_index_array' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - map_variations_file = outdata_dir + 'map_variations' + str(c) + '_s' + \ - str(SIFT) + '_' + POP + '.txt' - - - - ids = rd.read_names(POP, pop_dir, columns_file) - n_pairs = len(ids)/2 - - - rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) - mutation_index_array, total_mutations, total_mutations_list = rd.read_individuals(ids, rs_numbers, data_dir, chrom, individuals_merge_filename) - wr.write_total_indiv(total_mutations_filename, total_mutations) - wr.write_map_variations(map_variations_file, map_variations) - - #cross-correlations mutations overlapping - half_pairs_overlap = res.half_pair_individuals(mutation_index_array) - total_pairs_overlap, simetric_overlap = res.total_pair_individuals(mutation_index_array) - random_pairs_overlap = res.pair_individuals(mutation_index_array, n_runs) - - wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) - wr.write_pair_individuals(half_indpairsfile, half_pairs_overlap) - wr.write_pair_individuals(total_indpairsfile, total_pairs_overlap) - wr.write_pair_individuals(random_indpairsfile, random_pairs_overlap,) - - pd.individual_overlap(POP, half_pairs_overlap, half_overlap, c, SIFT) - pd.individual_overlap(POP, simetric_overlap, total_overlap, c, SIFT) - pd.individual_overlap(POP, random_pairs_overlap, random_overlap, c, SIFT) - pd.total_colormap_overlap(POP, total_pairs_overlap, colormap) - - #list of frecuency of mutations in 26 individuals - random_mutations_list=res.group_indivuals(total_mutations_list, n_runs) - wr.write_random_mutations_list(random_mutations_filename, random_mutations_list, n_runs) - - # gen overlapping - gene_pair_list = res.gene_pairs(mutation_index_array) - wr.write_gene_pairs(genepairsfile, gene_pair_list) - - # gen final output - tar = tarfile.open('/tmp/chr%s-%s.tar.gz' % (c, POP), 'w:gz') - tar.add(outdata_dir) - tar.add(plots_dir) - tar.close() - result_name = client.upload(benchmark_bucket, output_bucket + '/' + 'chr%s-%s.tar.gz' % (c, POP), '/tmp/chr%s-%s.tar.gz' % (c, POP)) - result_name = result_name.replace(output_bucket + '/', '') - - return { - "output_mutation_overlap": result_name - } + POP = event["array_element"] + benchmark_bucket = event["sifting"]["benchmark_bucket"] + output_bucket = event["sifting"]["output_bucket"] + input_bucket = event["sifting"]["input_bucket"] + sifting_filename = event["sifting"]["output_sifting"] + individuals_merge_filename = event["individuals_merge"]["merge_outputfile_name"] + + # download files + siftfile = os.path.join("/tmp", "sifting.txt") + individuals_merge_file = os.path.join("/tmp", "individuals_merge.tar.gz") + pop_file = os.path.join("/tmp", POP) + columns_file = os.path.join("/tmp", "columns.txt") + + client = storage.storage.get_instance() + client.download(benchmark_bucket, output_bucket + "/" + sifting_filename, siftfile) + client.download( + benchmark_bucket, + output_bucket + "/" + individuals_merge_filename, + individuals_merge_file, + ) + client.download(benchmark_bucket, input_bucket + "/" + POP, pop_file) + client.download(benchmark_bucket, input_bucket + "/" + "columns.txt", columns_file) + # chromosome no, doesn't matter. + c = 21 + + SIFT = "NO-SIFT" + n_runs = 1 + + data_dir = "/tmp/" + pop_dir = "/tmp/" + outdata_dir = "/tmp/chr{0}-{1}/output_no_sift/".format(str(c), str(POP)) + plots_dir = "/tmp/chr{0}-{1}/plots_no_sift/".format(str(c), str(POP)) + + if not os.path.exists(outdata_dir): + os.makedirs(outdata_dir, exist_ok=True) + if not os.path.exists(plots_dir): + os.makedirs(plots_dir, exist_ok=True) + + OutputFormat = ".png" + chrom = "chr" + str(c) + + font = {"family": "serif", "size": 14} + plt.rc("font", **font) + + tar = tarfile.open(individuals_merge_file) + tar.extractall(path="/tmp/" + individuals_merge_filename) + tar.close() + + rd = ReadData() + res = Results() + wr = WriteData() + pd = PlotData() + + half_indpairsfile = ( + outdata_dir + + "individual_half_pairs_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + total_indpairsfile = ( + outdata_dir + + "total_individual_pairs_overlap_chr" + + str(c) + + "_s" + + str(SIFT) + + "_" + + POP + + ".txt" + ) + genepairsfile = ( + outdata_dir + "gene_pairs_count_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + random_indpairsfile = ( + outdata_dir + "100_individual_overlap_chr" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + colormap = ( + plots_dir + "colormap_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat + ) + half_overlap = ( + plots_dir + "half_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat + ) + total_overlap = ( + plots_dir + "total_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat + ) + random_overlap = ( + plots_dir + "100_distribution_c" + str(c) + "_s" + str(SIFT) + "_" + POP + OutputFormat + ) + + total_mutations_filename = ( + outdata_dir + "total_mutations_individual" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + random_mutations_filename = ( + outdata_dir + "random_mutations_individual" + str(c) + "_s" + str(SIFT) + "_" + POP + ) + + mutation_index_array_file = ( + outdata_dir + "mutation_index_array" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + map_variations_file = ( + outdata_dir + "map_variations" + str(c) + "_s" + str(SIFT) + "_" + POP + ".txt" + ) + + ids = rd.read_names(POP, pop_dir, columns_file) + len(ids) / 2 + + rs_numbers, map_variations = rd.read_rs_numbers(siftfile, SIFT) + mutation_index_array, total_mutations, total_mutations_list = rd.read_individuals( + ids, rs_numbers, data_dir, chrom, individuals_merge_filename + ) + wr.write_total_indiv(total_mutations_filename, total_mutations) + wr.write_map_variations(map_variations_file, map_variations) + + # cross-correlations mutations overlapping + half_pairs_overlap = res.half_pair_individuals(mutation_index_array) + total_pairs_overlap, simetric_overlap = res.total_pair_individuals(mutation_index_array) + random_pairs_overlap = res.pair_individuals(mutation_index_array, n_runs) + + wr.write_mutation_index_array(mutation_index_array_file, mutation_index_array) + wr.write_pair_individuals(half_indpairsfile, half_pairs_overlap) + wr.write_pair_individuals(total_indpairsfile, total_pairs_overlap) + wr.write_pair_individuals( + random_indpairsfile, + random_pairs_overlap, + ) + + pd.individual_overlap(POP, half_pairs_overlap, half_overlap, c, SIFT) + pd.individual_overlap(POP, simetric_overlap, total_overlap, c, SIFT) + pd.individual_overlap(POP, random_pairs_overlap, random_overlap, c, SIFT) + pd.total_colormap_overlap(POP, total_pairs_overlap, colormap) + + # list of frecuency of mutations in 26 individuals + random_mutations_list = res.group_indivuals(total_mutations_list, n_runs) + wr.write_random_mutations_list(random_mutations_filename, random_mutations_list, n_runs) + + # gen overlapping + gene_pair_list = res.gene_pairs(mutation_index_array) + wr.write_gene_pairs(genepairsfile, gene_pair_list) + + # gen final output + tar = tarfile.open("/tmp/chr%s-%s.tar.gz" % (c, POP), "w:gz") + tar.add(outdata_dir) + tar.add(plots_dir) + tar.close() + result_name = client.upload( + benchmark_bucket, + output_bucket + "/" + "chr%s-%s.tar.gz" % (c, POP), + "/tmp/chr%s-%s.tar.gz" % (c, POP), + ) + result_name = result_name.replace(output_bucket + "/", "") + + return {"output_mutation_overlap": result_name} diff --git a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt index c357805d6..ba14aed5c 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt +++ b/benchmarks/600.workflows/6100.1000-genome/python/requirements.txt @@ -1,3 +1,2 @@ -#numpy==1.17 -numpy==1.25 #1.16 works on Azure, but not AWS -matplotlib +numpy==1.26.4 +matplotlib==3.8.4 diff --git a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py index 2add45bdb..e6cbb1938 100644 --- a/benchmarks/600.workflows/6100.1000-genome/python/sifting.py +++ b/benchmarks/600.workflows/6100.1000-genome/python/sifting.py @@ -2,73 +2,75 @@ import re from . import storage import subprocess -import datetime + def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): - benchmark_bucket = event["benchmark_bucket"] - input_bucket = event["columns_bucket"] - input_filename = event["sifting_input"] - inputfile = os.path.join("/tmp", "sifting_file.vcf") - - output_bucket = event["bucket"] - - - client = storage.storage.get_instance() - client.download(benchmark_bucket, input_bucket + '/' + input_filename, inputfile) - - #c is the chromosome number - doesn't matter here. - c = 21 - final_name = 'sifted.SIFT.chr{}.txt'.format(c) - final = os.path.join("/tmp", final_name) - - rawdata = readfile(inputfile) - - - r1 = re.compile('.*(#).*') - header = len(list(filter(r1.match, rawdata[:1000]))) - - siftfile = 'SIFT.chr{}.vcf'.format(c) - siftfile = os.path.join("/tmp", siftfile) - with open(siftfile, 'w') as f: - subprocess.run(["grep -n \"deleterious\|tolerated\" {}".format(inputfile)], shell=True, stdout=f) - - data_temp = readfile(siftfile) - - r3 = re.compile('.*(rs).*') - data = list(filter(r3.match, data_temp)) - - - with open(final, 'w') as f: - for l in data: - line = str(int(l.split('\t')[0].split(':')[0]) - int(header)) - id = l.split('\t')[2] - - sifts = l.split('\t')[7].split('|') - sifts = sifts[4] + ' ' + sifts[16] + ' ' + sifts[17] - sifts = sifts.replace('(', ' ').replace(')', '') - - temp = (line + ' ' + id + ' ' + sifts).split(' ') - - if temp[3] == '' or temp[4] == '': - f.write("{} {} {}\n".format(temp[0], temp[1], temp[2])) - elif temp[5] == '': - f.write("{} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4])) - else: - f.write("{} {} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4], temp[6])) - - os.remove(siftfile) - final_name = client.upload(benchmark_bucket, output_bucket + '/' + final_name, final) - final_name = final_name.replace(output_bucket + '/', '') - - return { - "output_bucket": output_bucket, - "benchmark_bucket": benchmark_bucket, - "output_sifting": final_name, - "populations": event["populations"], - "input_bucket": input_bucket - } + benchmark_bucket = event["benchmark_bucket"] + input_bucket = event["columns_bucket"] + input_filename = event["sifting_input"] + inputfile = os.path.join("/tmp", "sifting_file.vcf") + + output_bucket = event["bucket"] + + client = storage.storage.get_instance() + client.download(benchmark_bucket, input_bucket + "/" + input_filename, inputfile) + + # c is the chromosome number - doesn't matter here. + c = 21 + final_name = "sifted.SIFT.chr{}.txt".format(c) + final = os.path.join("/tmp", final_name) + + rawdata = readfile(inputfile) + + r1 = re.compile(".*(#).*") + header = len(list(filter(r1.match, rawdata[:1000]))) + + siftfile = "SIFT.chr{}.vcf".format(c) + siftfile = os.path.join("/tmp", siftfile) + with open(siftfile, "w") as f: + subprocess.run( + ['grep -n "deleterious\\\\|tolerated" {}'.format(inputfile)], + shell=True, + stdout=f, + ) + + data_temp = readfile(siftfile) + + r3 = re.compile(".*(rs).*") + data = list(filter(r3.match, data_temp)) + + with open(final, "w") as f: + for line_data in data: + line = str(int(line_data.split("\t")[0].split(":")[0]) - int(header)) + id = line_data.split("\t")[2] + + sifts = line_data.split("\t")[7].split("|") + sifts = sifts[4] + " " + sifts[16] + " " + sifts[17] + sifts = sifts.replace("(", " ").replace(")", "") + + temp = (line + " " + id + " " + sifts).split(" ") + + if temp[3] == "" or temp[4] == "": + f.write("{} {} {}\n".format(temp[0], temp[1], temp[2])) + elif temp[5] == "": + f.write("{} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4])) + else: + f.write("{} {} {} {} {}\n".format(temp[0], temp[1], temp[2], temp[4], temp[6])) + + os.remove(siftfile) + final_name = client.upload(benchmark_bucket, output_bucket + "/" + final_name, final) + final_name = final_name.replace(output_bucket + "/", "") + + return { + "output_bucket": output_bucket, + "benchmark_bucket": benchmark_bucket, + "output_sifting": final_name, + "populations": event["populations"], + "input_bucket": input_bucket, + } diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json b/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json index 1f5852d22..d89586cc7 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/definition.json @@ -5,7 +5,7 @@ "type": "map", "root": "individuals", "array": "blob", - "common_params": "bucket,columns,columns_bucket,populations,sifting_input,individuals_file", + "common_params": "benchmark_bucket,bucket,columns,columns_bucket,populations,sifting_input,individuals_file", "states": { "individuals": { "type": "task", diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py index c30c5bdcc..4223d9f05 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/input.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/input.py @@ -1,24 +1,43 @@ import os -import re import uuid import io size_generators = { - "test" : (1), - "small": (5), - "small-10": (10), - "small-20": (20), - "large": (10), + "test": 5, + "small": 5, + "small-10": 10, + "small-20": 20, + "large": 10, } + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - files = ["ALL.chr21.1250.vcf", "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", "columns.txt", "AFR", "ALL", "AMR", "EAS", "EUR", "GBR", "SAS"] + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): + files = [ + "ALL.chr21.1250.vcf", + "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf", + "columns.txt", + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR", + "SAS", + ] for name in files: if name == "ALL.chr21.1250.vcf" or name == "columns.txt": - #if name != "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf": path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -28,27 +47,27 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck start_bytes = 0 with open(os.path.join(data_dir, files[0]), "r") as f: content = f.readlines() - range_per_job = 1250 / num_individuals_jobs + content = content[: min(len(content), 500)] + range_per_job = len(content) / num_individuals_jobs for i in range(0, num_individuals_jobs): - #actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. - #regex = re.compile('(?!#)') + # actually split file; return it afterwards. see e.g. split.py in 660.map-reduce. + # regex = re.compile('(?!#)') start = i * range_per_job end = i * range_per_job + range_per_job - #print("start: ", start, "end: ", end, "range_per_job: ", range_per_job, "num_individuals_jobs: ", num_individuals_jobs) - #data = list(filter(regex.match, content[int(start):int(end)])) - data = content[int(start):int(end)] - #name with start and end lines is not needed as all individuals jobs can just read their entire file. + # print("start: ", start, "end: ", end, "range_per_job: ", range_per_job) + # data = list(filter(regex.match, content[int(start):int(end)])) + # data = content[int(start) : int(end)] + idx = slice(int(start), int(end)) + data = content[idx] + # start/end line names not needed; jobs read entire file chunk. name = str(uuid.uuid4())[:8] - + upload_data = io.BytesIO() upload_data.writelines((val).encode("utf-8") for val in data) upload_data.seek(0) nbytes = upload_data.getbuffer().nbytes - output = { - "start_bytes": start_bytes, - "end_bytes": start_bytes + nbytes - 1 - } + output = {"start_bytes": start_bytes, "end_bytes": start_bytes + nbytes - 1} blobs.append(output) start_bytes += nbytes diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py index f02c3b789..922c6b6cd 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/individuals.py @@ -1,5 +1,4 @@ import os -import uuid import tarfile import shutil import re @@ -7,16 +6,18 @@ client = storage.storage.get_instance() + def compress(output, input_dir): with tarfile.open(output, "w:gz") as file: file.add(input_dir, arcname=os.path.basename(input_dir)) def readfile(file): - with open(file, 'r') as f: + with open(file, "r") as f: content = f.readlines() return content + def handler(event): benchmark_bucket = event["benchmark_bucket"] individuals_bucket = event["bucket"] @@ -28,27 +29,31 @@ def handler(event): columns = event["columns"] columns_bucket = event["columns_bucket"] columns_path = os.path.join("/tmp", "columns.txt") - - client = storage.storage.get_instance() - client.download(benchmark_bucket, columns_bucket + '/' + columns, columns_path) - data = client.download_within_range(benchmark_bucket, columns_bucket + '/' + individuals_input, start_bytes, end_bytes) - ndir = 'chr{}n-{}/'.format(21, individuals_input) + client = storage.storage.get_instance() + client.download(benchmark_bucket, columns_bucket + "/" + columns, columns_path) + data = client.download_within_range( + benchmark_bucket, + columns_bucket + "/" + individuals_input, + start_bytes, + end_bytes, + ) + + ndir = "chr{}n-{}/".format(21, individuals_input) ndir = os.path.join("/tmp", ndir) os.makedirs(ndir, exist_ok=True) - - regex = re.compile('(?!#)') - #print("data: ", data) + regex = re.compile("(?!#)") + # print("data: ", data) data = data.split("\n") data = list(filter(lambda line: regex.match(line) and line != "", data)) chrp_data = {} - columndata = readfile(columns_path)[0].rstrip('\n').split('\t') + columndata = readfile(columns_path)[0].rstrip("\n").split("\t") start_data = 9 # where the real data start, the first 0|1, 1|1, 1|0 or 0|0 # position of the last element (normally equals to len(data[0].split(' ')) - #end_data = 2504 + # end_data = 2504 end_data = len(columndata) - start_data for i in range(0, end_data): @@ -58,40 +63,49 @@ def handler(event): filename = "{}/chr{}.{}".format(ndir, "21", name) chrp_data[i] = [] - with open(filename, 'w') as f: + with open(filename, "w") as f: zeilennummer = 0 for line in data: zeilennummer += 1 try: - first = line.split('\t')[col] # first =`echo $l | cut -d -f$i` + first = line.split("\t")[col] # first =`echo $l | cut -d -f$i` except Exception as e: - print("faulty line at col = ", col, "zeilennummer:", zeilennummer, " line : ", line) + print( + "faulty line at col = ", + col, + "zeilennummer:", + zeilennummer, + " line : ", + line, + ) raise e - #second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` - second = line.split('\t')[0:8] + # second =`echo $l | cut -d -f 2, 3, 4, 5, 8 --output-delimiter = ' '` + second = line.split("\t")[0:8] # We select the one we want second = [elem for id, elem in enumerate(second) if id in [1, 2, 3, 4, 7]] - af_value = second[4].split(';')[8].split('=')[1] + af_value = second[4].split(";")[8].split("=")[1] # We replace with AF_Value second[4] = af_value try: - if ',' in af_value: + if "," in af_value: # We only keep the first value if more than one (that's what awk is doing) - af_value = float(af_value.split(',')[0]) + af_value = float(af_value.split(",")[0]) else: af_value = float(af_value) - elem = first.split('|') + elem = first.split("|") # We skip some lines that do not meet these conditions - if af_value >= 0.5 and elem[0] == '0': + if af_value >= 0.5 and elem[0] == "0": chrp_data[i].append(second) - elif af_value < 0.5 and elem[0] == '1': + elif af_value < 0.5 and elem[0] == "1": chrp_data[i].append(second) else: continue - f.write("{0} {1} {2} {3} {4}\n".format( - second[0], second[1], second[2], second[3], second[4]) + f.write( + "{0} {1} {2} {3} {4}\n".format( + second[0], second[1], second[2], second[3], second[4] + ) ) except ValueError: continue @@ -100,8 +114,12 @@ def handler(event): # tar -zcf .. /$outputfile . compress(os.path.join("/tmp/", outputfile), ndir) - outputfile_name = client.upload(benchmark_bucket, individuals_bucket + '/' + outputfile, os.path.join("/tmp/", outputfile)) - outputfile_name = outputfile_name.replace(individuals_bucket + '/', '') + outputfile_name = client.upload( + benchmark_bucket, + individuals_bucket + "/" + outputfile, + os.path.join("/tmp/", outputfile), + ) + outputfile_name = outputfile_name.replace(individuals_bucket + "/", "") # Cleaning temporary files try: @@ -109,6 +127,4 @@ def handler(event): except OSError as e: print("Error: %s : %s" % (ndir, e.strerror)) - return { - "individuals_output": outputfile_name - } + return {"individuals_output": outputfile_name} diff --git a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt index 5453e2d48..ba14aed5c 100644 --- a/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt +++ b/benchmarks/600.workflows/6101.1000-genome-individuals/python/requirements.txt @@ -1,3 +1,2 @@ -#numpy==1.17 -numpy==1.18 #1.16 works on Azure, but not AWS -matplotlib +numpy==1.26.4 +matplotlib==3.8.4 diff --git a/benchmarks/600.workflows/620.func-invo/input.py b/benchmarks/600.workflows/620.func-invo/input.py index afefd5d9a..19b210c21 100644 --- a/benchmarks/600.workflows/620.func-invo/input.py +++ b/benchmarks/600.workflows/620.func-invo/input.py @@ -1,16 +1,25 @@ size_generators = { - 'test' : 10, - 'small' : 2**5, - 'large': 2**20, - '2e5': 2**5, - '2e8': 2**8, - '2e10': 2**10, - '2e12': 2**12, - '2e14': 2**14, - '2e16': 2**16, - '2e18': 2**18, - '2e18-1000': (2**18)-1000 + "test": 10, + "small": 2**5, + "large": 2**20, + "2e5": 2**5, + "2e8": 2**8, + "2e10": 2**10, + "2e12": 2**12, + "2e14": 2**14, + "2e16": 2**16, + "2e18": 2**18, + "2e18-1000": (2**18) - 1000, } -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size] } + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/600.workflows/620.func-invo/python/gen.py b/benchmarks/600.workflows/620.func-invo/python/gen.py index 60c328fee..801a247a4 100644 --- a/benchmarks/600.workflows/620.func-invo/python/gen.py +++ b/benchmarks/600.workflows/620.func-invo/python/gen.py @@ -1,5 +1,6 @@ from random import shuffle + def handler(event): size = int(event["size"]) elems = list(range(size)) @@ -11,4 +12,4 @@ def handler(event): if len(data) > size: break - return {'len' : data[:size]} + return {"len": data[:size]} diff --git a/benchmarks/600.workflows/620.func-invo/python/process.py b/benchmarks/600.workflows/620.func-invo/python/process.py index 084062854..54dc04eda 100644 --- a/benchmarks/600.workflows/620.func-invo/python/process.py +++ b/benchmarks/600.workflows/620.func-invo/python/process.py @@ -1,7 +1,8 @@ from random import shuffle + def handler(event): - size = len(event['len']) + size = len(event["len"]) elems = list(range(size)) shuffle(elems) @@ -11,4 +12,4 @@ def handler(event): if len(data) > size: break - return {'len' : data[:size]} + return {"len": data[:size]} diff --git a/benchmarks/600.workflows/6200.trip-booking/input.py b/benchmarks/600.workflows/6200.trip-booking/input.py index 4c261f755..0305642c8 100644 --- a/benchmarks/600.workflows/6200.trip-booking/input.py +++ b/benchmarks/600.workflows/6200.trip-booking/input.py @@ -1,27 +1,22 @@ - def allocate_nosql() -> dict: return { - "flights": { - "primary_key": "trip_id", - "secondary_key": "flight_id" - }, - "car_rentals": { - "primary_key": "trip_id", - "secondary_key": "rental_id" - }, - "hotel_booking": { - "primary_key": "trip_id", - "secondary_key": "booking_id" - } + "flights": {"primary_key": "trip_id", "secondary_key": "flight_id"}, + "car_rentals": {"primary_key": "trip_id", "secondary_key": "rental_id"}, + "hotel_booking": {"primary_key": "trip_id", "secondary_key": "booking_id"}, } + def generate_input( - data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, ): - input_config = {} - # test - invoke a single trip, succeed # small - fail in the middle # large - fail at the last step @@ -37,13 +32,13 @@ def generate_input( "rental_class": "compact", "rental_price_max": "100", "rental_duration": 3, - "rental_requests": ["full_tank", "CDW", "assistance"] + "rental_requests": ["full_tank", "CDW", "assistance"], } size_results = { "test": {"result": "success"}, "small": {"result": "failure", "reason": "hotel"}, - "large": {"result": "failure", "reason": "confirm"} + "large": {"result": "failure", "reason": "confirm"}, } trip_details["expected_result"] = size_results[size] diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py index fe55ed0c1..35fde12cc 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_flight.py @@ -1,8 +1,17 @@ +import uuid + from . import nosql nosql_client = nosql.nosql.get_instance() nosql_table_name = "flights" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -12,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = event["trip_id"] - flight_id = event["request-id"] + flight_id = _get_request_id(event) # Simulate return from a service flight_price = "1000" diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py index 623d1a8b0..a9d963583 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_hotel.py @@ -5,6 +5,13 @@ nosql_client = nosql.nosql.get_instance() nosql_table_name = "hotel_booking" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -14,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = str(uuid.uuid4().hex) - hotel_booking_id = event["request-id"] + hotel_booking_id = _get_request_id(event) # Simulate return from a service hotel_price = "130" diff --git a/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py b/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py index 8cf0b11fc..41be88a79 100644 --- a/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py +++ b/benchmarks/600.workflows/6200.trip-booking/python/reserve_rental.py @@ -1,8 +1,17 @@ +import uuid + from . import nosql nosql_client = nosql.nosql.get_instance() nosql_table_name = "car_rentals" +def _get_request_id(event): + request_id = event.get("request-id") or event.get("request_id") or event.get("requestId") + if not request_id: + request_id = uuid.uuid4().hex + event["request-id"] = request_id + return request_id + def handler(event): @@ -12,7 +21,7 @@ def handler(event): # We start with the hotel trip_id = event["trip_id"] - rental_id = event["request-id"] + rental_id = _get_request_id(event) # Simulate return from a service car_price = "125" diff --git a/benchmarks/600.workflows/630.parallel-sleep/input.py b/benchmarks/600.workflows/630.parallel-sleep/input.py index 092981d7a..073afa2d3 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/input.py +++ b/benchmarks/600.workflows/630.parallel-sleep/input.py @@ -1,34 +1,44 @@ - #threads-duration +# threads-duration size_generators = { - 'test' : (2, 2), - 'small': (16, 20), - 'large': (50, 2), - '2-1': (2, 1), - '4-1': (4, 1), - '8-1': (8, 1), - '16-1': (16, 1), - '2-5': (2, 5), - '4-5': (4, 5), - '8-5': (8, 5), - '16-5': (16, 5), - '2-10': (2, 10), - '4-10': (4, 10), - '8-10': (8, 10), - '16-10': (16, 10), - '2-15': (2, 15), - '4-15': (4, 15), - '8-15': (8, 15), - '16-15': (16, 15), - '2-20': (2, 20), - '4-20': (4, 20), - '8-20': (8, 20), - '16-20': (16, 20), - '50-1': (50, 1) + "test": (2, 2), + "small": (16, 20), + "large": (50, 2), + "2-1": (2, 1), + "4-1": (4, 1), + "8-1": (8, 1), + "16-1": (16, 1), + "2-5": (2, 5), + "4-5": (4, 5), + "8-5": (8, 5), + "16-5": (16, 5), + "2-10": (2, 10), + "4-10": (4, 10), + "8-10": (8, 10), + "16-10": (16, 10), + "2-15": (2, 15), + "4-15": (4, 15), + "8-15": (8, 15), + "16-15": (16, 15), + "2-20": (2, 20), + "4-20": (4, 20), + "8-20": (8, 20), + "16-20": (16, 20), + "50-1": (50, 1), } + def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): count, sleep = size_generators[size] - return { 'count': count, 'sleep': sleep } + return {"count": count, "sleep": sleep} diff --git a/benchmarks/600.workflows/630.parallel-sleep/python/generate.py b/benchmarks/600.workflows/630.parallel-sleep/python/generate.py index c291c6754..c538b6442 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/python/generate.py +++ b/benchmarks/600.workflows/630.parallel-sleep/python/generate.py @@ -1,12 +1,9 @@ def handler(event): count = int(event["count"]) sleep = int(event["sleep"]) - + sleep_list = [] for i in range(0, count): - sleep_list.append({'sleep':sleep}) - + sleep_list.append({"sleep": sleep}) - return { - "buffer": sleep_list - } + return {"buffer": sleep_list} diff --git a/benchmarks/600.workflows/630.parallel-sleep/python/process.py b/benchmarks/600.workflows/630.parallel-sleep/python/process.py index 9e2f1ab05..d56f45960 100644 --- a/benchmarks/600.workflows/630.parallel-sleep/python/process.py +++ b/benchmarks/600.workflows/630.parallel-sleep/python/process.py @@ -1,7 +1,7 @@ import time -def handler(event): - time.sleep(event['sleep']) +def handler(event): + time.sleep(event["sleep"]) return "ok" diff --git a/benchmarks/600.workflows/631.parallel-download/input.py b/benchmarks/600.workflows/631.parallel-download/input.py index fd9d6d7b5..217ddf104 100644 --- a/benchmarks/600.workflows/631.parallel-download/input.py +++ b/benchmarks/600.workflows/631.parallel-download/input.py @@ -2,16 +2,16 @@ from random import shuffle size_generators = { - 'test' : (5, 10), - 'small': (20, 2**10), - 'large': (50, 2**10), - '2e10': (20, 2**10), - '2e28': (20, 2**28), - '2e15': (20, 2**15), - '2e20': (20, 2**20), - '2e25': (20, 2**25), - '2e26': (20, 2**26), - '2e27': (20, 2**27) + "test": (5, 10), + "small": (20, 2**10), + "large": (50, 2**10), + "2e10": (20, 2**10), + "2e28": (20, 2**28), + "2e15": (20, 2**15), + "2e20": (20, 2**20), + "2e25": (20, 2**25), + "2e26": (20, 2**26), + "2e27": (20, 2**27), } @@ -32,7 +32,15 @@ def generate(size): yield data -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): count, size_bytes = size_generators[size] data_name = f"data-{size_bytes}.txt" @@ -45,4 +53,8 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck upload_func(0, data_name, data_path) # os.remove(data_path) - return { 'count': count, "bucket": benchmarks_bucket, "blob": input_buckets[0] + '/' + data_name} + return { + "count": count, + "bucket": benchmarks_bucket, + "blob": input_buckets[0] + "/" + data_name, + } diff --git a/benchmarks/600.workflows/631.parallel-download/python/generate.py b/benchmarks/600.workflows/631.parallel-download/python/generate.py index fa20cd018..a2e4164ed 100644 --- a/benchmarks/600.workflows/631.parallel-download/python/generate.py +++ b/benchmarks/600.workflows/631.parallel-download/python/generate.py @@ -2,7 +2,4 @@ def handler(event): count = int(event["count"]) del event["count"] - - return { - "buffer": count * [event] - } + return {"buffer": count * [event]} diff --git a/benchmarks/600.workflows/631.parallel-download/python/process.py b/benchmarks/600.workflows/631.parallel-download/python/process.py index e4d56fe20..7fcabed04 100644 --- a/benchmarks/600.workflows/631.parallel-download/python/process.py +++ b/benchmarks/600.workflows/631.parallel-download/python/process.py @@ -1,11 +1,11 @@ from . import storage + def handler(event): bucket = event["bucket"] blob = event["blob"] client = storage.storage.get_instance() - buffer = client.download_stream(bucket, blob) - + client.download_stream(bucket, blob) return "ok" diff --git a/benchmarks/600.workflows/640.selfish-detour/input.py b/benchmarks/600.workflows/640.selfish-detour/input.py index 69d06fcd5..687e61383 100644 --- a/benchmarks/600.workflows/640.selfish-detour/input.py +++ b/benchmarks/600.workflows/640.selfish-detour/input.py @@ -1,12 +1,22 @@ size_generators = { - 'test' : 100, - 'small': 5000, - 'large': 10000, + "test": 100, + "small": 5000, + "large": 10000, } + def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): num_samples = size_generators[size] - return { 'num_samples': num_samples } + return {"num_samples": num_samples} diff --git a/benchmarks/600.workflows/640.selfish-detour/package.sh b/benchmarks/600.workflows/640.selfish-detour/package.sh deleted file mode 100644 index c1145e436..000000000 --- a/benchmarks/600.workflows/640.selfish-detour/package.sh +++ /dev/null @@ -1,11 +0,0 @@ -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -CUR_DIR=$(pwd) -cd ${SCRIPT_DIR} - -for C_FILE in $(ls *.c) -do - cc -fPIC -shared -o ${C_FILE%%.*}.so ${C_FILE} - rm ${C_FILE} -done - -cd ${CUR_DIR} diff --git a/benchmarks/600.workflows/640.selfish-detour/python/measure.py b/benchmarks/600.workflows/640.selfish-detour/python/measure.py index 7a0900c8f..d602c4e3b 100644 --- a/benchmarks/600.workflows/640.selfish-detour/python/measure.py +++ b/benchmarks/600.workflows/640.selfish-detour/python/measure.py @@ -1,6 +1,7 @@ -import os import json -from ctypes import * +import os +from ctypes import POINTER, c_double, c_int, c_ulonglong, cast, cdll + def handler(event): num_samples = event["num_samples"] @@ -10,28 +11,31 @@ def handler(event): path = os.path.join(dir, so_file) if not os.path.exists(path): path = os.path.join(dir, os.pardir, so_file) + if not os.path.exists(path): + raise RuntimeError("selfish-detour.so not found in package.") lib = cdll.LoadLibrary(path) lib.get_ticks_per_second.restype = c_double lib.selfish_detour.argtypes = [c_int, c_int, POINTER(c_ulonglong)] tps = lib.get_ticks_per_second() - assert(tps > 0) + assert tps > 0 - res = (c_ulonglong*num_samples)() + res = (c_ulonglong * num_samples)() ptr = cast(res, POINTER(c_ulonglong)) lib.selfish_detour(num_samples, 900, ptr) res = list(res) - assert(all(x<=y for x, y in zip(res[2:], res[3:]))) - - payload = json.dumps({ - "min_diff": res[0], - "num_iterations": res[1], - "timestamps": res[2:], - "tps": tps - }) + assert all(x <= y for x, y in zip(res[2:], res[3:])) + + payload = json.dumps( + { + "min_diff": res[0], + "num_iterations": res[1], + "timestamps": res[2:], + "tps": tps, + } + ) os.environ["SEBS_FUNCTION_RESULT"] = payload return "ok" - diff --git a/benchmarks/600.workflows/640.selfish-detour/python/package.sh b/benchmarks/600.workflows/640.selfish-detour/python/package.sh new file mode 100755 index 000000000..05461c070 --- /dev/null +++ b/benchmarks/600.workflows/640.selfish-detour/python/package.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -euo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cc -O2 -fPIC -shared "$SCRIPT_DIR/selfish-detour.c" -o "$SCRIPT_DIR/selfish-detour.so" diff --git a/benchmarks/600.workflows/640.selfish-detour/selfish-detour.c b/benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c similarity index 100% rename from benchmarks/600.workflows/640.selfish-detour/selfish-detour.c rename to benchmarks/600.workflows/640.selfish-detour/python/selfish-detour.c diff --git a/benchmarks/600.workflows/650.vid/input.py b/benchmarks/600.workflows/650.vid/input.py index c1515f901..eafdec029 100644 --- a/benchmarks/600.workflows/650.vid/input.py +++ b/benchmarks/600.workflows/650.vid/input.py @@ -1,7 +1,7 @@ import os size_generators = { - "test" : (3, 10, "video_test.mp4"), + "test": (3, 10, "video_test.mp4"), "small": (10, 5, "video_small.mp4"), "large": (1000, 3, "video_large.mp4"), } @@ -11,9 +11,21 @@ def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): n_frames, batch_size, video_name = size_generators[size] - files = ["frozen_inference_graph.pb", "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", video_name] + files = [ + "frozen_inference_graph.pb", + "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", + video_name, + ] for name in files: path = os.path.join(data_dir, name) upload_func(0, name, path) @@ -26,5 +38,5 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck "benchmark_bucket": benchmarks_bucket, "input_bucket": input_buckets[0], "model_weights": files[0], - "model_config": files[1] + "model_config": files[1], } diff --git a/benchmarks/600.workflows/650.vid/python/analyse.py b/benchmarks/600.workflows/650.vid/python/analyse.py index 1b8f31664..f2bc2db45 100644 --- a/benchmarks/600.workflows/650.vid/python/analyse.py +++ b/benchmarks/600.workflows/650.vid/python/analyse.py @@ -1,25 +1,92 @@ import os -import io -import json -import sys from . import storage import cv2 client = storage.storage.get_instance() -labels = ["person", "bicycle", "car", "motorcycle", -"airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", -"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", -"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", -"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", -"snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", -"surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", -"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", -"pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", -"toilet", "tv", "laptop", "mouse", "remote", "keyboard", -"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", -"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] def load_model(bucket, weights_blob, config_blob, dest_dir): @@ -35,28 +102,25 @@ def load_model(bucket, weights_blob, config_blob, dest_dir): def load_frames(benchmark_bucket, bucket, blobs, dest_dir): for blob in blobs: - stripped_blob = blob.replace(bucket + '/', '') + stripped_blob = blob.replace(bucket + "/", "") path = os.path.join(dest_dir, stripped_blob) client.download(benchmark_bucket, blob, path) yield cv2.imread(path) def detect(net, img): - rows = img.shape[0] - cols = img.shape[1] + img.shape[0] + img.shape[1] img = cv2.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False) net.setInput(img) out = net.forward() preds = [] - for detection in out[0,0,:,:]: + for detection in out[0, 0, :, :]: score = float(detection[2]) if score > 0.5: class_id = int(detection[1]) - preds.append({ - "class": labels[class_id], - "score": score - }) + preds.append({"class": labels[class_id], "score": score}) return preds @@ -67,14 +131,17 @@ def handler(event): benchmark_bucket = event["benchmark_bucket"] frames = list(load_frames(benchmark_bucket, event["frames_bucket"], event["frames"], tmp_dir)) - net = load_model(benchmark_bucket, event["model_bucket"] + '/' + event["model_weights"], event["model_bucket"] + '/' + event["model_config"], tmp_dir) - + net = load_model( + benchmark_bucket, + event["model_bucket"] + "/" + event["model_weights"], + event["model_bucket"] + "/" + event["model_config"], + tmp_dir, + ) preds = [detect(net, frame) for frame in frames] - + frames_names = event["frames"] frames_names = [x.split(".")[0] for x in event["frames"]] - + preds = {f"{frames_names[idx]}": dets for idx, dets in enumerate(preds)} return preds - diff --git a/benchmarks/600.workflows/650.vid/python/decode.py b/benchmarks/600.workflows/650.vid/python/decode.py index d27b67c3c..3414ed42b 100644 --- a/benchmarks/600.workflows/650.vid/python/decode.py +++ b/benchmarks/600.workflows/650.vid/python/decode.py @@ -9,12 +9,13 @@ def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + idx = slice(i, i + n) + yield lst[idx] def load_video(benchmark_bucket, bucket, blob, dest_dir): path = os.path.join(dest_dir, blob) - client.download(benchmark_bucket, bucket + '/' + blob, path) + client.download(benchmark_bucket, bucket + "/" + blob, path) return path @@ -36,7 +37,7 @@ def upload_imgs(benchmark_bucket, bucket, paths): for path in paths: name = os.path.basename(path) - yield client.upload(benchmark_bucket, bucket + '/' + name, path) + yield client.upload(benchmark_bucket, bucket + "/" + name, path) def handler(event): @@ -56,12 +57,15 @@ def handler(event): frames = list(chunks(paths, batch_size)) return { - "frames": [{ - "frames_bucket": frames_bucket, - "frames": fs, - "benchmark_bucket": benchmark_bucket, - "model_bucket": input_bucket, - "model_config": event["model_config"], - "model_weights": event["model_weights"] - } for fs in frames] + "frames": [ + { + "frames_bucket": frames_bucket, + "frames": fs, + "benchmark_bucket": benchmark_bucket, + "model_bucket": input_bucket, + "model_config": event["model_config"], + "model_weights": event["model_weights"], + } + for fs in frames + ] } diff --git a/benchmarks/600.workflows/650.vid/python/summarize.py b/benchmarks/600.workflows/650.vid/python/summarize.py index 8d290f3f9..24304557a 100644 --- a/benchmarks/600.workflows/650.vid/python/summarize.py +++ b/benchmarks/600.workflows/650.vid/python/summarize.py @@ -1,18 +1,9 @@ -import os -import io -import uuid -import json -import sys -from . import storage - - def handler(event): frames = event["frames"] logs = {} for xs in frames: - for key,value in xs.items(): - logs[key] = value + for key, value in xs.items(): + logs[key] = value return logs - diff --git a/benchmarks/600.workflows/660.map-reduce/input.py b/benchmarks/600.workflows/660.map-reduce/input.py index 36b2bcc8f..8d860950d 100644 --- a/benchmarks/600.workflows/660.map-reduce/input.py +++ b/benchmarks/600.workflows/660.map-reduce/input.py @@ -1,18 +1,22 @@ import os import random -size_generators = { - "test" : (50, 3), - "small": (1000, 3), - "large": (100000, 3) -} +size_generators = {"test": (50, 3), "small": (1000, 3), "large": (100000, 3)} def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): mult, n_mappers = size_generators[size] words = ["cat", "dog", "bird", "horse", "pig"] lst = mult * words @@ -21,15 +25,15 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck list_path = os.path.join(data_dir, "words") list_name = "words" with open(list_path, "w") as f: - f.writelines(w+"\n" for w in lst) + f.writelines(w + "\n" for w in lst) upload_func(0, list_name, list_path) - #os.remove(list_path) + # os.remove(list_path) return { "benchmark_bucket": benchmarks_bucket, "words_bucket": input_buckets[0], "words": list_name, "n_mappers": n_mappers, - "output_bucket": output_buckets[0] + "output_bucket": output_buckets[0], } diff --git a/benchmarks/600.workflows/660.map-reduce/python/map.py b/benchmarks/600.workflows/660.map-reduce/python/map.py index 0ba79ae73..4d51cd857 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/map.py +++ b/benchmarks/600.workflows/660.map-reduce/python/map.py @@ -14,6 +14,7 @@ def count_words(lst): return index + def handler(event): benchmark_bucket = event["benchmark_bucket"] bucket = event["bucket"] @@ -21,16 +22,16 @@ def handler(event): prefix = event["prefix"] client = storage.storage.get_instance() - my_buffer = client.download_stream(benchmark_bucket, bucket + '/' + blob) + my_buffer = client.download_stream(benchmark_bucket, bucket + "/" + blob) words = bytes(my_buffer).decode("utf-8").split("\n") - + index = count_words(words) for word, count in index.items(): data = io.BytesIO() data.write(str(count).encode("utf-8")) data.seek(0) - #client.upload_stream(benchmark_bucket, os.path.join(bucket, prefix, word, blob), data) + # client.upload_stream(benchmark_bucket, os.path.join(bucket, prefix, word, blob), data) client.upload_stream(benchmark_bucket, os.path.join(prefix, word, blob), data) return event diff --git a/benchmarks/600.workflows/660.map-reduce/python/reduce.py b/benchmarks/600.workflows/660.map-reduce/python/reduce.py index 15fe6d707..1d9d99b9b 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/reduce.py +++ b/benchmarks/600.workflows/660.map-reduce/python/reduce.py @@ -1,6 +1,4 @@ import os -import io -import json from . import storage @@ -10,14 +8,11 @@ def handler(event): client = storage.storage.get_instance() count = 0 - #each blob is one word. - #for blob in client.list_directory(bucket, path): + # each blob is one word. + # for blob in client.list_directory(bucket, path): for blob in client.list_directory(bucket, path): my_buffer = client.download_stream(bucket, blob) count += int(bytes(my_buffer).decode("utf-8")) - #count += int(my_buffer.getvalue().decode("utf-8")) + # count += int(my_buffer.getvalue().decode("utf-8")) - return { - "word": os.path.basename(path), - "count": count - } + return {"word": os.path.basename(path), "count": count} diff --git a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py index 44568f27d..e47573e4c 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/shuffle.py +++ b/benchmarks/600.workflows/660.map-reduce/python/shuffle.py @@ -1,27 +1,26 @@ import os -import json from . import storage def handler(event): lst = event["list"] benchmark_bucket = lst[0]["benchmark_bucket"] - bucket = lst[0]["bucket"] + lst[0]["bucket"] prefix = lst[0]["prefix"] client = storage.storage.get_instance() dirs = client.list_directory(benchmark_bucket, prefix) dirs = [p.split(os.sep)[1] for p in dirs] dirs = list(set(dirs)) - lst = [{ - "bucket": benchmark_bucket, - #"dir": os.path.join(bucket, prefix, path) - #TODO add word here. - "dir": os.path.join(prefix, path) - #"dir": os.path.join(bucket, prefix) - } for path in dirs] + lst = [ + { + "bucket": benchmark_bucket, + # "dir": os.path.join(bucket, prefix, path) + # TODO add word here. + "dir": os.path.join(prefix, path) + # "dir": os.path.join(bucket, prefix) + } + for path in dirs + ] - - return { - "list": lst - } + return {"list": lst} diff --git a/benchmarks/600.workflows/660.map-reduce/python/split.py b/benchmarks/600.workflows/660.map-reduce/python/split.py index 941ffdfff..860e04e41 100644 --- a/benchmarks/600.workflows/660.map-reduce/python/split.py +++ b/benchmarks/600.workflows/660.map-reduce/python/split.py @@ -3,11 +3,15 @@ import uuid from . import storage + def chunks(lst, n): m = int(len(lst) / n) - for i in range(n-1): - yield lst[i*m:i*m+m] - tail = lst[(n-1)*m:] + for i in range(n - 1): + idx = slice(i * m, i * m + m) + yield lst[idx] + + idx2 = slice((n - 1) * m, len(lst)) + tail = lst[idx2] if len(tail) > 0: yield tail @@ -19,7 +23,7 @@ def handler(event): words_path = os.path.join("/tmp", "words.txt") client = storage.storage.get_instance() - client.download(benchmark_bucket, words_bucket + '/' + words_blob, words_path) + client.download(benchmark_bucket, words_bucket + "/" + words_blob, words_path) with open(words_path, "r") as f: list = f.read().split("\n") os.remove(words_path) @@ -29,25 +33,25 @@ def handler(event): map_lists = chunks(list, n_mappers) blobs = [] - for chunk in map_lists: name = str(uuid.uuid4())[:8] data = io.BytesIO() - data.writelines((val+"\n").encode("utf-8") for val in chunk) + data.writelines((val + "\n").encode("utf-8") for val in chunk) data.seek(0) - name = client.upload_stream(benchmark_bucket, output_bucket + '/' + name, data) - stripped_name = name.replace(output_bucket + '/', '') + name = client.upload_stream(benchmark_bucket, output_bucket + "/" + name, data) + stripped_name = name.replace(output_bucket + "/", "") blobs.append(stripped_name) prefix = str(uuid.uuid4())[:8] - lst = [{ - "benchmark_bucket": benchmark_bucket, - "bucket": output_bucket, - "blob": b, - "prefix": prefix - } for b in blobs] - - return { - "list": lst - } + lst = [ + { + "benchmark_bucket": benchmark_bucket, + "bucket": output_bucket, + "blob": b, + "prefix": prefix, + } + for b in blobs + ] + + return {"list": lst} diff --git a/benchmarks/600.workflows/670.auth/config.json b/benchmarks/600.workflows/670.auth/config.json index e6a65cb35..d6d184e8a 100644 --- a/benchmarks/600.workflows/670.auth/config.json +++ b/benchmarks/600.workflows/670.auth/config.json @@ -1,5 +1,6 @@ { "timeout": 120, "memory": 256, - "languages": ["python"] + "languages": ["python"], + "modules": [] } diff --git a/benchmarks/600.workflows/670.auth/input.py b/benchmarks/600.workflows/670.auth/input.py index d81d24e45..2529739e5 100644 --- a/benchmarks/600.workflows/670.auth/input.py +++ b/benchmarks/600.workflows/670.auth/input.py @@ -1,21 +1,20 @@ -import random - -size_generators = { - "test" : 10, - "small": 100, - "large": 1000 -} +size_generators = {"test": 10, "small": 100, "large": 1000} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): mult = size_generators[size] msg = "Who let the dogs out?\n" * mult - return { - "message": msg, - "token": "allow" - } \ No newline at end of file + return {"message": msg, "token": "allow"} diff --git a/benchmarks/600.workflows/670.auth/python/auth.py b/benchmarks/600.workflows/670.auth/python/auth.py index c7b77649c..e24bc32a1 100644 --- a/benchmarks/600.workflows/670.auth/python/auth.py +++ b/benchmarks/600.workflows/670.auth/python/auth.py @@ -1,5 +1,3 @@ -import random -import string import pyaes import base64 @@ -17,7 +15,7 @@ def AESModeCBC(plaintext): # random initialization vector of 16 bytes blocks_size = 16 iv = "InitializationVe" - pad = 16 - len(plaintext)% blocks_size + pad = 16 - len(plaintext) % blocks_size plaintext = str("0" * pad) + plaintext aes = pyaes.AESModeOfOperationCBC(KEY, iv=iv) ciphertext = aes.encrypt(plaintext) @@ -34,6 +32,4 @@ def handler(event): res = AESModeCTR(message) res = base64.b64encode(res).decode("ascii") - return { - "response": res - } + return {"response": res} diff --git a/benchmarks/600.workflows/680.excamera/input.py b/benchmarks/600.workflows/680.excamera/input.py index 687a2eabc..74b19aa3e 100644 --- a/benchmarks/600.workflows/680.excamera/input.py +++ b/benchmarks/600.workflows/680.excamera/input.py @@ -1,18 +1,21 @@ -import random import os -size_generators = { - "test" : (18, 6), - "small": (30, 6), - "large": (60, 6) -} +size_generators = {"test": (18, 6), "small": (30, 6), "large": (60, 6)} def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): num_frames, batch_size = size_generators[size] for bin in os.listdir(data_dir): @@ -32,7 +35,7 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck new_vid_segs.append(name) upload_func(0, name, path) - assert(len(new_vid_segs) == num_frames) + assert len(new_vid_segs) == num_frames return { "segments": new_vid_segs, @@ -40,5 +43,5 @@ def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buck "input_bucket": input_buckets[0], "output_bucket": output_buckets[0], "batch_size": batch_size, - "quality": 1 + "quality": 1, } diff --git a/benchmarks/600.workflows/680.excamera/python/encode.py b/benchmarks/600.workflows/680.excamera/python/encode.py index 44a84c5ec..a79f1fc43 100644 --- a/benchmarks/600.workflows/680.excamera/python/encode.py +++ b/benchmarks/600.workflows/680.excamera/python/encode.py @@ -5,16 +5,23 @@ import logging import shutil -VPXENC = "/tmp/vpxenc --ivf --codec=vp8 --good --cpu-used=0 --end-usage=cq --min-q=0 --max-q=63 --cq-level={quality} --buf-initial-sz=10000 --buf-optimal-sz=20000 --buf-sz=40000 --undershoot-pct=100 --passes=2 --auto-alt-ref=1 --threads=1 --token-parts=0 --tune=ssim --target-bitrate=4294967295 -o {output}.ivf {input}.y4m" +VPXENC = ( + "/tmp/vpxenc --ivf --codec=vp8 --good --cpu-used=0 --end-usage=cq " + "--min-q=0 --max-q=63 --cq-level={quality} --buf-initial-sz=10000 " + "--buf-optimal-sz=20000 --buf-sz=40000 --undershoot-pct=100 --passes=2 " + "--auto-alt-ref=1 --threads=1 --token-parts=0 --tune=ssim " + "--target-bitrate=4294967295 -o {output}.ivf {input}.y4m" +) TERMINATE_CHUNK = "/tmp/xc-terminate-chunk {input}.ivf {output}.ivf" XC_DUMP_0 = "/tmp/xc-dump {input}.ivf {output}.state" client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) + client.download(benchmark_bucket, bucket + "/" + name, path) subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) @@ -22,8 +29,8 @@ def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - #print("Uploading", file, "to", path) - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + # print("Uploading", file, "to", path) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -49,13 +56,13 @@ def encode(segs, data_dir, quality): output_path = os.path.join(data_dir, output) cmd = TERMINATE_CHUNK.format(input=input_path, output=output_path) run(cmd) - files.append(output_path+".ivf") + files.append(output_path + ".ivf") input_path = output_path output_path = os.path.join(data_dir, f"{name}-0") cmd = XC_DUMP_0.format(input=input_path, output=output_path) run(cmd) - files.append(output_path+".state") + files.append(output_path + ".state") return files @@ -77,7 +84,7 @@ def handler(event): os.makedirs(data_dir, exist_ok=True) for seg in segs: path = os.path.join(data_dir, seg) - client.download(benchmark_bucket, input_bucket + '/' + seg, path) + client.download(benchmark_bucket, input_bucket + "/" + seg, path) segs = [os.path.splitext(seg)[0] for seg in segs] output_paths = encode(segs, data_dir, quality) diff --git a/benchmarks/600.workflows/680.excamera/python/rebase.py b/benchmarks/600.workflows/680.excamera/python/rebase.py index 809774305..64a761030 100644 --- a/benchmarks/600.workflows/680.excamera/python/rebase.py +++ b/benchmarks/600.workflows/680.excamera/python/rebase.py @@ -5,14 +5,19 @@ import logging import shutil -XC_ENC_REBASE = "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r -I {source_state}.state -p {input_pred}.ivf -S {pred_state}.state {extra} {input}.y4m" +XC_ENC_REBASE = ( + "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r " + "-I {source_state}.state -p {input_pred}.ivf -S {pred_state}.state " + "{extra} {input}.y4m" +) client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) + client.download(benchmark_bucket, bucket + "/" + name, path) subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) @@ -20,8 +25,8 @@ def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - #print("Uploading", file, "to", path) - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + # print("Uploading", file, "to", path) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -34,8 +39,8 @@ def run(cmd): def prev_seg_name(seg): - idx = int(seg)-1 - assert(idx >= 0) + idx = int(seg) - 1 + assert idx >= 0 return "{:08d}".format(idx) @@ -49,7 +54,7 @@ def rebase(segs, data_dir, dry_run=False): prev_input_path = os.path.join(data_dir, prev_seg_name(name)) source_state_path = f"{prev_input_path}-1" output_state_path = f"{input_path}-1.state" - extra = f"-O {output_state_path}" if idx != len(segs)-1 else "" + extra = f"-O {output_state_path}" if idx != len(segs) - 1 else "" input_pred_path = f"{input_path}-1" pred_state_path = f"{prev_input_path}-0" @@ -59,18 +64,18 @@ def rebase(segs, data_dir, dry_run=False): source_state=source_state_path, extra=extra, input_pred=input_pred_path, - pred_state=pred_state_path) + pred_state=pred_state_path, + ) if not dry_run: run(cmd) + input_paths.append(input_path + ".y4m") + input_paths.append(source_state_path + ".state") + input_paths.append(input_pred_path + ".ivf") + input_paths.append(pred_state_path + ".state") - input_paths.append(input_path+".y4m") - input_paths.append(source_state_path+".state") - input_paths.append(input_pred_path+".ivf") - input_paths.append(pred_state_path+".state") - - output_paths.append(input_path+".ivf") - if idx != len(segs)-1: + output_paths.append(input_path + ".ivf") + if idx != len(segs) - 1: output_paths.append(output_state_path) return input_paths, output_paths @@ -91,25 +96,24 @@ def handler(event): os.makedirs(data_dir, exist_ok=True) input_paths, _ = rebase(segs, data_dir, dry_run=True) - + for path in input_paths: file = os.path.basename(path) try: if ".y4m" in file: - client.download(benchmark_bucket, input_bucket + '/' + file, path) + client.download(benchmark_bucket, input_bucket + "/" + file, path) else: file = prefix + file - client.download(benchmark_bucket, output_bucket + '/' + file, path) - except: + client.download(benchmark_bucket, output_bucket + "/" + file, path) + except Exception: # -1.state is generated by rebase itself - if not "-1.state" in file: + if "-1.state" not in file: raise _, output_paths = rebase(segs, data_dir) upload_files(benchmark_bucket, output_bucket, output_paths, prefix) - shutil.rmtree(data_dir) return event diff --git a/benchmarks/600.workflows/680.excamera/python/reencode.py b/benchmarks/600.workflows/680.excamera/python/reencode.py index ee9b6576a..2b95aea70 100644 --- a/benchmarks/600.workflows/680.excamera/python/reencode.py +++ b/benchmarks/600.workflows/680.excamera/python/reencode.py @@ -5,14 +5,18 @@ import logging import shutil -XC_ENC_FIRST_FRAME = "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r -I {source_state}.state -p {input_pred}.ivf {extra} {input}.y4m" +XC_ENC_FIRST_FRAME = ( + "/tmp/xc-enc -W -w 0.75 -i y4m -o {output}.ivf -r " + "-I {source_state}.state -p {input_pred}.ivf {extra} {input}.y4m" +) client = storage.storage.get_instance() + def download_bin(benchmark_bucket, bucket, name, dest_dir): path = os.path.join(dest_dir, name) if not os.path.exists(path): - client.download(benchmark_bucket, bucket + '/' + name, path) + client.download(benchmark_bucket, bucket + "/" + name, path) subprocess.check_output(f"chmod +x {path}", stderr=subprocess.STDOUT, shell=True) @@ -20,7 +24,7 @@ def upload_files(benchmark_bucket, bucket, paths, prefix): for path in paths: file = os.path.basename(path) file = prefix + file - client.upload(benchmark_bucket, bucket + '/' + file, path, unique_name=False) + client.upload(benchmark_bucket, bucket + "/" + file, path, unique_name=False) def run(cmd): @@ -33,8 +37,8 @@ def run(cmd): def prev_seg_name(seg): - idx = int(seg)-1 - assert(idx >= 0) + idx = int(seg) - 1 + assert idx >= 0 return "{:08d}".format(idx) @@ -45,7 +49,7 @@ def reencode_first_frame(segs, data_dir, dry_run=False): name = segs[idx] input_path = os.path.join(data_dir, name) output_path = input_path if idx == 1 else f"{input_path}-1" - source_state_path = os.path.join(data_dir, prev_seg_name(name))+"-0" + source_state_path = os.path.join(data_dir, prev_seg_name(name)) + "-0" output_state_path = f"{input_path}-1.state" extra = f"-O {output_state_path}" if idx == 1 else "" input_pred_path = f"{input_path}-0" @@ -55,15 +59,16 @@ def reencode_first_frame(segs, data_dir, dry_run=False): output=output_path, source_state=source_state_path, extra=extra, - input_pred=input_pred_path) + input_pred=input_pred_path, + ) if not dry_run: run(cmd) - input_paths.append(input_path+".y4m") - input_paths.append(source_state_path+".state") - input_paths.append(input_pred_path+".ivf") + input_paths.append(input_path + ".y4m") + input_paths.append(source_state_path + ".state") + input_paths.append(input_pred_path + ".ivf") - output_paths.append(output_path+".ivf") + output_paths.append(output_path + ".ivf") if idx == 1: output_paths.append(output_state_path) @@ -86,14 +91,12 @@ def handler(event): input_paths, _ = reencode_first_frame(segs, data_dir, dry_run=True) for path in input_paths: file = os.path.basename(path) - + if ".y4m" in file: - client.download(benchmark_bucket, input_bucket + '/' + file, path) + client.download(benchmark_bucket, input_bucket + "/" + file, path) else: file = prefix + file - client.download(benchmark_bucket, output_bucket + '/' + file, path) - - + client.download(benchmark_bucket, output_bucket + "/" + file, path) _, output_paths = reencode_first_frame(segs, data_dir) upload_files(benchmark_bucket, output_bucket, output_paths, prefix) diff --git a/benchmarks/600.workflows/680.excamera/python/split.py b/benchmarks/600.workflows/680.excamera/python/split.py index 5ecfad0b6..3a650efa0 100644 --- a/benchmarks/600.workflows/680.excamera/python/split.py +++ b/benchmarks/600.workflows/680.excamera/python/split.py @@ -1,8 +1,10 @@ import uuid + def chunks(lst, n): for i in range(0, len(lst), n): - yield lst[i:i + n] + idx = slice(i, i + n) + yield lst[idx] def handler(event): @@ -15,12 +17,13 @@ def handler(event): return { "segments": [ { - "prefix": str(uuid.uuid4().int & (1<<64)-1)[:8], + "prefix": str(uuid.uuid4().int & (1 << 64) - 1)[:8], "segments": ss, "quality": quality, "input_bucket": input_bucket, "output_bucket": output_bucket, - "benchmark_bucket": benchmark_bucket - } for idx, ss in enumerate(segs) + "benchmark_bucket": benchmark_bucket, + } + for idx, ss in enumerate(segs) ] } diff --git a/benchmarks/600.workflows/690.ml/input.py b/benchmarks/600.workflows/690.ml/input.py index d3f930bc7..e432e9ecb 100644 --- a/benchmarks/600.workflows/690.ml/input.py +++ b/benchmarks/600.workflows/690.ml/input.py @@ -1,5 +1,5 @@ size_generators = { - "test" : (1, 100, 5), + "test": (1, 100, 5), "small": (2, 500, 1024), "large": (3, 1000, 1024), } @@ -8,18 +8,28 @@ {"name": "SVC", "kernel": "linear", "C": 0.025}, {"name": "RandomForestClassifier", "max_depth": 5, "n_estimators": 10}, {"name": "RandomForestClassifier", "max_depth": 5, "n_estimators": 15}, - {"name": "AdaBoostClassifier"} + {"name": "AdaBoostClassifier"}, ] + def buckets_count(): return (0, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_buckets, + output_buckets, + upload_func, + nosql_func, +): n_classifiers, n_samples, n_features = size_generators[size] return { "classifiers": classifiers[:n_classifiers], - "benchmark_bucket" : benchmarks_bucket, + "benchmark_bucket": benchmarks_bucket, "dataset_bucket": output_buckets[0], "n_samples": n_samples, - "n_features": n_features + "n_features": n_features, } diff --git a/benchmarks/600.workflows/690.ml/python/generate.py b/benchmarks/600.workflows/690.ml/python/generate.py index 03fea03db..b5b1dee36 100644 --- a/benchmarks/600.workflows/690.ml/python/generate.py +++ b/benchmarks/600.workflows/690.ml/python/generate.py @@ -14,7 +14,7 @@ def generate(n_samples, n_features): n_clusters_per_class=2, weights=[0.9, 0.1], flip_y=0.1, - random_state=123 + random_state=123, ) return X, y @@ -30,10 +30,10 @@ def upload_dataset(benchmark_bucket, bucket, X, y): np.save(labels_path, y) client = storage.storage.get_instance() - features = client.upload(benchmark_bucket, bucket + '/' + "features.npy", features_path) - features = features.replace(bucket + '/', '') - labels = client.upload(benchmark_bucket, bucket + '/' + "labels.npy", labels_path) - labels = labels.replace(bucket + '/', '') + features = client.upload(benchmark_bucket, bucket + "/" + "features.npy", features_path) + features = features.replace(bucket + "/", "") + labels = client.upload(benchmark_bucket, bucket + "/" + "labels.npy", labels_path) + labels = labels.replace(bucket + "/", "") return features, labels @@ -48,7 +48,14 @@ def handler(event): X, y = generate(n_samples, n_features) X_key, y_key = upload_dataset(benchmark_bucket, bucket, X, y) - schedules = [{**c, "features": X_key, "labels": y_key, "bucket": bucket, "benchmark_bucket": benchmark_bucket} for c in classifiers] - return { - "schedules": schedules - } + schedules = [ + { + **c, + "features": X_key, + "labels": y_key, + "bucket": bucket, + "benchmark_bucket": benchmark_bucket, + } + for c in classifiers + ] + return {"schedules": schedules} diff --git a/benchmarks/600.workflows/690.ml/python/train.py b/benchmarks/600.workflows/690.ml/python/train.py index d886a3072..81e935fa6 100644 --- a/benchmarks/600.workflows/690.ml/python/train.py +++ b/benchmarks/600.workflows/690.ml/python/train.py @@ -1,18 +1,18 @@ import os import uuid -import sys from . import storage - from sklearn.model_selection import train_test_split -from sklearn.svm import SVC -from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC # noqa: F401 +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier # noqa: F401 import numpy as np + def str_to_cls(cls_name): - #print(cls_name) + # print(cls_name) return globals()[cls_name] + def load_dataset(benchmark_bucket, bucket, features, labels): dataset_dir = os.path.join("/tmp", str(uuid.uuid4())) os.makedirs(dataset_dir, exist_ok=True) @@ -20,10 +20,9 @@ def load_dataset(benchmark_bucket, bucket, features, labels): features_path = os.path.join(dataset_dir, "features.npy") labels_path = os.path.join(dataset_dir, "labels.npy") - client = storage.storage.get_instance() - client.download(benchmark_bucket, bucket + '/' + features, features_path) - client.download(benchmark_bucket, bucket + '/' + labels, labels_path) + client.download(benchmark_bucket, bucket + "/" + features, features_path) + client.download(benchmark_bucket, bucket + "/" + labels, labels_path) X = np.load(features_path) y = np.load(labels_path) @@ -34,9 +33,7 @@ def load_dataset(benchmark_bucket, bucket, features, labels): def preprocess(X, y): X = StandardScaler().fit_transform(X) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.4, random_state=123 - ) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=123) return X_train, X_test, y_train, y_test @@ -55,7 +52,8 @@ def handler(schedule): y_key = schedule.pop("labels") bucket = schedule.pop("bucket") benchmark_bucket = schedule.pop("benchmark_bucket") - request_id = schedule.pop("request-id") + schedule.pop("request-id", None) + schedule.pop("request_id", None) clf = str_to_cls(name)(**schedule) @@ -65,8 +63,4 @@ def handler(schedule): train(clf, X_train, y_train) score = val(clf, X_test, y_test) - return { - "name": name, - "score": score - } - + return {"name": name, "score": score} diff --git a/benchmarks/wrappers/aws/python/handler_function.py b/benchmarks/wrappers/aws/python/handler_function.py index 907b2c612..f5a1d4195 100644 --- a/benchmarks/wrappers/aws/python/handler_function.py +++ b/benchmarks/wrappers/aws/python/handler_function.py @@ -1,39 +1,46 @@ - -import datetime, io, json, os, sys, uuid +import datetime +import io +import json +import os +import sys +import uuid # Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # TODO: usual trigger # implement support for S3 and others + + def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() # HTTP trigger with API Gateaway - if 'body' in event: - event = json.loads(event['body']) + if "body" in event: + event = json.loads(event["body"]) req_id = context.aws_request_id - event['request-id'] = req_id - event['income-timestamp'] = income_timestamp + event["request-id"] = req_id + event["income-timestamp"] = income_timestamp begin = datetime.datetime.now() from function import function + ret = function.handler(event) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in event: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in event: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = event.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = event.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -41,14 +48,14 @@ def handler(event, context): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" @@ -56,16 +63,17 @@ def handler(event, context): cold_start_var = os.environ["cold_start"] return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) + "statusCode": 200, + "body": json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": context.aws_request_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), } - diff --git a/benchmarks/wrappers/aws/python/handler_workflow.py b/benchmarks/wrappers/aws/python/handler_workflow.py index 3f372d895..8bc99ce81 100644 --- a/benchmarks/wrappers/aws/python/handler_workflow.py +++ b/benchmarks/wrappers/aws/python/handler_workflow.py @@ -1,15 +1,12 @@ import datetime -import io import json import os import sys import uuid import importlib -# Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) - -from redis import Redis +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") def probe_cold_start(): @@ -28,6 +25,10 @@ def probe_cold_start(): def handler(event, context): + # Add current directory to allow location of packages + sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) + from redis import Redis + start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" os.environ["STORAGE_DOWNLOAD_BYTES"] = "0" @@ -70,11 +71,11 @@ def handler(event, context): payload = json.dumps(payload) redis = Redis( - host={{REDIS_HOST}}, + host=REDIS_HOST, port=6379, decode_responses=True, socket_connect_timeout=10, - password={{REDIS_PASSWORD}}, + password=REDIS_PASSWORD or None, ) req_id = event["request_id"] diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py index b3d878351..016974465 100644 --- a/benchmarks/wrappers/aws/python/setup.py +++ b/benchmarks/wrappers/aws/python/setup.py @@ -2,14 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, ) - diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index c3b0cc5db..f791a263d 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -20,18 +20,16 @@ class storage: client = None def __init__(self): - self.client = boto3.client('s3') + self.client = boto3.client("s3") @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) - def upload(self, bucket, file, filepath, unique_name = True): + def upload(self, bucket, file, filepath, unique_name=True): incr_io_env_file(filepath, "STORAGE_UPLOAD_BYTES") key_name = storage.unique_name(file) if unique_name else file @@ -44,8 +42,8 @@ def download(self, bucket, file, filepath): def download_directory(self, bucket, prefix, path): objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) - for obj in objects['Contents']: - file_name = obj['Key'] + for obj in objects["Contents"]: + file_name = obj["Key"] path_to_file = os.path.dirname(file_name) os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) @@ -64,15 +62,17 @@ def download_stream(self, bucket, file): self.client.download_fileobj(bucket, file, data) incr_io_env(data.tell(), "STORAGE_DOWNLOAD_BYTES") return data.getbuffer() - + def download_within_range(self, bucket, file, start_byte, stop_byte): - resp = self.client.get_object(Bucket=bucket, Key=file, Range='bytes={}-{}'.format(start_byte, stop_byte)) - return resp['Body'].read().decode('utf-8') + resp = self.client.get_object( + Bucket=bucket, Key=file, Range="bytes={}-{}".format(start_byte, stop_byte) + ) + return resp["Body"].read().decode("utf-8") def list_directory(self, bucket, prefix): objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) - for obj in objects['Contents']: - yield obj['Key'] + for obj in objects["Contents"]: + yield obj["Key"] def get_instance(): if storage.instance is None: diff --git a/benchmarks/wrappers/azure/python/handler_function.py b/benchmarks/wrappers/azure/python/handler_function.py index fc3f51b5a..8606dc103 100644 --- a/benchmarks/wrappers/azure/python/handler_function.py +++ b/benchmarks/wrappers/azure/python/handler_function.py @@ -1,8 +1,29 @@ - -import datetime, io, json, os, uuid +import datetime +import io +import json +import os +import uuid import azure.functions as func + +if "NOSQL_STORAGE_DATABASE" in os.environ: + + from . import nosql + + nosql.nosql.get_instance( + os.environ["NOSQL_STORAGE_DATABASE"], + os.environ["NOSQL_STORAGE_URL"], + os.environ["NOSQL_STORAGE_CREDS"], + ) + +if "STORAGE_CONNECTION_STRING" in os.environ: + + from . import storage + + client = storage.storage.get_instance(os.environ["STORAGE_CONNECTION_STRING"]) + + # TODO: usual trigger # implement support for blob and others def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.HttpResponse: @@ -11,29 +32,30 @@ def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.Htt req_json = req.get_json() # FIXME: proper placement of request - #req_json['request-id'] = context.invocation_id - req_json['payload']['request-id'] = context.invocation_id - req_json['income-timestamp'] = income_timestamp + # req_json['request-id'] = context.invocation_id + req_json["payload"]["request-id"] = context.invocation_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directory from . import function - ret = function.handler(req_json['payload']) + + ret = function.handler(req_json["payload"]) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from . import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') + b = req_json.get("logs").get("bucket") req_id = context.invocation_id - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -41,14 +63,14 @@ def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.Htt # cold test is_cold = False - fname = os.path.join('/tmp','cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() is_cold_worker = False @@ -60,17 +82,18 @@ def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.Htt is_cold_worker = True return func.HttpResponse( - json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'result': log_data, - 'is_cold': is_cold, - 'is_cold_worker': is_cold_worker, - 'container_id': container_id, - 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "result": log_data, + "is_cold": is_cold, + "is_cold_worker": is_cold_worker, + "container_id": container_id, + "environ_container_id": os.environ["CONTAINER_NAME"], + "request_id": context.invocation_id, + } + ), + mimetype="application/json", ) - diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index 143b5287f..a48b1c6c0 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -4,11 +4,13 @@ import uuid import importlib -import logging - import azure.functions as func from redis import Redis +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") + + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") @@ -23,6 +25,7 @@ def probe_cold_start(): return is_cold, container_id + def main(event, context: func.Context): start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" @@ -32,7 +35,7 @@ def main(event, context: func.Context): func_name = os.path.basename(os.path.dirname(__file__)) # FIXME: sort out workflow and function request id - #event["request-id"] = context.invocation_id + # event["request-id"] = context.invocation_id # this only works on benchmarks where payload is dict event["payload"]["request-id"] = context.invocation_id @@ -53,7 +56,7 @@ def main(event, context: func.Context): "end": end, "is_cold": is_cold, "container_id": container_id, - "provider.request_id": context.invocation_id + "provider.request_id": context.invocation_id, } func_res = os.getenv("SEBS_FUNCTION_RESULT") @@ -70,11 +73,13 @@ def main(event, context: func.Context): payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host=REDIS_HOST, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password=REDIS_PASSWORD or None, + ) req_id = event["request_id"] key = os.path.join(workflow_name, func_name, req_id, str(uuid.uuid4())[0:8]) diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 0c4e55a0c..ce74b4e49 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -8,6 +8,7 @@ import logging + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") @@ -28,12 +29,12 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu req_id = event["request_id"] logging.info("complete event: ") logging.info(event) - logging.info("req_id in main: ") + logging.info("req_id in main: ") logging.info(req_id) - if 'connection_string' in event: + if "connection_string" in event: logging.info("setting connection string.") - os.environ['STORAGE_CONNECTION_STRING'] = event['connection_string'] + os.environ["STORAGE_CONNECTION_STRING"] = event["connection_string"] begin = datetime.datetime.now() @@ -41,17 +42,19 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu instance_id = await client.start_new("run_workflow", None, event) res = client.create_check_status_response(req, instance_id) - #res = await client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) + # res = await client.wait_for_completion_or_create_check_status_response( + # req, instance_id, 1000000 + # ) end = datetime.datetime.now() is_cold, container_id = probe_cold_start() - #status = await client.get_status(instance_id) - #code = 500 if str(status.runtime_status) == "Failed" else 200 - - #try: + # status = await client.get_status(instance_id) + # code = 500 if str(status.runtime_status) == "Failed" else 200 + + # try: # result = json.loads(res.get_body()) - #except json.decoder.JSONDecodeError: + # except json.decoder.JSONDecodeError: # result = res.get_body().decode() body = json.loads(res.get_body()) @@ -68,7 +71,5 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu } return func.HttpResponse( - status_code=res.status_code, - body=json.dumps(body), - mimetype="application/json" + status_code=res.status_code, body=json.dumps(body), mimetype="application/json" ) diff --git a/benchmarks/wrappers/azure/python/nosql.py b/benchmarks/wrappers/azure/python/nosql.py index acc211ed2..edf79e444 100644 --- a/benchmarks/wrappers/azure/python/nosql.py +++ b/benchmarks/wrappers/azure/python/nosql.py @@ -90,9 +90,9 @@ def get_instance( database: Optional[str] = None, url: Optional[str] = None, credential: Optional[str] = None ): if nosql.instance is None: - database = os.environ['NOSQL_STORAGE_DATABASE'] - url = os.environ['NOSQL_STORAGE_URL'] - credential = os.environ['NOSQL_STORAGE_CREDS'] + database = os.environ["NOSQL_STORAGE_DATABASE"] + url = os.environ["NOSQL_STORAGE_URL"] + credential = os.environ["NOSQL_STORAGE_CREDS"] assert database is not None and url is not None and credential is not None nosql.instance = nosql(url, credential, database) return nosql.instance diff --git a/benchmarks/wrappers/azure/python/run_subworkflow.py b/benchmarks/wrappers/azure/python/run_subworkflow.py index c2730714e..ac8af5611 100644 --- a/benchmarks/wrappers/azure/python/run_subworkflow.py +++ b/benchmarks/wrappers/azure/python/run_subworkflow.py @@ -1,23 +1,15 @@ -import json -import sys -import os -import uuid -import operator -import logging import datetime +import logging +import operator import azure.durable_functions as df -from redis import Redis - -dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(dir_path, os.path.pardir)) -from .fsm import * +from .fsm import Loop, Map, Parallel, Repeat, State, Switch, Task def get_var(obj, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names: obj = obj[n] @@ -27,24 +19,27 @@ def get_var(obj, path: str): def set_var(obj, val, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names[:-1]: obj = obj[n] obj[names[-1]] = val + def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start - now = lambda: datetime.datetime.now().timestamp() + + def now(): + return datetime.datetime.now().timestamp() + duration = 0 input = context.get_input() res = input["payload"] request_id = input["request_id"] all_states = input["states"] - states = {n: State.deserialize(n, s) - for n, s in all_states.items()} + states = {n: State.deserialize(n, s) for n, s in all_states.items()} current = states[input["root"]] while current: @@ -53,7 +48,7 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(current, Task): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() current = states.get(current.next, None) @@ -63,7 +58,7 @@ def handler(context: df.DurableOrchestrationContext): "<=": operator.le, "==": operator.eq, ">=": operator.ge, - ">": operator.gt + ">": operator.gt, } next = None @@ -81,9 +76,9 @@ def handler(context: df.DurableOrchestrationContext): array = get_var(res, current.array) tasks = [] if current.common_params: - #assemble input differently + # assemble input differently for elem in array: - #assemble payload + # assemble payload payload = {} payload["array_element"] = elem params = current.common_params.split(",") @@ -91,12 +86,12 @@ def handler(context: df.DurableOrchestrationContext): payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} tasks.append(context.call_activity(current.func_name, myinput)) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} tasks.append(context.call_activity(current.func_name, myinput)) - duration += (now() - ts) + duration += now() - ts map_res = yield context.task_all(tasks) ts = now() @@ -106,7 +101,7 @@ def handler(context: df.DurableOrchestrationContext): for i in range(current.count): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() @@ -116,7 +111,7 @@ def handler(context: df.DurableOrchestrationContext): for elem in array: input = {"payload": elem, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts yield context.call_activity(current.func_name, input) ts = now() @@ -127,12 +122,13 @@ def handler(context: df.DurableOrchestrationContext): first_states = [] state_to_result = {} for i, subworkflow in enumerate(current.funcs): - parallel_states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + parallel_states = { + n: State.deserialize(n, s) for n, s in subworkflow["states"].items() + } - #for state in parallel_states.values(): + # for state in parallel_states.values(): # state_to_result[state.func_name] = [] - first_state = parallel_states[subworkflow["root"]] first_states.append(first_state) state_to_result[first_state.func_name] = [] @@ -140,24 +136,29 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - #task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if one state, otherwise suborchestrator if first_state.next: - #call suborchestrator - #FIXME define other parameters. - parallel_task = context.call_sub_orchestrator("run_subworkflow", input, subworkflow["root"], parallel_states) + # call suborchestrator + # FIXME define other parameters. + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", + input, + subworkflow["root"], + parallel_states, + ) parallel_tasks.append(parallel_task) else: parallel_tasks.append(context.call_activity(first_state.func_name, input)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - + state_to_result[first_state.func_name].append(len(parallel_tasks) - 1) + elif isinstance(first_state, Map): array = get_var(res, first_state.array) tasks = [] if first_state.next: - #call suborchestrator. + # call suborchestrator. if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -165,20 +166,31 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - #FIXME use right parameters for suborchestrator. - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, subworkflow["root"], parallel_states) + # FIXME use right parameters for suborchestrator. + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", + myinput, + subworkflow["root"], + parallel_states, + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, subworkflow["root"], parallel_states) + + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, subworkflow["root"], parallel_states + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -186,15 +198,23 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - - duration += (now() - ts) + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + + duration += now() - ts map_res = yield context.task_all(parallel_tasks) ts = now() res = {} @@ -207,7 +227,7 @@ def handler(context: df.DurableOrchestrationContext): output.append(map_res[index]) res[state.func_name] = output else: - #task state + # task state res[state.func_name] = map_res[indices[0]] current = states.get(current.next, None) @@ -215,9 +235,6 @@ def handler(context: df.DurableOrchestrationContext): else: raise ValueError(f"Undefined state: {current}") - #workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") - func_name = "run_subworkflow" - return res diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index d5e111408..f19de7d93 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -1,23 +1,22 @@ +import datetime import json -import sys +import logging +import operator import os import uuid -import operator -import logging -import datetime import azure.durable_functions as df from redis import Redis -dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(dir_path, os.path.pardir)) +from .fsm import Map, Loop, Parallel, Repeat, State, Switch, Task -from .fsm import * +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") def get_var(obj, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names: obj = obj[n] @@ -27,23 +26,26 @@ def get_var(obj, path: str): def set_var(obj, val, path: str): names = path.split(".") - assert(len(names) > 0) + assert len(names) > 0 for n in names[:-1]: obj = obj[n] obj[names[-1]] = val + def handler(context: df.DurableOrchestrationContext): start = datetime.datetime.now().timestamp() ts = start - now = lambda: datetime.datetime.now().timestamp() + + def now(): + return datetime.datetime.now().timestamp() + duration = 0 with open("definition.json") as f: definition = json.load(f) - states = {n: State.deserialize(n, s) - for n, s in definition["states"].items()} + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} current = states[definition["root"]] input = context.get_input() @@ -57,7 +59,7 @@ def handler(context: df.DurableOrchestrationContext): if isinstance(current, Task): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts if current.failure is None: res = yield context.call_activity(current.func_name, input) @@ -66,7 +68,7 @@ def handler(context: df.DurableOrchestrationContext): try: res = yield context.call_activity(current.func_name, input) current = states.get(current.next, None) - except: + except Exception: current = states.get(current.failure, None) ts = now() @@ -77,7 +79,7 @@ def handler(context: df.DurableOrchestrationContext): "<=": operator.le, "==": operator.eq, ">=": operator.ge, - ">": operator.gt + ">": operator.gt, } next = None @@ -99,7 +101,7 @@ def handler(context: df.DurableOrchestrationContext): array = get_var(res, current.array) tasks = [] if first_state.next: - #call suborchestrator - each map task should proceed with next step directly after it finished. + # call suborchestrator - each map task continues with next step after finishing if current.common_params: for elem in array: payload = {} @@ -112,20 +114,24 @@ def handler(context: df.DurableOrchestrationContext): myinput["states"] = current.funcs uuid_name = str(uuid.uuid4())[0:4] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, uuid_name) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, uuid_name + ) tasks.append(parallel_task) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} myinput["root"] = current.root myinput["states"] = current.funcs - + uuid_name = str(uuid.uuid4())[0:4] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput, uuid_name) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput, uuid_name + ) tasks.append(parallel_task) else: if current.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -134,12 +140,12 @@ def handler(context: df.DurableOrchestrationContext): payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} tasks.append(context.call_activity(first_state.func_name, myinput)) - else: + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} tasks.append(context.call_activity(first_state.func_name, myinput)) - duration += (now() - ts) + duration += now() - ts map_res = yield context.task_all(tasks) ts = now() @@ -149,7 +155,7 @@ def handler(context: df.DurableOrchestrationContext): for i in range(current.count): input = {"payload": res, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts res = yield context.call_activity(current.func_name, input) ts = now() @@ -159,7 +165,7 @@ def handler(context: df.DurableOrchestrationContext): for elem in array: input = {"payload": elem, "request_id": request_id} - duration += (now() - ts) + duration += now() - ts yield context.call_activity(current.func_name, input) ts = now() @@ -170,33 +176,35 @@ def handler(context: df.DurableOrchestrationContext): first_states = [] state_to_result = {} for subworkflow in current.funcs: - parallel_states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} - + parallel_states = { + n: State.deserialize(n, s) for n, s in subworkflow["states"].items() + } + first_state = parallel_states[subworkflow["root"]] first_states.append(first_state) state_to_result[first_state.func_name] = [] - if isinstance(first_state, Task): + if isinstance(first_state, Task): input = {"payload": res, "request_id": request_id} - #task directly here if only one state, task within suborchestrator if multiple states. + # task directly here if one state, else run within suborchestrator if first_state.next: input["root"] = subworkflow["root"] - input["states"] = subworkflow["states"] #parallel_states + input["states"] = subworkflow["states"] # parallel_states parallel_task = context.call_sub_orchestrator("run_subworkflow", input) parallel_tasks.append(parallel_task) else: parallel_tasks.append(context.call_activity(first_state.func_name, input)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - + state_to_result[first_state.func_name].append(len(parallel_tasks) - 1) + elif isinstance(first_state, Map): array = get_var(res, first_state.array) tasks = [] if first_state.next: - #call suborchestrator. + # call suborchestrator. if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -206,21 +214,29 @@ def handler(context: df.DurableOrchestrationContext): myinput = {"payload": payload, "request_id": request_id} myinput["root"] = subworkflow["root"] myinput["states"] = subworkflow["states"] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - + myinput["root"] = subworkflow["root"] myinput["states"] = subworkflow["states"] - parallel_task = context.call_sub_orchestrator("run_subworkflow", myinput) + parallel_task = context.call_sub_orchestrator( + "run_subworkflow", myinput + ) parallel_tasks.append(parallel_task) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: if first_state.common_params: - #assemble input differently + # assemble input differently for elem in array: payload = {} payload["array_element"] = elem @@ -228,21 +244,29 @@ def handler(context: df.DurableOrchestrationContext): for param in params: payload[param] = get_var(res, param) myinput = {"payload": payload, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - else: + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + else: for elem in array: myinput = {"payload": elem, "request_id": request_id} - parallel_tasks.append(context.call_activity(first_state.func_name, myinput)) - state_to_result[first_state.func_name].append(len(parallel_tasks)-1) - - duration += (now() - ts) + parallel_tasks.append( + context.call_activity(first_state.func_name, myinput) + ) + state_to_result[first_state.func_name].append( + len(parallel_tasks) - 1 + ) + + duration += now() - ts map_res = yield context.task_all(parallel_tasks) ts = now() res = {} for state in first_states: - #get respective results of map_res related to func according to state_to_result + # get respective results of map_res related to func according to state_to_result indices = state_to_result[state.func_name] if len(indices) > 1: output = [] @@ -250,7 +274,7 @@ def handler(context: df.DurableOrchestrationContext): output.append(map_res[index]) res[state.func_name] = output else: - #task state + # task state res[state.func_name] = map_res[indices[0]] current = states.get(current.next, None) @@ -261,19 +285,17 @@ def handler(context: df.DurableOrchestrationContext): workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") func_name = "run_workflow" - payload = { - "func": func_name, - "start": start, - "end": start+duration - } + payload = {"func": func_name, "start": start, "end": start + duration} payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host=REDIS_HOST, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password=REDIS_PASSWORD or None, + ) key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) redis.set(key, payload) diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 413418e33..c0e3b0843 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -1,7 +1,5 @@ - import os import uuid -from typing import Optional from azure.storage.blob import BlobServiceClient @@ -26,20 +24,18 @@ def __init__(self, connection_string: str): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, container, file, filepath, unique_name=True): incr_io_env_file(filepath, "STORAGE_UPLOAD_BYTES") - with open(filepath, 'rb') as data: + with open(filepath, "rb") as data: return self.upload_stream(container, file, data, unique_name=unique_name) def download(self, container, file, filepath): - with open(filepath, 'wb') as download_file: - download_file.write( self.download_stream(container, file) ) + with open(filepath, "wb") as download_file: + download_file.write(self.download_stream(container, file)) incr_io_env_file(filepath, "STORAGE_DOWNLOAD_BYTES") def download_directory(self, container, prefix, path): @@ -57,10 +53,7 @@ def upload_stream(self, container, file, data, unique_name=True): incr_io_env(size, "STORAGE_UPLOAD_BYTES") data.seek(0) key_name = storage.unique_name(file) if unique_name else file - client = self.client.get_blob_client( - container=container, - blob=key_name - ) + client = self.client.get_blob_client(container=container, blob=key_name) overwrite = not unique_name client.upload_blob(data, overwrite=overwrite) return key_name @@ -74,10 +67,12 @@ def download_stream(self, container, file): def download_within_range(self, container, file, start_byte, stop_byte): client = self.client.get_blob_client(container=container, blob=file) - data = client.download_blob(offset=start_byte, length=(stop_byte-start_byte), encoding='UTF-8').readall() + data = client.download_blob( + offset=start_byte, length=(stop_byte - start_byte), encoding="UTF-8" + ).readall() incr_io_env(len(data), "STORAGE_DOWNLOAD_BYTES") - return data #.decode('utf-8') + return data # .decode('utf-8') def list_directory(self, container, prefix): client = self.client.get_container_client(container=container) @@ -88,7 +83,7 @@ def list_directory(self, container, prefix): @staticmethod def get_instance(): if storage.instance is None: - connection_string = os.environ['STORAGE_CONNECTION_STRING'] + connection_string = os.environ["STORAGE_CONNECTION_STRING"] assert connection_string is not None storage.instance = storage(connection_string) return storage.instance diff --git a/benchmarks/wrappers/gcp/nodejs/storage.js b/benchmarks/wrappers/gcp/nodejs/storage.js index fd67a4ace..859c693a5 100644 --- a/benchmarks/wrappers/gcp/nodejs/storage.js +++ b/benchmarks/wrappers/gcp/nodejs/storage.js @@ -20,7 +20,7 @@ class gcp_storage { upload(container, file, filepath) { let bucket = this.storage.bucket(container); let uniqueName = this.unique_name(file); - let options = {destination: uniqueName}; + let options = {destination: uniqueName, resumable: false}; return [uniqueName, bucket.upload(filepath, options)]; }; diff --git a/benchmarks/wrappers/gcp/python/handler_function.py b/benchmarks/wrappers/gcp/python/handler_function.py index 9b6989611..57e1d000b 100644 --- a/benchmarks/wrappers/gcp/python/handler_function.py +++ b/benchmarks/wrappers/gcp/python/handler_function.py @@ -1,44 +1,46 @@ -import datetime, io, json, os, uuid, sys +import datetime +import io +import json +import os +import sys +import uuid -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # This variable is defined by SeBS during function creation. -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from function import nosql - nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'] - ) + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) def handler(req): income_timestamp = datetime.datetime.now().timestamp() - req_id = req.headers.get('Function-Execution-Id') - + req_id = req.headers.get("Function-Execution-Id") req_json = req.get_json() - req_json['request-id'] = req_id - req_json['income-timestamp'] = income_timestamp + req_json["request-id"] = req_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function + ret = function.handler(req_json) end = datetime.datetime.now() - - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = req_json.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -46,27 +48,33 @@ def handler(req): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': req_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} + return ( + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": req_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), + 200, + {"ContentType": "application/json"}, + ) diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index 083ef0a53..d84a066d6 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -1,23 +1,13 @@ - import datetime -import io import json import os import sys import uuid import importlib -# Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) - -if 'NOSQL_STORAGE_DATABASE' in os.environ: - from function import nosql +REDIS_HOST = os.getenv("REDIS_HOST", "{{REDIS_HOST}}") +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "{{REDIS_PASSWORD}}") - nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'] - ) - -from redis import Redis def probe_cold_start(): is_cold = False @@ -35,13 +25,23 @@ def probe_cold_start(): def handler(req): + # Add current directory to allow location of packages + sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) + + if "NOSQL_STORAGE_DATABASE" in os.environ: + from function import nosql + + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) + + from redis import Redis + start = datetime.datetime.now().timestamp() os.environ["STORAGE_UPLOAD_BYTES"] = "0" os.environ["STORAGE_DOWNLOAD_BYTES"] = "0" provider_request_id = req.headers.get("Function-Execution-Id") event = req.get_json() - event["payload"]['request-id'] = provider_request_id + event["payload"]["request-id"] = provider_request_id full_function_name = os.getenv("MY_FUNCTION_NAME") workflow_name, func_name = full_function_name.split("___") function = importlib.import_module(f"function.{func_name}") @@ -56,7 +56,7 @@ def handler(req): "end": end, "is_cold": is_cold, "container_id": container_id, - "provider.request_id": provider_request_id + "provider.request_id": provider_request_id, } func_res = os.getenv("SEBS_FUNCTION_RESULT") @@ -73,11 +73,13 @@ def handler(req): payload = json.dumps(payload) - redis = Redis(host={{REDIS_HOST}}, - port=6379, - decode_responses=True, - socket_connect_timeout=10, - password={{REDIS_PASSWORD}}) + redis = Redis( + host=REDIS_HOST, + port=6379, + decode_responses=True, + socket_connect_timeout=10, + password=REDIS_PASSWORD or None, + ) req_id = event["request_id"] key = os.path.join(workflow_name, func_name, req_id, str(uuid.uuid4())[0:8]) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index ee3e6fc17..50a358daa 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -25,11 +25,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath, unique_name=True): incr_io_env_file(filepath, "STORAGE_UPLOAD_BYTES") @@ -74,27 +72,25 @@ def download_stream(self, bucket, file): incr_io_env(size, "STORAGE_DOWNLOAD_BYTES") data.seek(0) - #return data + # return data return data.getbuffer() def download_within_range(self, bucket, file, start_byte, stop_byte): bucket_instance = self.client.bucket(bucket) blob = bucket_instance.blob(file) - blob.download_to_filename('/tmp/' + file, start=start_byte, end=stop_byte) - with open('/tmp/' + file, 'r') as f: + blob.download_to_filename("/tmp/" + file, start=start_byte, end=stop_byte) + with open("/tmp/" + file, "r") as f: content = f.read() return content def list_directory(self, bucket, prefix): - bucket_instance = self.client.bucket(bucket) - #objects = list(self.client.list_blobs(bucket_or_name=bucket_instance,prefix=prefix)) objects = self.client.bucket(bucket).list_blobs(prefix=prefix) names = [] for obj in objects: names.append(obj.name) return names - #for obj in objects: + # for obj in objects: # yield obj.name def get_instance(): diff --git a/benchmarks/wrappers/local/nodejs/storage.js b/benchmarks/wrappers/local/nodejs/storage.js deleted file mode 100644 index 9fb9d45f5..000000000 --- a/benchmarks/wrappers/local/nodejs/storage.js +++ /dev/null @@ -1,61 +0,0 @@ - -const minio = require('minio'), - path = require('path'), - uuid = require('uuid'), - util = require('util'), - stream = require('stream'); - -class minio_storage { - - constructor() { - let address = process.env.MINIO_ADDRESS; - let access_key = process.env.MINIO_ACCESS_KEY; - let secret_key = process.env.MINIO_SECRET_KEY; - this.client = new minio.Client( - { - endPoint: address.split(':')[0], - port: parseInt(address.split(':')[1], 10), - accessKey: access_key, - secretKey: secret_key, - useSSL: false - } - ); - } - - unique_name(file) { - let name = path.parse(file); - let uuid_name = uuid.v4().split('-')[0]; - return path.join(name.dir, util.format('%s.%s%s', name.name, uuid_name, name.ext)); - } - - upload(bucket, file, filepath) { - let uniqueName = this.unique_name(file); - return [uniqueName, this.client.fPutObject(bucket, uniqueName, filepath)]; - }; - - download(bucket, file, filepath) { - return this.client.fGetObject(bucket, file, filepath); - }; - - uploadStream(bucket, file) { - var write_stream = new stream.PassThrough(); - let uniqueName = this.unique_name(file); - let promise = this.client.putObject(bucket, uniqueName, write_stream, write_stream.size); - return [write_stream, promise, uniqueName]; - }; - - downloadStream(bucket, file) { - var read_stream = new stream.PassThrough(); - return this.client.getObject(bucket, file); - }; - - static get_instance() { - if(!this.instance) { - this.instance = new storage(); - } - return this.instance; - } - - -}; -exports.storage = minio_storage; diff --git a/benchmarks/wrappers/local/python/function_workflow.py b/benchmarks/wrappers/local/python/function_workflow.py new file mode 100644 index 000000000..d0d0a0f7a --- /dev/null +++ b/benchmarks/wrappers/local/python/function_workflow.py @@ -0,0 +1,106 @@ +import datetime +import importlib +import json +import os +import uuid + +from redis import Redis + + +_FUNCTION_HANDLER = None + + +def _load_function_handler(): + global _FUNCTION_HANDLER + if _FUNCTION_HANDLER: + return _FUNCTION_HANDLER + + module_name = os.getenv("SEBS_WORKFLOW_MODULE") + if not module_name: + raise RuntimeError("Environment variable SEBS_WORKFLOW_MODULE is not set.") + + module = importlib.import_module(module_name) + if not hasattr(module, "handler"): + raise RuntimeError(f"Module {module_name} does not provide a handler(payload) function.") + _FUNCTION_HANDLER = module.handler + return _FUNCTION_HANDLER + + +def _extract_request_id(event): + request_id = event.get("request_id") + if request_id: + return request_id + payload = event.get("payload") + if isinstance(payload, dict): + return payload.get("request_id") or payload.get("request-id") + return None + + +def _maybe_push_measurement(event, duration_start, duration_end): + redis_host = os.getenv("SEBS_REDIS_HOST") + if not redis_host: + return + + workflow_name = os.getenv("SEBS_WORKFLOW_NAME", "workflow") + func_name = os.getenv("SEBS_WORKFLOW_FUNC", "function") + request_id = event["request_id"] + + payload = { + "func": func_name, + "start": duration_start, + "end": duration_end, + "is_cold": False, + "container_id": os.getenv("HOSTNAME", "local"), + "provider.request_id": request_id, + } + + func_res = os.getenv("SEBS_FUNCTION_RESULT") + if func_res: + payload["result"] = json.loads(func_res) + + upload_bytes = os.getenv("STORAGE_UPLOAD_BYTES", "0") + download_bytes = os.getenv("STORAGE_DOWNLOAD_BYTES", "0") + if upload_bytes.isdigit(): + payload["blob.upload"] = int(upload_bytes) + if download_bytes.isdigit(): + payload["blob.download"] = int(download_bytes) + + redis = Redis( + host=redis_host, + port=int(os.getenv("SEBS_REDIS_PORT", "6379")), + decode_responses=True, + socket_connect_timeout=10, + password=os.getenv("SEBS_REDIS_PASSWORD"), + ) + + key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) + redis.set(key, json.dumps(payload)) + print(f"[workflow] stored measurement {key}") + + +def handler(event): + """ + Entry point used by the local workflow containers. Expects events with + {"payload": , "request_id": "..."} format and returns the same + structure expected by our workflow orchestrator. + """ + + if "payload" not in event: + raise RuntimeError("Workflow invocation payload must include 'payload' key.") + + request_id = _extract_request_id(event) or str(uuid.uuid4()) + event["request_id"] = request_id + payload = event["payload"] + handler_fn = _load_function_handler() + + begin = datetime.datetime.now().timestamp() + print(f"[workflow] handler input: {event}", flush=True) + result = handler_fn(payload) + end = datetime.datetime.now().timestamp() + + _maybe_push_measurement(event, begin, end) + + return { + "request_id": request_id, + "payload": result, + } diff --git a/benchmarks/wrappers/local/python/storage.py b/benchmarks/wrappers/local/python/storage.py index 4e1f9c5de..d2fb5d4d3 100644 --- a/benchmarks/wrappers/local/python/storage.py +++ b/benchmarks/wrappers/local/python/storage.py @@ -1,39 +1,41 @@ -import io import os import uuid import minio + class storage: instance = None client = None def __init__(self): - if 'MINIO_ADDRESS' in os.environ: - address = os.environ['MINIO_ADDRESS'] - access_key = os.environ['MINIO_ACCESS_KEY'] - secret_key = os.environ['MINIO_SECRET_KEY'] + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] self.client = minio.Minio( - address, - access_key=access_key, - secret_key=secret_key, - secure=False) + address, access_key=access_key, secret_key=secret_key, secure=False + ) @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) - def upload(self, bucket, file, filepath): - key_name = storage.unique_name(file) + def upload(self, bucket, file, filepath, unique_name=True): + key_name = storage.unique_name(file) if unique_name else file self.client.fput_object(bucket, key_name, filepath) return key_name def download(self, bucket, file, filepath): + data = self.client.get_object(bucket, file) + size = data.headers.get("Content-Length") + if size: + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + int(size) + ) self.client.fget_object(bucket, file, filepath) def download_directory(self, bucket, prefix, path): @@ -49,15 +51,30 @@ def upload_stream(self, bucket, file, bytes_data): def download_stream(self, bucket, file): data = self.client.get_object(bucket, file) - return data.read() + body = data.read() + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(body) + ) + return body + + def download_within_range(self, bucket, file, start_byte, stop_byte): + range_header = f"bytes={start_byte}-{stop_byte}" + resp = self.client.get_object(bucket, file, request_headers={"Range": range_header}) + data = resp.read().decode("utf-8") + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(data.encode("utf-8")) + ) + return data def list_directory(self, bucket, prefix): - objects = self.client.list_objects_v2(bucket, prefix, recursive=True) - for obj in objects: + if hasattr(self.client, "list_objects_v2"): + iterator = self.client.list_objects_v2(bucket, prefix, recursive=True) + else: + iterator = self.client.list_objects(bucket, prefix, recursive=True) + for obj in iterator: yield obj.object_name def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - diff --git a/benchmarks/wrappers/openwhisk/python/__main__.py b/benchmarks/wrappers/openwhisk/python/__main__.py index 3ae44f9c2..3833bff8c 100644 --- a/benchmarks/wrappers/openwhisk/python/__main__.py +++ b/benchmarks/wrappers/openwhisk/python/__main__.py @@ -2,24 +2,30 @@ import datetime import os + def main(args): logging.getLogger().setLevel(logging.INFO) begin = datetime.datetime.now() - args['request-id'] = os.getenv('__OW_ACTIVATION_ID') - args['income-timestamp'] = begin.timestamp() + args["request-id"] = os.getenv("__OW_ACTIVATION_ID") + args["income-timestamp"] = begin.timestamp() - for arg in ["MINIO_STORAGE_CONNECTION_URL", "MINIO_STORAGE_ACCESS_KEY", "MINIO_STORAGE_SECRET_KEY"]: + for arg in [ + "MINIO_STORAGE_CONNECTION_URL", + "MINIO_STORAGE_ACCESS_KEY", + "MINIO_STORAGE_SECRET_KEY", + ]: os.environ[arg] = args[arg] del args[arg] key_list = list(args.keys()) for arg in key_list: - if 'NOSQL_STORAGE_' in arg: + if "NOSQL_STORAGE_" in arg: os.environ[arg] = args[arg] del args[arg] try: from function import function + ret = function.handler(args) end = datetime.datetime.now() logging.info("Function result: {}".format(ret)) @@ -38,7 +44,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, "is_cold": is_cold, "result": log_data, @@ -49,7 +55,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, - "result": f"Error - invocation failed! Reason: {e}" + "result": f"Error - invocation failed! Reason: {e}", } diff --git a/benchmarks/wrappers/openwhisk/python/nosql.py b/benchmarks/wrappers/openwhisk/python/nosql.py index da8245009..4a8676d36 100644 --- a/benchmarks/wrappers/openwhisk/python/nosql.py +++ b/benchmarks/wrappers/openwhisk/python/nosql.py @@ -5,6 +5,7 @@ import boto3 from botocore.client import Config + class nosql: instance: Optional["nosql"] = None @@ -14,14 +15,14 @@ def __init__(self): if environ["NOSQL_STORAGE_TYPE"] != "scylladb": raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") - config = Config(connect_timeout=5, retries={'max_attempts': 0}) + config = Config(connect_timeout=5, retries={"max_attempts": 0}) self.client = boto3.resource( "dynamodb", region_name="None", aws_access_key_id="None", aws_secret_access_key="None", endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", - config=config + config=config, ) self._tables = {} diff --git a/benchmarks/wrappers/openwhisk/python/setup.py b/benchmarks/wrappers/openwhisk/python/setup.py index b942d059b..016974465 100644 --- a/benchmarks/wrappers/openwhisk/python/setup.py +++ b/benchmarks/wrappers/openwhisk/python/setup.py @@ -2,13 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, -) \ No newline at end of file + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, +) diff --git a/benchmarks/wrappers/openwhisk/python/storage.py b/benchmarks/wrappers/openwhisk/python/storage.py index 76c7e3e8e..09b9e78a7 100644 --- a/benchmarks/wrappers/openwhisk/python/storage.py +++ b/benchmarks/wrappers/openwhisk/python/storage.py @@ -1,8 +1,8 @@ +import logging import os import uuid -import json + import minio -import logging class storage: @@ -25,14 +25,14 @@ def __init__(self): maxsize=10, retries=urllib3.Retry( total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504] - ) + ), ) self.client = minio.Minio( os.getenv("MINIO_STORAGE_CONNECTION_URL"), access_key=os.getenv("MINIO_STORAGE_ACCESS_KEY"), secret_key=os.getenv("MINIO_STORAGE_SECRET_KEY"), secure=False, - http_client=mgr + http_client=mgr, ) except Exception as e: logging.info(e) @@ -41,12 +41,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) - + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -64,9 +61,7 @@ def download_directory(self, bucket, prefix, path): def upload_stream(self, bucket, file, bytes_data): key_name = storage.unique_name(file) - self.client.put_object( - bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes - ) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) return key_name def download_stream(self, bucket, file): diff --git a/benchmarks/wrappers/sonataflow/python/function_workflow.py b/benchmarks/wrappers/sonataflow/python/function_workflow.py new file mode 100644 index 000000000..3881b02c8 --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/function_workflow.py @@ -0,0 +1,115 @@ +import datetime +import importlib +import json +import os +import uuid + +from redis import Redis + + +_FUNCTION_HANDLER = None + + +def _load_function_handler(): + global _FUNCTION_HANDLER + if _FUNCTION_HANDLER: + return _FUNCTION_HANDLER + + module_name = os.getenv("SEBS_WORKFLOW_MODULE") + if not module_name: + raise RuntimeError("Environment variable SEBS_WORKFLOW_MODULE is not set.") + + module = importlib.import_module(module_name) + if not hasattr(module, "handler"): + raise RuntimeError(f"Module {module_name} does not provide a handler(payload) function.") + _FUNCTION_HANDLER = module.handler + return _FUNCTION_HANDLER + + +def _extract_request_id(event): + request_id = event.get("request_id") + if request_id: + return request_id + payload = event.get("payload") + if isinstance(payload, dict): + return payload.get("request_id") or payload.get("request-id") + return None + + +def _maybe_push_measurement(event, duration_start, duration_end): + redis_host = os.getenv("SEBS_REDIS_HOST") + redis_port = int(os.getenv("SEBS_REDIS_PORT", "6379")) + if not redis_host: + print(f"[workflow] SEBS_REDIS_HOST not set, skipping measurement", flush=True) + return + + workflow_name = os.getenv("SEBS_WORKFLOW_NAME", "workflow") + func_name = os.getenv("SEBS_WORKFLOW_FUNC", "function") + request_id = event["request_id"] + + print(f"[workflow] attempting to connect to Redis at {redis_host}:{redis_port}", flush=True) + + payload = { + "func": func_name, + "start": duration_start, + "end": duration_end, + "is_cold": False, + "container_id": os.getenv("HOSTNAME", "local"), + "provider.request_id": request_id, + } + + func_res = os.getenv("SEBS_FUNCTION_RESULT") + if func_res: + payload["result"] = json.loads(func_res) + + upload_bytes = os.getenv("STORAGE_UPLOAD_BYTES", "0") + download_bytes = os.getenv("STORAGE_DOWNLOAD_BYTES", "0") + if upload_bytes.isdigit(): + payload["blob.upload"] = int(upload_bytes) + if download_bytes.isdigit(): + payload["blob.download"] = int(download_bytes) + + try: + redis = Redis( + host=redis_host, + port=redis_port, + decode_responses=True, + socket_connect_timeout=10, + password=os.getenv("SEBS_REDIS_PASSWORD"), + ) + + key = os.path.join(workflow_name, func_name, request_id, str(uuid.uuid4())[0:8]) + redis.set(key, json.dumps(payload)) + print(f"[workflow] stored measurement {key}", flush=True) + except Exception as e: + print(f"[workflow] ERROR storing measurement to Redis: {e}", flush=True) + import traceback + traceback.print_exc() + + +def handler(event): + """ + Entry point used by the local workflow containers. Expects events with + {"payload": , "request_id": "..."} format and returns the same + structure expected by our workflow orchestrator. + """ + + if "payload" not in event: + raise RuntimeError("Workflow invocation payload must include 'payload' key.") + + request_id = _extract_request_id(event) or str(uuid.uuid4()) + event["request_id"] = request_id + payload = event["payload"] + handler_fn = _load_function_handler() + + begin = datetime.datetime.now().timestamp() + print(f"[workflow] handler input: {event}", flush=True) + result = handler_fn(payload) + end = datetime.datetime.now().timestamp() + + _maybe_push_measurement(event, begin, end) + + return { + "request_id": request_id, + "payload": result, + } diff --git a/benchmarks/wrappers/sonataflow/python/nosql.py b/benchmarks/wrappers/sonataflow/python/nosql.py new file mode 100644 index 000000000..0e816954c --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/nosql.py @@ -0,0 +1,131 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + + if environ["NOSQL_STORAGE_TYPE"] != "scylladb": + raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") + + self.client = boto3.resource( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", + ) + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/sonataflow/python/storage.py b/benchmarks/wrappers/sonataflow/python/storage.py new file mode 100644 index 000000000..d2fb5d4d3 --- /dev/null +++ b/benchmarks/wrappers/sonataflow/python/storage.py @@ -0,0 +1,80 @@ +import os +import uuid + +import minio + + +class storage: + instance = None + client = None + + def __init__(self): + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] + self.client = minio.Minio( + address, access_key=access_key, secret_key=secret_key, secure=False + ) + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) + + def upload(self, bucket, file, filepath, unique_name=True): + key_name = storage.unique_name(file) if unique_name else file + self.client.fput_object(bucket, key_name, filepath) + return key_name + + def download(self, bucket, file, filepath): + data = self.client.get_object(bucket, file) + size = data.headers.get("Content-Length") + if size: + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + int(size) + ) + self.client.fget_object(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(bucket, prefix, recursive=True) + for obj in objects: + file_name = obj.object_name + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, bytes_data): + key_name = storage.unique_name(file) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) + return key_name + + def download_stream(self, bucket, file): + data = self.client.get_object(bucket, file) + body = data.read() + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(body) + ) + return body + + def download_within_range(self, bucket, file, start_byte, stop_byte): + range_header = f"bytes={start_byte}-{stop_byte}" + resp = self.client.get_object(bucket, file, request_headers={"Range": range_header}) + data = resp.read().decode("utf-8") + os.environ["STORAGE_DOWNLOAD_BYTES"] = str( + int(os.getenv("STORAGE_DOWNLOAD_BYTES", "0")) + len(data.encode("utf-8")) + ) + return data + + def list_directory(self, bucket, prefix): + if hasattr(self.client, "list_objects_v2"): + iterator = self.client.list_objects_v2(bucket, prefix, recursive=True) + else: + iterator = self.client.list_objects(bucket, prefix, recursive=True) + for obj in iterator: + yield obj.object_name + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/config/example.json b/config/example.json index 3133d7249..15b7c98f3 100644 --- a/config/example.json +++ b/config/example.json @@ -1,10 +1,10 @@ { "experiments": { - "deployment": "aws", - "update_code": false, + "deployment": "local", + "update_code": true, "update_storage": false, "download_results": false, - "architecture": "arm64", + "architecture": "x64", "container_deployment": true, "runtime": { "language": "python", @@ -51,7 +51,7 @@ } }, "deployment": { - "name": "aws", + "name": "local", "aws": { "region": "us-east-1", "lambda-role": "", @@ -71,6 +71,34 @@ "credentials": "" }, "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, + "sonataflow": { + "resources": { + "redis": { + "host": "", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, "storage": { "address": "", "mapped_port": -1, diff --git a/config/local_deployment.json b/config/local_deployment.json new file mode 100644 index 000000000..4d1174e3a --- /dev/null +++ b/config/local_deployment.json @@ -0,0 +1,179 @@ +{ + "experiments": { + "deployment": "local", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.8" + }, + "type": "invocation-overhead", + "perf-cost": { + "benchmark": "110.dynamic-html", + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [ + 128, + 256 + ] + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "invocation-overhead": { + "repetitions": 5, + "N": 20, + "type": "payload", + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20 + }, + "eviction-model": { + "invocations": 1, + "function_copy_idx": 0, + "repetitions": 5, + "sleep": 1 + } + }, + "deployment": { + "name": "sonataflow", + "aws": { + "region": "us-east-1", + "lambda-role": "", + "resources": { + "redis": { + "host": "ec2-54-86-32-136.compute-1.amazonaws.com", + "password": "xB46z3u9I6WJ" + } + } + }, + "azure": { + "region": "westeurope" + }, + "gcp": { + "region": "europe-west1", + "project_name": "", + "credentials": "" + }, + "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, + "storage": { + "object": { + "type": "minio", + "minio": { + "address": "10.5.38.121:9011", + "mapped_port": 9011, + "access_key": "rB907YMFJW7gUgnUnefzcni9RExzy4aP0vr52tGzYgQ", + "secret_key": "f47e06c50a29f37f68b01eb2b96ea1679cab8b6e72102078fa36ed72a07f8ec9", + "instance_id": "5fac4b34c68172c95f81230df52c2989d06e3f7891b5bd901905c085258e56b9", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "10.5.38.121:9012", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "784e07f6d7cfe6b2670fc65d840fd79864d1e6336422dcc2fb340975b8131a4d", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume" + } + } + } + }, + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6381", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio", + "object": { + "type": "minio", + "minio": { + "address": "172.18.0.2:9000", + "mapped_port": 9011, + "access_key": "66tQNYoeOvF8igk220P1R2waQrtalkZh07nynxuEdCE", + "secret_key": "4fbddba8e5b88597f4c4b781de22de7cff97ed5f6671ef515756574bb0a9491c", + "instance_id": "33484801c78fe94ba30e3f1976962e8aa83610a87aa814abf523618642dc3f89", + "input_buckets": [], + "output_buckets": [] + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.18.0.3:8000", + "mapped_port": 9012, + "instance_id": "35280a9874ae24335c7da349a729f0a6e02b6d70af340038580e3f4758b83605" + } + } + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } + } +} diff --git a/config/local_workflows.json b/config/local_workflows.json new file mode 100644 index 000000000..2722611e1 --- /dev/null +++ b/config/local_workflows.json @@ -0,0 +1,162 @@ +{ + "experiments": { + "deployment": "local", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.11" + }, + "type": "invocation-overhead", + "perf-cost": { + "benchmark": "110.dynamic-html", + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "repetitions": 50, + "concurrent-invocations": 50, + "memory-sizes": [ + 128, + 256 + ] + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "invocation-overhead": { + "repetitions": 5, + "N": 20, + "type": "payload", + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20 + }, + "eviction-model": { + "invocations": 1, + "function_copy_idx": 0, + "repetitions": 5, + "sleep": 1 + } + }, + "deployment": { + "name": "sonataflow", + "aws": { + "region": "us-east-1", + "lambda-role": "", + "resources": { + "redis": { + "host": "ec2-54-86-32-136.compute-1.amazonaws.com", + "password": "xB46z3u9I6WJ" + } + } + }, + "azure": { + "region": "westeurope" + }, + "gcp": { + "region": "europe-west1", + "project_name": "", + "credentials": "" + }, + "local": { + "resources": { + "redis": { + "host": "", + "password": "" + } + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + }, + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6381", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "" + } + }, + "storage": { + "address": "172.17.0.2:9000", + "mapped_port": 9011, + "access_key": "skPhf3f8aEMLd0P8n81M8OrA6fq8ZKCx6dn313lq2ws", + "secret_key": "4c15b2336fe9e89fac929dd13b4f43e222c9f8f0ae3e528572f46d94e93a1a13", + "instance_id": "b59d6d8581f4d62f8fd53e9d2184f3f9b4ab5661370d42f4dabbe739d6bda579", + "input_buckets": [], + "output_buckets": [], + "type": "minio", + "object": { + "type": "minio", + "minio": { + "address": "172.18.0.2:9000", + "mapped_port": 9011, + "access_key": "66tQNYoeOvF8igk220P1R2waQrtalkZh07nynxuEdCE", + "secret_key": "4fbddba8e5b88597f4c4b781de22de7cff97ed5f6671ef515756574bb0a9491c", + "instance_id": "33484801c78fe94ba30e3f1976962e8aa83610a87aa814abf523618642dc3f89", + "input_buckets": [], + "output_buckets": [] + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.18.0.3:8000", + "mapped_port": 9012, + "instance_id": "35280a9874ae24335c7da349a729f0a6e02b6d70af340038580e3f4758b83605" + } + } + }, + "nosql": { + "type": "scylladb", + "address": "172.17.0.3:8000", + "mapped_port": 9012, + "instance_id": "841c8a0b85fae2647f214170eb8fa666cd7ee01a361a1614f9c752e011b1a757" + } + }, + "openwhisk": { + "shutdownStorage": false, + "removeCluster": false, + "wskBypassSecurity": "true", + "wskExec": "wsk", + "experimentalManifest": false, + "docker_registry": { + "registry": "", + "username": "", + "password": "" + }, + "storage": { + "address": "", + "mapped_port": -1, + "access_key": "", + "secret_key": "", + "instance_id": "", + "input_buckets": [], + "output_buckets": [], + "type": "minio" + } + } + } +} diff --git a/config/sonataflow_local.json b/config/sonataflow_local.json new file mode 100644 index 000000000..9f2c9962f --- /dev/null +++ b/config/sonataflow_local.json @@ -0,0 +1,57 @@ +{ + "experiments": { + "deployment": "sonataflow", + "update_code": true, + "update_storage": false, + "download_results": false, + "architecture": "x64", + "container_deployment": true, + "runtime": { + "language": "python", + "version": "3.8" + }, + "type": "invocation-overhead", + "invocation-overhead": { + "repetitions": 1, + "N": 5, + "type": "payload", + "payload_begin": 1024, + "payload_end": 2048, + "payload_points": 2, + "code_begin": 1048576, + "code_end": 2097152, + "code_points": 2 + } + }, + "deployment": { + "name": "sonataflow", + "sonataflow": { + "resources": { + "redis": { + "host": "localhost:6380", + "password": "" + }, + "runtime": { + "url": "http://localhost:8080", + "endpoint_prefix": "services" + } + }, + "storage": { + "type": "minio", + "address": "localhost", + "mapped_port": 9000, + "access_key": "minio", + "secret_key": "minio123", + "instance_id": "minio", + "input_buckets": [], + "output_buckets": [] + }, + "nosql": { + "type": "scylladb", + "address": "localhost", + "mapped_port": 9042, + "instance_id": "scylladb" + } + } + } +} diff --git a/config/storage.json b/config/storage.json index 9ea14d31d..644db56f9 100644 --- a/config/storage.json +++ b/config/storage.json @@ -4,7 +4,8 @@ "minio": { "mapped_port": 9011, "version": "RELEASE.2024-07-16T23-46-41Z", - "data_volume": "minio-volume" + "data_volume": "minio-volume", + "network_name": "sebs-network" } }, "nosql": { @@ -14,7 +15,8 @@ "version": "6.0", "cpus": 1, "memory": "750", - "data_volume": "scylladb-volume" + "data_volume": "scylladb-volume", + "network_name": "sebs-network" } } } diff --git a/config/systems.json b/config/systems.json index 9acc1dd2d..6f5c50d4b 100644 --- a/config/systems.json +++ b/config/systems.json @@ -34,9 +34,81 @@ "deployment": { "files": [ "storage.py", - "nosql.py" + "nosql.py", + "function_workflow.py" + ], + "packages": [ + "redis" + ], + "module_packages": { + "nosql": [ + "boto3==1.28.3" + ] + } + } + }, + "nodejs": { + "base_images": { + "x64": { + "14": "node:14-slim", + "16": "node:16-slim", + "18": "node:18-slim", + "20": "node:20-slim" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.js" + ], + "packages": [] + } + } + }, + "architecture": ["x64"], + "deployments": ["package"] + }, + "sonataflow": { + "experiments": { + "python": [ + "papi", + "time", + "disk-io", + "memory" + ], + "nodejs": [ + "time" + ] + }, + "languages": { + "python": { + "base_images": { + "x64": { + "3.7": "python:3.7-slim", + "3.8": "python:3.8-slim", + "3.9": "python:3.9-slim", + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.py", + "nosql.py", + "function_workflow.py" + ], + "packages": [ + "redis" ], - "packages": [], "module_packages": { "nosql": [ "boto3==1.28.3" @@ -208,7 +280,6 @@ "python": { "base_images": { "x64": { - "3.7": "ubuntu:22.04", "3.8": "ubuntu:22.04", "3.9": "ubuntu:22.04", "3.10": "ubuntu:22.04", @@ -243,10 +314,6 @@ "nodejs": { "base_images": { "x64": { - "10": "ubuntu:18.04", - "12": "ubuntu:18.04", - "14": "ubuntu:18.04", - "16": "ubuntu:18.04", "18": "ubuntu:22.04", "20": "ubuntu:22.04" } diff --git a/dockerfiles/local/python/Dockerfile.run b/dockerfiles/local/python/Dockerfile.run index 768472607..2c1e27df7 100755 --- a/dockerfiles/local/python/Dockerfile.run +++ b/dockerfiles/local/python/Dockerfile.run @@ -6,7 +6,7 @@ RUN deps=''\ # for route and sudo && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ && apt-get purge -y --auto-remove ${deps}\ - && pip3 install cffi minio bottle + && pip3 install cffi minio bottle redis RUN mkdir -p /sebs COPY dockerfiles/local/run.sh /sebs/ diff --git a/dockerfiles/sonataflow/entrypoint.sh b/dockerfiles/sonataflow/entrypoint.sh new file mode 100755 index 000000000..5451f551a --- /dev/null +++ b/dockerfiles/sonataflow/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +USER_ID=${CONTAINER_UID} +GROUP_ID=${CONTAINER_GID} +USER=${CONTAINER_USER} + +useradd --non-unique -m -u ${USER_ID} ${USER} +groupmod --non-unique -g ${GROUP_ID} ${USER} +export HOME=/home/${USER} +echo "Running as ${USER}, with ${USER_ID} and ${GROUP_ID}" + +if [ ! -z "$CMD" ]; then + gosu ${USER} $CMD +fi + +chown -R ${USER}:${USER} /sebs/ +echo "$USER ALL=(ALL:ALL) NOPASSWD: ALL" | tee /etc/sudoers.d/dont-prompt-$USER-for-password +usermod -aG sudo ${USER} + +exec gosu ${USER} "$@" + diff --git a/dockerfiles/sonataflow/nodejs/Dockerfile.build b/dockerfiles/sonataflow/nodejs/Dockerfile.build new file mode 100755 index 000000000..f65fd75f7 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/Dockerfile.build @@ -0,0 +1,16 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends zip gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/nodejs_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/nodejs/Dockerfile.run b/dockerfiles/sonataflow/nodejs/Dockerfile.run new file mode 100755 index 000000000..33e531524 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/Dockerfile.run @@ -0,0 +1,27 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + && apt-get install -y --no-install-recommends curl net-tools gosu python3 sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps} + +RUN mkdir -p /sebs +RUN cd /sebs/ && npm install -g uuid strftime express minio +# NODE_PATH=$(npm root --quiet -g) +# https://github.com/moby/moby/issues/29110 +ENV NODE_PATH=/usr/local/lib/node_modules + +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/nodejs/*.js /sebs/ +COPY dockerfiles/local/nodejs/run_server.sh /sebs/ +COPY dockerfiles/local/nodejs/timeit.sh /sebs/ +COPY dockerfiles/local/nodejs/runners.json /sebs/ +COPY dockerfiles/local/nodejs/package.json /sebs/ + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/nodejs/config.js b/dockerfiles/sonataflow/nodejs/config.js new file mode 100644 index 000000000..19e7f075f --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/config.js @@ -0,0 +1,2 @@ +var tools = require('./tools'); +console.log( JSON.stringify(tools.get_config()) ) diff --git a/dockerfiles/sonataflow/nodejs/package.json b/dockerfiles/sonataflow/nodejs/package.json new file mode 100644 index 000000000..635c8b693 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "minio": "^7.0.13", + "strftime": "^0.10.0", + "uuid": "^3.4.0" + } +} diff --git a/dockerfiles/sonataflow/nodejs/run_server.sh b/dockerfiles/sonataflow/nodejs/run_server.sh new file mode 100755 index 000000000..c257e1fb7 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +node /sebs/server.js "$@" diff --git a/dockerfiles/sonataflow/nodejs/runners.json b/dockerfiles/sonataflow/nodejs/runners.json new file mode 100644 index 000000000..77eda0117 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/runners.json @@ -0,0 +1,6 @@ +{ + "time" : {"warm" : "time-in-proc.js", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.js", + "disk-io": "analyzer-runner.js", + "config": ["node", "config.js"] +} diff --git a/dockerfiles/sonataflow/nodejs/server.js b/dockerfiles/sonataflow/nodejs/server.js new file mode 100644 index 000000000..cdd0af5aa --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/server.js @@ -0,0 +1,51 @@ +const http = require('http'), + strftime = require('strftime'), + express = require('express'), + f = require('/function/function/function'); +//import { v4 as uuidv4 } from 'uuid'; +const { v4: uuidv4 } = require('uuid'); + + +var app = express(); +app.use(express.json()); + +app.post('/alive', function (req, res) { + res.send(JSON.stringify({ + status: "ok" + })); +}); + +app.post('/', function (req, res) { + + // SonataFlow sends requests wrapped in {"payload": ...} + // Unwrap the payload before passing to the function + let function_input = req.body; + if (req.body && typeof req.body === 'object' && Object.prototype.hasOwnProperty.call(req.body, 'payload')) { + function_input = req.body.payload; + } + + let ret = f.handler(function_input); + ret.then((func_res) => { + let output = func_res; + if (func_res && typeof func_res === 'object' && Object.prototype.hasOwnProperty.call(func_res, 'payload')) { + output = func_res.payload; + } + res.setHeader('Content-Type', 'application/json'); + res.end(JSON.stringify(output)); + }, + (reason) => { + console.error('Function invocation failed!'); + console.error('Request body:', JSON.stringify(req.body, null, 2)); + console.error('Error:', reason); + res.status(500).json({ + error: reason.message || String(reason), + stack: reason.stack + }); + } + ); +}); + +app.listen(port=process.argv[2], function () { + console.log(`Server listening on port ${process.argv[2]}.`); +}); + diff --git a/dockerfiles/sonataflow/nodejs/time-in-proc.js b/dockerfiles/sonataflow/nodejs/time-in-proc.js new file mode 100644 index 000000000..fd829d9e9 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/time-in-proc.js @@ -0,0 +1,72 @@ + +const tools = require('./tools'), + fs = require('fs'), + strftime = require('strftime'), + f = require('./function/function'), + util = require('util'); +const createCsvWriter = require('csv-writer').createArrayCsvWriter; + +let cfg = JSON.parse(fs.readFileSync(process.argv[2])); +let repetitions = cfg.benchmark.repetitions; +let disable_gc = cfg.benchmark.disable_gc; +let input_data = cfg.input; +let timedata = new Array(repetitions); +process.on('unhandledRejection', r => console.log(r)); + +// Due to the async nature of nodejs, we use 'then' functionality +// of promise to make sure that we start a new instance only after finishing +// the previous one. There's no other option to achieve true waiting and we don't +// want to start multiple instances and let them work concurrently. +let measurer = async function(repetition, finish) { + if (repetition < repetitions) { + let begin_timestamp = Date.now(); + let begin = process.hrtime(); + let cpuTimeBegin = process.cpuUsage(); + let ret = f.handler(input_data); + ret.then((res) => { + let cpuTimeEnd = process.cpuUsage(); + let stop_timestamp = Date.now(); + let stop = process.hrtime(begin); + let output_file = tools.get_result_prefix(tools.LOGS_DIR, 'output', 'txt'); + fs.writeFileSync(output_file, JSON.stringify(res)); + let userTime = cpuTimeEnd.user - cpuTimeBegin.user; + let sysTime = cpuTimeEnd.system - cpuTimeBegin.system; + timedata[repetition] = [begin_timestamp, stop_timestamp, stop[0]*1e6 + stop[1]/1e3, userTime, sysTime]; + measurer(repetition + 1, finish); + }, + (reason) => { + console.log('Function invocation failed!'); + console.log(reason); + process.exit(1); + } + ); + } else{ + finish(); + } +} +start = tools.start_benchmarking(); +measurer(0, + () => { + end = tools.stop_benchmarking(); + let result = tools.get_result_prefix(tools.RESULTS_DIR, cfg.benchmark.name, 'csv') + let csvWriter = createCsvWriter({ + path: result, + header: ['Begin','End','Duration','User','Sys'] + }); + for(let i = 0; i < repetitions; ++i) { + timedata[i][0] = strftime('%s.%L', new Date(timedata[i][0])); + timedata[i][1] = strftime('%s.%L', new Date(timedata[i][1])); + } + let p = csvWriter.writeRecords(timedata); + p.then( () => { + let reduce_array = timedata.map( x => { x.pop(); return x} ); + experiment_data = { + repetitions: repetitions, + start: start, + end: end, + timestamps: reduce_array + } + console.log( JSON.stringify({experiment: experiment_data, runtime: tools.get_config()}, null, 2) ) + }); + } +); diff --git a/dockerfiles/sonataflow/nodejs/timeit.sh b/dockerfiles/sonataflow/nodejs/timeit.sh new file mode 100644 index 000000000..15fd78b5d --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/timeit.sh @@ -0,0 +1,11 @@ +#!/bin/bash +OUT=$1 +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time node --expose-gc -e "var fs = require('fs'), f = require('./function/function'); +async function test() { + var input = JSON.parse(fs.readFileSync('input.json', 'utf-8')); + return await f.handler(input); +} +test().then( (data) => console.log(data) );" > $OUT +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/sonataflow/nodejs/tools.js b/dockerfiles/sonataflow/nodejs/tools.js new file mode 100644 index 000000000..991344979 --- /dev/null +++ b/dockerfiles/sonataflow/nodejs/tools.js @@ -0,0 +1,44 @@ + +const glob = require('glob'), + path = require('path'); + +const RESULTS_DIR = 'results'; +exports.RESULTS_DIR = RESULTS_DIR; +const LOGS_DIR = 'logs'; +exports.LOGS_DIR = LOGS_DIR; + + +exports.get_config = function () { + return { + name: 'nodejs', + version: process.version, + modules: process.moduleLoadList + }; +} + +exports.start_benchmarking = function() { + return Date.now() +} + +exports.stop_benchmarking = function() { + return Date.now() +} + +exports.get_result_prefix = function(dirname, name, suffix) { + name = path.join(dirname, name); + let counter = 0 + while( + glob.sync( + name + '_' + counter.toString().padStart(2, '0') + '*.' + suffix + ).length + ) { + counter += 1 + } + // util.format ignores padding zeroes + return name + '_' + counter.toString().padStart(2, '0') + '.' + suffix +} + +exports.process_timestamps = function(timestamps) { + +} + diff --git a/dockerfiles/sonataflow/python/Dockerfile.build b/dockerfiles/sonataflow/python/Dockerfile.build new file mode 100755 index 000000000..5892c6500 --- /dev/null +++ b/dockerfiles/sonataflow/python/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gcc build-essential python3-dev libxml2 libxml2-dev zlib1g-dev gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/python_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/python/Dockerfile.run b/dockerfiles/sonataflow/python/Dockerfile.run new file mode 100755 index 000000000..2c1e27df7 --- /dev/null +++ b/dockerfiles/sonataflow/python/Dockerfile.run @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + # for route and sudo + && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps}\ + && pip3 install cffi minio bottle redis + +RUN mkdir -p /sebs +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/python/*.py /sebs/ +COPY dockerfiles/local/python/run_server.sh /sebs/ +COPY dockerfiles/local/python/timeit.sh /sebs/ +COPY dockerfiles/local/python/runners.json /sebs/ +ADD third-party/pypapi/pypapi /sebs/pypapi +ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/sonataflow/python/analyzer-runner.py b/dockerfiles/sonataflow/python/analyzer-runner.py new file mode 100644 index 000000000..624459795 --- /dev/null +++ b/dockerfiles/sonataflow/python/analyzer-runner.py @@ -0,0 +1,64 @@ + +import datetime, json, sys, subprocess, os +ip_address = os.environ['DOCKER_HOST_IP'] +cfg = json.load(open(sys.argv[1], 'r')) +ret = subprocess.run(['curl', '-X', 'POST', + '{}:{}/start'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2])], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) +if ret.returncode != 0: + import sys + print('Analyzer initialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(100) + + +from utils import * +from tools import * +# imported function +from function import function + +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + res = function.handler(input_data) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() + + ret = subprocess.run( + [ + 'curl', '-X', 'POST', + '{}:{}/stop'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2]) + ], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if ret.returncode != 0: + import sys + print('Analyzer deinitialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(101) + experiment_data = {} + experiment_data['repetitions'] = repetitions + experiment_data['timestamps'] = process_timestamps(timedata) + experiment_data['start'] = str(start) + experiment_data['end'] = str(end) + print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) +except Exception as e: + print('Exception caught!') + print(e) + sys.exit(102) +sys.exit(0) diff --git a/dockerfiles/sonataflow/python/config.py b/dockerfiles/sonataflow/python/config.py new file mode 100644 index 000000000..e7115cc73 --- /dev/null +++ b/dockerfiles/sonataflow/python/config.py @@ -0,0 +1,5 @@ +import json + +from tools import * + +print(json.dumps(get_config())) diff --git a/dockerfiles/sonataflow/python/papi-runner.py b/dockerfiles/sonataflow/python/papi-runner.py new file mode 100644 index 000000000..0c82d476d --- /dev/null +++ b/dockerfiles/sonataflow/python/papi-runner.py @@ -0,0 +1,104 @@ + +import datetime, json, sys, traceback, csv + +from utils import * +from tools import * + +# imported function +from function import function + +import pypapi.exceptions + +class papi_benchmarker: + from pypapi import papi_low as papi + from pypapi import events as papi_events + + def __init__(self, papi_cfg): + self.events = [] + self.events_names = [] + self.count = 0 + + self.papi.library_init() + self.events = self.papi.create_eventset() + for event in papi_cfg['events']: + try: + self.papi.add_event(self.events, getattr(self.papi_events, event)) + except pypapi.exceptions.PapiInvalidValueError as err: + print('Adding event {event} failed!'.format(event=event)) + sys.exit(100) + + self.events_names = papi_cfg['events'] + self.count = len(papi_cfg['events']) + self.results = [] + + self.ins_granularity = papi_cfg['overflow_instruction_granularity'] + self.buffer_size = papi_cfg['overflow_buffer_size'] + self.start_time = datetime.datetime.now() + + self.papi.overflow_sampling(self.events, self.papi_events.PAPI_TOT_INS, + int(self.ins_granularity), int(self.buffer_size)) + + def start_overflow(self): + self.papi.start(self.events) + + def stop_overflow(self): + self.papi.stop(self.events) + + def get_results(self): + data = self.papi.overflow_sampling_results(self.events) + for vals in data: + for i in range(0, len(vals), self.count + 1): + chunks = vals[i:i+self.count+1] + measurement_time = datetime.datetime.fromtimestamp(chunks[0]/1e6) + time = (measurement_time - self.start_time) / datetime.timedelta(microseconds = 1) + self.results.append([measurement_time.strftime("%s.%f"), time] + list(chunks[1:])) + + def finish(self): + self.papi.cleanup_eventset(self.events) + self.papi.destroy_eventset(self.events) + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +papi_experiments = papi_benchmarker(cfg['benchmark']['papi']) + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + papi_experiments.start_overflow() + res = function.handler(input_data) + papi_experiments.stop_overflow() + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +papi_experiments.get_results() +papi_experiments.finish() +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow( + ['Time','RelativeTime'] + papi_experiments.events_names + ) + for val in papi_experiments.results: + csv_writer.writerow(val) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/sonataflow/python/run_server.sh b/dockerfiles/sonataflow/python/run_server.sh new file mode 100755 index 000000000..fa9a82297 --- /dev/null +++ b/dockerfiles/sonataflow/python/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python3 /sebs/server.py "$@" diff --git a/dockerfiles/sonataflow/python/runners.json b/dockerfiles/sonataflow/python/runners.json new file mode 100644 index 000000000..1a7a9d84c --- /dev/null +++ b/dockerfiles/sonataflow/python/runners.json @@ -0,0 +1,7 @@ +{ + "papi": "papi-runner.py", + "time" : {"warm" : "time-in-proc.py", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.py", + "disk-io": "analyzer-runner.py", + "config": ["python3", "config.py"] +} diff --git a/dockerfiles/sonataflow/python/server.py b/dockerfiles/sonataflow/python/server.py new file mode 100644 index 000000000..0917e42db --- /dev/null +++ b/dockerfiles/sonataflow/python/server.py @@ -0,0 +1,45 @@ +import datetime +import os +import sys +import uuid + +import bottle +from bottle import route, run, template, request + +CODE_LOCATION='/function' + +@route('/alive', method='GET') +def alive(): + return { + "result": "ok" + } + +@route('/', method='POST') +def process_request(): + from function import function + import traceback + try: + # SonataFlow sends requests wrapped in {"payload": ...} + # Unwrap the payload before passing to the function + request_data = request.json + if isinstance(request_data, dict) and "payload" in request_data: + function_input = request_data["payload"] + else: + function_input = request_data + + ret = function.handler(function_input) + + # Wrap response in payload if not already wrapped + if isinstance(ret, dict) and "payload" in ret: + return ret["payload"] + return ret + except Exception as e: + print(f"Error processing request: {e}", file=sys.stderr) + print(f"Request JSON: {request.json}", file=sys.stderr) + traceback.print_exc() + bottle.response.status = 500 + return {"error": str(e), "traceback": traceback.format_exc()} + +sys.path.append(os.path.join(CODE_LOCATION)) +sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/')) +run(host='0.0.0.0', port=int(sys.argv[1]), debug=True) diff --git a/dockerfiles/sonataflow/python/time-in-proc.py b/dockerfiles/sonataflow/python/time-in-proc.py new file mode 100644 index 000000000..962da527a --- /dev/null +++ b/dockerfiles/sonataflow/python/time-in-proc.py @@ -0,0 +1,59 @@ + +import datetime, json, sys, traceback, csv, resource + +from utils import * +from tools import * + +# imported function +from function import function + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +os_times = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + begin_times = resource.getrusage(resource.RUSAGE_SELF) + res = function.handler(input_data) + end_times = resource.getrusage(resource.RUSAGE_SELF) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + os_times[i] = [begin_times, end_times] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; CPU times are in microseconds']) + csv_writer.writerow(['Begin','End','Duration','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + (timedata[i][1] - timedata[i][0]) / + datetime.timedelta(microseconds=1), + (os_times[i][1].ru_utime - os_times[i][0].ru_utime) * 1e6, + (os_times[i][1].ru_stime - os_times[i][0].ru_stime) * 1e6 + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/sonataflow/python/timeit.sh b/dockerfiles/sonataflow/python/timeit.sh new file mode 100755 index 000000000..fed626b97 --- /dev/null +++ b/dockerfiles/sonataflow/python/timeit.sh @@ -0,0 +1,5 @@ +#!/bin/bash +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time python3 -c "from json import load; from function import function; print(function.handler(load(open('input.json', 'r'))))" > $1 +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/sonataflow/python/tools.py b/dockerfiles/sonataflow/python/tools.py new file mode 100644 index 000000000..33213f70c --- /dev/null +++ b/dockerfiles/sonataflow/python/tools.py @@ -0,0 +1,21 @@ + +import datetime, gc, platform, os, sys + +def start_benchmarking(disable_gc): + if disable_gc: + gc.disable() + return datetime.datetime.now() + +def stop_benchmarking(): + end = datetime.datetime.now() + gc.enable() + return end + +def get_config(): + # get currently loaded modules + # https://stackoverflow.com/questions/4858100/how-to-list-imported-modules + modulenames = set(sys.modules) & set(globals()) + allmodules = [sys.modules[name] for name in modulenames] + return {'name': 'python', + 'version': platform.python_version(), + 'modules': str(allmodules)} diff --git a/dockerfiles/sonataflow/run.sh b/dockerfiles/sonataflow/run.sh new file mode 100644 index 000000000..9ecc13e5b --- /dev/null +++ b/dockerfiles/sonataflow/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +export DOCKER_HOST_IP=$(route -n | awk '/UG[ \t]/{print $2}') +EXPERIMENT_INPUT="$1" +file_name=logs/execution_00.log +counter=0 + +while [ -e "${file_name}" ]; do + counter=$((counter + 1)) + file_name=$(printf '%s_%02d.log' "logs/execution" "$(( counter ))") +done + +script -e -c "python3 runner.py ${EXPERIMENT_INPUT}" -f "${file_name}" diff --git a/dockerfiles/sonataflow/runner.py b/dockerfiles/sonataflow/runner.py new file mode 100644 index 000000000..96261fc33 --- /dev/null +++ b/dockerfiles/sonataflow/runner.py @@ -0,0 +1,62 @@ +import csv, gc, sys, imp, datetime, json, os, subprocess, uuid, sys + +from distutils.dir_util import copy_tree +from utils import * + +def get_language(lang): + languages = {'python': 'python3', 'nodejs': 'nodejs'} + return languages[lang] + +def get_runner(experiment, options=None): + runners = json.load(open('runners.json', 'r')) + return runners[experiment][options] if options is not None else runners[experiment] + +def get_runner_cmd(lang, experiment, options): + executable = get_language(lang) + script = get_runner(experiment, options) + script_name, extension = os.path.splitext(script) + # Out-of-proc measurements don't require languge-specific implementations + if extension == '.py': + executable = get_language('python') + return [executable, script] + +def export_storage_config(config): + if config is not None: + os.environ['MINIO_ADDRESS'] = config['address'] + os.environ['MINIO_ACCESS_KEY'] = config['access_key'] + os.environ['MINIO_SECRET_KEY'] = config['secret_key'] + +if __name__ == "__main__": + cfg = json.load(open(sys.argv[1], 'r')) + input_data = cfg['input'] + repetitions = cfg['benchmark']['repetitions'] + experiment = cfg['benchmark']['type'] + language = cfg['benchmark']['language'] + export_storage_config(cfg['benchmark'].get('storage', None)) + experiment_options = cfg['benchmark'].get('experiment_options', None) + + # copy code to main directory + copy_tree('code', '.') + + runner = get_runner_cmd(language, experiment, experiment_options) + uuid = uuid.uuid1() + ret = subprocess.run(runner + [sys.argv[1], str(uuid)], stdout=subprocess.PIPE) + if ret.returncode != 0: + print('Experiment finished incorrectly! Exit code {}'.format(ret.returncode)) + print('Output: ', ret.stdout.decode('utf-8')) + sys.exit(1) + + # Dump experiment data + result = {'input': cfg} + try: + experiment_data = json.loads(ret.stdout.decode('utf-8')) + for v in ['experiment', 'runtime']: + result[v] = experiment_data[v] + result_dir = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'json') + with open(result_dir, 'w') as f: + json.dump(result, f, indent = 2) + except json.decoder.JSONDecodeError as e: + print('Experiment output is not valid!') + print(e) + print(ret.stdout.decode('utf-8')) + sys.exit(1) diff --git a/dockerfiles/sonataflow/time-out-proc.py b/dockerfiles/sonataflow/time-out-proc.py new file mode 100644 index 000000000..9613d1ab5 --- /dev/null +++ b/dockerfiles/sonataflow/time-out-proc.py @@ -0,0 +1,56 @@ + +import datetime, json, subprocess, sys, traceback, csv + +from utils import * + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +json.dump(input_data, open('input.json', 'w')) + +timedata = [0] * repetitions +durations = [0] * repetitions +try: + start = datetime.datetime.now() + for i in range(0, repetitions): + prefix = get_result_prefix(LOGS_DIR, 'output', 'txt') + begin = datetime.datetime.now() + ret = subprocess.run(['/bin/bash', 'timeit.sh', prefix], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stop = datetime.datetime.now() + if ret.returncode != 0: + raise RuntimeError() + timedata[i] = [begin, stop] + # time returns WALL,USER,SYS + durations[i] = ret.stdout.decode('utf-8').rstrip().split(',') + end = datetime.datetime.now() +except ValueError as e: + print('Incorrect output from function') + print(ret.stdout.decode('utf-8')) +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; Duration in miliseconds']) + csv_writer.writerow(['Begin','End','Wallclock','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + *durations[i] + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +ret = subprocess.run(json.load(open('runners.json', 'r'))['config'], stdout=subprocess.PIPE) +config = json.loads(ret.stdout.decode('utf-8')) +print(json.dumps({'experiment': experiment_data, 'runtime': config}, indent=2)) diff --git a/dockerfiles/sonataflow/utils.py b/dockerfiles/sonataflow/utils.py new file mode 100644 index 000000000..087ec397a --- /dev/null +++ b/dockerfiles/sonataflow/utils.py @@ -0,0 +1,21 @@ +import glob, os + +RESULTS_DIR = 'results' +LOGS_DIR = 'logs' + +def get_result_prefix(dirname, name, suffix): + name = os.path.join(dirname, name) + counter = 0 + while glob.glob( '{}_{:02d}*.{}'.format(name, counter, suffix) ): + counter +=1 + return '{}_{:02d}.{}'.format(name, counter, suffix) + +def process_timestamps(timestamps): + # convert list of lists of times data to proper timestamps + return list(map( + lambda times : list(map( + lambda x: x.strftime('%s.%f'), + times + )), + timestamps + )) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index e292a4b04..6977672d6 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -10,6 +10,8 @@ | Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. | | Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. | | Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. | +| Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. | +| Inference | 413.recommendation | Python | x64 | GPU DLRM-inspired recommender scoring implemented in PyTorch. | | Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. | | Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. | | Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. | @@ -70,6 +72,14 @@ It implements the .zip file creation with the help of the `shutil` standard libr The benchmark is inspired by MLPerf and implements image recognition with Resnet50. It downloads the input and model from the storage and uses the CPU-only `pytorch` library in Python. +### Language Inference + +This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences. + +### Recommendation + +Inspired by MLPerf’s DLRM v2, this benchmark ships a tiny PyTorch DLRM model that optionally runs on CUDA when available. The function downloads the model and request batch, moves the network to GPU if possible, performs batched inference, and reports recommendation scores alongside timing measurements. + ## Scientific ### Graph PageRank, BFS, MST @@ -87,4 +97,3 @@ This benchmark is inspired by the [DNAVisualization](https://github.com/Benjamin ## Applications **(WiP)** Coming soon! - diff --git a/experiments.json b/experiments.json new file mode 100644 index 000000000..cb06338e4 --- /dev/null +++ b/experiments.json @@ -0,0 +1,149 @@ +{ + "_invocations": { + "sebd-690.ml-python-3.11": { + "e6b2713e": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "f3d89608-c7c2-4054-8849-c2daa7ecece4", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "classifiers": [ + { + "C": 0.025, + "kernel": "linear", + "name": "SVC" + } + ], + "dataset_bucket": "690.ml-0-output", + "n_features": 5, + "n_samples": 100, + "request_id": "e6b2713e", + "schedules": [ + { + "name": "SVC", + "score": 0.925 + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "e6b2713e", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2380060, + "client_begin": "2026-01-07 03:56:42.451065", + "client_end": "2026-01-07 03:56:44.831125", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767754602.448902, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "ec89b6c77f2efe30d9442b9b4fa12242c0e0a47b56fd494db2c2f77d0eac9804", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "veg9pif0_IdAKBcFAOx3tbI0t1FAEkLnOf4GXymojwo", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "2e7b46cd4f88d0d2c9831c8fc25200176030abbeec32169897cf1558450d920b", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "0c938f69cd525d8c85a911111e2688c0b837806ef4b2b0e2665b293155401a99", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767754604.831985, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-610.gen-python-3.11.json b/experiments_sebd-610.gen-python-3.11.json new file mode 100644 index 000000000..388dfe644 --- /dev/null +++ b/experiments_sebd-610.gen-python-3.11.json @@ -0,0 +1,268 @@ +{ + "_invocations": { + "sebd-610.gen-python-3.11": { + "64ea8b9d": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "f4661a5e-1415-4aa9-b4d8-cfe517725d9b", + "workflowdata": { + "astros": { + "astros": { + "message": "success", + "number": 12, + "people": [ + { + "craft": "ISS", + "name": "Oleg Kononenko", + "name_rev": "Kononenko Oleg" + }, + { + "craft": "ISS", + "name": "Nikolai Chub", + "name_rev": "Chub Nikolai" + }, + { + "craft": "ISS", + "name": "Tracy Caldwell Dyson", + "name_rev": "Dyson Tracy" + }, + { + "craft": "ISS", + "name": "Matthew Dominick", + "name_rev": "Dominick Matthew" + }, + { + "craft": "ISS", + "name": "Michael Barratt", + "name_rev": "Barratt Michael" + }, + { + "craft": "ISS", + "name": "Jeanette Epps", + "name_rev": "Epps Jeanette" + }, + { + "craft": "ISS", + "name": "Alexander Grebenkin", + "name_rev": "Grebenkin Alexander" + }, + { + "craft": "ISS", + "name": "Butch Wilmore", + "name_rev": "Wilmore Butch" + }, + { + "craft": "ISS", + "name": "Sunita Williams", + "name_rev": "Williams Sunita" + }, + { + "craft": "Tiangong", + "name": "Li Guangsu", + "name_rev": "Guangsu Li" + }, + { + "craft": "Tiangong", + "name": "Li Cong", + "name_rev": "Cong Li" + }, + { + "craft": "Tiangong", + "name": "Ye Guangfu", + "name_rev": "Guangfu Ye" + } + ] + }, + "many_astros": true, + "message": "success", + "number": 12, + "people": [ + { + "craft": "ISS", + "name": "Oleg Kononenko", + "name_rev": "Kononenko Oleg" + }, + { + "craft": "ISS", + "name": "Nikolai Chub", + "name_rev": "Chub Nikolai" + }, + { + "craft": "ISS", + "name": "Tracy Caldwell Dyson", + "name_rev": "Dyson Tracy" + }, + { + "craft": "ISS", + "name": "Matthew Dominick", + "name_rev": "Dominick Matthew" + }, + { + "craft": "ISS", + "name": "Michael Barratt", + "name_rev": "Barratt Michael" + }, + { + "craft": "ISS", + "name": "Jeanette Epps", + "name_rev": "Epps Jeanette" + }, + { + "craft": "ISS", + "name": "Alexander Grebenkin", + "name_rev": "Grebenkin Alexander" + }, + { + "craft": "ISS", + "name": "Butch Wilmore", + "name_rev": "Wilmore Butch" + }, + { + "craft": "ISS", + "name": "Sunita Williams", + "name_rev": "Williams Sunita" + }, + { + "craft": "Tiangong", + "name": "Li Guangsu", + "name_rev": "Guangsu Li" + }, + { + "craft": "Tiangong", + "name": "Li Cong", + "name_rev": "Cong Li" + }, + { + "craft": "Tiangong", + "name": "Ye Guangfu", + "name_rev": "Guangfu Ye" + } + ], + "request_id": "64ea8b9d" + }, + "done": true, + "many_astros": true, + "request_id": "64ea8b9d" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "64ea8b9d", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 3920895, + "client_begin": "2026-01-07 18:42:35.640173", + "client_end": "2026-01-07 18:42:39.561068", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767807755.637872, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767807759.562647, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6100.1000-genome-python-3.11.json b/experiments_sebd-6100.1000-genome-python-3.11.json new file mode 100644 index 000000000..ba8d54350 --- /dev/null +++ b/experiments_sebd-6100.1000-genome-python-3.11.json @@ -0,0 +1,212 @@ +{ + "_invocations": { + "sebd-6100.1000-genome-python-3.11": { + "9f32c850": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "ab66c56a-339c-4138-bc28-32603f76bfcb", + "workflowdata": { + "_parallel_frequency_and_overlap_0_results": [ + { + "output_frequency": "chr21-AFR-freq.tar.a23079f0.gz" + }, + { + "output_frequency": "chr21-ALL-freq.tar.334d18ab.gz" + }, + { + "output_frequency": "chr21-AMR-freq.tar.ae171774.gz" + }, + { + "output_frequency": "chr21-EAS-freq.tar.66e46eae.gz" + }, + { + "output_frequency": "chr21-EUR-freq.tar.9a95a5c0.gz" + }, + { + "output_frequency": "chr21-GBR-freq.tar.4c2097c9.gz" + } + ], + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "blob": [ + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.9ca02841.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.1d0671ec.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.64de5b48.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.0941b71e.gz" + }, + { + "individuals_output": "chr21n-ALL.chr21.1250.vcf.tar.720b7319.gz" + } + ], + "bucket": "6100.1000-genome-0-output", + "columns": "columns.txt", + "columns_bucket": "6100.1000-genome-0-input", + "individuals_file": "ALL.chr21.1250.vcf", + "individuals_merge": { + "merge_outputfile_name": "chr21n.tar.a25b5cf3.gz" + }, + "populations": [ + "AFR", + "ALL", + "AMR", + "EAS", + "EUR", + "GBR" + ], + "request_id": "9f32c850", + "sifting": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "6100.1000-genome-0-input", + "output_bucket": "6100.1000-genome-0-output", + "output_sifting": "sifted.SIFT.chr21.4c5c74d2.txt", + "populations": [ + { + "output_mutation_overlap": "chr21-AFR.tar.d5be3c0f.gz" + }, + { + "output_mutation_overlap": "chr21-ALL.tar.0cc10d0b.gz" + }, + { + "output_mutation_overlap": "chr21-AMR.tar.78ae52f3.gz" + }, + { + "output_mutation_overlap": "chr21-EAS.tar.afbf7206.gz" + }, + { + "output_mutation_overlap": "chr21-EUR.tar.df68aa89.gz" + }, + { + "output_mutation_overlap": "chr21-GBR.tar.02bb7bf3.gz" + } + ] + }, + "sifting_input": "ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "9f32c850", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 201910288, + "client_begin": "2026-01-07 18:43:22.499229", + "client_end": "2026-01-07 18:46:44.409517", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767807802.497806, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808004.410645, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6101.1000-genome-individuals-python-3.11.json b/experiments_sebd-6101.1000-genome-individuals-python-3.11.json new file mode 100644 index 000000000..1f5f9d03a --- /dev/null +++ b/experiments_sebd-6101.1000-genome-individuals-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-6101.1000-genome-individuals-python-3.11": { + "ea1e2488": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "ea1e2488", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 59380648, + "client_begin": "2026-01-07 18:48:18.625256", + "client_end": "2026-01-07 18:49:18.005904", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808098.624247, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808158.006176, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-620.func-invo-python-3.11.json b/experiments_sebd-620.func-invo-python-3.11.json new file mode 100644 index 000000000..dde5f32c1 --- /dev/null +++ b/experiments_sebd-620.func-invo-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-620.func-invo-python-3.11": { + "eddf325d": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "eddf325d", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 58520101, + "client_begin": "2026-01-07 18:49:46.989085", + "client_end": "2026-01-07 18:50:45.509186", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808186.986613, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808245.509385, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-6200.trip-booking-python-3.11.json b/experiments_sebd-6200.trip-booking-python-3.11.json new file mode 100644 index 000000000..ef83535fe --- /dev/null +++ b/experiments_sebd-6200.trip-booking-python-3.11.json @@ -0,0 +1,127 @@ +{ + "_invocations": { + "sebd-6200.trip-booking-python-3.11": { + "4ca0f1dd": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": {}, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "4ca0f1dd", + "stats": { + "cold_start": false, + "failure": true, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 58675677, + "client_begin": "2026-01-07 18:51:26.188106", + "client_end": "2026-01-07 18:52:24.863783", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808286.185467, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808344.864041, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-630.parallel-sleep-python-3.11.json b/experiments_sebd-630.parallel-sleep-python-3.11.json new file mode 100644 index 000000000..ac631aebe --- /dev/null +++ b/experiments_sebd-630.parallel-sleep-python-3.11.json @@ -0,0 +1,138 @@ +{ + "_invocations": { + "sebd-630.parallel-sleep-python-3.11": { + "34a1f4bd": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "569cab83-368a-44ad-9502-13337e95ad53", + "workflowdata": { + "buffer": [ + "ok", + "ok" + ], + "count": 2, + "request_id": "34a1f4bd", + "sleep": 2 + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "34a1f4bd", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 4123684, + "client_begin": "2026-01-07 18:52:54.327731", + "client_end": "2026-01-07 18:52:58.451415", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808374.32533, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808378.453515, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-631.parallel-download-python-3.11.json b/experiments_sebd-631.parallel-download-python-3.11.json new file mode 100644 index 000000000..3f2e770cc --- /dev/null +++ b/experiments_sebd-631.parallel-download-python-3.11.json @@ -0,0 +1,142 @@ +{ + "_invocations": { + "sebd-631.parallel-download-python-3.11": { + "77f89ce3": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "5a60d717-f8ad-463f-8b89-e16ea3d711a3", + "workflowdata": { + "blob": "631.parallel-download-0-input/data-10.txt", + "bucket": "sebs-benchmarks-sonataflow-075e240d", + "buffer": [ + "ok", + "ok", + "ok", + "ok", + "ok" + ], + "count": 5, + "request_id": "77f89ce3" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "77f89ce3", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 169956, + "client_begin": "2026-01-07 18:53:27.747029", + "client_end": "2026-01-07 18:53:27.916985", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808407.745584, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808407.920742, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-640.selfish-detour-python-3.11.json b/experiments_sebd-640.selfish-detour-python-3.11.json new file mode 100644 index 000000000..ed1fdd4ad --- /dev/null +++ b/experiments_sebd-640.selfish-detour-python-3.11.json @@ -0,0 +1,134 @@ +{ + "_invocations": { + "sebd-640.selfish-detour-python-3.11": { + "4ea3bae4": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "38e353c8-294d-4205-92e4-4947c841396f", + "workflowdata": { + "num_samples": 100, + "request_id": "4ea3bae4", + "response": "ok" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "4ea3bae4", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 10086565, + "client_begin": "2026-01-07 18:53:56.036991", + "client_end": "2026-01-07 18:54:06.123556", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808436.034907, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808446.125367, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-650.vid-python-3.11.json b/experiments_sebd-650.vid-python-3.11.json new file mode 100644 index 000000000..9efb48664 --- /dev/null +++ b/experiments_sebd-650.vid-python-3.11.json @@ -0,0 +1,188 @@ +{ + "_invocations": { + "sebd-650.vid-python-3.11": { + "eb3c486c": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "3d2a17ce-283d-4860-97fc-62c71fec55b7", + "workflowdata": { + "650": [ + { + "class": "car", + "score": 0.7900354862213135 + }, + { + "class": "car", + "score": 0.6695976853370667 + }, + { + "class": "car", + "score": 0.5970374345779419 + }, + { + "class": "car", + "score": 0.5392462015151978 + }, + { + "class": "car", + "score": 0.5122644901275635 + } + ], + "batch_size": 10, + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "frames": [ + { + "650": [ + { + "class": "car", + "score": 0.7900354862213135 + }, + { + "class": "car", + "score": 0.6695976853370667 + }, + { + "class": "car", + "score": 0.5970374345779419 + }, + { + "class": "car", + "score": 0.5392462015151978 + }, + { + "class": "car", + "score": 0.5122644901275635 + } + ] + } + ], + "frames_bucket": "650.vid-0-output", + "input_bucket": "650.vid-0-input", + "model_config": "faster_rcnn_resnet50_coco_2018_01_28.pbtxt", + "model_weights": "frozen_inference_graph.pb", + "n_frames": 3, + "request_id": "eb3c486c", + "video": "video_test.mp4" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "eb3c486c", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2047114, + "client_begin": "2026-01-07 18:54:45.415712", + "client_end": "2026-01-07 18:54:47.462826", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808485.414011, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808487.464517, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-660.map-reduce-python-3.11.json b/experiments_sebd-660.map-reduce-python-3.11.json new file mode 100644 index 000000000..55d72845f --- /dev/null +++ b/experiments_sebd-660.map-reduce-python-3.11.json @@ -0,0 +1,159 @@ +{ + "_invocations": { + "sebd-660.map-reduce-python-3.11": { + "284ec8d7": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "681af736-c2d6-4eb5-9812-07f41744975c", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "list": [ + { + "count": 50, + "word": "bird" + }, + { + "count": 50, + "word": "pig" + }, + { + "count": 50, + "word": "dog" + }, + { + "count": 50, + "word": "horse" + }, + { + "count": 50, + "word": "cat" + } + ], + "n_mappers": 3, + "output_bucket": "660.map-reduce-0-output", + "request_id": "284ec8d7", + "words": "words", + "words_bucket": "660.map-reduce-0-input" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "284ec8d7", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 558964, + "client_begin": "2026-01-07 18:55:19.711042", + "client_end": "2026-01-07 18:55:20.270006", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808519.709199, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808520.27119, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-670.auth-python-3.11.json b/experiments_sebd-670.auth-python-3.11.json new file mode 100644 index 000000000..41688dc5b --- /dev/null +++ b/experiments_sebd-670.auth-python-3.11.json @@ -0,0 +1,135 @@ +{ + "_invocations": { + "sebd-670.auth-python-3.11": { + "8d3e9b67": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "21f2650b-6bff-4d69-b5f7-637b65e3393e", + "workflowdata": { + "message": "Who let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\nWho let the dogs out?\n", + "request_id": "8d3e9b67", + "response": "IoDRYyZtnY6ZtQCw16+m5IDqdXi35RMI2fWGPqKxuvbFQEfrRePgGE9M+Ob2GdkUckqZV0RCbMqRozEgS2WlnzAFiUxJMewZrIJH3Tsg/s7WJuHbr9/uyS78JUtXGPsaZbO2CwfqUDrK3urbpxZuGlFOtOREJypD7i0iQKW1ocZROppH4QMqvzrJ0+LCOvY0yjDa7w4p224s//Rxuhfmjq9nKCRexzfBU3+2jowMdQzCvjWBcCMoifPfiVIW5CVi6z7iATo9swodlLePxspu/zwHX3bKi2IuqcgkRw==", + "token": "allow" + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "8d3e9b67", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 45661, + "client_begin": "2026-01-07 18:55:49.146095", + "client_end": "2026-01-07 18:55:49.191756", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808549.14383, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808549.193409, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-680.excamera-python-3.11.json b/experiments_sebd-680.excamera-python-3.11.json new file mode 100644 index 000000000..4f3ec5b13 --- /dev/null +++ b/experiments_sebd-680.excamera-python-3.11.json @@ -0,0 +1,184 @@ +{ + "_invocations": { + "sebd-680.excamera-python-3.11": { + "9ccda4f4": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "75b9689e-a1fb-4f31-83e2-a6fbff49217f", + "workflowdata": { + "batch_size": 6, + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "quality": 1, + "request_id": "9ccda4f4", + "segments": [ + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "12945549", + "quality": 1, + "segments": [ + "00000000.y4m", + "00000001.y4m", + "00000002.y4m", + "00000003.y4m", + "00000004.y4m", + "00000005.y4m" + ] + }, + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "11097751", + "quality": 1, + "segments": [ + "00000006.y4m", + "00000007.y4m", + "00000008.y4m", + "00000009.y4m", + "00000010.y4m", + "00000011.y4m" + ] + }, + { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "input_bucket": "680.excamera-0-input", + "output_bucket": "680.excamera-0-output", + "prefix": "11371388", + "quality": 1, + "segments": [ + "00000012.y4m", + "00000013.y4m", + "00000014.y4m", + "00000015.y4m", + "00000016.y4m", + "00000017.y4m" + ] + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "9ccda4f4", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 9202517, + "client_begin": "2026-01-07 18:56:23.637670", + "client_end": "2026-01-07 18:56:32.840187", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808583.635935, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808592.841583, + "result_bucket": null +} \ No newline at end of file diff --git a/experiments_sebd-690.ml-python-3.11.json b/experiments_sebd-690.ml-python-3.11.json new file mode 100644 index 000000000..42a973dfe --- /dev/null +++ b/experiments_sebd-690.ml-python-3.11.json @@ -0,0 +1,149 @@ +{ + "_invocations": { + "sebd-690.ml-python-3.11": { + "1e4e7cce": { + "billing": { + "_billed_time": null, + "_gb_seconds": 0, + "_memory": null + }, + "output": { + "id": "39f9c1b4-20e2-40aa-b583-18c822b99f55", + "workflowdata": { + "benchmark_bucket": "sebs-benchmarks-sonataflow-075e240d", + "classifiers": [ + { + "C": 0.025, + "kernel": "linear", + "name": "SVC" + } + ], + "dataset_bucket": "690.ml-0-output", + "n_features": 5, + "n_samples": 100, + "request_id": "1e4e7cce", + "schedules": [ + { + "name": "SVC", + "score": 0.925 + } + ] + } + }, + "provider_times": { + "execution": 0, + "initialization": 0 + }, + "request_id": "1e4e7cce", + "stats": { + "cold_start": false, + "failure": false, + "memory_used": null + }, + "times": { + "benchmark": 0, + "client": 2207477, + "client_begin": "2026-01-07 18:57:12.129611", + "client_end": "2026-01-07 18:57:14.337088", + "initialization": 0 + } + } + } + }, + "_metrics": {}, + "begin_time": 1767808632.127185, + "config": { + "deployment": { + "name": "sonataflow", + "region": "", + "resources": { + "allocated_ports": [], + "nosql": { + "access_key": "None", + "address": "172.18.0.3:8000", + "alternator_port": 8000, + "cpus": -1, + "data_volume": "", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "mapped_port": 9012, + "memory": -1, + "network_name": "bridge", + "region": "None", + "secret_key": "None", + "version": "" + }, + "runtime": { + "endpoint_prefix": "", + "url": "http://localhost:8080" + }, + "storage": { + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "address": "172.18.0.2:9000", + "data_volume": "", + "input_buckets": [], + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "mapped_port": 9011, + "network_name": "bridge", + "output_buckets": [], + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "type": "minio", + "version": "" + } + } + }, + "experiments": { + "architecture": "x64", + "container_deployment": false, + "download_results": false, + "experiments": { + "eviction-model": { + "function_copy_idx": 0, + "invocations": 1, + "repetitions": 5, + "sleep": 1 + }, + "invocation-overhead": { + "N": 20, + "code_begin": 1048576, + "code_end": 261619712, + "code_points": 20, + "payload_begin": 1024, + "payload_end": 6251000, + "payload_points": 20, + "repetitions": 5, + "type": "payload" + }, + "network-ping-pong": { + "invocations": 50, + "repetitions": 1000, + "threads": 1 + }, + "perf-cost": { + "benchmark": "110.dynamic-html", + "concurrent-invocations": 50, + "experiments": [ + "cold", + "warm", + "burst", + "sequential" + ], + "input-size": "test", + "memory-sizes": [ + 128, + 256 + ], + "repetitions": 50 + } + }, + "flags": {}, + "runtime": { + "language": "python", + "version": "3.11" + }, + "update_code": false, + "update_storage": false + } + }, + "end_time": 1767808634.339108, + "result_bucket": null +} \ No newline at end of file diff --git a/install.py b/install.py index 57f047d23..b856e45b7 100755 --- a/install.py +++ b/install.py @@ -86,7 +86,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") else: raise error @@ -99,4 +99,3 @@ def execute(cmd, cwd=None): execute("python3 setup.py build") execute("python3 pypapi/papi_build.py") os.chdir(cur_dir) - diff --git a/out_storage.json b/out_storage.json new file mode 100644 index 000000000..0ed1219f4 --- /dev/null +++ b/out_storage.json @@ -0,0 +1,35 @@ +{ + "object": { + "type": "minio", + "minio": { + "address": "172.18.0.2:9000", + "mapped_port": 9011, + "access_key": "8TOsGofwZjq6cEUVkb6FMFBt1HVgNW0rTPG2OjFFXUk", + "secret_key": "70b6e79a2ea05da0119a4ca4bf4a5f22ed95722796bf686915d2fa4d86c43e8d", + "instance_id": "ab6b3bc1b23989b211b8fd94bddc0bf59d1d032d1ab3a0a71c8c2d0408c8cb52", + "output_buckets": [], + "input_buckets": [], + "version": "RELEASE.2024-07-16T23-46-41Z", + "data_volume": "minio-volume", + "network_name": "sebs-network", + "type": "minio" + } + }, + "nosql": { + "type": "scylladb", + "scylladb": { + "address": "172.18.0.3:8000", + "mapped_port": 9012, + "alternator_port": 8000, + "access_key": "None", + "secret_key": "None", + "instance_id": "533665fb786cf926f5029ae13b41a9e46e70e06b5bed8d99d14811b3d676255b", + "region": "None", + "cpus": 1, + "memory": "750", + "version": "6.0", + "data_volume": "scylladb-volume", + "network_name": "sebs-network" + } + } +} \ No newline at end of file diff --git a/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv new file mode 100644 index 000000000..d9821413e --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.11/sonataflow.csv @@ -0,0 +1,4 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process_astros,1767807759.496729,1767807759.496746,False,242064b010bb,64ea8b9d,0,0,64ea8b9d,0 +get_astros,1767807755.751181,1767807759.31053,False,aa63d8a3dc01,64ea8b9d,0,0,64ea8b9d,0 +many_people,1767807759.382087,1767807759.382103,False,bdd11f6cfcf6,64ea8b9d,0,0,64ea8b9d,0 diff --git a/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv b/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv new file mode 100644 index 000000000..0e8e794cc --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.8/local.csv @@ -0,0 +1,16 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +map_astros,1765748134.354224,1765748134.354254,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.328433,1765748134.328455,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.343733,1765748134.343765,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.332384,1765748134.332417,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.350819,1765748134.35085,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.336082,1765748134.336113,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +process_astros,1765748135.419784,1765748135.419843,False,5c4ed0b239b9,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748135.407162,1765748135.407214,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +get_astros,1765748133.951551,1765748134.3044,False,a8eea933c3b9,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.35759,1765748134.35763,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +many_people,1765748134.315323,1765748134.315371,False,d7e1c83e01e7,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.347346,1765748134.347378,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.324549,1765748134.324576,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.361045,1765748134.361076,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 +map_astros,1765748134.339969,1765748134.339993,False,5aa2bf260f08,4460a6b8,0,0,4460a6b8,0 diff --git a/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-610.gen-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv new file mode 100644 index 000000000..3eabafc79 --- /dev/null +++ b/results/local-workflows/results/sebd-6100.1000-genome-python-3.11/sonataflow.csv @@ -0,0 +1,3 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +sifting,1767807876.686747,1767807877.226287,False,db9ff909d2b3,9f32c850,0,267806263,9f32c850,0 +individuals_merge,1767807877.313827,1767807878.402442,False,86874c18a723,9f32c850,0,312614,9f32c850,0 diff --git a/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6100.1000-genome-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6101.1000-genome-individuals-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv new file mode 100644 index 000000000..d9925d643 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.8/local.csv @@ -0,0 +1,10 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process,1765747664.143729,1765747664.143746,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.141557,1765747664.141582,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.136148,1765747664.136173,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.126044,1765747664.126068,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.129636,1765747664.129651,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.13179,1765747664.131804,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.133686,1765747664.133701,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +process,1765747664.138938,1765747664.138953,False,c1359898b4fe,9f28b7c0,0,0,9f28b7c0,0 +gen,1765747664.092395,1765747664.092421,False,4d70bed1abfe,9f28b7c0,0,0,9f28b7c0,0 diff --git a/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-620.func-invo-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv new file mode 100644 index 000000000..3a0822a8d --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/local.csv @@ -0,0 +1,5 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +reserve_rental,1765747687.98063,1765747687.9935,False,74204aa51c51,dd46e1af,0,0,dd46e1af,0 +confirm,1765747688.208023,1765747688.224673,False,bf1e5dc4e095,dd46e1af,0,0,dd46e1af,0 +reserve_hotel,1765747687.861356,1765747687.865317,False,a42e4157c8e8,dd46e1af,0,0,dd46e1af,0 +reserve_flight,1765747688.099512,1765747688.10294,False,6aa1001caa28,dd46e1af,0,0,dd46e1af,0 diff --git a/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-6200.trip-booking-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv new file mode 100644 index 000000000..54922a532 --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.11/sonataflow.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808374.378038,1767808374.378049,False,a5a5d0db5fd7,34a1f4bd,0,0,34a1f4bd,0 diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv new file mode 100644 index 000000000..7e81486fd --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/local.csv @@ -0,0 +1,4 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1765747700.97717,1765747700.977188,False,320acc55d242,47737366,0,0,47737366,0 +process,1765747701.01707,1765747703.019128,False,640d6a710572,47737366,0,0,47737366,0 +process,1765747703.024391,1765747705.025678,False,640d6a710572,47737366,0,0,47737366,0 diff --git a/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-630.parallel-sleep-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv new file mode 100644 index 000000000..ac543654e --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.11/sonataflow.csv @@ -0,0 +1,7 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808407.789139,1767808407.789153,False,6afde6a7f512,77f89ce3,0,0,77f89ce3,0 +process,1767808407.898752,1767808407.899614,False,78336b62f860,77f89ce3,0,50,77f89ce3,0 +process,1767808407.887955,1767808407.889048,False,78336b62f860,77f89ce3,0,20,77f89ce3,0 +process,1767808407.895273,1767808407.896161,False,78336b62f860,77f89ce3,0,40,77f89ce3,0 +process,1767808407.891864,1767808407.892736,False,78336b62f860,77f89ce3,0,30,77f89ce3,0 +process,1767808407.88167,1767808407.884182,False,78336b62f860,77f89ce3,0,10,77f89ce3,0 diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv new file mode 100644 index 000000000..622ea2382 --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/local.csv @@ -0,0 +1,7 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +process,1765747717.312196,1765747717.315331,False,e4d1911fe02a,acf04ece,0,10,acf04ece,0 +process,1765747717.329192,1765747717.330548,False,e4d1911fe02a,acf04ece,0,50,acf04ece,0 +process,1765747717.322696,1765747717.323698,False,e4d1911fe02a,acf04ece,0,30,acf04ece,0 +process,1765747717.326168,1765747717.32726,False,e4d1911fe02a,acf04ece,0,40,acf04ece,0 +process,1765747717.318985,1765747717.319929,False,e4d1911fe02a,acf04ece,0,20,acf04ece,0 +generate,1765747717.197459,1765747717.197475,False,3ca6c483a8fd,acf04ece,0,0,acf04ece,0 diff --git a/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-631.parallel-download-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv new file mode 100644 index 000000000..b4963046f --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.11/sonataflow.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,result.min_diff,result.num_iterations,result.timestamps,result.tps,request_id,rep +measure,1767808436.089672,1767808446.114368,False,a77dea6ab60d,4ea3bae4,0,0,63,3736172,"[794517, 812041, 1365470, 1416311, 1589671, 1590327, 1590327, 1591748, 1591748, 1592707, 3547264, 3547902, 3548869, 3549806, 3784359, 3802844, 5150022, 5163813, 6150680, 6151259, 6151259, 6151936, 6151936, 6152710, 6152710, 6153507, 6203325, 6238664, 8622529, 8637723, 10450174, 10451438, 10451438, 10452326, 11041670, 11053240, 11535090, 11549288, 12586443, 12600715, 13460793, 13469871, 15879957, 15882613, 18299035, 18301563, 20718237, 20720724, 23137441, 23139983, 25556576, 25559681, 26047252, 26057661, 27092134, 27102539, 27975776, 27978485, 30394846, 30397351, 32814042, 32816527, 35233248, 35235823, 35616767, 35627172, 36656456, 36666967, 37652387, 37654945, 38227374, 38237766, 39266235, 39276596, 40071587, 40074198, 42490783, 42493252, 44909918, 44912259, 45137720, 45148432, 46176010, 46186393, 47329187, 47332054, 49748387, 49751090, 52167514, 52170349, 52551582, 52561924, 53592872, 53603318, 54586653, 54589317, 54632072, 54642627]",2419324120.0,4ea3bae4,0 diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv new file mode 100644 index 000000000..79d75c4f2 --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/local.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,result.min_diff,result.num_iterations,result.timestamps,result.tps,request_id,rep +measure,1765747728.917587,1765747738.933023,False,c3a5e2a3a4ff,087c8db5,0,0,44,979240,"[950319, 965368, 1582816, 1622185, 2179788, 2180260, 2180260, 2180969, 2181349, 2181798, 2181798, 2182265, 4002006, 4008839, 4267482, 4268238, 4268238, 4269151, 4269151, 4269665, 6421125, 6432373, 6993537, 6994279, 6994279, 6994882, 6995051, 6995671, 6995671, 6996484, 8290402, 8297852, 8840221, 8873054, 11259472, 11272153, 12111464, 12112159, 12112159, 12113344, 12113344, 12114100, 12364730, 12369412, 13678544, 13683824, 14180323, 14181004, 14181004, 14182475, 15857470, 15858100, 15858100, 15859165, 15859165, 15859795, 16097807, 16113998, 16190769, 16191317, 17944018, 17947912, 18517003, 18522260, 19202207, 19202874, 19202874, 19203870, 19204122, 19204947, 19204947, 19205402, 20936138, 20943241, 21139459, 21152149, 22193358, 22206157, 23355330, 23366056, 25774404, 25782254, 25910878, 25923777, 26951622, 26964354, 28193667, 28202125, 30612877, 30620435, 31590019, 31594360, 33031963, 33040423, 33644199, 33644728, 33644728, 33645685]",2419314588.0,087c8db5,0 diff --git a/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-640.selfish-detour-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv new file mode 100644 index 000000000..532a5d6fe --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.11/sonataflow.csv @@ -0,0 +1,3 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +decode,1767808485.59046,1767808486.000908,False,880ac8f8fb66,eb3c486c,0,1258844,eb3c486c,0 +summarize,1767808487.453,1767808487.453017,False,3c3d7afcc453,eb3c486c,0,0,eb3c486c,0 diff --git a/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv b/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv new file mode 100644 index 000000000..bd473f9f6 --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.8/local.csv @@ -0,0 +1,4 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +analyse,1765747762.429102,1765747763.720146,False,3522a8327935,adf53a22,0,127875949,adf53a22,0 +decode,1765747761.878056,1765747762.267019,False,538e1395f46c,adf53a22,0,1258844,adf53a22,0 +summarize,1765747763.755012,1765747763.75504,False,b5931fa654fb,adf53a22,0,0,adf53a22,0 diff --git a/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-650.vid-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv new file mode 100644 index 000000000..b50c2d26e --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.11/sonataflow.csv @@ -0,0 +1,3 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +split,1767808519.80488,1767808519.838882,False,e86dfb1e3fe5,284ec8d7,0,1150,284ec8d7,0 +shuffle,1767808520.140384,1767808520.145531,False,e3d0e3b63419,284ec8d7,0,0,284ec8d7,0 diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv new file mode 100644 index 000000000..52f98c35c --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/local.csv @@ -0,0 +1,11 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +map,1765747779.271765,1765747779.308599,False,52dfb2249ba4,aac31972,0,761,aac31972,0 +map,1765747779.230342,1765747779.269322,False,52dfb2249ba4,aac31972,0,383,aac31972,0 +reduce,1765747779.575957,1765747779.578511,False,4e82bda63d6d,aac31972,0,24,aac31972,0 +reduce,1765747779.571899,1765747779.57446,False,4e82bda63d6d,aac31972,0,18,aac31972,0 +shuffle,1765747779.450749,1765747779.456999,False,e13b2c9c8fba,aac31972,0,0,aac31972,0 +reduce,1765747779.579605,1765747779.582346,False,4e82bda63d6d,aac31972,0,30,aac31972,0 +split,1765747779.074395,1765747779.128264,False,7fb21a2dee8a,aac31972,0,1150,aac31972,0 +reduce,1765747779.556904,1765747779.564585,False,4e82bda63d6d,aac31972,0,6,aac31972,0 +reduce,1765747779.56733,1765747779.569981,False,4e82bda63d6d,aac31972,0,12,aac31972,0 +map,1765747779.31135,1765747779.350271,False,52dfb2249ba4,aac31972,0,1151,aac31972,0 diff --git a/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-660.map-reduce-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv new file mode 100644 index 000000000..b5bcff1ee --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.11/sonataflow.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +auth,1767808549.184025,1767808549.184416,False,1a4ee4af7980,8d3e9b67,0,0,8d3e9b67,0 diff --git a/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv b/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv new file mode 100644 index 000000000..e5d1e62f4 --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.8/local.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +auth,1765747791.875554,1765747791.876051,False,cb3d1c2c9564,4824a414,0,0,4824a414,0 diff --git a/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-670.auth-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv new file mode 100644 index 000000000..301a80872 --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.11/sonataflow.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +split,1767808583.672572,1767808583.672602,False,b06eec9c6029,9ccda4f4,0,0,9ccda4f4,0 diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv b/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv new file mode 100644 index 000000000..56421e27a --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.8/local.csv @@ -0,0 +1,11 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +reencode,1765747815.673114,1765747816.138713,False,c0eeb709dd78,55c1b086,0,55320350,55c1b086,0 +reencode,1765747815.09418,1765747815.669897,False,c0eeb709dd78,55c1b086,0,42806402,55c1b086,0 +rebase,1765747816.2347,1765747816.765206,False,762c3ef9fc1d,55c1b086,0,24585228,55c1b086,0 +encode,1765747810.208044,1765747812.434979,False,b9794f24732a,55c1b086,0,39678856,55c1b086,0 +rebase,1765747817.294315,1765747817.774332,False,762c3ef9fc1d,55c1b086,0,49348109,55c1b086,0 +split,1765747808.178205,1765747808.178237,False,ee0abb69372f,55c1b086,0,0,55c1b086,0 +reencode,1765747814.463546,1765747815.091485,False,c0eeb709dd78,55c1b086,0,27515255,55c1b086,0 +rebase,1765747816.769762,1765747817.290442,False,762c3ef9fc1d,55c1b086,0,37002777,55c1b086,0 +encode,1765747812.4392,1765747814.360625,False,b9794f24732a,55c1b086,0,53688436,55c1b086,0 +encode,1765747808.281911,1765747810.204151,False,b9794f24732a,55c1b086,0,23088628,55c1b086,0 diff --git a/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-680.excamera-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv new file mode 100644 index 000000000..92a0e087a --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.11/sonataflow.csv @@ -0,0 +1,2 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +generate,1767808633.177356,1767808633.211837,False,4afe812649fc,1e4e7cce,0,0,1e4e7cce,0 diff --git a/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv b/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv new file mode 100644 index 000000000..69fb33dac --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.8/local.csv @@ -0,0 +1,3 @@ +func,start,end,is_cold,container_id,provider.request_id,blob.upload,blob.download,request_id,rep +train,1765747848.855069,1765747848.882302,False,724812af4c78,1e122cc3,0,5056,1e122cc3,0 +generate,1765747847.846056,1765747847.870281,False,2e3894765f0e,1e122cc3,0,0,1e122cc3,0 diff --git a/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv b/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-690.ml-python-3.8/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv b/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-695.protein-screen-python-3.11/local.csv @@ -0,0 +1 @@ + diff --git a/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv b/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/local-workflows/results/sebd-721.gpu-fraud-detect-python-3.11/local.csv @@ -0,0 +1 @@ + diff --git a/results/sebd-610.gen-python-3.11/sonataflow.csv b/results/sebd-610.gen-python-3.11/sonataflow.csv new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/sebd-610.gen-python-3.11/sonataflow.csv @@ -0,0 +1 @@ + diff --git a/run_local_workflows.sh b/run_local_workflows.sh new file mode 100755 index 000000000..00a059817 --- /dev/null +++ b/run_local_workflows.sh @@ -0,0 +1,108 @@ +#!/bin/bash +set -euo pipefail + +# Prepare local configuration files +if [ ! -f config/local_workflows.json ]; then + cp config/example.json config/local_workflows.json +fi +if [ ! -f config/local_deployment.json ]; then + cp config/example.json config/local_deployment.json +fi + +DATA_FLAG="benchmarks-data/600.workflows/6100.1000-genome/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" +if [ ! -f "$DATA_FLAG" ]; then + echo "Workflow datasets missing, running download_datasets.sh..." + (cd benchmarks-data/600.workflows && ./download_datasets.sh) +else + echo "Workflow datasets present, skipping download." +fi + +cleanup() { + echo "Stopping all running Docker containers..." + docker ps -q | xargs -r docker stop >/dev/null || true +} +trap cleanup EXIT + +./sebs.py storage start all config/storage.json --output-json out_storage.json + +MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) +MINIO_PORT=$(jq -r '.object.minio.mapped_port' out_storage.json) +MINIO_ACCESS=$(jq -r '.object.minio.access_key' out_storage.json) +MINIO_SECRET=$(jq -r '.object.minio.secret_key' out_storage.json) +MINIO_INSTANCE=$(jq -r '.object.minio.instance_id' out_storage.json) +SCYLLA_ADDRESS=$(jq -r '.nosql.scylladb.address' out_storage.json) +SCYLLA_PORT=$(jq -r '.nosql.scylladb.mapped_port' out_storage.json) +SCYLLA_INSTANCE=$(jq -r '.nosql.scylladb.instance_id' out_storage.json) + +for cfg in config/local_workflows.json config/local_deployment.json; do + tmp=$(mktemp) + jq \ + --arg addr "$MINIO_ADDRESS" \ + --argjson port "$MINIO_PORT" \ + --arg access "$MINIO_ACCESS" \ + --arg secret "$MINIO_SECRET" \ + --arg inst "$MINIO_INSTANCE" \ + --arg saddr "$SCYLLA_ADDRESS" \ + --argjson sport "$SCYLLA_PORT" \ + --arg sinst "$SCYLLA_INSTANCE" \ + --arg redis_host "localhost:6380" \ + --arg redis_pass "" \ + '(.deployment.local.storage.object.minio.address = $addr) + | (.deployment.local.storage.object.minio.mapped_port = $port) + | (.deployment.local.storage.object.minio.access_key = $access) + | (.deployment.local.storage.object.minio.secret_key = $secret) + | (.deployment.local.storage.object.minio.instance_id = $inst) + | (.deployment.local.storage.object.type = "minio") + | (.deployment.local.storage.nosql.scylladb.address = $saddr) + | (.deployment.local.storage.nosql.scylladb.mapped_port = $sport) + | (.deployment.local.storage.nosql.scylladb.instance_id = $sinst) + | (.deployment.local.storage.nosql.type = "scylladb") + | (.deployment.local.resources.redis.host = $redis_host) + | (.deployment.local.resources.redis.password = $redis_pass) + ' "$cfg" > "$tmp" + mv "$tmp" "$cfg" +done + +if docker ps -a --format '{{.Names}}' | grep -q '^sebs-redis$'; then + docker rm -f sebs-redis >/dev/null +fi +docker run -d --name sebs-redis -p 6380:6379 redis:7 + +# docker run --network=host --name redis -d redis redis-server --save 60 1 --loglevel warning --requirepass {yourpassword} + +# Ensure native helper for selfish-detour is built before packaging +SELFISH_DIR="benchmarks/600.workflows/640.selfish-detour/python" +SELFISH_SRC="$SELFISH_DIR/selfish-detour.c" +SELFISH_SO="$SELFISH_DIR/selfish-detour.so" +if [ -f "$SELFISH_SRC" ]; then + if [ ! -f "$SELFISH_SO" ] || [ "$SELFISH_SRC" -nt "$SELFISH_SO" ]; then + echo "Compiling selfish-detour shared object..." + gcc -O2 -shared -fPIC -o "$SELFISH_SO" "$SELFISH_SRC" + fi +fi + +WORKFLOWS=( + # "610.gen" + # "6100.1000-genome" + # "6101.1000-genome-individuals" + # "620.func-invo" + # "6200.trip-booking" + # "630.parallel-sleep" + # "631.parallel-download" + # "640.selfish-detour" + # "650.vid" + # "660.map-reduce" + # "670.auth" + # "680.excamera" + # "690.ml" + "721.gpu-fraud-detect" +) + +for wf in "${WORKFLOWS[@]}"; do + echo "===== Running $wf =====" + ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment local --trigger http --repetitions 1 \ + --output-dir results/local-workflows --verbose || true + sleep 5 +done diff --git a/run_sonataflow.pid b/run_sonataflow.pid new file mode 100644 index 000000000..524339ec5 --- /dev/null +++ b/run_sonataflow.pid @@ -0,0 +1 @@ +169510 diff --git a/run_sonataflow_workflows.sh b/run_sonataflow_workflows.sh new file mode 100755 index 000000000..da9af15a4 --- /dev/null +++ b/run_sonataflow_workflows.sh @@ -0,0 +1,454 @@ +#!/bin/bash +set -euo pipefail + +# Use a single Docker daemon for both SeBS (python docker SDK) and `docker` CLI. +# On Linux, prefer the native engine at `/var/run/docker.sock` when available: +# Docker Desktop's VM-backed filesystem sharing can break bind-mounted volumes for MinIO/ScyllaDB. +if [ -z "${DOCKER_HOST:-}" ]; then + if [ -S /var/run/docker.sock ] && DOCKER_HOST=unix:///var/run/docker.sock docker info >/dev/null 2>&1; then + export DOCKER_HOST="unix:///var/run/docker.sock" + elif command -v docker >/dev/null 2>&1; then + DOCKER_HOST_FROM_CONTEXT=$(docker context inspect --format '{{.Endpoints.docker.Host}}' 2>/dev/null || true) + if [ -n "${DOCKER_HOST_FROM_CONTEXT:-}" ]; then + export DOCKER_HOST="$DOCKER_HOST_FROM_CONTEXT" + fi + fi +fi + +# Prefer the repo's virtualenv (avoids missing deps when not activated). +SEBS_PYTHON="${SEBS_PYTHON:-}" +if [ -z "${SEBS_PYTHON}" ]; then + if [ -x "$PWD/python-venv/bin/python" ]; then + SEBS_PYTHON="$PWD/python-venv/bin/python" + elif command -v python3 >/dev/null 2>&1; then + SEBS_PYTHON="$(command -v python3)" + elif command -v python >/dev/null 2>&1; then + SEBS_PYTHON="$(command -v python)" + else + echo "ERROR: python not found (set SEBS_PYTHON or install python3)." + exit 1 + fi +fi + +# Prepare local configuration files +if [ ! -f config/local_workflows.json ]; then + cp config/example.json config/local_workflows.json +fi +if [ ! -f config/local_deployment.json ]; then + cp config/example.json config/local_deployment.json +fi + +DATA_FLAG="benchmarks-data/600.workflows/6100.1000-genome/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5.20130502.sites.annotation.vcf" +if [ ! -f "$DATA_FLAG" ]; then + echo "Workflow datasets missing, running download_datasets.sh..." + (cd benchmarks-data/600.workflows && ./download_datasets.sh) +else + echo "Workflow datasets present, skipping download." +fi + +RUNTIME_URL="http://localhost:8080" +# Recent `quay.io/kiegroup/kogito-swf-devmode` images expose workflow endpoints at `/{workflowId}`. +# Some older setups used `/services/{workflowId}`; SeBS will auto-fallback on 404. +ENDPOINT_PREFIX="" + +cleanup() { + echo "Stopping all running Docker containers..." + docker ps -q | xargs -r docker stop >/dev/null || true +} +trap cleanup EXIT + +# Clean up stale workflow definitions and cached workflows from previous runs +# These contain hardcoded IPs that become invalid when containers restart +echo "Cleaning up stale workflow definitions and cached workflows..." +rm -f "$PWD/sonataflow-workflows/workflows"/*.sw.json 2>/dev/null || true +# Delete entire workflow cache directories to force full regeneration +if command -v docker >/dev/null 2>&1 && [ -d cache ]; then + docker run --rm -v "$PWD/cache:/cache" alpine sh -c "find /cache -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} +" 2>/dev/null || true +fi +if [ -d "$PWD/cache" ]; then + rm -rf "$PWD/cache"/* 2>/dev/null || true +fi + +"$SEBS_PYTHON" ./sebs.py storage start all config/storage.json --output-json out_storage.json + +MINIO_ADDRESS=$(jq -r '.object.minio.address' out_storage.json) +MINIO_PORT=$(jq -r '.object.minio.mapped_port' out_storage.json) +MINIO_ACCESS=$(jq -r '.object.minio.access_key' out_storage.json) +MINIO_SECRET=$(jq -r '.object.minio.secret_key' out_storage.json) +MINIO_INSTANCE=$(jq -r '.object.minio.instance_id' out_storage.json) +SCYLLA_ADDRESS=$(jq -r '.nosql.scylladb.address' out_storage.json) +SCYLLA_PORT=$(jq -r '.nosql.scylladb.mapped_port' out_storage.json) +SCYLLA_INSTANCE=$(jq -r '.nosql.scylladb.instance_id' out_storage.json) + +# Fail fast if storage containers were created in a different daemon/context. +if ! docker inspect "$MINIO_INSTANCE" >/dev/null 2>&1; then + echo "ERROR: MinIO container $MINIO_INSTANCE not found in the current Docker daemon." + echo "Hint: set DOCKER_HOST to the daemon SeBS uses (e.g., unix:///var/run/docker.sock)." + exit 1 +fi +if ! docker inspect "$SCYLLA_INSTANCE" >/dev/null 2>&1; then + echo "ERROR: ScyllaDB container $SCYLLA_INSTANCE not found in the current Docker daemon." + echo "Hint: set DOCKER_HOST to the daemon SeBS uses (e.g., unix:///var/run/docker.sock)." + exit 1 +fi + +for cfg in config/local_workflows.json config/local_deployment.json; do + tmp=$(mktemp) + jq \ + --arg addr "$MINIO_ADDRESS" \ + --argjson port "$MINIO_PORT" \ + --arg access "$MINIO_ACCESS" \ + --arg secret "$MINIO_SECRET" \ + --arg inst "$MINIO_INSTANCE" \ + --arg saddr "$SCYLLA_ADDRESS" \ + --argjson sport "$SCYLLA_PORT" \ + --arg sinst "$SCYLLA_INSTANCE" \ + --arg redis_host "localhost:6381" \ + --arg redis_pass "" \ + --arg runtime_url "$RUNTIME_URL" \ + --arg endpoint_prefix "$ENDPOINT_PREFIX" \ + '(.deployment.name = "sonataflow") + | (.deployment.sonataflow.storage.object.type = "minio") + | (.deployment.sonataflow.storage.object.minio.address = $addr) + | (.deployment.sonataflow.storage.object.minio.mapped_port = $port) + | (.deployment.sonataflow.storage.object.minio.access_key = $access) + | (.deployment.sonataflow.storage.object.minio.secret_key = $secret) + | (.deployment.sonataflow.storage.object.minio.instance_id = $inst) + | (.deployment.sonataflow.storage.object.minio.input_buckets = []) + | (.deployment.sonataflow.storage.object.minio.output_buckets = []) + | (.deployment.sonataflow.storage.nosql.type = "scylladb") + | (.deployment.sonataflow.storage.nosql.scylladb.address = $saddr) + | (.deployment.sonataflow.storage.nosql.scylladb.mapped_port = $sport) + | (.deployment.sonataflow.storage.nosql.scylladb.instance_id = $sinst) + | (.deployment.sonataflow.resources.redis.host = $redis_host) + | (.deployment.sonataflow.resources.redis.password = $redis_pass) + | (.deployment.sonataflow.resources.runtime.url = $runtime_url) + | (.deployment.sonataflow.resources.runtime.endpoint_prefix = $endpoint_prefix) + ' "$cfg" > "$tmp" + mv "$tmp" "$cfg" +done + +# Create sebs-network if it doesn't exist (needed before Redis starts) +docker network inspect sebs-network >/dev/null 2>&1 || docker network create sebs-network + +# Start Redis if not already running +if ! docker ps --format '{{.Names}}' | grep -q '^sebs-redis$'; then + # Remove any stopped Redis container + docker rm -f sebs-redis >/dev/null 2>&1 || true + # Start Redis on sebs-network so function containers can reach it + docker run -d --name sebs-redis --network sebs-network -p 6381:6379 redis:7 +fi + +# Prepare SonataFlow resources directory structure expected by kogito-swf-devmode: +# - `src/main/resources/application.properties` +# - `src/main/resources/workflows/*.sw.json` +SONATAFLOW_RESOURCES_DIR="$PWD/sonataflow-workflows" +SONATAFLOW_WORKFLOWS_DIR="$SONATAFLOW_RESOURCES_DIR/workflows" +mkdir -p "$SONATAFLOW_WORKFLOWS_DIR" +if [ ! -f "$SONATAFLOW_RESOURCES_DIR/application.properties" ]; then + cat >"$SONATAFLOW_RESOURCES_DIR/application.properties" <<'EOF' +# Enable Kogito process/workflow generation +kogito.codegen.processes.enabled=true +quarkus.kogito.codegen.processes.enabled=true +EOF +fi + +# Read the runtime settings so we only stage matching workflow variants. +RUNTIME_LANG=$(jq -r '.experiments.runtime.language // "python"' config/local_workflows.json) +RUNTIME_VER=$(jq -r '.experiments.runtime.version // "3.11"' config/local_workflows.json) +ARCH=$(jq -r '.experiments.architecture // "x64"' config/local_workflows.json) + +dedupe_sw_files() { + local dir=$1 + declare -A seen=() + # Consider any `.sw.json` under resources (root + workflows/) to avoid Quarkus duplicates. + while IFS= read -r -d '' f; do + local wid + wid=$(jq -r '.id // empty' "$f" 2>/dev/null || true) + [ -n "$wid" ] || continue + if [ -n "${seen[$wid]:-}" ] && [ "${seen[$wid]}" != "$f" ]; then + echo "Removing duplicate workflow id '$wid' at $f (keeping ${seen[$wid]})" + rm -f "$f" + else + seen[$wid]="$f" + fi + done < <(find "$dir" -maxdepth 2 -name "*.sw.json" -print0 2>/dev/null) +} + +# If older runs put `.sw.json` in the resources root, move them into `workflows/` +# so Quarkus only sees a single copy. +while IFS= read -r -d '' f; do + mv -f "$f" "$SONATAFLOW_WORKFLOWS_DIR/" || true +done < <(find "$SONATAFLOW_RESOURCES_DIR" -maxdepth 1 -name "*.sw.json" -print0 2>/dev/null) +dedupe_sw_files "$SONATAFLOW_RESOURCES_DIR" + +# Function to copy workflow definitions to SonataFlow directory after each benchmark +copy_workflows_to_sonataflow() { + find cache -name "*.sw.json" \ + -path "*/sonataflow/${RUNTIME_LANG}/${RUNTIME_VER}/${ARCH}/*" \ + -path "*/workflow_resources/sonataflow/*" 2>/dev/null | while read -r swfile; do + cp -f "$swfile" "$SONATAFLOW_WORKFLOWS_DIR/" 2>/dev/null || true + done + dedupe_sw_files "$SONATAFLOW_RESOURCES_DIR" +} + +get_workflow_id_for() { + local wf_name=$1 + local pattern="${wf_name//./_}" + for f in "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json; do + [ -f "$f" ] || continue + if printf '%s\n' "$f" | grep -q "$pattern"; then + jq -r '.id' "$f" + return 0 + fi + done + local newest + newest=$(ls -1t "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json 2>/dev/null | head -n1) + if [ -n "$newest" ]; then + jq -r '.id' "$newest" + return 0 + fi + return 1 +} + +wait_for_health() { + local url=$1 + local attempts=40 + local delay=3 + echo "Waiting for SonataFlow runtime health at $url ..." + for i in $(seq 1 $attempts); do + code=$(curl -s -o /dev/null -w "%{http_code}" "$url/q/health/ready" || true) + if [ "$code" = "200" ]; then + echo "SonataFlow runtime is ready." + return 0 + fi + sleep "$delay" + done + echo "Warning: SonataFlow runtime health endpoint not ready after $((attempts * delay))s" +} + +wait_for_workflow_endpoint() { + local workflow_id=$1 + local base_url=$2 + local endpoint_prefix=$3 + local prefix="${endpoint_prefix#/}" + local -a urls=() + if [ -n "$prefix" ]; then + urls+=("${base_url%/}/${prefix}/${workflow_id}") + fi + urls+=("${base_url%/}/${workflow_id}") + if [ "$prefix" != "services" ]; then + urls+=("${base_url%/}/services/${workflow_id}") + fi + local attempts=60 + local delay=5 + echo "Waiting for workflow endpoint(s): ${urls[*]} ..." + for i in $(seq 1 $attempts); do + for url in "${urls[@]}"; do + # GET will likely return 405 for POST-only endpoints; 404 means not loaded yet + # 500/503 mean workflow is loading/compiling, keep waiting + code=$(curl -s -o /dev/null -w "%{http_code}" "$url" || true) + if [ "$code" = "200" ] || [ "$code" = "405" ]; then + echo "Workflow endpoint responding at $url with HTTP $code." + return 0 + elif [ "$code" != "404" ] && [ "$code" != "000" ]; then + echo "Workflow endpoint at $url returned HTTP $code (still loading), waiting..." + fi + done + sleep "$delay" + done + echo "Warning: Workflow endpoint(s) not responding after $((attempts * delay))s" +} + +ensure_runtime_networks() { + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + if ! docker ps -a --format '{{.Names}}' 2>/dev/null | grep -q '^sonataflow-runtime$'; then + return 0 + fi + if ! command -v jq >/dev/null 2>&1; then + return 0 + fi + + local networks + networks=$(docker inspect -f '{{json .NetworkSettings.Networks}}' sonataflow-runtime 2>/dev/null || echo "{}") + if ! echo "$networks" | jq -e 'has("sebs-network")' >/dev/null 2>&1; then + docker network connect sebs-network sonataflow-runtime >/dev/null 2>&1 || true + fi + if ! echo "$networks" | jq -e 'has("bridge")' >/dev/null 2>&1; then + docker network connect bridge sonataflow-runtime >/dev/null 2>&1 || true + fi +} + +preflight_runtime_function_connectivity() { + local sw_json=$1 + if ! command -v docker >/dev/null 2>&1; then + return 0 + fi + if ! docker ps --format '{{.Names}}' 2>/dev/null | grep -q '^sonataflow-runtime$'; then + return 0 + fi + if ! command -v jq >/dev/null 2>&1; then + return 0 + fi + if [ ! -f "$sw_json" ]; then + return 0 + fi + + ensure_runtime_networks + + # Extract function base URLs from `rest:post:http://host:port/` operations. + mapfile -t urls < <(jq -r '.functions[]?.operation // empty' "$sw_json" 2>/dev/null \ + | sed -n 's#^rest:post:##p' | sed -e 's#/*$#/#' | sort -u) + if [ "${#urls[@]}" -eq 0 ]; then + return 0 + fi + + echo "Preflight: checking SonataFlow runtime connectivity to function containers..." + # Try curl first, then wget, then python. + local http_cmd + http_cmd=$(docker exec sonataflow-runtime sh -lc 'if command -v curl >/dev/null 2>&1; then echo curl; elif command -v wget >/dev/null 2>&1; then echo wget; elif command -v python3 >/dev/null 2>&1; then echo python3; elif command -v python >/dev/null 2>&1; then echo python; else echo none; fi' 2>/dev/null || echo none) + if [ "$http_cmd" = "none" ]; then + http_cmd="" + fi + local failed=0 + if [ -n "$http_cmd" ]; then + for u in "${urls[@]}"; do + # Use `/alive` which SeBS function containers expose. + if [ "$http_cmd" = "curl" ]; then + if ! docker exec sonataflow-runtime sh -lc "curl -fsS --max-time 3 '${u}alive' >/dev/null" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + elif [ "$http_cmd" = "wget" ]; then + if ! docker exec sonataflow-runtime sh -lc "wget -q -T 3 -O - '${u}alive' >/dev/null" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + else + if ! docker exec sonataflow-runtime sh -lc "$http_cmd - <<'PY'\nimport sys, urllib.request\nurl=sys.argv[1]\nurllib.request.urlopen(url, timeout=3).read(1)\nPY\n'${u}alive'" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sonataflow-runtime" + failed=1 + fi + fi + done + else + if ! docker network inspect sebs-network >/dev/null 2>&1; then + echo "Preflight skipped: sebs-network not found and sonataflow-runtime lacks curl/wget/python." + return 0 + fi + docker run --rm --network sebs-network busybox sh -c ' +failed=0 +for u in "$@"; do + if ! wget -q -T 3 -O - "${u}alive" >/dev/null 2>&1; then + echo " Cannot reach ${u}alive from sebs-network" + failed=1 + fi +done +exit $failed +' -- "${urls[@]}" || failed=1 + fi + if [ "$failed" -ne 0 ]; then + echo "Preflight failed: SonataFlow cannot reach one or more function containers." + echo "Hint: ensure sonataflow-runtime and sebd-*___* function containers share a Docker network, and that SeBS and docker CLI use the same Docker daemon/context." + return 1 + fi +} + +# Create Docker network for SonataFlow and functions if it doesn't exist +docker network inspect sebs-network >/dev/null 2>&1 || docker network create sebs-network + +# Note: We'll start SonataFlow runtime AFTER generating the first workflow +# so that it can detect workflows at startup and enable the processes generator + +# Ensure native helper for selfish-detour is built before packaging +SELFISH_DIR="benchmarks/600.workflows/640.selfish-detour/python" +SELFISH_SRC="$SELFISH_DIR/selfish-detour.c" +SELFISH_SO="$SELFISH_DIR/selfish-detour.so" +if [ -f "$SELFISH_SRC" ]; then + if [ ! -f "$SELFISH_SO" ] || [ "$SELFISH_SRC" -nt "$SELFISH_SO" ]; then + echo "Compiling selfish-detour shared object..." + gcc -O2 -shared -fPIC -o "$SELFISH_SO" "$SELFISH_SRC" + fi +fi + +WORKFLOWS=( + "610.gen" + "6100.1000-genome" + "6101.1000-genome-individuals" + "620.func-invo" + "6200.trip-booking" + "630.parallel-sleep" + "631.parallel-download" + "640.selfish-detour" + "650.vid" + "660.map-reduce" + "670.auth" + "680.excamera" + "690.ml" +) + +SONATAFLOW_STARTED=false +for wf in "${WORKFLOWS[@]}"; do + echo "===== Running $wf =====" + + # First, create the workflow (without invoking it yet) by running with --repetitions 0 + # This generates the .sw.json file + "$SEBS_PYTHON" ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment sonataflow --trigger http --repetitions 0 \ + --output-dir results/local-workflows --verbose || true + + # Copy newly generated workflow definitions to SonataFlow directory + copy_workflows_to_sonataflow + echo "Copied workflow definitions to SonataFlow directory" + + if ! ls "$SONATAFLOW_WORKFLOWS_DIR"/*.sw.json >/dev/null 2>&1; then + echo "No workflow definitions found in $SONATAFLOW_WORKFLOWS_DIR after generating $wf" + exit 1 + fi + + WF_ID=$(get_workflow_id_for "$wf" || true) + if [ -z "$WF_ID" ] || [ "$WF_ID" = "null" ]; then + echo "Could not determine workflow id for $wf; available definitions:" + ls -l "$SONATAFLOW_WORKFLOWS_DIR" + exit 1 + fi + echo "Workflow id for $wf: $WF_ID" + + # Start SonataFlow runtime on first iteration (after first workflow is generated) + if [ "$SONATAFLOW_STARTED" = false ]; then + echo "Starting SonataFlow runtime container..." + if docker ps -a --format '{{.Names}}' | grep -q '^sonataflow-runtime$'; then + docker rm -f sonataflow-runtime >/dev/null + fi + # Start on `sebs-network` (primary) and also attach to `bridge` so the runtime can reach + # function containers whether SeBS exposes them via `sebs-network` or `bridge`. + docker run -d --name sonataflow-runtime --network sebs-network -p 8080:8080 \ + -v "$SONATAFLOW_RESOURCES_DIR":/home/kogito/serverless-workflow-project/src/main/resources \ + quay.io/kiegroup/kogito-swf-devmode:latest + docker network connect bridge sonataflow-runtime >/dev/null 2>&1 || true + + echo "Waiting for SonataFlow runtime to start and load workflows..." + wait_for_health "$RUNTIME_URL" + wait_for_workflow_endpoint "$WF_ID" "$RUNTIME_URL" "$ENDPOINT_PREFIX" + SONATAFLOW_STARTED=true + else + # Wait for SonataFlow to detect and load the new workflow (dev mode auto-reload) + echo "Waiting for SonataFlow to load workflow..." + sleep 10 + wait_for_workflow_endpoint "$WF_ID" "$RUNTIME_URL" "$ENDPOINT_PREFIX" + fi + + # Ensure runtime can reach function containers before invoking the workflow. + preflight_runtime_function_connectivity "$SONATAFLOW_WORKFLOWS_DIR/${WF_ID}.sw.json" || exit 1 + + # Now run the actual benchmark + "$SEBS_PYTHON" ./sebs.py benchmark workflow "$wf" test \ + --config config/local_workflows.json \ + --deployment sonataflow --trigger http --repetitions 1 \ + --output-dir results/local-workflows --verbose || true + + sleep 5 +done diff --git a/sebs.py b/sebs.py index fc00af73c..cba6537e6 100755 --- a/sebs.py +++ b/sebs.py @@ -37,8 +37,14 @@ def __call__(self, *args, **kwargs): logging.info("# Experiments failed! See out.log for details") finally: # Close + # For SonataFlow deployments, skip shutdown to keep containers alive + # The external script will manage container lifecycle if deployment_client is not None: - deployment_client.shutdown() + deployment_name = getattr(deployment_client.config, '_name', '') + if deployment_name != 'sonataflow': + deployment_client.shutdown() + else: + logging.info("Skipping deployment shutdown for SonataFlow (containers kept alive)") if sebs_client is not None: sebs_client.shutdown() @@ -91,7 +97,7 @@ def common_params(func): @click.option( "--deployment", default=None, - type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk"]), + type=click.Choice(["azure", "aws", "gcp", "local", "openwhisk", "sonataflow"]), help="Cloud deployment to use.", ) @click.option( @@ -334,8 +340,6 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam sebs_client, deployment_client, ) = parse_common_params(**kwargs) - if isinstance(deployment_client, Local): - raise NotImplementedError("Local workflow deployment is currently not supported.") assert deployment_client.config.resources.redis_host is not None @@ -392,9 +396,11 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam df = pd.DataFrame(measurements) df.to_csv(path, index=False) - with open("experiments.json", "w") as out_f: + # Use workflow name to create unique file + experiment_file = f"experiments_{workflow.name}.json" + with open(experiment_file, "w") as out_f: out_f.write(sebs.utils.serialize(result)) - sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) + sebs_client.logging.info("Save results to {}".format(os.path.abspath(experiment_file))) @benchmark.command() diff --git a/sebs/benchmark.py b/sebs/benchmark.py index dbcae6b43..98ca47820 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -113,9 +113,7 @@ def functions(self) -> Dict[str, Any]: @property def code_location(self): if self.code_package: - return os.path.join( - self._cache_client.cache_dir, self.code_package["location"] - ) + return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) else: return self._code_location @@ -179,9 +177,7 @@ def container_deployment(self): @property # noqa: A003 def hash(self): path = os.path.join(self.benchmark_path, self.language_name) - self._hash_value = Benchmark.hash_directory( - path, self._deployment_name, self.language_name - ) + self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @hash.setter # noqa: A003 @@ -211,18 +207,14 @@ def __init__( self._container_deployment = config.container_deployment self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") if not self._benchmark_path: - raise RuntimeError( - "Benchmark {benchmark} not found!".format(benchmark=self._benchmark) - ) + raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) with open(os.path.join(self.benchmark_path, "config.json")) as json_file: self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) ) if self.language not in self.benchmark_config.languages: raise RuntimeError( - "Benchmark {} not available for language {}".format( - self.benchmark, self.language - ) + "Benchmark {} not available for language {}".format(self.benchmark, self.language) ) self._cache_client = cache_client self._docker_client = docker_client @@ -333,7 +325,7 @@ def query_cache(self): def get_code_files(self, include_config=True): FILES = { - "python": ["*.py"], + "python": ["*.py", "*.sh", "*.c", "*.h", "*.so", "*.so.*"], "nodejs": ["*.js"], } if include_config: @@ -385,12 +377,24 @@ def add_deployment_files(self, output_dir: str, is_workflow: bool): handler_path = os.path.join(output_dir, "handler.py") handler_function_path = os.path.join(output_dir, "handler_function.py") handler_workflow_path = os.path.join(output_dir, "handler_workflow.py") - if is_workflow: + if is_workflow and os.path.exists(handler_workflow_path): os.rename(handler_workflow_path, handler_path) - os.remove(handler_function_path) - else: + if os.path.exists(handler_function_path): + os.remove(handler_function_path) + elif not is_workflow and os.path.exists(handler_function_path): os.rename(handler_function_path, handler_path) - os.remove(handler_workflow_path) + if os.path.exists(handler_workflow_path): + os.remove(handler_workflow_path) + + workflow_entry = os.path.join(output_dir, "function_workflow.py") + function_entry = os.path.join(output_dir, "function.py") + if os.path.exists(workflow_entry): + if is_workflow: + if os.path.exists(function_entry): + os.remove(function_entry) + os.rename(workflow_entry, function_entry) + else: + os.remove(workflow_entry) def add_deployment_package_python(self, output_dir): @@ -424,9 +428,7 @@ def add_deployment_package_nodejs(self, output_dir): ) if len(packages): - package_config = os.path.join( - output_dir, f"package.json.{self._language_version}" - ) + package_config = os.path.join(output_dir, f"package.json.{self._language_version}") if not os.path.exists(package_config): package_config = os.path.join(output_dir, "package.json") @@ -484,9 +486,7 @@ def ensure_image(name: str) -> None: except docker.errors.ImageNotFound: try: self.logging.info( - "Docker pull of image {repo}:{image}".format( - repo=repo_name, image=name - ) + "Docker pull of image {repo}:{image}".format(repo=repo_name, image=name) ) self._docker_client.images.pull(repo_name, name) except docker.errors.APIError: @@ -511,9 +511,7 @@ def ensure_image(name: str) -> None: # Create set of mounted volumes unless Docker volumes are disabled if not self._experiment_config.check_flag("docker_copy_build_files"): - volumes = { - os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"} - } + volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} package_script = os.path.abspath( os.path.join(self._benchmark_path, self.language_name, "package.sh") ) @@ -531,15 +529,11 @@ def ensure_image(name: str) -> None: try: self.logging.info( "Docker build of benchmark dependencies in container " - "of image {repo}:{image}".format( - repo=repo_name, image=image_name - ) + "of image {repo}:{image}".format(repo=repo_name, image=image_name) ) uid = os.getuid() # Standard, simplest build - if not self._experiment_config.check_flag( - "docker_copy_build_files" - ): + if not self._experiment_config.check_flag("docker_copy_build_files"): self.logging.info( "Docker mount of benchmark code from path {path}".format( path=os.path.abspath(output_dir) @@ -580,9 +574,7 @@ def ensure_image(name: str) -> None: "Send benchmark code from path {path} to " "Docker instance".format(path=os.path.abspath(output_dir)) ) - tar_archive = os.path.join( - output_dir, os.path.pardir, "function.tar" - ) + tar_archive = os.path.join(output_dir, os.path.pardir, "function.tar") with tarfile.open(tar_archive, "w") as tar: for f in os.listdir(output_dir): tar.add(os.path.join(output_dir, f), arcname=f) @@ -628,18 +620,14 @@ def recalculate_code_size(self): def build( self, - deployment_build_step: Callable[ - ["Benchmark", str, bool, bool], Tuple[str, int, str] - ], + deployment_build_step: Callable[["Benchmark", str, bool, bool], Tuple[str, int, str]], is_workflow: bool, ) -> Tuple[bool, str, bool, str]: # Skip build if files are up to date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: self.logging.info( - "Using cached benchmark {} at {}".format( - self.benchmark, self.code_location - ) + "Using cached benchmark {} at {}".format(self.benchmark, self.code_location) ) if self.container_deployment: return ( @@ -656,9 +644,7 @@ def build( if not self.is_cached else "cached code package is not up to date/build enforced." ) - self.logging.info( - "Building benchmark {}. Reason: {}".format(self.benchmark, msg) - ) + self.logging.info("Building benchmark {}. Reason: {}".format(self.benchmark, msg)) # clear existing cache information self._code_package = None @@ -673,13 +659,11 @@ def build( self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) - self._code_location, self._code_size, self._container_uri = ( - deployment_build_step( - self, - os.path.abspath(self._output_dir), - is_workflow, - self.is_cached_valid, - ) + (self._code_location, self._code_size, self._container_uri,) = deployment_build_step( + self, + os.path.abspath(self._output_dir), + is_workflow, + self.is_cached_valid, ) self.logging.info( ( @@ -808,9 +792,7 @@ def code_package_modify(self, filename: str, data: bytes): if self.code_package_is_archive(): self._update_zip(self.code_location, filename, data) new_size = self.code_package_recompute_size() / 1024.0 / 1024.0 - self.logging.info( - f"Modified zip package {self.code_location}, new size {new_size} MB" - ) + self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") else: raise NotImplementedError() @@ -895,9 +877,7 @@ def load_benchmark_input(path: str) -> BenchmarkModuleInterface: import importlib.machinery import importlib.util - loader = importlib.machinery.SourceFileLoader( - "input", os.path.join(path, "input.py") - ) + loader = importlib.machinery.SourceFileLoader("input", os.path.join(path, "input.py")) spec = importlib.util.spec_from_loader(loader.name, loader) assert spec mod = importlib.util.module_from_spec(spec) diff --git a/sebs/faas/config.py b/sebs/faas/config.py index ad3a631be..6aebd514a 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -209,6 +209,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config name = config["name"] implementations = {"local": LocalConfig.deserialize} + from sebs.sonataflow.config import SonataFlowConfig + + implementations["sonataflow"] = SonataFlowConfig.deserialize if has_platform("aws"): from sebs.aws.config import AWSConfig diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 039457cd4..9f7c620ae 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Optional, List, Callable, Union, Dict, Type, Tuple +from typing import Optional, List, Callable, Union, Dict, Type import json diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 0adcfc1d7..6ee7c95d2 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -11,7 +11,13 @@ from sebs.config import SeBSConfig from sebs.faas.resources import SystemResources from sebs.faas.config import Resources -from sebs.faas.function import CloudBenchmark, Function, Trigger, ExecutionResult, Workflow +from sebs.faas.function import ( + CloudBenchmark, + Function, + Trigger, + ExecutionResult, + Workflow, +) from sebs.utils import LoggingBase from .config import Config @@ -173,7 +179,11 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] @abstractmethod def package_code( - self, code_package: Benchmark, directory: str, is_workflow: bool, is_cached: bool + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, ) -> Tuple[str, int, str]: pass @@ -264,7 +274,9 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) if not func_name: func_name = self.default_function_name(code_package) - rebuilt, _, container_deployment, container_uri = code_package.build(self.package_code, False) + rebuilt, _, container_deployment, container_uri = code_package.build( + self.package_code, False + ) """ There's no function with that name? @@ -363,7 +375,7 @@ def update_workflow(self, workflow: Workflow, code_package: Benchmark): def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = None): if code_package.language_version not in self.system_config.supported_language_versions( - self.name(), code_package.language_name + self.name(), code_package.language_name, code_package.architecture ): raise Exception( "Unsupported {language} version {version} in {system}!".format( @@ -375,7 +387,9 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N if not workflow_name: workflow_name = self.default_function_name(code_package) - rebuilt, _ = code_package.build(self.package_code, True) + rebuilt, _, container_deployment, container_uri = code_package.build( + self.package_code, True + ) """ There's no function with that name? @@ -412,8 +426,23 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N workflow_name=workflow_name, loc=code_location ) ) + needs_refresh = getattr(workflow, "needs_refresh", False) # is the function up-to-date? - if workflow.code_package_hash != code_package.hash or rebuilt: + if needs_refresh: + self.logging.info( + f"Cached workflow {workflow_name} requires refreshing local resources." + ) + self.update_workflow(workflow, code_package) + if hasattr(workflow, "needs_refresh"): + workflow.needs_refresh = False + self.cache_client.add_benchmark( + deployment_name=self.name(), + language_name=code_package.language_name, + code_package=code_package, + benchmark=workflow, + ) + code_package.query_cache() + elif workflow.code_package_hash != code_package.hash or rebuilt: self.logging.info( f"Cached workflow {workflow_name} with hash " f"{workflow.code_package_hash} is not up to date with " diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 9163bd62d..d1c726d10 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -340,12 +340,16 @@ def create_function_trigger( ) deployed = False + begin = time.time() while not deployed: status_res = our_function_req.execute() if status_res["status"] == "ACTIVE": deployed = True else: time.sleep(3) + if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable + self.logging.error(f"Failed to deploy function: {function.name}") + raise RuntimeError("Deployment timeout!") self.logging.info(f"Function {function.name} - deployed!") invoke_url = status_res["httpsTrigger"]["url"] diff --git a/sebs/local/config.py b/sebs/local/config.py index 0b512c67c..5074a323c 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -53,7 +53,8 @@ def initialize(res: Resources, config: dict): def update_cache(self, cache: Cache): super().update_cache(cache) cache.update_config( - val=list(self._allocated_ports), keys=["local", "resources", "allocated_ports"] + val=list(self._allocated_ports), + keys=["local", "resources", "allocated_ports"], ) @staticmethod @@ -63,6 +64,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour cached_config = cache.get_config("local") ret._deserialize(ret, config, cached_config) + if "resources" in config: + ret.load_redis(config["resources"]) + elif cached_config and "resources" in cached_config: + ret.load_redis(cached_config["resources"]) + # Load cached values if cached_config and "resources" in cached_config: LocalResources.initialize(ret, cached_config["resources"]) @@ -113,7 +119,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config return config_obj def serialize(self) -> dict: - out = {"name": "local", "region": self._region, "resources": self._resources.serialize()} + out = { + "name": "local", + "region": self._region, + "resources": self._resources.serialize(), + } return out def update_cache(self, cache: Cache): diff --git a/sebs/local/executor.py b/sebs/local/executor.py new file mode 100644 index 000000000..8212e2522 --- /dev/null +++ b/sebs/local/executor.py @@ -0,0 +1,205 @@ +import concurrent.futures +import copy +import json +from typing import Any, Dict, List, Optional + +import requests + +from sebs.faas.fsm import Loop, Map, Parallel, Repeat, State, Switch, Task + + +def _get_var(obj: Any, path: str) -> Any: + parts = [segment.strip() for segment in path.split(".") if segment.strip()] + value = obj + for part in parts: + try: + value = value[part] + except (KeyError, TypeError): + raise WorkflowExecutionError( + f"Missing key '{part}' while reading path '{path}' in object {value!r}" + ) + return value + + +def _set_var(obj: Any, value: Any, path: str): + parts = [segment.strip() for segment in path.split(".") if segment.strip()] + target = obj + for part in parts[:-1]: + target = target[part] + target[parts[-1]] = value + + +class WorkflowExecutionError(RuntimeError): + pass + + +class LocalWorkflowExecutor: + """ + Execute workflow definitions (benchmarks/600.workflows/*/definition.json) + by invoking local function containers directly. Mirrors the orchestration + semantics implemented in Azure/GCP wrappers. + """ + + def __init__(self, definition_path: str, bindings: Dict[str, Dict[str, str]]): + self._definition_path = definition_path + with open(definition_path) as definition_file: + definition = json.load(definition_file) + self._states = { + name: State.deserialize(name, payload) for name, payload in definition["states"].items() + } + self._root = definition["root"] + self._bindings = bindings + + def run(self, payload: dict, request_id: str) -> dict: + return self._run_state_machine(self._states, self._root, payload, request_id) + + def _run_state_machine( + self, states: Dict[str, State], root_name: str, payload: dict, request_id: str + ) -> dict: + current = states[root_name] + result = payload + while current: + if isinstance(current, Task): + result, current = self._execute_task(states, current, result, request_id) + elif isinstance(current, Switch): + current = self._execute_switch(states, current, result) + elif isinstance(current, Map): + result = self._execute_map(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Repeat): + result = self._execute_repeat(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Loop): + self._execute_loop(current, result, request_id) + current = states.get(current.next) + elif isinstance(current, Parallel): + result = self._execute_parallel(current, result, request_id) + current = states.get(current.next) + else: + raise WorkflowExecutionError(f"Undefined state: {current}") + return result + + def _call_function(self, func_name: str, payload: dict, request_id: str) -> dict: + if func_name not in self._bindings: + raise WorkflowExecutionError(f"No binding found for function {func_name}") + binding = self._bindings[func_name] + url = f"http://{binding['host']}:{binding['port']}/" + body_payload = payload + if isinstance(payload, dict): + body_payload = dict(payload) + body_payload.setdefault("request_id", request_id) + body_payload.setdefault("request-id", request_id) + response = requests.post( + url, + json={"payload": body_payload, "request_id": request_id}, + timeout=900, + ) + if response.status_code >= 300: + raise WorkflowExecutionError( + f"Invocation of {func_name} at {url} failed with status {response.status_code}" + ) + body = response.json() + if isinstance(body, dict): + candidate = body + if "result" in body and isinstance(body["result"], dict): + candidate = body["result"].get("output", candidate) + if isinstance(candidate, dict) and "payload" in candidate: + return candidate["payload"] + if "payload" in body: + return body["payload"] + return body + + def _execute_task( + self, states: Dict[str, State], state: Task, data: dict, request_id: str + ) -> (dict, Optional[State]): + try: + result = self._call_function(state.func_name, data, request_id) + except Exception: + if state.failure: + return data, states.get(state.failure) + raise + return result, states.get(state.next) + + def _execute_switch( + self, states: Dict[str, State], switch: Switch, data: dict + ) -> Optional[State]: + ops = { + "<": lambda x, y: x < y, + "<=": lambda x, y: x <= y, + "==": lambda x, y: x == y, + ">=": lambda x, y: x >= y, + ">": lambda x, y: x > y, + } + for case in switch.cases: + lhs = _get_var(data, case.var) + if ops[case.op](lhs, case.val): + return states.get(case.next) + if switch.default: + return states.get(switch.default) + return None + + def _build_map_payload(self, element: Any, data: dict, common_params: Optional[str]) -> dict: + if not common_params: + return element + payload: Dict[str, Any] = {"array_element": element} + for param in [entry.strip() for entry in common_params.split(",") if entry.strip()]: + payload[param] = _get_var(data, param) + return payload + + def _execute_map(self, map_state: Map, data: dict, request_id: str) -> dict: + array = _get_var(data, map_state.array) + if not isinstance(array, list): + raise WorkflowExecutionError( + f"Map state {map_state.name} expects list at {map_state.array}" + ) + map_states = {n: State.deserialize(n, s) for n, s in map_state.funcs.items()} + results: List[Any] = [] + tasks: List[Any] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=max(len(array), 1)) as executor: + for element in array: + payload = self._build_map_payload(element, data, map_state.common_params) + tasks.append( + executor.submit( + self._run_state_machine, + map_states, + map_state.root, + payload, + request_id, + ) + ) + for task in tasks: + results.append(task.result()) + _set_var(data, results, map_state.array) + return data + + def _execute_repeat(self, repeat: Repeat, data: dict, request_id: str) -> dict: + result = data + for _ in range(repeat.count): + result = self._call_function(repeat.func_name, result, request_id) + return result + + def _execute_loop(self, loop: Loop, data: dict, request_id: str): + array = _get_var(data, loop.array) + for element in array: + self._call_function(loop.func_name, element, request_id) + + def _execute_parallel(self, parallel: Parallel, data: dict, request_id: str) -> dict: + results: Dict[str, Any] = {} + tasks: List[concurrent.futures.Future] = [] + labels: List[str] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=len(parallel.funcs)) as executor: + for branch in parallel.funcs: + branch_states = {n: State.deserialize(n, s) for n, s in branch["states"].items()} + labels.append(branch["root"]) + tasks.append( + executor.submit( + self._run_state_machine, + branch_states, + branch["root"], + copy.deepcopy(data), + request_id, + ) + ) + for label, future in zip(labels, tasks): + results[label] = future.result() + return results diff --git a/sebs/local/function.py b/sebs/local/function.py index c940811ce..b92b17325 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -52,24 +52,7 @@ def __init__( self._instance = docker_container self._instance_id = docker_container.id self._instance.reload() - networks = self._instance.attrs["NetworkSettings"]["Networks"] - self._port = port - - if is_linux(): - self._url = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=port - ) - if not self._url: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" - ) - else: - self._url = f"localhost:{port}" - + self._configure_endpoint(port) self._measurement_pid = measurement_pid @property @@ -92,6 +75,28 @@ def memory_measurement_pid(self) -> Optional[int]: def typename() -> str: return "Local.LocalFunction" + def refresh_endpoint(self, port: int): + self._configure_endpoint(port) + + def _configure_endpoint(self, port: int): + self._instance.reload() + networks = self._instance.attrs["NetworkSettings"]["Networks"] + self._port = port + if is_linux(): + self._url = "{IPAddress}:{Port}".format( + IPAddress=networks["bridge"]["IPAddress"], Port=port + ) + if not self._url: + self.logging.error( + f"Couldn't read the IP address of container from attributes " + f"{json.dumps(self._instance.attrs, indent=2)}" + ) + raise RuntimeError( + f"Incorrect detection of IP address for container with id {self._instance_id}" + ) + else: + self._url = f"localhost:{port}" + def serialize(self) -> dict: return { **super().serialize(), diff --git a/sebs/local/local.py b/sebs/local/local.py index 841251138..582e02653 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -1,8 +1,11 @@ +import json import os import requests import shutil import time -from typing import cast, Dict, List, Optional, Type, Tuple # noqa +import re +import datetime +from typing import cast, Dict, List, Optional, Type, Tuple, Set # noqa import subprocess import socket @@ -14,6 +17,8 @@ from sebs.utils import LoggingHandlers, is_linux from sebs.local.config import LocalConfig from sebs.local.function import LocalFunction +from sebs.local.workflow import LocalWorkflow +from sebs.local.triggers import WorkflowLocalTrigger from sebs.faas.function import ( CloudBenchmark, Function, @@ -25,6 +30,38 @@ from sebs.faas.system import System from sebs.faas.config import Resources from sebs.benchmark import Benchmark +from sebs.faas.fsm import State, Task, Map, Repeat, Loop, Parallel + + +def _collect_task_names(state: State) -> Set[str]: + names: Set[str] = set() + if isinstance(state, Task): + names.add(state.func_name) + elif isinstance(state, Repeat): + names.add(state.func_name) + elif isinstance(state, Loop): + names.add(state.func_name) + elif isinstance(state, Map): + for nested_name, nested_state in state.funcs.items(): + nested_obj = ( + nested_state + if isinstance(nested_state, State) + else State.deserialize(nested_name, nested_state) + ) + names.update(_collect_task_names(nested_obj)) + elif isinstance(state, Parallel): + for subworkflow in state.funcs: + for nested_name, nested_state in subworkflow["states"].items(): + names.update(_collect_task_names(State.deserialize(nested_name, nested_state))) + return names + + +def _workflow_task_names(definition: dict) -> Set[str]: + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} + names: Set[str] = set() + for state in states.values(): + names.update(_collect_task_names(state)) + return names class Local(System): @@ -45,7 +82,7 @@ def function_type() -> "Type[Function]": @staticmethod def workflow_type() -> "Type[Workflow]": - raise NotImplementedError() + return LocalWorkflow @property def config(self) -> LocalConfig: @@ -93,9 +130,153 @@ def __init__( self._memory_measurement_path: Optional[str] = None # disable external measurements self._measure_interval = -1 + self._bridge_ip: Optional[str] = self._detect_bridge_ip() self.initialize_resources(select_prefix="local") + @staticmethod + def _load_workflow_definition(path: str) -> dict: + with open(path) as definition_file: + return json.load(definition_file) + + @staticmethod + def _normalize_workflow_id(name: str) -> str: + sanitized = re.sub(r"[^A-Za-z0-9_-]", "-", name) + if not sanitized: + sanitized = "wf" + if not sanitized[0].isalpha(): + sanitized = f"wf-{sanitized}" + return sanitized + + def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: + for port in range(start_port, start_port + range_size): + if port in self.config.resources.allocated_ports: + continue + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + sock.bind(("127.0.0.1", port)) + except socket.error: + continue + self.config.resources.allocated_ports.add(port) + return port + raise RuntimeError( + f"Failed to allocate host port for container: No ports available between " + f"{start_port} and {start_port + range_size - 1}" + ) + + def _detect_bridge_ip(self) -> Optional[str]: + try: + network = self._docker_client.networks.get("bridge") + config = network.attrs.get("IPAM", {}).get("Config", []) + if config: + gateway = config[0].get("Gateway") + if gateway: + return gateway + except docker.errors.DockerException: + pass + return None + + def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: + host, port = func.url.split(":") + if is_linux(): + return host, port + host_override = os.getenv("DOCKER_HOST_IP") + if host_override: + return host_override, port + return host, port + + def _container_service_address(self, endpoint: str) -> str: + if not endpoint or ":" not in endpoint: + return endpoint + host, port = endpoint.split(":", 1) + if host not in ("127.0.0.1", "localhost"): + return endpoint + if self._bridge_ip is None: + self._bridge_ip = self._detect_bridge_ip() + if self._bridge_ip: + return f"{self._bridge_ip}:{port}" + if is_linux(): + return endpoint + host_override = os.getenv("DOCKER_HOST_IP", "host.docker.internal") + return f"{host_override}:{port}" + + def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: + overrides = { + "SEBS_WORKFLOW_NAME": workflow_name, + "SEBS_WORKFLOW_FUNC": module_name, + "SEBS_WORKFLOW_MODULE": f"function.{module_name}", + "SEBS_WORKFLOW_LOCAL": "1", + } + redis_host = self.config.resources.redis_host + if redis_host: + if ":" in redis_host: + host, port = redis_host.split(":", 1) + else: + host, port = redis_host, "6379" + container_host = host + if host in ("127.0.0.1", "localhost"): + container_host = self._bridge_ip or host + overrides["SEBS_REDIS_HOST"] = container_host + overrides["SEBS_REDIS_PORT"] = port + if self.config.resources.redis_password: + overrides["SEBS_REDIS_PASSWORD"] = self.config.resources.redis_password + return overrides + + def _prepare_workflow_functions( + self, + code_package: Benchmark, + workflow_name: str, + workflow_id: str, + definition_path: str, + definition: dict, + existing_workflow: Optional[LocalWorkflow] = None, + ) -> Tuple[List[LocalFunction], Dict[str, Dict[str, str]], str]: + + task_names = sorted(_workflow_task_names(definition)) + if not task_names: + raise RuntimeError("Workflow definition does not contain any task states.") + + existing_funcs = ( + {func.name: func for func in existing_workflow.functions} if existing_workflow else {} + ) + + functions: List[LocalFunction] = [] + bindings: Dict[str, Dict[str, str]] = {} + + required_containers = {f"{workflow_name}___{task}" for task in task_names} + obsolete_funcs = set(existing_funcs.keys()) - required_containers + for obsolete in obsolete_funcs: + existing_funcs[obsolete].stop() + + for task_name in task_names: + container_name = f"{workflow_name}___{task_name}" + existing_func = existing_funcs.get(container_name) + if existing_func: + existing_func.stop() + + env = self._workflow_env(workflow_name, task_name) + func_instance = self._start_container(code_package, container_name, existing_func, env) + functions.append(func_instance) + host, port = self._function_network_endpoint(func_instance) + workflow_function_name = f"{workflow_id}_{task_name}" + bindings[task_name] = { + "type": "custom", + "operation": "rest:post:/", + "host": host, + "port": port, + "workflow_function_name": workflow_function_name, + } + + resources_dir = os.path.join(code_package.code_location, "workflow_resources") + workflows_dir = os.path.join(resources_dir, "workflows") + os.makedirs(workflows_dir, exist_ok=True) + os.makedirs(resources_dir, exist_ok=True) + definition_copy = os.path.join(workflows_dir, f"{workflow_id}.sw.json") + shutil.copy2(definition_path, definition_copy) + + return functions, bindings, definition_copy + """ Shut down minio storage instance. """ @@ -122,7 +303,11 @@ def shutdown(self): """ def package_code( - self, code_package: Benchmark, directory: str, is_workflow: bool, is_cached: bool + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, ) -> Tuple[str, int, str]: CONFIG_FILES = { @@ -145,7 +330,11 @@ def package_code( return directory, bytes_size, "" def _start_container( - self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] + self, + code_package: Benchmark, + func_name: str, + func: Optional[LocalFunction], + env_overrides: Optional[Dict[str, str]] = None, ) -> LocalFunction: container_name = "{}:run.local.{}.{}".format( @@ -159,20 +348,33 @@ def _start_container( "CONTAINER_GID": str(os.getgid()), "CONTAINER_USER": self._system_config.username(self.name(), code_package.language_name), } - if self.config.resources.storage_config: - - environment = {**self.config.resources.storage_config.envs(), **environment} + storage_cfg = self.config.resources.storage_config + if storage_cfg: + storage_envs = dict(storage_cfg.envs()) + if "MINIO_ADDRESS" in storage_envs: + storage_envs["MINIO_ADDRESS"] = self._container_service_address( + storage_envs["MINIO_ADDRESS"] + ) + environment = {**storage_envs, **environment} if code_package.uses_nosql: nosql_storage = self.system_resources.get_nosql_storage() - environment = {**environment, **nosql_storage.envs()} + nosql_envs = dict(nosql_storage.envs()) + if "NOSQL_STORAGE_ENDPOINT" in nosql_envs: + nosql_envs["NOSQL_STORAGE_ENDPOINT"] = self._container_service_address( + nosql_envs["NOSQL_STORAGE_ENDPOINT"] + ) + environment = {**environment, **nosql_envs} for original_name, actual_name in nosql_storage.get_tables( code_package.benchmark ).items(): environment[f"NOSQL_STORAGE_TABLE_{original_name}"] = actual_name + if env_overrides: + environment.update(env_overrides) + # FIXME: make CPUs configurable # FIXME: configure memory # FIXME: configure timeout @@ -197,36 +399,19 @@ def _start_container( # If SeBS is running on non-linux platforms, # container port must be mapped to host port to make it reachable # Check if the system is NOT Linux or that it is WSL - port = self.DEFAULT_PORT if not is_linux(): - port_found = False - for p in range(self.DEFAULT_PORT, self.DEFAULT_PORT + 1000): - # check no container has been deployed on docker's port p - if p not in self.config.resources.allocated_ports: - # check if port p on the host is free - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - try: - s.bind(("127.0.0.1", p)) - # The port is available - port = p - port_found = True - self.config.resources.allocated_ports.add(p) - break - except socket.error: - # The port is already in use - continue - - if not port_found: - raise RuntimeError( - f"Failed to allocate port for container: No ports available between " - f"{self.DEFAULT_PORT} and {self.DEFAULT_PORT + 999}" - ) - + port = self._allocate_host_port(self.DEFAULT_PORT) container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" container_kwargs["ports"] = {f"{port}/tcp": port} + else: + port = self.DEFAULT_PORT - container = self._docker_client.containers.run(**container_kwargs) + from docker.types import DeviceRequest + + container = self._docker_client.containers.run( + **container_kwargs, + device_requests=[DeviceRequest(driver="nvidia", count=-1, capabilities=[["gpu"]])], + ) pid: Optional[int] = None if self.measurements_enabled and self._memory_measurement_path is not None: @@ -259,6 +444,7 @@ def _start_container( else: func.container = container func._measurement_pid = pid + func.refresh_endpoint(port) # Wait until server starts max_attempts = 10 @@ -316,7 +502,7 @@ def update_function( There's only one trigger - HTTP. """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) @@ -375,15 +561,83 @@ def format_function_name(func_name: str) -> str: return func_name def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: - raise NotImplementedError() + workflow_name = self.format_function_name(workflow_name) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow_name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = self._normalize_workflow_id(workflow_name) + + functions, bindings, definition_output = self._prepare_workflow_functions( + code_package, workflow_name, workflow_id, definition_path, definition + ) + + function_cfg = FunctionConfig.from_benchmark(code_package) + workflow = LocalWorkflow( + workflow_name, + functions, + code_package.benchmark, + workflow_id, + code_package.hash, + function_cfg, + definition_output, + bindings, + ) + trigger = WorkflowLocalTrigger(definition_output, bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + return workflow def create_workflow_trigger( self, workflow: Workflow, trigger_type: Trigger.TriggerType ) -> Trigger: - raise NotImplementedError() + workflow = cast(LocalWorkflow, workflow) + if trigger_type != Trigger.TriggerType.HTTP: + raise RuntimeError("Local workflows currently support only HTTP triggers.") + + trigger = WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + self.cache_client.update_benchmark(workflow) + return trigger def update_workflow(self, workflow: Workflow, code_package: Benchmark): - raise NotImplementedError() + workflow = cast(LocalWorkflow, workflow) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow.name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = ( + workflow.workflow_id + if workflow.workflow_id + else self._normalize_workflow_id(workflow.name) + ) + functions, bindings, definition_output = self._prepare_workflow_functions( + code_package, + workflow.name, + workflow_id, + definition_path, + definition, + workflow, + ) + workflow.set_functions(functions) + workflow.definition_path = definition_output + workflow.function_bindings = bindings + workflow.workflow_id = workflow_id + + triggers = workflow.triggers(Trigger.TriggerType.HTTP) + if not triggers: + trigger = WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + else: + for trigger in triggers: + if isinstance(trigger, WorkflowLocalTrigger): + trigger.update(workflow.definition_path, workflow.function_bindings) + + self.logging.info(f"Updated workflow {workflow.name} definition.") def start_measurements(self, measure_interval: int) -> Optional[str]: diff --git a/sebs/local/triggers.py b/sebs/local/triggers.py new file mode 100644 index 000000000..ea89a4f47 --- /dev/null +++ b/sebs/local/triggers.py @@ -0,0 +1,70 @@ +import concurrent.futures +import datetime +import uuid +from typing import Optional + +from sebs.faas.function import ExecutionResult, Trigger +from sebs.local.executor import LocalWorkflowExecutor, WorkflowExecutionError + + +class WorkflowLocalTrigger(Trigger): + def __init__(self, definition_path: str, bindings: dict): + super().__init__() + self._definition_path = definition_path + self._bindings = bindings + self._executor = LocalWorkflowExecutor(definition_path, bindings) + + @staticmethod + def typename() -> str: + return "Local.WorkflowLocalTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + def _invoke(self, payload: dict) -> ExecutionResult: + request_id = str(uuid.uuid4())[0:8] + begin = datetime.datetime.now() + result = ExecutionResult.from_times(begin, begin) + try: + output = self._executor.run(payload, request_id) + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.output = output + except WorkflowExecutionError as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"Workflow execution failed: {exc}") + except Exception as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"Workflow execution error: {exc}") + return result + + def sync_invoke(self, payload: dict) -> ExecutionResult: + return self._invoke(payload) + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + return pool.submit(self._invoke, payload) + + def serialize(self) -> dict: + return { + "type": "LOCAL", + "definition_path": self._definition_path, + "bindings": self._bindings, + } + + @classmethod + def deserialize(cls, obj: dict) -> "WorkflowLocalTrigger": + return cls(obj["definition_path"], obj["bindings"]) + + def update(self, definition_path: str, bindings: dict): + self._definition_path = definition_path + self._bindings = bindings + self._executor = LocalWorkflowExecutor(definition_path, bindings) diff --git a/sebs/local/workflow.py b/sebs/local/workflow.py new file mode 100644 index 000000000..29a2dc2a4 --- /dev/null +++ b/sebs/local/workflow.py @@ -0,0 +1,84 @@ +import logging +import os +from typing import Dict, List, Optional + +from sebs.faas.function import FunctionConfig, Workflow +from sebs.local.function import LocalFunction +from sebs.local.triggers import WorkflowLocalTrigger + + +class LocalWorkflow(Workflow): + def __init__( + self, + name: str, + functions: List[LocalFunction], + benchmark: str, + workflow_id: str, + code_package_hash: str, + cfg: FunctionConfig, + definition_path: str, + function_bindings: Dict[str, Dict], + ): + super().__init__(benchmark, name, code_package_hash, cfg) + self._functions: Dict[str, LocalFunction] = {func.name: func for func in functions} + self.workflow_id = workflow_id + self.definition_path = definition_path + self.function_bindings = function_bindings + self.needs_refresh = False + + @property + def functions(self) -> List[LocalFunction]: + return list(self._functions.values()) + + def set_functions(self, functions: List[LocalFunction]): + self._functions = {func.name: func for func in functions} + + def update_function(self, func: LocalFunction): + self._functions[func.name] = func + + @staticmethod + def typename() -> str: + return "Local.Workflow" + + def serialize(self) -> dict: + serialized = { + **super().serialize(), + "functions": [func.serialize() for func in self._functions.values()], + "definition_path": self.definition_path, + "function_bindings": self.function_bindings, + "workflow_id": self.workflow_id, + } + serialized["triggers"] = [] + return serialized + + @staticmethod + def deserialize(cached_config: dict) -> "LocalWorkflow": + funcs: List[LocalFunction] = [] + missing_function = False + for entry in cached_config["functions"]: + try: + funcs.append(LocalFunction.deserialize(entry)) + except RuntimeError as exc: + logging.getLogger(__name__).warning( + "Skipping cached function for workflow %s: %s", + cached_config.get("name", ""), + exc, + ) + missing_function = True + cfg = FunctionConfig.deserialize(cached_config["config"]) + workflow = LocalWorkflow( + cached_config["name"], + funcs, + cached_config["benchmark"], + cached_config.get("workflow_id", cached_config["name"]), + cached_config["hash"], + cfg, + cached_config.get("definition_path", ""), + cached_config.get("function_bindings", {}), + ) + workflow.needs_refresh = missing_function + if os.path.exists(workflow.definition_path): + workflow.add_trigger( + WorkflowLocalTrigger(workflow.definition_path, workflow.function_bindings) + ) + return workflow diff --git a/sebs/regression.py b/sebs/regression.py index 4d73d3b00..7d5cd5929 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -21,6 +21,8 @@ "220.video-processing", "311.compression", "411.image-recognition", + "412.language-bert", + "413.recommendation", "501.graph-pagerank", "502.graph-mst", "503.graph-bfs", diff --git a/sebs/sebs.py b/sebs/sebs.py index ab8a54b9d..39489987d 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -90,6 +90,10 @@ def get_deployment( name = dep_config["name"] implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} + from sebs.sonataflow import SonataFlow + + implementations["sonataflow"] = SonataFlow + if has_platform("aws"): from sebs.aws import AWS diff --git a/sebs/sonataflow/__init__.py b/sebs/sonataflow/__init__.py new file mode 100644 index 000000000..8468f1ba6 --- /dev/null +++ b/sebs/sonataflow/__init__.py @@ -0,0 +1,6 @@ +from .sonataflow import SonataFlow +from .config import SonataFlowConfig +from .workflow import SonataFlowWorkflow +from .triggers import WorkflowSonataFlowTrigger + +__all__ = ["SonataFlow", "SonataFlowConfig", "SonataFlowWorkflow", "WorkflowSonataFlowTrigger"] diff --git a/sebs/sonataflow/config.py b/sebs/sonataflow/config.py new file mode 100644 index 000000000..2cc0932ab --- /dev/null +++ b/sebs/sonataflow/config.py @@ -0,0 +1,138 @@ +from typing import cast, Optional, Set + +from sebs.cache import Cache +from sebs.faas.config import Config, Credentials, Resources +from sebs.storage.resources import SelfHostedResources +from sebs.storage.config import NoSQLStorageConfig, PersistentStorageConfig +from sebs.utils import LoggingHandlers + + +class SonataFlowCredentials(Credentials): + def serialize(self) -> dict: + return {} + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + return SonataFlowCredentials() + + +class SonataFlowResources(SelfHostedResources): + def __init__( + self, + storage_cfg: Optional[PersistentStorageConfig] = None, + nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, + ): + super().__init__("sonataflow", storage_cfg, nosql_storage_cfg) + self._allocated_ports: Set[int] = set() + self._runtime_url: str = "http://localhost:8080" + self._endpoint_prefix: str = "services" + + @property + def allocated_ports(self) -> set: + return self._allocated_ports + + @property + def runtime_url(self) -> str: + return self._runtime_url + + @property + def endpoint_prefix(self) -> str: + return self._endpoint_prefix + + def serialize(self) -> dict: + out = super().serialize() + out["allocated_ports"] = list(self._allocated_ports) + out["runtime"] = { + "url": self._runtime_url, + "endpoint_prefix": self._endpoint_prefix, + } + return out + + @staticmethod + def _initialize_resources(res: "SonataFlowResources", cfg: dict): + source = cfg.get("resources", cfg) + if "allocated_ports" in source: + res._allocated_ports = set(source["allocated_ports"]) + runtime = source.get("runtime", {}) + res._runtime_url = runtime.get("url", res._runtime_url) + res._endpoint_prefix = runtime.get("endpoint_prefix", res._endpoint_prefix) + + @staticmethod + def initialize(res: Resources, config: dict): + resources = cast(SonataFlowResources, res) + SonataFlowResources._initialize_resources(resources, config) + + def update_cache(self, cache: Cache): + super().update_cache(cache) + cache.update_config(val=list(self._allocated_ports), keys=["sonataflow", "resources", "allocated_ports"]) + cache.update_config(val=self._runtime_url, keys=["sonataflow", "resources", "runtime", "url"]) + cache.update_config( + val=self._endpoint_prefix, + keys=["sonataflow", "resources", "runtime", "endpoint_prefix"], + ) + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + ret = SonataFlowResources() + + cached_config = cache.get_config("sonataflow") + ret._deserialize(ret, config, cached_config) + + if "resources" in config: + ret.load_redis(config["resources"]) + elif cached_config and "resources" in cached_config: + ret.load_redis(cached_config["resources"]) + + if cached_config and "resources" in cached_config: + SonataFlowResources._initialize_resources(ret, cached_config["resources"]) + ret.logging_handlers = handlers + ret.logging.info("Using cached resources for SonataFlow") + else: + ret.logging_handlers = handlers + SonataFlowResources._initialize_resources(ret, config) + + return ret + + +class SonataFlowConfig(Config): + def __init__(self): + super().__init__(name="sonataflow") + self._credentials = SonataFlowCredentials() + self._resources = SonataFlowResources() + + @staticmethod + def typename() -> str: + return "SonataFlow.Config" + + @staticmethod + def initialize(cfg: Config, dct: dict): + pass + + @property + def credentials(self) -> SonataFlowCredentials: + return self._credentials + + @property + def resources(self) -> SonataFlowResources: + return self._resources + + @resources.setter + def resources(self, val: SonataFlowResources): + self._resources = val + + @staticmethod + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + cfg = SonataFlowConfig() + cfg.resources = cast(SonataFlowResources, SonataFlowResources.deserialize(config, cache, handlers)) + cfg.logging_handlers = handlers + return cfg + + def serialize(self) -> dict: + return { + "name": "sonataflow", + "region": self._region, + "resources": self._resources.serialize(), + } + + def update_cache(self, cache: Cache): + self.resources.update_cache(cache) diff --git a/sebs/sonataflow/generator.py b/sebs/sonataflow/generator.py new file mode 100644 index 000000000..c8ae813c4 --- /dev/null +++ b/sebs/sonataflow/generator.py @@ -0,0 +1,260 @@ +import json +from typing import Dict, List, Optional, Union + +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat, Loop, Parallel + + +class SonataFlowGenerator(Generator): + """ + Translate a SeBS workflow definition into a SonataFlow Serverless Workflow definition. + Currently supports task, switch, map (as foreach), repeat, loop and parallel constructs + with a best-effort mapping to SonataFlow branches. + """ + + def __init__(self, workflow_id: str, bindings: Dict[str, Dict[str, str]]): + super().__init__(export_func=lambda obj: json.dumps(obj, indent=2)) + self._workflow_id = workflow_id + self._bindings = bindings + self._functions: Dict[str, Dict[str, str]] = {} + self._uses_errors = False # Track if any state uses onErrors + # Unwrap SeBS local server responses so workflow state data stays as payload. + self._action_results_expr_inner = ".result.output.payload // .payload // ." + self._action_results_expr = f"${{ {self._action_results_expr_inner} }}" + + def _function_ref(self, func_name: str) -> Dict[str, str]: + binding = self._bindings.get(func_name) + if not binding: + raise ValueError(f"No binding found for function {func_name}") + ref_name = binding.get("workflow_function_name", func_name) + if ref_name not in self._functions: + host = binding["host"] + port = binding["port"] + # SonataFlow custom REST function format: operation is "rest:METHOD:URL" + # Use absolute URL since we know the host and port + url = f"http://{host}:{port}/" + self._functions[ref_name] = { + "name": ref_name, + "operation": f"rest:post:{url}", + "type": "custom", + } + return {"refName": ref_name} + + def _default_action(self, func_name: str, payload_ref: str = "${ . }") -> Dict[str, object]: + ref = self._function_ref(func_name) + request_id_expr = '${ .request_id // .requestId // .["request-id"] }' + ref["arguments"] = {"payload": payload_ref, "request_id": request_id_expr} + return { + "name": func_name, + "functionRef": ref, + "actionDataFilter": {"results": self._action_results_expr}, + } + + def postprocess(self, payloads: List[dict]) -> dict: + workflow_def = { + "id": self._workflow_id, + "name": self._workflow_id, + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": list(self._functions.values()), + "start": self.root.name, + "states": payloads, + } + # Add error definitions if any state uses onErrors + if self._uses_errors: + workflow_def["errors"] = [{"name": "workflow_error", "code": "*"}] # Catch all errors + return workflow_def + + def encode_task(self, state: Task) -> Union[dict, List[dict]]: + payload: Dict[str, object] = { + "name": state.name, + "type": "operation", + "actions": [self._default_action(state.func_name, "${ . }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + if state.failure is not None: + self._uses_errors = True + payload["onErrors"] = [{"errorRef": "workflow_error", "transition": state.failure}] + return payload + + def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: + def _condition(case: Switch.Case) -> str: + # Serverless Workflow uses jq expressions wrapped in ${ } + var = case.var.strip() + needs_dot_prefix = not var.startswith((".", "$")) and not any(ch in var for ch in " ()|+*/-") + + # Ensure field path has dot prefix for jq + if needs_dot_prefix: + var = "." + self._quote_field_path(var) + elif var.startswith(".") and "." in var[1:]: + # Already has a dot prefix + var = "." + self._quote_field_path(var[1:]) + + # Wrap the condition in ${ } as per SonataFlow documentation + return f"${{ {var} {case.op} {json.dumps(case.val)} }}" + + return { + "name": state.name, + "type": "switch", + "dataConditions": [ + {"condition": _condition(c), "transition": c.next} for c in state.cases + ], + "defaultCondition": {"transition": state.default} if state.default else {"end": True}, + } + + def _quote_field_path(self, path: str) -> str: + """Return field path as-is for jq expressions. + Simple dot notation like "astros.people" works fine in jq. + """ + return path + + def encode_map(self, state: Map) -> Union[dict, List[dict]]: + iteration_param = "item" + action_args = "${ ." + iteration_param + " }" + if state.common_params: + # Merge map element with selected common parameters. + merged = {"array_element": "${ ." + iteration_param + " }"} + for param in [p.strip() for p in state.common_params.split(",") if p.strip()]: + quoted_param = self._quote_field_path(param) + merged[param] = "${ ." + quoted_param + " }" + action_args = merged # type: ignore + + # Resolve the actual function name from the root state + # state.root is the name of the nested state, state.funcs contains the state definitions + root_state_def = state.funcs.get(state.root, {}) + func_name = root_state_def.get("func_name", state.root) + + quoted_array = self._quote_field_path(state.array) + output_array = getattr(state, "output_array", state.array) + quoted_output = self._quote_field_path(output_array) + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": "${ ." + quoted_array + " }", + "outputCollection": "${ ." + quoted_output + " }", + "iterationParam": iteration_param, + "actions": [self._default_action(func_name, action_args)], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: + # Encode as a foreach over a generated range. + iterations = list(range(state.count)) + input_expr = f"${{ {json.dumps(iterations)} }}" + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": input_expr, + "iterationParam": "idx", + "actions": [self._default_action(state.func_name, "${ . }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + quoted_array = self._quote_field_path(state.array) + payload: Dict[str, object] = { + "name": state.name, + "type": "foreach", + "inputCollection": "${ ." + quoted_array + " }", + "iterationParam": "item", + "actions": [self._default_action(state.func_name, "${ .item }")], + } + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def _encode_branch(self, subworkflow: dict) -> Dict[str, object]: + """ + For SonataFlow, branches are flat lists of actions. We flatten the root state + of each subworkflow to a single action by selecting the function name. + """ + states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + root_state = states.get(subworkflow["root"]) + if not root_state: + raise ValueError(f"Root state {subworkflow['root']} not found in subworkflow") + + func_name = None + if isinstance(root_state, Task): + func_name = root_state.func_name + elif isinstance(root_state, Map): + # Use the mapped state's root function as the branch action. + root_def = root_state.funcs.get(root_state.root, {}) + func_name = root_def.get("func_name", root_state.root) + elif isinstance(root_state, Repeat): + func_name = root_state.func_name + elif isinstance(root_state, Loop): + func_name = root_state.func_name + else: + raise ValueError( + f"Parallel branches currently support Task/Map/Repeat/Loop root states, got {type(root_state).__name__}" + ) + + results_expr = ( + f"${{ {{\"{subworkflow['root']}\": {self._action_results_expr_inner}}} }}" + ) + action = self._default_action(func_name, "${ . }") + action["actionDataFilter"] = {"results": results_expr} + return {"name": subworkflow["root"], "actions": [action]} + + def encode_parallel(self, state: Parallel) -> Union[dict, List[dict]]: + branch_roots: List[State] = [] + has_complex = False + for subworkflow in state.funcs: + states = {n: State.deserialize(n, s) for n, s in subworkflow["states"].items()} + root_state = states.get(subworkflow["root"]) + if root_state is None: + raise ValueError(f"Root state {subworkflow['root']} not found in subworkflow") + branch_roots.append(root_state) + if not isinstance(root_state, Task): + has_complex = True + + if not has_complex: + branches = [self._encode_branch(sw) for sw in state.funcs] + payload: Dict[str, object] = {"name": state.name, "type": "parallel", "branches": branches} + if state.next: + payload["transition"] = state.next + else: + payload["end"] = True + return payload + + def _clone_state(root: State, name: str, next_name: Optional[str]) -> State: + if isinstance(root, Task): + return Task(name, root.func_name, next_name, root.failure) + if isinstance(root, Map): + return Map(name, root.funcs, root.array, root.root, next_name, root.common_params) + if isinstance(root, Repeat): + return Repeat(name, root.func_name, root.count, next_name) + if isinstance(root, Loop): + return Loop(name, root.func_name, root.array, next_name) + raise ValueError( + f"Parallel branch {name} uses unsupported root state type {type(root).__name__}" + ) + + encoded_states: List[dict] = [] + for idx, root in enumerate(branch_roots): + branch_name = state.name if idx == 0 else root.name + next_name = ( + branch_roots[idx + 1].name if idx < len(branch_roots) - 1 else state.next + ) + cloned = _clone_state(root, branch_name, next_name) + if isinstance(cloned, Map) and idx < len(branch_roots) - 1: + cloned.output_array = f"_parallel_{state.name}_{idx}_results" + encoded = self.encode_state(cloned) + if isinstance(encoded, list): + encoded_states.extend(encoded) + else: + encoded_states.append(encoded) + return encoded_states diff --git a/sebs/sonataflow/sonataflow.py b/sebs/sonataflow/sonataflow.py new file mode 100644 index 000000000..4cb9714cc --- /dev/null +++ b/sebs/sonataflow/sonataflow.py @@ -0,0 +1,452 @@ +import os +import shutil +from typing import cast, Dict, List, Optional, Tuple, Set, Type + +import docker + +from sebs.cache import Cache +from sebs.config import SeBSConfig +from sebs.storage.resources import SelfHostedSystemResources +from sebs.utils import LoggingHandlers +from sebs.sonataflow.config import SonataFlowConfig +from sebs.sonataflow.workflow import SonataFlowWorkflow +from sebs.sonataflow.triggers import WorkflowSonataFlowTrigger +from sebs.sonataflow.generator import SonataFlowGenerator +from sebs.faas.function import ( + CloudBenchmark, + Function, + FunctionConfig, + ExecutionResult, + Trigger, + Workflow, +) +from sebs.faas.system import System +from sebs.faas.config import Resources +from sebs.benchmark import Benchmark +from sebs.faas.fsm import State, Task, Map, Repeat, Loop, Parallel +from sebs.local.function import LocalFunction +from sebs.local.local import Local + + +def _collect_task_names(state: State) -> Set[str]: + names: Set[str] = set() + if isinstance(state, Task): + names.add(state.func_name) + elif isinstance(state, Repeat): + names.add(state.func_name) + elif isinstance(state, Loop): + names.add(state.func_name) + elif isinstance(state, Map): + for nested_name, nested_state in state.funcs.items(): + nested_obj = ( + nested_state + if isinstance(nested_state, State) + else State.deserialize(nested_name, nested_state) + ) + names.update(_collect_task_names(nested_obj)) + elif isinstance(state, Parallel): + for subworkflow in state.funcs: + for nested_name, nested_state in subworkflow["states"].items(): + names.update(_collect_task_names(State.deserialize(nested_name, nested_state))) + return names + + +def _workflow_task_names(definition: dict) -> Set[str]: + states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} + names: Set[str] = set() + for state in states.values(): + names.update(_collect_task_names(state)) + return names + + +class SonataFlow(Local): + DEFAULT_PORT = 9000 + + @staticmethod + def name(): + return "sonataflow" + + @staticmethod + def typename(): + return "SonataFlow" + + @staticmethod + def function_type() -> "Type[Function]": + return LocalFunction + + @staticmethod + def workflow_type() -> "Type[Workflow]": + return SonataFlowWorkflow + + @property + def config(self) -> SonataFlowConfig: + return self._config + + def __init__( + self, + sebs_config: SeBSConfig, + config: SonataFlowConfig, + cache_client: Cache, + docker_client: docker.client, + logger_handlers: LoggingHandlers, + ): + System.__init__( + self, + sebs_config, + cache_client, + docker_client, + SelfHostedSystemResources( + "sonataflow", config, cache_client, docker_client, logger_handlers + ), + ) + self.logging_handlers = logger_handlers + self._config = config + self._remove_containers = True + self._memory_measurement_path: Optional[str] = None + self._measure_interval = -1 + self._bridge_ip: Optional[str] = self._detect_bridge_ip() + self.initialize_resources(select_prefix="sonataflow") + + # Reuse networking helpers from Local + def _detect_bridge_ip(self) -> Optional[str]: + return Local._detect_bridge_ip(self) + + def _container_service_address(self, endpoint: str) -> str: + return Local._container_service_address(self, endpoint) + + def _function_network_endpoint(self, func: LocalFunction) -> Tuple[str, str]: + # SonataFlow runtime runs in `sebs-network` and invokes workflow functions from within + # that network. Use the function container's `sebs-network` IP and the container port. + try: + func.container.reload() + networks = func.container.attrs.get("NetworkSettings", {}).get("Networks", {}) + sf_net = networks.get("sebs-network", {}) + ip = sf_net.get("IPAddress") + if ip: + return ip, str(Local.DEFAULT_PORT) + except Exception: + pass + # Fallback to Local behavior (bridge IP + published host port). + return Local._function_network_endpoint(self, func) + + def _workflow_env(self, workflow_name: str, module_name: str) -> Dict[str, str]: + # Get base environment from Local + env = Local._workflow_env(self, workflow_name, module_name) + + # Override Redis configuration for SonataFlow containers on sebs-network + # Function containers are on sebs-network and should use the Redis container hostname + redis_host = self.config.resources.redis_host + if redis_host: + if ":" in redis_host: + host, port = redis_host.split(":", 1) + else: + host, port = redis_host, "6379" + + # If the config specifies localhost, use the Redis container hostname instead + if host in ("127.0.0.1", "localhost"): + env["SEBS_REDIS_HOST"] = "sebs-redis" + env["SEBS_REDIS_PORT"] = "6379" # Use internal port, not mapped port + self.logging.info(f"Overriding Redis config for {module_name}: sebs-redis:6379") + else: + env["SEBS_REDIS_HOST"] = host + env["SEBS_REDIS_PORT"] = port + + if self.config.resources.redis_password: + env["SEBS_REDIS_PASSWORD"] = self.config.resources.redis_password + + self.logging.debug(f"Container env for {module_name}: SEBS_REDIS_HOST={env.get('SEBS_REDIS_HOST')}, SEBS_REDIS_PORT={env.get('SEBS_REDIS_PORT')}") + return env + + def _allocate_host_port(self, start_port: int, range_size: int = 1000) -> int: + return Local._allocate_host_port(self, start_port, range_size) + + @staticmethod + def _normalize_workflow_id_for_sonataflow(name: str) -> str: + """ + Normalize workflow ID for SonataFlow. + SonataFlow generates Java classes from workflow IDs, so they must be valid Java identifiers. + Replace hyphens with underscores and ensure it starts with a letter. + """ + import re + # Replace any non-alphanumeric characters (except underscore) with underscore + sanitized = re.sub(r"[^A-Za-z0-9_]", "_", name) + if not sanitized: + sanitized = "wf" + # Ensure it starts with a letter + if not sanitized[0].isalpha(): + sanitized = f"wf_{sanitized}" + return sanitized + + def _start_container( + self, + code_package: Benchmark, + func_name: str, + func: Optional[LocalFunction], + env_overrides: Optional[Dict[str, str]] = None, + ) -> LocalFunction: + import requests + import time + + # Override to use custom network for SonataFlow + # Create sebs-network if it doesn't exist + try: + self._docker_client.networks.get("sebs-network") + except docker.errors.NotFound: + self._docker_client.networks.create("sebs-network", driver="bridge") + + # Call parent method to start the container + func_instance = Local._start_container(self, code_package, func_name, func, env_overrides) + + # Connect the container to sebs-network + try: + network = self._docker_client.networks.get("sebs-network") + network.connect(func_instance.container.id, aliases=[func_name]) + self.logging.info( + f"Connected container {func_instance.container.name} to sebs-network (alias {func_name})" + ) + + # Wait for the container to be reachable on sebs-network + # Get the sebs-network IP + func_instance.container.reload() + networks = func_instance.container.attrs.get("NetworkSettings", {}).get("Networks", {}) + sf_net = networks.get("sebs-network", {}) + sebs_ip = sf_net.get("IPAddress") + + if sebs_ip: + # Health check on sebs-network IP + max_attempts = 10 + attempts = 0 + while attempts < max_attempts: + try: + requests.get(f"http://{sebs_ip}:{Local.DEFAULT_PORT}/alive", timeout=1) + self.logging.debug(f"Container {func_instance.container.name} ready on sebs-network at {sebs_ip}") + break + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + time.sleep(0.25) + attempts += 1 + + if attempts >= max_attempts: + self.logging.warning(f"Container {func_instance.container.name} not responding on sebs-network IP {sebs_ip} after {max_attempts} attempts") + except Exception as e: + self.logging.warning(f"Failed to connect container to sebs-network: {e}") + + return func_instance + + def _load_workflow_definition(self, path: str) -> dict: + return Local._load_workflow_definition(path) + + def _prepare_workflow_functions( + self, + code_package: Benchmark, + workflow_name: str, + workflow_id: str, + definition_path: str, + definition: dict, + existing_workflow: Optional[SonataFlowWorkflow] = None, + ) -> Tuple[List[LocalFunction], Dict[str, Dict[str, str]], str]: + task_names = sorted(_workflow_task_names(definition)) + if not task_names: + raise RuntimeError("Workflow definition does not contain any task states.") + + existing_funcs = ( + {func.name: func for func in existing_workflow.functions} if existing_workflow else {} + ) + + functions: List[LocalFunction] = [] + bindings: Dict[str, Dict[str, str]] = {} + + required_containers = {f"{workflow_name}___{task}" for task in task_names} + obsolete_funcs = set(existing_funcs.keys()) - required_containers + for obsolete in obsolete_funcs: + existing_funcs[obsolete].stop() + + for task_name in task_names: + container_name = f"{workflow_name}___{task_name}" + existing_func = existing_funcs.get(container_name) + if existing_func: + existing_func.stop() + + env = self._workflow_env(workflow_name, task_name) + func_instance = self._start_container(code_package, container_name, existing_func, env) + functions.append(func_instance) + host, port = self._function_network_endpoint(func_instance) + workflow_function_name = f"{workflow_id}_{task_name}" + bindings[task_name] = { + "type": "custom", + "operation": "rest:post:/", + "host": host, + "port": port, + "workflow_function_name": workflow_function_name, + } + + resources_dir = os.path.join(code_package.code_location, "workflow_resources") + workflows_dir = os.path.join(resources_dir, "workflows") + os.makedirs(workflows_dir, exist_ok=True) + os.makedirs(resources_dir, exist_ok=True) + definition_copy = os.path.join(workflows_dir, f"{workflow_id}.json") + shutil.copy2(definition_path, definition_copy) + + return functions, bindings, definition_copy + + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: + workflow_name = self.format_function_name(workflow_name) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow_name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = self._normalize_workflow_id_for_sonataflow(workflow_name) + + functions, bindings, definition_copy = self._prepare_workflow_functions( + code_package, workflow_name, workflow_id, definition_path, definition + ) + + generator = SonataFlowGenerator(workflow_id, bindings) + generator.parse(definition_path) + sonataflow_definition = generator.generate() + + sf_dir = os.path.join(code_package.code_location, "workflow_resources", "sonataflow") + os.makedirs(sf_dir, exist_ok=True) + sonataflow_path = os.path.join(sf_dir, f"{workflow_id}.sw.json") + with open(sonataflow_path, "w") as outf: + outf.write(sonataflow_definition) + + function_cfg = FunctionConfig.from_benchmark(code_package) + workflow = SonataFlowWorkflow( + workflow_name, + functions, + code_package.benchmark, + workflow_id, + code_package.hash, + function_cfg, + sonataflow_path, + bindings, + ) + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + return workflow + + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: + workflow = cast(SonataFlowWorkflow, workflow) + if trigger_type != Trigger.TriggerType.HTTP: + raise RuntimeError("SonataFlow workflows currently support only HTTP triggers.") + + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + self.cache_client.update_benchmark(workflow) + return trigger + + def update_workflow(self, workflow: Workflow, code_package: Benchmark): + workflow = cast(SonataFlowWorkflow, workflow) + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError(f"No workflow definition found for {workflow.name}") + + definition = self._load_workflow_definition(definition_path) + workflow_id = workflow.workflow_id if workflow.workflow_id else self._normalize_workflow_id_for_sonataflow(workflow.name) + functions, bindings, _ = self._prepare_workflow_functions( + code_package, + workflow.name, + workflow_id, + definition_path, + definition, + workflow, + ) + + generator = SonataFlowGenerator(workflow_id, bindings) + generator.parse(definition_path) + sonataflow_definition = generator.generate() + sonataflow_path = os.path.join( + code_package.code_location, "workflow_resources", "sonataflow", f"{workflow_id}.sw.json" + ) + os.makedirs(os.path.dirname(sonataflow_path), exist_ok=True) + with open(sonataflow_path, "w") as outf: + outf.write(sonataflow_definition) + + workflow.set_functions(functions) + workflow.definition_path = sonataflow_path + workflow.function_bindings = bindings + workflow.workflow_id = workflow_id + + triggers = workflow.triggers(Trigger.TriggerType.HTTP) + if not triggers: + trigger = WorkflowSonataFlowTrigger( + workflow.workflow_id, + self.config.resources.runtime_url, + self.config.resources.endpoint_prefix, + ) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + else: + for trigger in triggers: + if isinstance(trigger, WorkflowSonataFlowTrigger): + trigger.update(self.config.resources.runtime_url, self.config.resources.endpoint_prefix) + + self.logging.info(f"Updated SonataFlow workflow {workflow.name} definition.") + + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + self.initialize_resources(select_prefix=resource_prefix or "sonataflow") + + def package_code( + self, + code_package: Benchmark, + directory: str, + is_workflow: bool, + is_cached: bool, + ) -> Tuple[str, int, str]: + return Local.package_code(self, code_package, directory, is_workflow, is_cached) + + def create_function( + self, + code_package: Benchmark, + func_name: str, + container_deployment: bool, + container_uri: str, + ) -> Function: + raise RuntimeError("SonataFlow deployment does not support individual function creation.") + + def update_function( + self, + code_package: Benchmark, + func: Function, + container_deployment: bool, + container_uri: str, + ) -> Function: + raise RuntimeError("SonataFlow deployment does not support individual function updates.") + + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + raise RuntimeError("SonataFlow deployment does not support function triggers.") + + def update_function_trigger(self, func: Function, trigger: Trigger): + raise RuntimeError("SonataFlow deployment does not support function triggers.") + + def execute( + self, + code_package: Benchmark, + trigger: Trigger, + input: dict, + repetitions: int, + sync: bool, + ) -> List[ExecutionResult]: + return Local.execute(self, code_package, trigger, input, repetitions, sync) + + def get_function(self, code_package: Benchmark, func_name: str) -> Function: + raise RuntimeError("Function retrieval is not supported in SonataFlow mode.") + + def prepare_experiment(self, benchmark: CloudBenchmark): + return Local.prepare_experiment(self, benchmark) + + def shutdown(self) -> None: + super().shutdown() diff --git a/sebs/sonataflow/triggers.py b/sebs/sonataflow/triggers.py new file mode 100644 index 000000000..75eea7e53 --- /dev/null +++ b/sebs/sonataflow/triggers.py @@ -0,0 +1,179 @@ +import concurrent.futures +import datetime +import uuid +from typing import Optional + +import requests + +from sebs.faas.function import ExecutionResult, Trigger + + +class WorkflowSonataFlowTrigger(Trigger): + def __init__(self, workflow_id: str, base_url: str, endpoint_prefix: str = "services"): + super().__init__() + self._workflow_id = workflow_id + self._base_url = base_url.rstrip("/") + self._endpoint_prefix = endpoint_prefix.strip("/") + + @staticmethod + def typename() -> str: + return "SonataFlow.WorkflowTrigger" + + @staticmethod + def trigger_type() -> Trigger.TriggerType: + return Trigger.TriggerType.HTTP + + def _endpoint(self) -> str: + if self._endpoint_prefix: + return f"{self._base_url}/{self._endpoint_prefix}/{self._workflow_id}" + return f"{self._base_url}/{self._workflow_id}" + + def _candidate_endpoints(self) -> list[tuple[str, str]]: + """ + Return a list of candidate endpoints to try. + + Kogito/SonataFlow images have historically exposed the workflow start endpoint + either at `/{workflowId}` or at `/services/{workflowId}` depending on version/config. + """ + candidates = [self._endpoint_prefix, "", "services"] + seen: set[str] = set() + out: list[tuple[str, str]] = [] + for prefix in candidates: + prefix = (prefix or "").strip("/") + if prefix in seen: + continue + seen.add(prefix) + if prefix: + out.append((prefix, f"{self._base_url}/{prefix}/{self._workflow_id}")) + else: + out.append((prefix, f"{self._base_url}/{self._workflow_id}")) + return out + + def _invoke(self, payload: dict) -> ExecutionResult: + import time + request_id = str(uuid.uuid4())[0:8] + begin = datetime.datetime.now() + result = ExecutionResult.from_times(begin, begin) + try: + body = payload + if isinstance(payload, dict): + body = dict(payload) + body.setdefault("request_id", request_id) + endpoint_used = self._endpoint() + + # Retry logic for 404 (workflow not loaded yet) + max_retries = 30 + retry_delay = 2 + resp = None + original_endpoint = endpoint_used + + for attempt in range(max_retries): + # Try the main endpoint first + resp = requests.post( + endpoint_used, + json=body, + timeout=900, + ) + self.logging.debug(f"Attempt {attempt + 1}: {endpoint_used} returned {resp.status_code}") + + # Check if we should retry + if resp.status_code == 404: + # Auto-detect the correct endpoint layout. + found_endpoint = False + for prefix, endpoint in self._candidate_endpoints(): + if endpoint == original_endpoint: + # Already tried this one as the main attempt + continue + self.logging.debug(f"Trying candidate: {endpoint}") + resp = requests.post( + endpoint, + json=body, + timeout=900, + ) + self.logging.debug(f"Candidate {endpoint} returned {resp.status_code}") + if resp.status_code != 404 and resp.status_code != 503: + # Found the correct endpoint! + self._endpoint_prefix = prefix + endpoint_used = endpoint + found_endpoint = True + self.logging.info(f"Found workflow at {endpoint}") + break + + if not found_endpoint and attempt < max_retries - 1: + # Workflow not loaded yet, wait and retry + self.logging.info( + f"Workflow endpoint not ready (404), retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + # Reset to original endpoint for next attempt + endpoint_used = original_endpoint + continue + elif not found_endpoint: + # Final attempt failed + self.logging.error(f"Workflow endpoint not found after {max_retries} attempts") + break + elif resp.status_code in [500, 503] and attempt < max_retries - 1: + # Service error (SonataFlow loading/restarting), wait and retry + self.logging.info( + f"SonataFlow not ready ({resp.status_code}), retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})" + ) + time.sleep(retry_delay) + endpoint_used = original_endpoint + continue + + # Success or non-retryable error, break out of retry loop + break + + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + if resp is not None and resp.status_code >= 300: + result.stats.failure = True + try: + error_text = resp.text[:500] if len(resp.text) > 500 else resp.text + except: + error_text = "" + self.logging.error( + f"SonataFlow invocation failed ({resp.status_code}): {error_text}" + ) + elif resp is not None: + try: + result.output = resp.json() + except Exception as e: + result.stats.failure = True + self.logging.error(f"Failed to parse SonataFlow response: {e}") + else: + result.stats.failure = True + self.logging.error("SonataFlow invocation failed: No response received") + except Exception as exc: + end = datetime.datetime.now() + result = ExecutionResult.from_times(begin, end) + result.request_id = request_id + result.stats.failure = True + self.logging.error(f"SonataFlow invocation error: {exc}") + return result + + def sync_invoke(self, payload: dict) -> ExecutionResult: + return self._invoke(payload) + + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pool = concurrent.futures.ThreadPoolExecutor() + return pool.submit(self._invoke, payload) + + def serialize(self) -> dict: + return { + "type": "SONATAFLOW", + "workflow_id": self._workflow_id, + "base_url": self._base_url, + "endpoint_prefix": self._endpoint_prefix, + } + + @classmethod + def deserialize(cls, obj: dict) -> "WorkflowSonataFlowTrigger": + return cls(obj["workflow_id"], obj["base_url"], obj.get("endpoint_prefix", "services")) + + def update(self, base_url: Optional[str] = None, endpoint_prefix: Optional[str] = None): + if base_url: + self._base_url = base_url.rstrip("/") + if endpoint_prefix is not None: + self._endpoint_prefix = endpoint_prefix.strip("/") diff --git a/sebs/sonataflow/workflow.py b/sebs/sonataflow/workflow.py new file mode 100644 index 000000000..ac2e8e6b6 --- /dev/null +++ b/sebs/sonataflow/workflow.py @@ -0,0 +1,79 @@ +import os +import logging +from typing import Dict, List + +from sebs.faas.function import FunctionConfig, Workflow +from sebs.local.function import LocalFunction + + +class SonataFlowWorkflow(Workflow): + def __init__( + self, + name: str, + functions: List[LocalFunction], + benchmark: str, + workflow_id: str, + code_package_hash: str, + cfg: FunctionConfig, + definition_path: str, + function_bindings: Dict[str, Dict], + ): + super().__init__(benchmark, name, code_package_hash, cfg) + self._functions: Dict[str, LocalFunction] = {func.name: func for func in functions} + self.workflow_id = workflow_id + self.definition_path = definition_path + self.function_bindings = function_bindings + self.needs_refresh = False + + @property + def functions(self) -> List[LocalFunction]: + return list(self._functions.values()) + + def set_functions(self, functions: List[LocalFunction]): + self._functions = {func.name: func for func in functions} + + def update_function(self, func: LocalFunction): + self._functions[func.name] = func + + @staticmethod + def typename() -> str: + return "SonataFlow.Workflow" + + def serialize(self) -> dict: + serialized = { + **super().serialize(), + "functions": [func.serialize() for func in self._functions.values()], + "definition_path": self.definition_path, + "function_bindings": self.function_bindings, + "workflow_id": self.workflow_id, + } + serialized["triggers"] = [] + return serialized + + @staticmethod + def deserialize(cached_config: dict) -> "SonataFlowWorkflow": + funcs: List[LocalFunction] = [] + missing_function = False + for entry in cached_config["functions"]: + try: + funcs.append(LocalFunction.deserialize(entry)) + except RuntimeError as exc: + logging.getLogger(__name__).warning( + "Skipping cached function for workflow %s: %s", + cached_config.get("name", ""), + exc, + ) + missing_function = True + cfg = FunctionConfig.deserialize(cached_config["config"]) + workflow = SonataFlowWorkflow( + cached_config["name"], + funcs, + cached_config["benchmark"], + cached_config.get("workflow_id", cached_config["name"]), + cached_config["hash"], + cfg, + cached_config.get("definition_path", ""), + cached_config.get("function_bindings", {}), + ) + workflow.needs_refresh = missing_function + return workflow diff --git a/sebs/storage/config.py b/sebs/storage/config.py index cd47df391..b4d92da60 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -29,6 +29,7 @@ class MinioConfig(PersistentStorageConfig): input_buckets: List[str] = field(default_factory=lambda: []) version: str = "" data_volume: str = "" + network_name: str = "bridge" type: str = "minio" def update_cache(self, path: List[str], cache: Cache): @@ -79,6 +80,7 @@ class ScyllaDBConfig(NoSQLStorageConfig): memory: int = -1 version: str = "" data_volume: str = "" + network_name: str = "bridge" def update_cache(self, path: List[str], cache: Cache): diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index bb9112a22..4d17a2155 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -93,7 +93,7 @@ def start(self): self._storage_container = self._docker_client.containers.run( f"minio/minio:{self._cfg.version}", command="server /data", - network_mode="bridge", + network_mode=self._cfg.network_name, ports={"9000": str(self._cfg.mapped_port)}, environment={ "MINIO_ACCESS_KEY": self._cfg.access_key, @@ -129,9 +129,18 @@ def configure_connection(self): # Check if the system is Linux and that it's not WSL if is_linux(): networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=9000 - ) + # Use the configured network name instead of hardcoded "bridge" + network_info = networks.get(self._cfg.network_name) + if network_info: + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=network_info["IPAddress"], Port=9000 + ) + else: + # Fallback: use the first available network + first_network = next(iter(networks.values())) + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=first_network["IPAddress"], Port=9000 + ) else: # System is either WSL, Windows, or Mac self._cfg.address = f"localhost:{self._cfg.mapped_port}" @@ -279,7 +288,11 @@ def _deserialize( try: obj._storage_container = docker_client.containers.get(instance_id) except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.warning( + f"Storage container {instance_id} not found; continuing without container handle." + ) + obj._storage_container = None + obj._cfg.instance_id = "" else: obj._storage_container = None obj._input_prefixes = copy.copy(cached_config.input_buckets) diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index aae97815d..85cc9c18b 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -90,7 +90,7 @@ def start(self): command=scylladb_args, name="some-scylla", hostname="some-scylla", - network_mode="bridge", + network_mode=self._cfg.network_name, volumes=volumes, ports={"8000": str(self._cfg.mapped_port)}, remove=True, @@ -143,9 +143,18 @@ def configure_connection(self): # Check if the system is Linux and that it's not WSL if platform.system() == "Linux" and "microsoft" not in platform.release().lower(): networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=self._cfg.alternator_port - ) + # Use the configured network name instead of hardcoded "bridge" + network_info = networks.get(self._cfg.network_name) + if network_info: + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=network_info["IPAddress"], Port=self._cfg.alternator_port + ) + else: + # Fallback: use the first available network + first_network = next(iter(networks.values())) + self._cfg.address = "{IPAddress}:{Port}".format( + IPAddress=first_network["IPAddress"], Port=self._cfg.alternator_port + ) else: # System is either WSL, Windows, or Mac self._cfg.address = f"localhost:{self._cfg.mapped_port}" @@ -196,7 +205,11 @@ def _deserialize( try: obj._storage_container = docker_client.containers.get(instance_id) except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.warning( + f"Storage container {instance_id} not found; continuing without container handle." + ) + obj._storage_container = None + obj._cfg.instance_id = "" else: obj._storage_container = None return obj diff --git a/sebs/types.py b/sebs/types.py index b87516fba..914c9af70 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -12,6 +12,7 @@ class Platforms(str, Enum): GCP = "gcp" LOCAL = "local" OPENWHISK = "openwhisk" + SONATAFLOW = "sonataflow" class Storage(str, Enum): diff --git a/sebs/utils.py b/sebs/utils.py index a0d397199..0bd11692b 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -110,8 +110,21 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): def connect_to_redis_cache(host: str, password: str): + if ":" in host: + redis_host, redis_port = host.split(":", 1) + port = int(redis_port) + else: + redis_host = host + port = 6379 + redis = Redis( - host=host, port=6379, decode_responses=True, socket_keepalive=True, socket_timeout=10, socket_connect_timeout=10, password=password + host=redis_host, + port=port, + decode_responses=True, + socket_keepalive=True, + socket_timeout=10, + socket_connect_timeout=10, + password=password, ) redis.ping() @@ -119,7 +132,11 @@ def connect_to_redis_cache(host: str, password: str): def download_measurements( - redis: Redis, workflow_name: str, after: float, request_id: Optional[str], **static_args + redis: Redis, + workflow_name: str, + after: float, + request_id: Optional[str], + **static_args, ): payloads = [] pattern = f"{workflow_name}/*/{request_id}/*" if request_id else f"{workflow_name}/*" diff --git a/sonataflow-workflows/application.properties b/sonataflow-workflows/application.properties new file mode 100644 index 000000000..9219b2ca7 --- /dev/null +++ b/sonataflow-workflows/application.properties @@ -0,0 +1,3 @@ +# Enable Kogito process/workflow generation +kogito.codegen.processes.enabled=true +quarkus.kogito.codegen.processes.enabled=true diff --git a/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json new file mode 100644 index 000000000..24e1535b3 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6100_1000_genome_python_3_11.sw.json @@ -0,0 +1,161 @@ +{ + "id": "sebd_6100_1000_genome_python_3_11", + "name": "sebd_6100_1000_genome_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6100_1000_genome_python_3_11_individuals", + "operation": "rest:post:http://172.18.0.12:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "operation": "rest:post:http://172.18.0.13:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_sifting", + "operation": "rest:post:http://172.18.0.15:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_frequency", + "operation": "rest:post:http://172.18.0.11:9000/", + "type": "custom" + }, + { + "name": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "operation": "rest:post:http://172.18.0.14:9000/", + "type": "custom" + } + ], + "start": "individuals", + "states": [ + { + "name": "individuals", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ .item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "merge_and_sifting" + }, + { + "name": "merge_and_sifting", + "type": "parallel", + "branches": [ + { + "name": "individuals_merge", + "actions": [ + { + "name": "individuals_merge", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_individuals_merge", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ {\"individuals_merge\": .result.output.payload // .payload // .} }" + } + } + ] + }, + { + "name": "sifting", + "actions": [ + { + "name": "sifting", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_sifting", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ {\"sifting\": .result.output.payload // .payload // .} }" + } + } + ] + } + ], + "transition": "frequency_and_overlap" + }, + { + "name": "frequency_and_overlap", + "type": "foreach", + "inputCollection": "${ .sifting.populations }", + "outputCollection": "${ ._parallel_frequency_and_overlap_0_results }", + "iterationParam": "item", + "actions": [ + { + "name": "frequency", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_frequency", + "arguments": { + "payload": { + "array_element": "${ .item }", + "sifting": "${ .sifting }", + "individuals_merge": "${ .individuals_merge }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "mutation_overlap" + }, + { + "name": "mutation_overlap", + "type": "foreach", + "inputCollection": "${ .sifting.populations }", + "outputCollection": "${ .sifting.populations }", + "iterationParam": "item", + "actions": [ + { + "name": "mutation_overlap", + "functionRef": { + "refName": "sebd_6100_1000_genome_python_3_11_mutation_overlap", + "arguments": { + "payload": { + "array_element": "${ .item }", + "sifting": "${ .sifting }", + "individuals_merge": "${ .individuals_merge }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json new file mode 100644 index 000000000..3f5c104e9 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6101_1000_genome_individuals_python_3_11.sw.json @@ -0,0 +1,48 @@ +{ + "id": "sebd_6101_1000_genome_individuals_python_3_11", + "name": "sebd_6101_1000_genome_individuals_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "operation": "rest:post:http://172.18.0.16:9000/", + "type": "custom" + } + ], + "start": "individuals_state", + "states": [ + { + "name": "individuals_state", + "type": "foreach", + "inputCollection": "${ .blob }", + "outputCollection": "${ .blob }", + "iterationParam": "item", + "actions": [ + { + "name": "individuals", + "functionRef": { + "refName": "sebd_6101_1000_genome_individuals_python_3_11_individuals", + "arguments": { + "payload": { + "array_element": "${ .item }", + "benchmark_bucket": "${ .benchmark_bucket }", + "bucket": "${ .bucket }", + "columns": "${ .columns }", + "columns_bucket": "${ .columns_bucket }", + "populations": "${ .populations }", + "sifting_input": "${ .sifting_input }", + "individuals_file": "${ .individuals_file }" + } + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json new file mode 100644 index 000000000..b4e018e2f --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_610_gen_python_3_11.sw.json @@ -0,0 +1,152 @@ +{ + "id": "sebd_610_gen_python_3_11", + "name": "sebd_610_gen_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_610_gen_python_3_11_get_astros", + "operation": "rest:post:http://172.18.0.6:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_few_people", + "operation": "rest:post:http://172.18.0.5:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_many_people", + "operation": "rest:post:http://172.18.0.7:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_map_astros", + "operation": "rest:post:http://172.18.0.8:9000/", + "type": "custom" + }, + { + "name": "sebd_610_gen_python_3_11_process_astros", + "operation": "rest:post:http://172.18.0.9:9000/", + "type": "custom" + } + ], + "start": "get_astros", + "states": [ + { + "name": "get_astros", + "type": "operation", + "actions": [ + { + "name": "get_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_get_astros", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "select_astros_number" + }, + { + "name": "select_astros_number", + "type": "switch", + "dataConditions": [ + { + "condition": "${ .astros.number < 10 }", + "transition": "few_people" + }, + { + "condition": "${ .astros.number >= 10 }", + "transition": "many_people" + } + ], + "defaultCondition": { + "transition": "few_people" + } + }, + { + "name": "few_people", + "type": "operation", + "actions": [ + { + "name": "few_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_few_people", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "map_astros" + }, + { + "name": "many_people", + "type": "operation", + "actions": [ + { + "name": "many_people", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_many_people", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "map_astros" + }, + { + "name": "map_astros", + "type": "foreach", + "inputCollection": "${ .astros.people }", + "outputCollection": "${ .astros.people }", + "iterationParam": "item", + "actions": [ + { + "name": "map_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_map_astros", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process_astros" + }, + { + "name": "process_astros", + "type": "operation", + "actions": [ + { + "name": "process_astros", + "functionRef": { + "refName": "sebd_610_gen_python_3_11_process_astros", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json new file mode 100644 index 000000000..0d1a95f88 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_6200_trip_booking_python_3_11.sw.json @@ -0,0 +1,204 @@ +{ + "id": "sebd_6200_trip_booking_python_3_11", + "name": "sebd_6200_trip_booking_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "operation": "rest:post:http://172.18.0.24:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "operation": "rest:post:http://172.18.0.25:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "operation": "rest:post:http://172.18.0.23:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_confirm", + "operation": "rest:post:http://172.18.0.22:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "operation": "rest:post:http://172.18.0.19:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "operation": "rest:post:http://172.18.0.21:9000/", + "type": "custom" + }, + { + "name": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "operation": "rest:post:http://172.18.0.20:9000/", + "type": "custom" + } + ], + "start": "hotel", + "states": [ + { + "name": "hotel", + "type": "operation", + "actions": [ + { + "name": "reserve_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_hotel", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "rental" + }, + { + "name": "rental", + "type": "operation", + "actions": [ + { + "name": "reserve_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_rental", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "flight", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_hotel" + } + ] + }, + { + "name": "flight", + "type": "operation", + "actions": [ + { + "name": "reserve_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_reserve_flight", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "confirm", + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_rental" + } + ] + }, + { + "name": "confirm", + "type": "operation", + "actions": [ + { + "name": "confirm", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_confirm", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true, + "onErrors": [ + { + "errorRef": "workflow_error", + "transition": "cancel_flight" + } + ] + }, + { + "name": "cancel_flight", + "type": "operation", + "actions": [ + { + "name": "cancel_flight", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_flight", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "cancel_rental" + }, + { + "name": "cancel_rental", + "type": "operation", + "actions": [ + { + "name": "cancel_rental", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_rental", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "cancel_hotel" + }, + { + "name": "cancel_hotel", + "type": "operation", + "actions": [ + { + "name": "cancel_hotel", + "functionRef": { + "refName": "sebd_6200_trip_booking_python_3_11_cancel_hotel", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ], + "errors": [ + { + "name": "workflow_error", + "code": "*" + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json new file mode 100644 index 000000000..42ad38bfb --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_620_func_invo_python_3_11.sw.json @@ -0,0 +1,71 @@ +{ + "id": "sebd_620_func_invo_python_3_11", + "name": "sebd_620_func_invo_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_620_func_invo_python_3_11_gen", + "operation": "rest:post:http://172.18.0.17:9000/", + "type": "custom" + }, + { + "name": "sebd_620_func_invo_python_3_11_process", + "operation": "rest:post:http://172.18.0.18:9000/", + "type": "custom" + } + ], + "start": "gen", + "states": [ + { + "name": "gen", + "type": "operation", + "actions": [ + { + "name": "gen", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_gen", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process" + }, + { + "name": "process", + "type": "foreach", + "inputCollection": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "iterationParam": "idx", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_620_func_invo_python_3_11_process", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json new file mode 100644 index 000000000..85f05c30c --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_630_parallel_sleep_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_630_parallel_sleep_python_3_11", + "name": "sebd_630_parallel_sleep_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_630_parallel_sleep_python_3_11_generate", + "operation": "rest:post:http://172.18.0.26:9000/", + "type": "custom" + }, + { + "name": "sebd_630_parallel_sleep_python_3_11_process", + "operation": "rest:post:http://172.18.0.27:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process-state" + }, + { + "name": "process-state", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_630_parallel_sleep_python_3_11_process", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json new file mode 100644 index 000000000..3e89cb0d2 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_631_parallel_download_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_631_parallel_download_python_3_11", + "name": "sebd_631_parallel_download_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_631_parallel_download_python_3_11_generate", + "operation": "rest:post:http://172.18.0.28:9000/", + "type": "custom" + }, + { + "name": "sebd_631_parallel_download_python_3_11_process", + "operation": "rest:post:http://172.18.0.29:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "process1" + }, + { + "name": "process1", + "type": "foreach", + "inputCollection": "${ .buffer }", + "outputCollection": "${ .buffer }", + "iterationParam": "item", + "actions": [ + { + "name": "process", + "functionRef": { + "refName": "sebd_631_parallel_download_python_3_11_process", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json new file mode 100644 index 000000000..1595ed5c6 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_640_selfish_detour_python_3_11.sw.json @@ -0,0 +1,36 @@ +{ + "id": "sebd_640_selfish_detour_python_3_11", + "name": "sebd_640_selfish_detour_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_640_selfish_detour_python_3_11_measure", + "operation": "rest:post:http://172.18.0.30:9000/", + "type": "custom" + } + ], + "start": "measure", + "states": [ + { + "name": "measure", + "type": "operation", + "actions": [ + { + "name": "measure", + "functionRef": { + "refName": "sebd_640_selfish_detour_python_3_11_measure", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json new file mode 100644 index 000000000..b3333d736 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_650_vid_python_3_11.sw.json @@ -0,0 +1,87 @@ +{ + "id": "sebd_650_vid_python_3_11", + "name": "sebd_650_vid_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_650_vid_python_3_11_decode", + "operation": "rest:post:http://172.18.0.32:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_analyse", + "operation": "rest:post:http://172.18.0.31:9000/", + "type": "custom" + }, + { + "name": "sebd_650_vid_python_3_11_summarize", + "operation": "rest:post:http://172.18.0.33:9000/", + "type": "custom" + } + ], + "start": "decode", + "states": [ + { + "name": "decode", + "type": "operation", + "actions": [ + { + "name": "decode", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_decode", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "analyse-map" + }, + { + "name": "analyse-map", + "type": "foreach", + "inputCollection": "${ .frames }", + "outputCollection": "${ .frames }", + "iterationParam": "item", + "actions": [ + { + "name": "analyse", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_analyse", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "summarize" + }, + { + "name": "summarize", + "type": "operation", + "actions": [ + { + "name": "summarize", + "functionRef": { + "refName": "sebd_650_vid_python_3_11_summarize", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json new file mode 100644 index 000000000..de2f7de52 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_660_map_reduce_python_3_11.sw.json @@ -0,0 +1,114 @@ +{ + "id": "sebd_660_map_reduce_python_3_11", + "name": "sebd_660_map_reduce_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_660_map_reduce_python_3_11_split", + "operation": "rest:post:http://172.18.0.37:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_map", + "operation": "rest:post:http://172.18.0.34:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_shuffle", + "operation": "rest:post:http://172.18.0.36:9000/", + "type": "custom" + }, + { + "name": "sebd_660_map_reduce_python_3_11_reduce", + "operation": "rest:post:http://172.18.0.35:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "map-state" + }, + { + "name": "map-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "map", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_map", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "shuffle" + }, + { + "name": "shuffle", + "type": "operation", + "actions": [ + { + "name": "shuffle", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_shuffle", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "reduce-state" + }, + { + "name": "reduce-state", + "type": "foreach", + "inputCollection": "${ .list }", + "outputCollection": "${ .list }", + "iterationParam": "item", + "actions": [ + { + "name": "reduce", + "functionRef": { + "refName": "sebd_660_map_reduce_python_3_11_reduce", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json new file mode 100644 index 000000000..1e0d05182 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_670_auth_python_3_11.sw.json @@ -0,0 +1,36 @@ +{ + "id": "sebd_670_auth_python_3_11", + "name": "sebd_670_auth_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_670_auth_python_3_11_auth", + "operation": "rest:post:http://172.18.0.38:9000/", + "type": "custom" + } + ], + "start": "auth", + "states": [ + { + "name": "auth", + "type": "operation", + "actions": [ + { + "name": "auth", + "functionRef": { + "refName": "sebd_670_auth_python_3_11_auth", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json new file mode 100644 index 000000000..0ac5c43b5 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_680_excamera_python_3_11.sw.json @@ -0,0 +1,116 @@ +{ + "id": "sebd_680_excamera_python_3_11", + "name": "sebd_680_excamera_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_680_excamera_python_3_11_split", + "operation": "rest:post:http://172.18.0.42:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_encode", + "operation": "rest:post:http://172.18.0.39:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_reencode", + "operation": "rest:post:http://172.18.0.41:9000/", + "type": "custom" + }, + { + "name": "sebd_680_excamera_python_3_11_rebase", + "operation": "rest:post:http://172.18.0.40:9000/", + "type": "custom" + } + ], + "start": "split", + "states": [ + { + "name": "split", + "type": "operation", + "actions": [ + { + "name": "split", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_split", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "encode-state" + }, + { + "name": "encode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "encode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_encode", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "reencode-state" + }, + { + "name": "reencode-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "outputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "reencode", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_reencode", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "rebase-state" + }, + { + "name": "rebase-state", + "type": "foreach", + "inputCollection": "${ .segments }", + "iterationParam": "item", + "actions": [ + { + "name": "rebase", + "functionRef": { + "refName": "sebd_680_excamera_python_3_11_rebase", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json new file mode 100644 index 000000000..981de4db8 --- /dev/null +++ b/sonataflow-workflows/workflows/sebd_690_ml_python_3_11.sw.json @@ -0,0 +1,63 @@ +{ + "id": "sebd_690_ml_python_3_11", + "name": "sebd_690_ml_python_3_11", + "version": "0.1", + "specVersion": "0.8", + "description": "Auto-generated from SeBS workflow definition.", + "functions": [ + { + "name": "sebd_690_ml_python_3_11_generate", + "operation": "rest:post:http://172.18.0.43:9000/", + "type": "custom" + }, + { + "name": "sebd_690_ml_python_3_11_train", + "operation": "rest:post:http://172.18.0.44:9000/", + "type": "custom" + } + ], + "start": "generate", + "states": [ + { + "name": "generate", + "type": "operation", + "actions": [ + { + "name": "generate", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_generate", + "arguments": { + "payload": "${ . }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "transition": "train-state" + }, + { + "name": "train-state", + "type": "foreach", + "inputCollection": "${ .schedules }", + "outputCollection": "${ .schedules }", + "iterationParam": "item", + "actions": [ + { + "name": "train", + "functionRef": { + "refName": "sebd_690_ml_python_3_11_train", + "arguments": { + "payload": "${ .item }" + } + }, + "actionDataFilter": { + "results": "${ .result.output.payload // .payload // . }" + } + } + ], + "end": true + } + ] +} \ No newline at end of file diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py index 5336fb485..7269c2183 100755 --- a/tools/build_docker_images.py +++ b/tools/build_docker_images.py @@ -10,7 +10,7 @@ parser = argparse.ArgumentParser(description="Run local app experiments.") parser.add_argument( - "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store" + "--deployment", default=None, choices=["local", "aws", "azure", "gcp", "sonataflow"], action="store" ) parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store") parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store")