From d6562a4824ec1c5cfbe2fc1a4aae9f3c6a92ffb0 Mon Sep 17 00:00:00 2001 From: micoleaoo Date: Thu, 12 Feb 2026 13:05:18 +0000 Subject: [PATCH 1/4] Fixed HTTPS service compatibility with smoke tests, updated README files for both DTS and tests. --- docker/dts/README.md | 44 +++++++++++++------ docker/dts/docker-compose.yaml | 1 + docker/dts/http/main_flask.py | 53 ++++++++++++++++++++--- tests/README.md | 78 +++++++++++++++++++++++++++++++--- 4 files changed, 153 insertions(+), 23 deletions(-) diff --git a/docker/dts/README.md b/docker/dts/README.md index 0ecd021..adcc12f 100644 --- a/docker/dts/README.md +++ b/docker/dts/README.md @@ -1,4 +1,4 @@ -The `docker-compose.yaml` file in this directory is primarily intended for testing the DTS services of the TESP API. +The `docker-compose.yaml` file in this directory is primarily intended for testing the DTS services of the TESP API. Currently, **only HTTP service is compatible** with the smoke tests. To ensure proper execution of the tests, run `docker-compose.yaml` both in this directory and in the `/tesp-api` directory with: ``` docker compose up -d --build @@ -24,11 +24,20 @@ The `clients` container contains clients for the used protocols. So you doesn't ### HTTP -Upload -`curl -i -X POST -F "file=@up-file.txt;filename=file.txt" service-http:5000/upload` +The HTTP service provides multiple routes: -Download -`curl -X GET -o down-file.txt service-http:5000/download/file.txt` +**Upload** (saves to `/data/uploaded_data/`) +`curl -i -X POST -F "file=@up-file.txt" localhost:5000/upload` + +**Download (legacy route)** +`curl -X GET -o down-file.txt localhost:5000/download/file.txt` + +**Browse test_data (smoke test compatible)** +`curl localhost:5000/test_data/test.txt` +`curl localhost:5000/test_data/input_dir/` + +**List all files** +`curl localhost:5000/list` ### S3 Create bucket @@ -89,19 +98,30 @@ lftp -p 2121 -e "put /tmp/sample.jpg; get $(basename $SAMPLE_DATE) -o /tmp/sampl ##### Upload -POST + `http://localhost:5000/upload` while payload is sent as 'file' HTTP body parameter. +POST to `http://localhost:5000/upload` with file as multipart form data: +```bash +curl -F "file=@/tmp/qwerty" http://localhost:5000/upload ``` + +With subdirectory: +```bash curl -F "file=@/tmp/qwerty" http://localhost:5000/upload/foo/bar ``` - ##### Download -GET + `http://localhost:5000/download` while payload is sent as 'file' HTTP body parameter. -``` -wget http://localhost:5000/download/foo/bar/qwerty -``` +Legacy route: +```bash +wget http://localhost:5000/download/foo/bar/qwerty +``` +Smoke test compatible route (serves from mounted test_data): +```bash +curl http://localhost:5000/test_data/test.txt +``` ##### List of all data -GET + `http://localhost:5000/list` + +```bash +curl http://localhost:5000/list +``` diff --git a/docker/dts/docker-compose.yaml b/docker/dts/docker-compose.yaml index 5434d6d..f72438b 100644 --- a/docker/dts/docker-compose.yaml +++ b/docker/dts/docker-compose.yaml @@ -31,6 +31,7 @@ services: - "5000:5000" volumes: - ../../tests/test_data:/data + - ../../tests/uploaded_data:/data/uploaded_data tests-clients: build: diff --git a/docker/dts/http/main_flask.py b/docker/dts/http/main_flask.py index df94809..e598e64 100644 --- a/docker/dts/http/main_flask.py +++ b/docker/dts/http/main_flask.py @@ -1,11 +1,12 @@ import os import subprocess -from flask import Flask, request, send_file, jsonify +from flask import Flask, request, send_file, send_from_directory, jsonify, abort from werkzeug.utils import secure_filename from logger_config import logger app = Flask(__name__) DATA_DIR = '/data' +UPLOAD_DIR = '/data/uploaded_data' @app.route('/upload', defaults={'target_path': ''}, methods=['POST'], strict_slashes=False) @@ -18,16 +19,29 @@ def upload_file(target_path): if file.filename == '': return 'No selected file', 400 - target_dir = os.path.join(DATA_DIR, target_path) + # Support nested paths in filename (e.g., "subdir/file.txt") + raw_filename = file.filename + secure_parts = [secure_filename(part) for part in raw_filename.split('/') if part] + + if target_path: + target_dir = os.path.join(UPLOAD_DIR, target_path) + else: + target_dir = UPLOAD_DIR + + if len(secure_parts) > 1: + target_dir = os.path.join(target_dir, *secure_parts[:-1]) + os.makedirs(target_dir, exist_ok=True) - - filename = secure_filename(file.filename) - file.save(os.path.join(target_dir, filename)) - return 'File uploaded successfully.', 200 + filename = secure_parts[-1] if secure_parts else secure_filename(raw_filename) + save_path = os.path.join(target_dir, filename) + file.save(save_path) + + return jsonify({"status": "ok", "saved_as": save_path}), 200 @app.route('/download/', methods=['GET']) def download_file(file_path): + """Legacy download route for backwards compatibility.""" logger.info(f"path { file_path }") target_file = os.path.join(DATA_DIR, file_path) @@ -37,6 +51,31 @@ def download_file(file_path): return 'File not found.', 404 +@app.route('/test_data/', methods=['GET']) +@app.route('/test_data/', methods=['GET']) +def browse_test_data(subpath=''): + """Serve files and directory listings from /data (mounted as test_data).""" + full_path = os.path.join(DATA_DIR, subpath) + + if os.path.isdir(full_path): + # Directory listing + items = os.listdir(full_path) + links = [] + for item in items: + item_path = os.path.join('test_data', subpath, item) if subpath else os.path.join('test_data', item) + if os.path.isdir(os.path.join(full_path, item)): + item_path += '/' + links.append(f"{item}
") + return "" + "\n".join(links) + "" + + elif os.path.isfile(full_path): + # Serve file + return send_from_directory(os.path.dirname(full_path), os.path.basename(full_path)) + + else: + abort(404, description=f"{subpath} not found") + + @app.route('/list', methods=['GET']) def list_data(): try: @@ -55,4 +94,6 @@ def list_data(): if __name__ == '__main__': + os.makedirs(UPLOAD_DIR, exist_ok=True) app.run(host='0.0.0.0', debug=True, port=5000) + diff --git a/tests/README.md b/tests/README.md index b2c706f..40d8a54 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,10 +1,78 @@ # Testing -To test functionality of TESP API, running `docker-compose.yaml` with: + +Integration tests for TESP-API. All tests run against a live TESP-API instance at `http://localhost:8080`. + +## Prerequisites + +Start TESP-API + MongoDB + Pulsar from the project root: + +```bash +docker compose --profile pulsar up -d --build ``` -docker compose up -d --build + +## Running Tests + +### 1. Start the upload server + +For I/O tests (`test_inputs`, `test_dir_input`, `test_volumes`, `test_envs`), the test fixtures download/upload files via HTTP. Start the local file server: + +```bash +python3 tests/upload_server.py ``` -is necessary both in `/tesp-api` and `/tesp-api/docker/dts` -Tests themselves can be run with: + +This serves `tests/test_data/` on port 5000 and accepts uploads to `tests/uploaded_data/`. + +### 2. Run smoke tests + +```bash +python3 -m pytest tests/smoke_tests.py -v ``` -python3 -m pytest smoke_tests.py + +### Load/stress test (100 concurrent tasks) + +```bash +python3 -m pytest tests/load_stress_test.py -v ``` + +## Platform Notes + +The test fixtures use `http://172.17.0.1:5000` to reach the host from inside containers. This works on **Linux** (Docker bridge gateway IP). + + +## Test Fixtures + +All task JSON fixtures are in `test_jsons/`. Key files: + +| Fixture | Description | Requires upload_server | +|---|---|---| +| `state_true.json` | Simple task that should succeed | No | +| `state_false.json` | Task designed to fail | No | +| `cancel.json` | Long-running task (`sleep infinity`) for cancellation | No | +| `multi_true.json` | Three sequential executors | No | +| `inputs.json` | HTTP download + inline content | **Yes** | +| `dir-io.json` | Directory input/output via HTTP | **Yes** | +| `volumes.json` | Shared volume between executors | **Yes** | +| `envs.json` | Environment variables with output upload | **Yes** | +| `workdir.json` | Working directory test | No | + +## DTS (Data Transfer Services) + +The `docker/dts/` directory contains an **alternative** test infrastructure with MinIO (S3), FTP, and HTTP services running in containers. At the moment, only HTTP service is compatible with the smoke tests. + +### Using DTS instead of upload_server.py + +```bash +# Start DTS (from project root) +cd docker/dts && docker compose up -d --build && cd ../.. + +# Run tests (DTS serves on the same port 5000) +python3 -m pytest tests/smoke_tests.py -v +``` + +DTS mounts `tests/test_data/` and `tests/uploaded_data/` so the same fixture URLs work. + +## Cleanup + +The test suite automatically cleans up: +- `uploaded_data/` directory created by upload tests +- Leftover Ubuntu containers from failed tests From 0dec36885b6c1bc0f5f72f87b77d4914fc7802d4 Mon Sep 17 00:00:00 2001 From: micoleaoo Date: Thu, 12 Feb 2026 15:19:47 +0000 Subject: [PATCH 2/4] dts readme update --- docker/dts/README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docker/dts/README.md b/docker/dts/README.md index adcc12f..5e4bcfe 100644 --- a/docker/dts/README.md +++ b/docker/dts/README.md @@ -1,20 +1,26 @@ -The `docker-compose.yaml` file in this directory is primarily intended for testing the DTS services of the TESP API. Currently, **only HTTP service is compatible** with the smoke tests. +The `docker-compose.yaml` file in this directory is primarily intended for testing the DTS services of the TESP API. Smoke tests exercise **only the HTTP service**. To ensure proper execution of the tests, run `docker-compose.yaml` both in this directory and in the `/tesp-api` directory with: ``` docker compose up -d --build ``` - # DTS Example of data transfer server using HTTP, S3 and FTP. -Project uses Docker and deploy 4 containers: +### Current status + +- HTTP is the only DTS service implemented in this repository and used by smoke tests. +- S3 runs MinIO as an external service for manual testing. +- FTP runs the upstream `ftpserver` container and uses S3 (MinIO) as its storage backend. + +`docker-compose.yaml` deploys 4 containers: - s3 - ftp - http - clients -The `clients` container contains clients for the used protocols. So you doesn't need to install the clients on you local computer to test it. +The `clients` container contains clients for the used protocols, so you do not need to install the clients on your local computer to test it. + ## Deploy From de724eacf9643d0246ca4efb9dfbddf837153171 Mon Sep 17 00:00:00 2001 From: micoleaoo Date: Fri, 13 Feb 2026 08:57:36 +0000 Subject: [PATCH 3/4] updated readme - limitations, phrasing, formatting --- README.md | 139 ++++++++++++++++++++++++------------------------------ 1 file changed, 62 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index a76f6d4..81f735a 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,13 @@ [![python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/download) [![last-commit](https://img.shields.io/github/last-commit/CESNET/tesp-api)]() -This project is an effort to create Open-source implementation of a task execution engine based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) -distributing executions to services exposing [Pulsar](https://github.com/galaxyproject/pulsar) application. For more details -on `TES`, see the Task Execution Schemas [documentation](https://ga4gh.github.io/task-execution-schemas/docs/). `Pulsar` -is a Python server application that allows a [Galaxy](https://github.com/galaxyproject/galaxy) server to run jobs on remote systems. The original intention of this -project was to modify the `Pulsar` project (e.g. via forking) so its Rest API would be compatible with the `TES` standard. -Later a decision was made that rather a separate microservice will be created, decoupled from the `Pulsar`, implementing the `TES` -standard and distributing `TES` tasks execution to `Pulsar` applications. +This is a task execution microservice based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) that sends job executions to [Pulsar](https://github.com/galaxyproject/pulsar) application. + +Read about our project on the [Galaxy Hub](https://galaxyproject.org/news/2025-10-06-tesp-api/) and [e-INFRA CZ Blog](https://blog.e-infra.cz/blog/tesp-api/). + +This effort is part of the [EuroScienceGateway](https://galaxyproject.org/projects/esg/) project. +For more details on TES, see the [Task Execution Schemas documentation](https://ga4gh.github.io/task-execution-schemas/docs/). +Pulsar is a Python server application that allows a [Galaxy](https://github.com/galaxyproject/galaxy) server to run jobs on remote systems. ## Quick start @@ -22,19 +22,19 @@ The most straightforward way to deploy the TESP is to use Docker Compose. ``` docker compose up -d ``` -Expecting exetrnal Pulsar configured in `settings.toml` before the compose is run. -So far only REST Pulsar communication is supported. +Starts the API and MongoDB containers. Configure an external Pulsar in `settings.toml` +(default points to `http://localhost:8913`). REST is the default; AMQP is used only +if `pulsar.amqp_url` is set. #### With pulsar_rest service: ``` docker compose --profile pulsar up -d ``` +Starts a local Pulsar REST container in the same compose network. -
-

-Depending on you Docker and Docker Compose installation, you may need to use `docker-compose` (with hyphen) instead. +Depending on your Docker and Docker Compose installation, you may need to use `docker-compose` (with hyphen) instead. You might encounter a timeout error in container runtime which can be solved by correct `mtu` configuration either in the `docker-compose.yaml`: ``` @@ -47,17 +47,19 @@ networks: or directly in your `/etc/docker/daemon.json`: ``` { - "mtu": 1442 + "mtu": 1442 } ``` -The `docker-compose.yaml` spins also collection of [Data Transfer Services](docker/dts/README.md) which can be used for testing. +The Data Transfer Services (HTTP/S3/FTP) are defined in [docker/dts](docker/dts/README.md) +and run via a separate compose file. +  ### Usage If the TESP is running, you can try to submit a task. One way is to use cURL. Although the project is still in development, the TESP should be compatible with TES so you can try TES clients such as Snakemake or Nextflow. The example below shows how to submit task using cURL. #### 1. Create JSON file -The first step you need to take is to prepare JSON file with the task. For inspiration you can use [tests](https://github.com/CESNET/tesp-api/tree/dev/tests/test_jsons) located in this repository, or [TES documentation](https://ga4gh.github.io/task-execution-schemas/docs/). +The first step you need to take is to prepare JSON file with the task. For inspiration you can use [tests/test_jsons](tests/test_jsons) located in this repository, or [TES documentation](https://ga4gh.github.io/task-execution-schemas/docs/). Example JSON file: ``` @@ -89,10 +91,10 @@ Please check the URL of the running TES and the file with the task you just crea curl http://localhost:8080/v1/tasks -X POST -H "Content-Type: application/json" -d $(sed -e "s/ //g" example.json | tr -d '\n') ``` (The only reason for the subshell is to remove whitespaces and newlines.) -After the task is submitted, the endpoint returns the task ID. This is usefull to check the task status. +After the task is submitted, the endpoint returns the task ID. This is useful to check the task status. #### 3. Check the task status -There are more usefull endpoints to check the task status. +There are more useful endpoints to check the task status. List all tasks: ``` @@ -130,8 +132,9 @@ instead of starting the project locally without `docker`. In that case only thos | poetry | 1.1.13+ | _pip install poetry_ | | mongodb | 4.4+ | _docker-compose uses latest_ | | pulsar | 0.14.13 | _actively trying to support latest. Must have access to docker with the same host as pulsar application itself_ | -| ftp server | - | _no real recommendation here. docker-compose uses [ftpserver](https://github.com/fclairamb/ftpserver) so local alternative should support same fpt commands_. | +| ftp server | - | _optional for I/O testing. The [docker/dts](docker/dts/README.md) stack provides FTP/S3/HTTP services_. | +  ### Configuring TESP API `TESP API` uses [dynaconf](https://www.dynaconf.com/) for its configuration. Configuration is currently set up by using [./settings.toml](https://github.com/CESNET/tesp-api/blob/main/settings.toml) file. This file declares sections which represent different environments for `TESP API`. Default section @@ -153,13 +156,25 @@ To apply different environment (i.e. to switch which section will be picked by ` `FASTAPI_PROFILE` must be set to the concrete name of such section (e.g. `FASTAPI_PROFILE=dev-docker` which can be seen in the [./docker/tesp_api/Dockerfile](https://github.com/CESNET/tesp-api/blob/main/docker/tesp_api/Dockerfile)) +  +### Authentication +`TESP API` can run without authentication (default). To enable Basic Auth, set `basic_auth.enable = true` +and configure `basic_auth.username` and `basic_auth.password` in `settings.toml`. To enable OAuth2, +set `oauth.enable = true` and pass a Bearer token; the token is validated via the issuer in its `iss` +claim using OIDC discovery. + +Container execution runtime is controlled by the `CONTAINER_TYPE` environment variable (`docker` or +`singularity`). The default is `docker`. + +  ### Configuring required services You can have a look at [./docker-compose.yaml](https://github.com/CESNET/tesp-api/blob/main/docker-compose.yaml) to see how the infrastructure for development should look like. Of course, you can configure those services in your preferred way if you are going to start the project without `docker` or if you are trying to create other than `development` environment but some things -must remain as they are. For example, `TESP API` currently supports communication with `Pulsar` only through its Rest API and -therefore `Pulsar` must be configured in such a way. +must remain as they are. For example, `TESP API` currently communicates with `Pulsar` via REST by default; configure Pulsar for +REST unless you set `pulsar.amqp_url` to enable AMQP. +  ### Current Docker services All the current `Docker` services which will be used when the project is started with `docker-compose` have common directory [./docker](https://github.com/CESNET/tesp-api/tree/main/docker) for configurations, data, logs and Dockerfiles if required. @@ -169,15 +184,12 @@ example trying to create data folder for given service. Such issues should be re which ports to be used etc. Following services are currently defined by [./docker-compose.yaml](https://github.com/CESNET/tesp-api/blob/main/docker-compose.yaml) - **tesp-api** - This project itself. Depends on mongodb - **tesp-db** - [MongoDB](https://www.mongodb.com/) instance for persistence layer -- **pulsar_rest** - `Pulsar` configured to use Rest API with access to a docker instance thanks to [DIND](https://hub.docker.com/_/docker). -- **pulsar_amqp** - currently disabled, will be used in the future development -- **ftpserver** - online storage for `TES` tasks input/output content -- **minio** - currently acting only as a storage backend for the `ftpserver` with simple web interface to access data. +- **pulsar_rest** - `Pulsar` configured to use REST API with access to a docker instance thanks to [DIND](https://hub.docker.com/_/docker) (enabled with `--profile pulsar`). -**Folder [./docker/minio/initial_data](https://github.com/CESNET/tesp-api/tree/main/docker/minio/initial_data) contains startup -folders for `minio` service which must be copied to the `./docker/minio/data` folder before starting up the infrastructure. Those data -configure `minio` to start with already created bucket and user which will be used by `ftpserver` for access.** +If you want HTTP/FTP/S3 data transfer services for testing, use the separate +[docker/dts](docker/dts/README.md) compose stack. +  ### Run the project This project uses [Poetry](https://python-poetry.org/) for `dependency management` and `packaging`. `Poetry` makes it easy to install libraries required by `TESP API`. It uses [./pyproject.toml](https://github.com/CESNET/tesp-api/blob/feature/TESP-0-github-proper-readme/pyproject.toml) @@ -210,26 +222,29 @@ initialized properly or whether any errors occurred. - **http://localhost:8080/** - will redirect to Swagger documentation of `TESP API`. This endpoint also currently acts as a frontend. You can use it to execute REST based calls expected by the `TESP API`. Swagger is automatically generated from the sources, and therefore it corresponds to the very current state of the `TESP API` interface. -- **http://localhost:40949/** - `minio` web interface. Use `admin` and `!Password123` credentials to login. Make sure -that bucket `tesp-ftp` is already present, otherwise see [Current Docker services](#current-docker-services) section of this readme to properly -prepare infrastructure before the startup. +- If you run the DTS stack from [docker/dts](docker/dts/README.md), MinIO console is available at + **http://localhost:9001/** with `root` / `123456789` credentials. ### Executing simple TES task This section will demonstrate execution of simple `TES` task which will calculate _[md5sum](https://en.wikipedia.org/wiki/Md5sum)_ hash of given input. There are more approaches of how I/O can be handled by `TES` but main goal here is to demonstrate `ftp server` as well. -1. Head over to **http://localhost:40949/buckets/tesp-ftp/browse** and upload a new file with your preferred name and content (e.g. name -`holy_file` and content `Hello World!`). This file will now be accessible trough `ftpserver` service and will be used as -an input file for this demonstration. +If you want to use the bundled HTTP/FTP/S3 services, start the DTS stack in [docker/dts](docker/dts/README.md) +and adapt hostnames/ports to match your network setup. + +1. Upload a new file with your preferred name and content (e.g. name `holy_file` and content `Hello World!`) to your +FTP-backed storage. If you run the DTS stack, use the MinIO console at **http://localhost:9001/** to create a bucket +and upload the file. This file will be accessible through your FTP service and will be used as an input file for this +demonstration. 2. Go to **http://localhost:8080/** and use `POST /v1/tasks` request to create following `TES` task (task is sent in the request body). -In the `"inputs.url"` replace `` with the file name you chose in the previous step. If http status of +In the `"inputs.url"` replace `` with the file name you chose in the previous step. If http status of returned response is 200, the response will contain `id` of created task in the response body which will be used to reference this task later on. ```json { "inputs": [ { - "url": "ftp://ftpserver:2121/", + "url": "ftp://:2121/", "path": "/data/file1", "type": "FILE" } @@ -237,7 +252,7 @@ reference this task later on. "outputs": [ { "path": "/data/outfile", - "url": "ftp://ftpserver:2121/outfile-1", + "url": "ftp://:2121/outfile-1", "type": "FILE" } ], @@ -259,57 +274,27 @@ previous step. This request also supports `view` query parameter which can be us set to the state `COMPLETE` or one of the error states. In case of an error state, depending on its type, the error will be part of the task logs in the response (use `FULL` view), or you can inspect the logs of `TESP API` service, where error should be logged with respective message. -4. Once the task completes you can head over back to **http://localhost:40949/buckets/tesp-ftp/browse** where you should find -uploaded `outfile-1` with output content of executed _[md5sum](https://en.wikipedia.org/wiki/Md5sum)_. You can play around +4. Once the task completes you can check your FTP-backed storage (for the DTS stack, use the MinIO console at +**http://localhost:9001/**) where you should find uploaded `outfile-1` with output content of executed +_[md5sum](https://en.wikipedia.org/wiki/Md5sum)_. You can play around by creating different tasks, just be sure to only use functionality which is currently supported - see [Known limitations](#known-limitations). For example, you can omit `inputs.url` and instead use `inputs.content` which allows you to create input in place, or you can also omit `outputs` and `executors.stdout` in which case the output will be present in the `logs.logs.stdout` as executor is no longer configured to redirect stdout into the file. +  ### Known limitations of TESP API | Domain | Limitation | |----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| _Pulsar_ | `TESP API` communicates with `Pulsar` only through its REST API, missing functionality for message queues | | _Pulsar_ | `TESP API` should be able to dispatch executions to multiple `Pulsar` services via different types of `Pulsar` interfaces. Currently, only one `Pulsar` service is supported | | _Pulsar_ | `Pulsar` must be "polled" for job state. Preferably `Pulsar` should notify `TESP API` about state change. This is already default behavior when using `Pulsar` with message queues | -| _TES_ | Canceling `TES` task does not immediately stop the task. Task even cannot be canceled while it is running. | -| _TES_ | `TES` does not state specific urls to be supported for file transfer (e.g. tasks `inputs.url`). Only FTP is supported for now | -| _TES_ | tasks `inputs.type` and `outputs.type` can be either DIRECTORY or FILE. Only FILE is supported, DIRECTORY will lead to undefined behavior for now | -| _TES_ | tasks `resources` currently do not change execution behavior in any way. This configuration will take effect once `Pulsar` limitations are resolved | -| _TES_ | tasks `executors.workdir` and `executors.env` functionality is not yet implemented. You can use them but they will have no effect | -| _TES_ | tasks `volumes` and `tags` functionality is not yet implemented. You use them but they will have no effect | -| _TES_ | tasks `logs.outputs` functionality is not yet implemented. However this limitation can be bypassed with tasks `outputs` | +| _TES_ | Canceling a `TES` task calls Pulsar's cancel endpoint but container termination depends on Pulsar/runtime behavior. In-flight tasks may still complete. | +| _TES_ | Only `cpu_cores` and `ram_gb` are mapped to container runtime flags. Other resource fields (disk, preemptible, zones) are stored but not enforced. | +| _TES_ | Task `tags` are accepted and stored but not used by the scheduler or runtime. | +| _TES_ | Task `logs.outputs` is not populated. Use `outputs` to persist result files. |   -## GIT -Current main branch is `origin/main`. This happens to be also a release branch for now. Developers should typically derive their -own feature branches such as e.g. `feature/TESP-111-task-monitoring`. This project has not yet configured any CI/CD. Releases are -done manually by creating a tag in the current release branch. There is not yet configured any issue tracking software but for -any possible future integration this project should reference commits, branches PR's etc ... with prefix `TESP-0` as a reference -to a work that has been done before such integration. Pull request should be merged using `Squash and merge` option with message format `Merge pull request # from `. -Since there is no CI/CD setup this is only opinionated view on how branching policies should work and for now everything is possible. -## License - -[![license](https://img.shields.io/github/license/CESNET/tesp-api)](https://github.com/CESNET/tesp-api/blob/main/LICENSE.md) -``` -Copyright (c) 2022 Norbert Dopjera - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -``` +History note: _The original intention of this project was to modify the `Pulsar` project so its Rest API would be compatible with the `TES` standard. +Later a decision was made that rather a separate microservice will be created, decoupled from the `Pulsar`, implementing the `TES` +standard and distributing `TES` tasks execution to `Pulsar` applications._ From 36cbc969ad7dab42b2121d00eb041f561abfff6f Mon Sep 17 00:00:00 2001 From: micoleaoo Date: Fri, 13 Feb 2026 09:07:56 +0000 Subject: [PATCH 4/4] Revert "updated readme - limitations, phrasing, formatting" This reverts commit de724eacf9643d0246ca4efb9dfbddf837153171. forgot to pull repo --- README.md | 139 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 81f735a..a76f6d4 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,13 @@ [![python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/download) [![last-commit](https://img.shields.io/github/last-commit/CESNET/tesp-api)]() -This is a task execution microservice based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) that sends job executions to [Pulsar](https://github.com/galaxyproject/pulsar) application. - -Read about our project on the [Galaxy Hub](https://galaxyproject.org/news/2025-10-06-tesp-api/) and [e-INFRA CZ Blog](https://blog.e-infra.cz/blog/tesp-api/). - -This effort is part of the [EuroScienceGateway](https://galaxyproject.org/projects/esg/) project. -For more details on TES, see the [Task Execution Schemas documentation](https://ga4gh.github.io/task-execution-schemas/docs/). -Pulsar is a Python server application that allows a [Galaxy](https://github.com/galaxyproject/galaxy) server to run jobs on remote systems. +This project is an effort to create Open-source implementation of a task execution engine based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) +distributing executions to services exposing [Pulsar](https://github.com/galaxyproject/pulsar) application. For more details +on `TES`, see the Task Execution Schemas [documentation](https://ga4gh.github.io/task-execution-schemas/docs/). `Pulsar` +is a Python server application that allows a [Galaxy](https://github.com/galaxyproject/galaxy) server to run jobs on remote systems. The original intention of this +project was to modify the `Pulsar` project (e.g. via forking) so its Rest API would be compatible with the `TES` standard. +Later a decision was made that rather a separate microservice will be created, decoupled from the `Pulsar`, implementing the `TES` +standard and distributing `TES` tasks execution to `Pulsar` applications. ## Quick start @@ -22,19 +22,19 @@ The most straightforward way to deploy the TESP is to use Docker Compose. ``` docker compose up -d ``` -Starts the API and MongoDB containers. Configure an external Pulsar in `settings.toml` -(default points to `http://localhost:8913`). REST is the default; AMQP is used only -if `pulsar.amqp_url` is set. +Expecting exetrnal Pulsar configured in `settings.toml` before the compose is run. +So far only REST Pulsar communication is supported. #### With pulsar_rest service: ``` docker compose --profile pulsar up -d ``` -Starts a local Pulsar REST container in the same compose network. +
+

-Depending on your Docker and Docker Compose installation, you may need to use `docker-compose` (with hyphen) instead. +Depending on you Docker and Docker Compose installation, you may need to use `docker-compose` (with hyphen) instead. You might encounter a timeout error in container runtime which can be solved by correct `mtu` configuration either in the `docker-compose.yaml`: ``` @@ -47,19 +47,17 @@ networks: or directly in your `/etc/docker/daemon.json`: ``` { - "mtu": 1442 + "mtu": 1442 } ``` -The Data Transfer Services (HTTP/S3/FTP) are defined in [docker/dts](docker/dts/README.md) -and run via a separate compose file. +The `docker-compose.yaml` spins also collection of [Data Transfer Services](docker/dts/README.md) which can be used for testing. -  ### Usage If the TESP is running, you can try to submit a task. One way is to use cURL. Although the project is still in development, the TESP should be compatible with TES so you can try TES clients such as Snakemake or Nextflow. The example below shows how to submit task using cURL. #### 1. Create JSON file -The first step you need to take is to prepare JSON file with the task. For inspiration you can use [tests/test_jsons](tests/test_jsons) located in this repository, or [TES documentation](https://ga4gh.github.io/task-execution-schemas/docs/). +The first step you need to take is to prepare JSON file with the task. For inspiration you can use [tests](https://github.com/CESNET/tesp-api/tree/dev/tests/test_jsons) located in this repository, or [TES documentation](https://ga4gh.github.io/task-execution-schemas/docs/). Example JSON file: ``` @@ -91,10 +89,10 @@ Please check the URL of the running TES and the file with the task you just crea curl http://localhost:8080/v1/tasks -X POST -H "Content-Type: application/json" -d $(sed -e "s/ //g" example.json | tr -d '\n') ``` (The only reason for the subshell is to remove whitespaces and newlines.) -After the task is submitted, the endpoint returns the task ID. This is useful to check the task status. +After the task is submitted, the endpoint returns the task ID. This is usefull to check the task status. #### 3. Check the task status -There are more useful endpoints to check the task status. +There are more usefull endpoints to check the task status. List all tasks: ``` @@ -132,9 +130,8 @@ instead of starting the project locally without `docker`. In that case only thos | poetry | 1.1.13+ | _pip install poetry_ | | mongodb | 4.4+ | _docker-compose uses latest_ | | pulsar | 0.14.13 | _actively trying to support latest. Must have access to docker with the same host as pulsar application itself_ | -| ftp server | - | _optional for I/O testing. The [docker/dts](docker/dts/README.md) stack provides FTP/S3/HTTP services_. | +| ftp server | - | _no real recommendation here. docker-compose uses [ftpserver](https://github.com/fclairamb/ftpserver) so local alternative should support same fpt commands_. | -  ### Configuring TESP API `TESP API` uses [dynaconf](https://www.dynaconf.com/) for its configuration. Configuration is currently set up by using [./settings.toml](https://github.com/CESNET/tesp-api/blob/main/settings.toml) file. This file declares sections which represent different environments for `TESP API`. Default section @@ -156,25 +153,13 @@ To apply different environment (i.e. to switch which section will be picked by ` `FASTAPI_PROFILE` must be set to the concrete name of such section (e.g. `FASTAPI_PROFILE=dev-docker` which can be seen in the [./docker/tesp_api/Dockerfile](https://github.com/CESNET/tesp-api/blob/main/docker/tesp_api/Dockerfile)) -  -### Authentication -`TESP API` can run without authentication (default). To enable Basic Auth, set `basic_auth.enable = true` -and configure `basic_auth.username` and `basic_auth.password` in `settings.toml`. To enable OAuth2, -set `oauth.enable = true` and pass a Bearer token; the token is validated via the issuer in its `iss` -claim using OIDC discovery. - -Container execution runtime is controlled by the `CONTAINER_TYPE` environment variable (`docker` or -`singularity`). The default is `docker`. - -  ### Configuring required services You can have a look at [./docker-compose.yaml](https://github.com/CESNET/tesp-api/blob/main/docker-compose.yaml) to see how the infrastructure for development should look like. Of course, you can configure those services in your preferred way if you are going to start the project without `docker` or if you are trying to create other than `development` environment but some things -must remain as they are. For example, `TESP API` currently communicates with `Pulsar` via REST by default; configure Pulsar for -REST unless you set `pulsar.amqp_url` to enable AMQP. +must remain as they are. For example, `TESP API` currently supports communication with `Pulsar` only through its Rest API and +therefore `Pulsar` must be configured in such a way. -  ### Current Docker services All the current `Docker` services which will be used when the project is started with `docker-compose` have common directory [./docker](https://github.com/CESNET/tesp-api/tree/main/docker) for configurations, data, logs and Dockerfiles if required. @@ -184,12 +169,15 @@ example trying to create data folder for given service. Such issues should be re which ports to be used etc. Following services are currently defined by [./docker-compose.yaml](https://github.com/CESNET/tesp-api/blob/main/docker-compose.yaml) - **tesp-api** - This project itself. Depends on mongodb - **tesp-db** - [MongoDB](https://www.mongodb.com/) instance for persistence layer -- **pulsar_rest** - `Pulsar` configured to use REST API with access to a docker instance thanks to [DIND](https://hub.docker.com/_/docker) (enabled with `--profile pulsar`). +- **pulsar_rest** - `Pulsar` configured to use Rest API with access to a docker instance thanks to [DIND](https://hub.docker.com/_/docker). +- **pulsar_amqp** - currently disabled, will be used in the future development +- **ftpserver** - online storage for `TES` tasks input/output content +- **minio** - currently acting only as a storage backend for the `ftpserver` with simple web interface to access data. -If you want HTTP/FTP/S3 data transfer services for testing, use the separate -[docker/dts](docker/dts/README.md) compose stack. +**Folder [./docker/minio/initial_data](https://github.com/CESNET/tesp-api/tree/main/docker/minio/initial_data) contains startup +folders for `minio` service which must be copied to the `./docker/minio/data` folder before starting up the infrastructure. Those data +configure `minio` to start with already created bucket and user which will be used by `ftpserver` for access.** -  ### Run the project This project uses [Poetry](https://python-poetry.org/) for `dependency management` and `packaging`. `Poetry` makes it easy to install libraries required by `TESP API`. It uses [./pyproject.toml](https://github.com/CESNET/tesp-api/blob/feature/TESP-0-github-proper-readme/pyproject.toml) @@ -222,29 +210,26 @@ initialized properly or whether any errors occurred. - **http://localhost:8080/** - will redirect to Swagger documentation of `TESP API`. This endpoint also currently acts as a frontend. You can use it to execute REST based calls expected by the `TESP API`. Swagger is automatically generated from the sources, and therefore it corresponds to the very current state of the `TESP API` interface. -- If you run the DTS stack from [docker/dts](docker/dts/README.md), MinIO console is available at - **http://localhost:9001/** with `root` / `123456789` credentials. +- **http://localhost:40949/** - `minio` web interface. Use `admin` and `!Password123` credentials to login. Make sure +that bucket `tesp-ftp` is already present, otherwise see [Current Docker services](#current-docker-services) section of this readme to properly +prepare infrastructure before the startup. ### Executing simple TES task This section will demonstrate execution of simple `TES` task which will calculate _[md5sum](https://en.wikipedia.org/wiki/Md5sum)_ hash of given input. There are more approaches of how I/O can be handled by `TES` but main goal here is to demonstrate `ftp server` as well. -If you want to use the bundled HTTP/FTP/S3 services, start the DTS stack in [docker/dts](docker/dts/README.md) -and adapt hostnames/ports to match your network setup. - -1. Upload a new file with your preferred name and content (e.g. name `holy_file` and content `Hello World!`) to your -FTP-backed storage. If you run the DTS stack, use the MinIO console at **http://localhost:9001/** to create a bucket -and upload the file. This file will be accessible through your FTP service and will be used as an input file for this -demonstration. +1. Head over to **http://localhost:40949/buckets/tesp-ftp/browse** and upload a new file with your preferred name and content (e.g. name +`holy_file` and content `Hello World!`). This file will now be accessible trough `ftpserver` service and will be used as +an input file for this demonstration. 2. Go to **http://localhost:8080/** and use `POST /v1/tasks` request to create following `TES` task (task is sent in the request body). -In the `"inputs.url"` replace `` with the file name you chose in the previous step. If http status of +In the `"inputs.url"` replace `` with the file name you chose in the previous step. If http status of returned response is 200, the response will contain `id` of created task in the response body which will be used to reference this task later on. ```json { "inputs": [ { - "url": "ftp://:2121/", + "url": "ftp://ftpserver:2121/", "path": "/data/file1", "type": "FILE" } @@ -252,7 +237,7 @@ reference this task later on. "outputs": [ { "path": "/data/outfile", - "url": "ftp://:2121/outfile-1", + "url": "ftp://ftpserver:2121/outfile-1", "type": "FILE" } ], @@ -274,27 +259,57 @@ previous step. This request also supports `view` query parameter which can be us set to the state `COMPLETE` or one of the error states. In case of an error state, depending on its type, the error will be part of the task logs in the response (use `FULL` view), or you can inspect the logs of `TESP API` service, where error should be logged with respective message. -4. Once the task completes you can check your FTP-backed storage (for the DTS stack, use the MinIO console at -**http://localhost:9001/**) where you should find uploaded `outfile-1` with output content of executed -_[md5sum](https://en.wikipedia.org/wiki/Md5sum)_. You can play around +4. Once the task completes you can head over back to **http://localhost:40949/buckets/tesp-ftp/browse** where you should find +uploaded `outfile-1` with output content of executed _[md5sum](https://en.wikipedia.org/wiki/Md5sum)_. You can play around by creating different tasks, just be sure to only use functionality which is currently supported - see [Known limitations](#known-limitations). For example, you can omit `inputs.url` and instead use `inputs.content` which allows you to create input in place, or you can also omit `outputs` and `executors.stdout` in which case the output will be present in the `logs.logs.stdout` as executor is no longer configured to redirect stdout into the file. -  ### Known limitations of TESP API | Domain | Limitation | |----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| _Pulsar_ | `TESP API` communicates with `Pulsar` only through its REST API, missing functionality for message queues | | _Pulsar_ | `TESP API` should be able to dispatch executions to multiple `Pulsar` services via different types of `Pulsar` interfaces. Currently, only one `Pulsar` service is supported | | _Pulsar_ | `Pulsar` must be "polled" for job state. Preferably `Pulsar` should notify `TESP API` about state change. This is already default behavior when using `Pulsar` with message queues | -| _TES_ | Canceling a `TES` task calls Pulsar's cancel endpoint but container termination depends on Pulsar/runtime behavior. In-flight tasks may still complete. | -| _TES_ | Only `cpu_cores` and `ram_gb` are mapped to container runtime flags. Other resource fields (disk, preemptible, zones) are stored but not enforced. | -| _TES_ | Task `tags` are accepted and stored but not used by the scheduler or runtime. | -| _TES_ | Task `logs.outputs` is not populated. Use `outputs` to persist result files. | +| _TES_ | Canceling `TES` task does not immediately stop the task. Task even cannot be canceled while it is running. | +| _TES_ | `TES` does not state specific urls to be supported for file transfer (e.g. tasks `inputs.url`). Only FTP is supported for now | +| _TES_ | tasks `inputs.type` and `outputs.type` can be either DIRECTORY or FILE. Only FILE is supported, DIRECTORY will lead to undefined behavior for now | +| _TES_ | tasks `resources` currently do not change execution behavior in any way. This configuration will take effect once `Pulsar` limitations are resolved | +| _TES_ | tasks `executors.workdir` and `executors.env` functionality is not yet implemented. You can use them but they will have no effect | +| _TES_ | tasks `volumes` and `tags` functionality is not yet implemented. You use them but they will have no effect | +| _TES_ | tasks `logs.outputs` functionality is not yet implemented. However this limitation can be bypassed with tasks `outputs` |   +## GIT +Current main branch is `origin/main`. This happens to be also a release branch for now. Developers should typically derive their +own feature branches such as e.g. `feature/TESP-111-task-monitoring`. This project has not yet configured any CI/CD. Releases are +done manually by creating a tag in the current release branch. There is not yet configured any issue tracking software but for +any possible future integration this project should reference commits, branches PR's etc ... with prefix `TESP-0` as a reference +to a work that has been done before such integration. Pull request should be merged using `Squash and merge` option with message format `Merge pull request # from `. +Since there is no CI/CD setup this is only opinionated view on how branching policies should work and for now everything is possible. -History note: _The original intention of this project was to modify the `Pulsar` project so its Rest API would be compatible with the `TES` standard. -Later a decision was made that rather a separate microservice will be created, decoupled from the `Pulsar`, implementing the `TES` -standard and distributing `TES` tasks execution to `Pulsar` applications._ +## License + +[![license](https://img.shields.io/github/license/CESNET/tesp-api)](https://github.com/CESNET/tesp-api/blob/main/LICENSE.md) +``` +Copyright (c) 2022 Norbert Dopjera + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +```